Submit
The “submit” script is used to “submit” my pages to the various web sites I maintain. The “submit” script performs various checks on the page and then sends them on their way.
Here’s the “submit” script:
#!/bin/sh #Use this script to submit documents from the "prepsite" to the # "livesite". Other scripts will be called when you do this. # Also the information from livesite will be pushed to other # sites as well that are to be kept in synchronization #Please do not modify below this line #Bring in configuration file variables and exit codes . /scripts/wwwmanage/wwwmanage.conf # #For now we have to be root # Later I'll see how we can get around this. if [ "`whoami`" != "root" ]; then echo "You must be root to run this command" exit $EXIT_NOT_ROOT fi #Set up Filename variable ARG_FORCE="" if [ $# -gt 0 ]; then while [ $# -gt 0 ]; do case $1 in --force|-f) ARG_FORCE=1 ;; *) FILENAME=$1 ;; esac shift done else FILENAME=$1 fi if [ "$ARG_FORCE" = 1 ]; then #Do not bother checking tags : else #Check tags... # HTML format /scripts/wwwmanage/checkreqtags.sh ${PREPSITE}$FILENAME EXIT=$? if [ $EXIT -ne 0 ]; then echo Check for Required TAGS failed! exit $EXIT fi #Check tags... # that all tags open and close in proper order /scripts/wwwmanage/openclosetags.sh ${PREPSITE}$FILENAME EXIT=$? if [ $EXIT -ne 0 ]; then echo Check for Matching TAGS failed! exit $EXIT fi fi TAR_ADD() { chown root.root ${LIVESITE}$1 chmod 0644 ${LIVESITE}$1 MODIFY=`find ${LIVESITE}$1 -printf "%TY%Tb%Td-%TH%TM%TS"` -rp ${LIVESITE}$1 ${LIVESITE}$1-$MODIFY (cd $LIVESITE && tar -rvf $BACKUP $1-$MODIFY) rm ${LIVESITE}$1-$MODIFY } SITE_UPDATE() { # It is possible we are only updating the format of the doc # and are not changing the content and therefore I would probably # want to keep the original modification date so that it is # reflective of the last date the content changed, rather # than just formatting changes. # # We must compare between the current "Site Update" line in # index.html and the date on the file being submitted. # # We then use the date contained in the variable # Modify_En to place on the index.html page. #Grab Date from "Site Update" line in index.html VAR_TMP=`cat ${LIVESITE}index.html | grep "Site Updated" | sed -e \ 's/Site Updated //' | sed -e \ 's/,//' | sed -e \ 's/<BR>//'` #Do Year CMP_DATE=`echo $VAR_TMP | awk '{print $3}'` #There seems to be a trailing Carriage Return that is causing grief # Remove the offending CR! CMP_DATE=`echo $CMP_DATE | awk '{sub ("\r$", ""); print}'` #Do Month TMP_MONTH=`echo $VAR_TMP | awk '{print $1}'` case $TMP_MONTH in January) CMP_DATE=${CMP_DATE}01;; February) CMP_DATE=${CMP_DATE}02;; March) CMP_DATE=${CMP_DATE}03;; April) CMP_DATE=${CMP_DATE}04;; May) CMP_DATE=${CMP_DATE}05;; June) CMP_DATE=${CMP_DATE}06;; July) CMP_DATE=${CMP_DATE}07;; August) CMP_DATE=${CMP_DATE}08;; September) CMP_DATE=${CMP_DATE}09;; October) CMP_DATE=${CMP_DATE}10;; November) CMP_DATE=${CMP_DATE}11;; December) CMP_DATE=${CMP_DATE}12;; *) echo "Unknown Error in Case statement!"; exit 30;; #This is from a mangled "Site Updated" line in index.html # We should put some sort of method here to repair that line. # The line does get fixed; however, when we eventually push # index.html esac #Do Day CMP_DATE_INDEX=$CMP_DATE`echo $VAR_TMP | awk '{print $2}'` #Get Date of File we are updating # Note: It is probably much faster to do this "find -printf" # command rather than to translate the variable Modify_En # to this format so we can compare. CMP_DATE_FILE=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td"` #echo Date in index.html is $CMP_DATE_INDEX #echo Date of file being updated is $CMP_DATE_FILE #echo -n "The newer of these two dates is: " if [ $CMP_DATE_INDEX -gt $CMP_DATE_FILE ]; then #Date in header of index.html is newer than the file being # submitted, do nothing : else #Date of file being pushed is newer than that in the header # of index.html, update the "Site Updated" line in index.html #Save modification date of index.html INDEX_MODIFY=`find ${LIVESITE}index.html -printf "%TY%Tm%Td%TH%TM.%TS"` #Update "Site Updated" line in index.html cat ${LIVESITE}index.html | sed -e \ "s/Site Updated.*<BR>/Site Updated ${MODIFY_EN}<BR>/" >${LIVESITE}index.tmp.html mv ${LIVESITE}index.tmp.html ${LIVESITE}index.html #Restore modification date of index.html touch -t $INDEX_MODIFY ${LIVESITE}index.html fi } #Check to see if backup file exists, or to see # if we can at least create a backup in the # directory mentioned if [ -f $BACKUP ]; then #Backup exists, life is good : elif [ -d `dirname $BACKUP` ]; then #Backup directory exists (no .tarfile exists), # life is still good : else echo FAILURE:Backup dir does not exist EXIT $EXIT_BACKUP fi #Check if we have a file, dir, or unknown if [ -f ${PREPSITE}$FILENAME ]; then #$FILENAME is a file; #echo ${PREPSITE}$FILENAME is a regular file : elif [ -d ${PREPSITE}$FILENAME ]; then #$FILENAME is a directory echo ${PREPSITE}$FILENAME is a directory echo Directories are not currently handled; exit $EXIT_DIR; else #$FILENAME is not a file nor a directory # boy are we in trouble now. echo ${PREPSITE}$FILENAME is of Unknown File Type exit $EXIT_UNK_FIL_TYP fi #Check to see if directory exists, if not then create it. DIRNAME=`dirname $FILENAME` if [ ! -d $DIRNAME ]; then mkdir -p ${LIVESITE}$DIRNAME #work needed here to create directories on ftp sites we to. #maybe set a flag that we look at later. # I probably need to have the ftp expect script generated from this # script, rather than being an existing script file. That way I can more # easily control how many files I push in one shot, and if any # directories are to be created on the remote site. fi # #Copy file from prepsite to livesite MODIFY_EN=`find ${PREPSITE}$FILENAME -printf "%TB %Td, %TY"` cat ${PREPSITE}$FILENAME | sed -e "s/June 19, 2001/$MODIFY_EN/" >${LIVESITE}${FILENAME} #Reset file modification date to that of PrepSite FILE_MODIFY=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td%TH%TM.%TS"` touch -t $FILE_MODIFY ${LIVESITE}${FILENAME} #Update "Site Update" line in index.html SITE_UPDATE #Add file to backup with date appended to filename TAR_ADD $FILENAME # #Generate a new datesort.html file (cd $LIVESITE && /scripts/wwwmanage/datesort.sh) #Add file to backup with date appended to filename TAR_ADD datesort.html #Now push all 3 files (datesort.html, index.html and $FILENAME) to riblack6 /scripts/wwwmanage/riblack6.ftp.expect \ ${LIVESITE}$FILENAME $FILENAME \ ${LIVESITE}datesort.html datesort.html \ ${LIVESITE}index.html index.html #Now push all 3 files (datesort.html, index.html and $FILENAME) to www.geocities.com/rlcomp_1999 /scripts/wwwmanage/geo-rlcomp.ftp.expect \ ${LIVESITE}$FILENAME $FILENAME \ ${LIVESITE}datesort.html datesort.html \ ${LIVESITE}index.html index.html #Check if file ($Filename) needs to be updated on linux.cca.qcorp.net cat /scripts/wwwmanage/linux.cca.files | grep $FILENAME | ( read SOURCE DESTINATION if [ -n "$DESTINATION" ]; then /scripts/wwwmanage/linux.cca.ftp.expect ${LIVESITE}$SOURCE $DESTINATION fi )
Datesort.sh Shell Script
datesort.sh is used for the main controlling and packaging of the do_datesort.sh script.
#!/bin/sh FILENAME=datesort.html TMPFILE=/tmp/datesort.tmp cat <<EOF>$FILENAME <HTML> <BODY> <CENTER><H1>Web Pages - Sorted By Date</H1></CENTER> <HR> <PRE> EOF /scripts/wwwmanage/do_datesort.sh >>$FILENAME cat /scripts/wwwmanage/datesort.old >>$FILENAME cat <<EOF>>$FILENAME </PRE> <HR> <CENTER>This file generated at <BR>`date +"%H:%M:%S on %b %d, %Y"`</CENTER> </BODY> </HTML> EOF cat $FILENAME | grep -v $FILENAME >$TMPFILE; mv $TMPFILE $FILENAME
Do_datesort.sh Shell Script
The do_datesort.sh script does the bulk of the work of generating the datesort.html document.
#!/bin/sh NAME_FIELD=48 TIME_PAD=2 SIZE_FIELD=12 TITLE_PAD=2 TITLE_FIELD=80 DESCR_PAD=2 DESCR_FIELD=160 TEMPFILE=/tmp/sort.tmp #Please don't modify below this line #The following find command will generate # a tmpfile with last access time and name. # We do not care about the time format at this point. # Sort - this will be sorted in order of date and time. find . -type f -printf "%T@ %p\n" | sort -r >$TEMPFILE #Now that we have the web pages sorted, lets skip # over the first column (time) and pick out the # 2nd column which are the web page names. # Then we will process each web page filename in order # of date (because we are already sorted) COUNTER=0 for X in `cat $TEMPFILE`; do if [ $COUNTER -eq 1 ]; then # #Grab name # Left justify, pad with blanks to the right # until end of NAME_FIELD is reached. #Also Add in Hyperlink for name # NAME=`find $X -printf "%p"` echo -n '<A HREF=' echo -n $NAME echo -n '>' echo -n $NAME echo -n '</A>' while [ ${#NAME} -lt $NAME_FIELD ]; do NAME="${NAME}." echo -ne '\040' done # #Grab time #Time is a fixed field, pad some spaces at the end # TIME=`find $X -printf "%TY %Tb %Td - %TH:%TM:%TS\n"` echo -n $TIME TIME_COUNT=$TIME_PAD while [ $TIME_COUNT -ne 0 ]; do echo -ne '\040' TIME_COUNT=`expr $TIME_COUNT - 1` done # #Grab size #Size should be right justified, pad with # spaces on the left. # SIZE=`find $X -printf "%s"` SIZE_C=$SIZE while [ ${#SIZE_C} -lt $SIZE_FIELD ]; do SIZE_C=".${SIZE_C}" echo -ne '\040' done echo -n $SIZE #Lets add on the title of the page #First lets space the column over TITLE_COUNT=$TITLE_PAD while [ $TITLE_COUNT -ne 0 ]; do echo -ne '\040' TITLE_COUNT=`expr $TITLE_COUNT - 1` done #display title TITLE=`cat $X | grep \<TITLE\> | sed -e \ 's,<TITLE>,,' | sed -e \ 's,</TITLE>,,'|awk '{sub("\r$","");print}'` echo -n $TITLE #Have title field pad out with spaces while [ ${#TITLE} -lt $TITLE_FIELD ]; do TITLE="${TITLE}." echo -ne '\040' done echo -n "." #Lets display the description as well #space over just a little DESCR_COUNT=$DESCR_PAD while [ $DESCR_COUNT -ne 0 ]; do echo -ne '\040' DESCR_COUNT=`expr $DESCR_COUNT - 1` done #display description DESCRIPTION=`cat $X | grep '<META name="description"' | sed -e \ 's,<META name="description" content=",,' | sed -e \ 's/">//' | awk '{sub("\r$","");print}'` echo -n $DESCRIPTION #Pad to field size with spaces while [ ${#DESCRIPTION} -lt $DESCR_FIELD ]; do DESCRIPTION="${DESCRIPTION}." echo -ne '\040' done echo -n "." #Last item on this line, give <BR> for line break echo '<BR>' #Now reset counter so we can skip # the nextdate and time column. COUNTER=0 else #This must be the first column which holds the time # Now that we are sorted by time, we do not # really care about the time field, so just # skip over it. Do nothing and increment counter. COUNTER=`expr $COUNTER + 1` fi done
Checkreqtags.sh Shell Script
The checkreqtags.sh script checks the html page for a list of tags that I require to be present. The tags are listed one at a time in a separate configuration file.
#!/bin/sh #Use this script, named "checktags" is used to check a document for # specific html tags that should be present. An error condition # is raised if not enough tags. Zero is returned if the doc. # contains sufficient tags. FILENAME=$1 # HTML format EXIT= cat /scripts/wwwmanage/tags.conf | while read; do echo Checking for tag $REPLY if [ -z "`cat $FILENAME | grep "$REPLY"`" ]; then echo Sorry, not enough tags present echo Failed on: $REPLY exit 1 fi done EXIT=$? if [ $EXIT -ne 0 ]; then echo Error, exiting, Error Code $EXIT exit $EXIT fi echo "All Necessary Tags are Present"
Tags.conf Configuration File
These are the tags configuration file for the above checkreqtags.sh shell script. As I need more tags I can just add them to this file, one tag per line.
<HTML> </HTML> <TITLE> </TITLE> <HEAD> </HEAD> <META name="keywords" content=" <META name="description" content=" <BODY BGCOLOR="#FFFFFF" BACKGROUND=""> </BODY>
Openclosetags.sh Shell Script
Check the html file for proper order on opening and closing tags. The tag to be closed should always be the last one that was opened.
#!/bin/sh FILENAME=$1 if [ ! -f ${FILENAME} ]; then echo No File present, echo unable to continue exit 1 fi gettag() { #Find start of tag (starts with less than sign) while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 3c ]; then #3c is less than sign CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG=$CHAR break fi SEEK=`expr $SEEK + 1` done #check exit status of above loop - EOF reached? if [ -z "$HEX" ]; then EOFSTATUS=1 return 0 fi #Get rest of tag up to space bar (important part of tag) # or up to greater than sign SEEK=`expr $SEEK + 1` while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 20 ]; then #20 is space bar CURR_TAG="${CURR_TAG} " #Special Case - <A NAME=" SEEK_CHECK=$SEEK TESTCASE='<A ' if [ "$CURR_TAG" = "$TESTCASE" ]; then SEEK_CHECK=`expr $SEEK_CHECK + 1` SP_CASE_CHECK=`xxd -l 6 -s $SEEK_CHECK ${FILENAME} | awk '{print $5}'` TESTCASE='NAME="' if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then SEEK=`expr $SEEK_CHECK + 5` CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}" fi TESTCASE='HREF="' if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then SEEK=`expr $SEEK_CHECK + 5` CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}" fi fi #Because the we have hit a space, lets discard the rest # of the tag until we get to the greater than sign #Eat up the rest of the input until the greater than sign # is reached (end of tag) SEEK=`expr $SEEK + 1` while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 3e ]; then #3e is greater than sign break fi SEEK=`expr $SEEK + 1` done break elif [ "$HEX" = 3e ]; then #3e is greater than sign CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG="${CURR_TAG}$CHAR" break elif [ -n "$HEX" ]; then CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG="${CURR_TAG}$CHAR" fi SEEK=`expr $SEEK + 1` done #check exit status of above loop - EOF reached? if [ -z "$HEX" ]; then EOFSTATUS=1 echo EOF reached before end of tag return 1 fi } checktag() { TAG_CHECKTAG="$*" #Investigate tag (start tag, stop tag, comment with no matching stop tag, ...) #Check for keywords we can ignore case "$TAG_CHECKTAG" in '<BR>') TAGSTATUS=IGNORE return 0 ;; '<HR>') TAGSTATUS=IGNORE return 0 ;; '<A NAME="') TAGSTATUS=IGNORE return 0 ;; '<A HREF="') CURR_TAG="<A " TAGSTATUS=PUSH return 0 ;; \<\!--*) TAGSTATUS=IGNORE return 0 ;; \<\!*) TAGSTATUS=IGNORE return 0 ;; \<LINK*) TAGSTATUS=IGNORE return 0 ;; '<IMG ') TAGSTATUS=IGNORE return 0 ;; \<META*) TAGSTATUS=IGNORE return 0 ;; \</*) TAGSTATUS=PULL return 0 ;; *) TAGSTATUS=PUSH return 0 ;; esac } dotag() { TAGSTATUS= checktag "$1" case "$TAGSTATUS" in IGNORE) return 0 ;; PUSH) if [ "$1" = '<A HREF="' ]; then TAGSTACK="${TAGSTACK}<A " else TAGSTACK="${TAGSTACK}$1" fi return 0 ;; PULL) TAGCOMPARE="$TAGSTACK" TAGKILL="`echo $1 | sed -e 's,</,<,'`" #Try to remove the modified close tag from tagstack: TAGSTACK=`echo "$TAGSTACK" | sed -e "s,${TAGKILL}$,,"` if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then #If the above removal didn't work, then lets try one more time # to remove TAGKill. #This time, lets remove the trailing greater than sign # from tagkill. TAGKILL="`echo $TAGKILL | sed -e 's,>$,,'`" TAGSTACK="`echo $TAGSTACK | sed -e "s,${TAGKILL}$,,"`" fi if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then #Removal of tag failed on both attempts. # Tags are out of order echo Tagstack is as follows: echo "$TAGSTACK" echo Failed while attempting to remove "$1" echo from the stack. echo Seek count is $SEEK bytes return 1 fi return 0 ;; *) echo Unknown error in dotag echo TAGSTATUS is "$TAGSTATUS" return 1 ;; esac } SEEK=0 TAGS= EOFSTATUS=0 CURR_TAG= HEX=0 TAGSTACK= #Go from beginning of file to end of file while [ -n "$HEX" ]; do gettag if [ -z "$HEX" ]; then break fi echo "$CURR_TAG" dotag "$CURR_TAG" EXIT=$? if [ $EXIT -gt 0 ]; then echo Failure, exiting return $EXIT fi #Debug echo Tagstack is "$TAGSTACK" done #check exit status of above loop - EOF reached? if [ "$EOFSTATUS" -eq 1 ]; then echo End of File Reached fi #Check if tagstack is empty if [ -n "$TAGSTACK" ]; then echo FAILURE: Failed to remove all items from tagstack echo Tags do not match. echo Tagstack contents are "$TAGSTACK" exit 1 fi echo Successfully cleared the tagstack, all tags match. echo Tagstack contents are "$TAGSTACK" exit 0
Wwwmanage.conf Configuration File
#Global Variables #Livesite is a local version of the live web site. This livesite # can be running a web server, or not, it doesn't matter. # Files will be copied to livesite, then other scripts will # be run to push files from here to other sites you may wish to # keep synchronized. Also other scripts will be run after files # are copied here, such as datesort. LIVESITE=/home/httpd/www-geocities-rlcomp_1999/ #Prepsite is your preparation site that is not accessible by others. # All your work is done in prepsite, once your page is ready # "submit" it to the "livesite" by using this "submit" script. PREPSITE=/home/ftp/pub/techcd/pub/www_prep_site/www-geocities-rlcomp_1999-prep/ #backup is where you will save backups to. Files will be added # with their modification date as part of their filename # files will keep appending to the end of that backup. BACKUP=/home/ftp/pub/techcd/pub/www_prep_site/www-geo-rlcomp_1999.tar #DateSort Variables NAME_FIELD=48 TIME_FIELD=2 SIZE_FIELD=12 TEMPFILE=/tmp/sort.tmp #Home directory for wwwmanage WWWMANAGE=/scripts/wwwmanage #Exit Error Codes EXIT_NO_TAGS=7 EXIT_NOT_ROOT=6 #File Type is unknown; not a regular file, nor a directory EXIT_UNK_FIL_TYP=5 EXIT_DIR=4 EXIT_BACKUP=3
Ftp Expect Scripts
#!/usr/bin/expect -f # # This Expect script was generated by autoexpect on Wed Jun 6 18:10:55 2001 # Expect and autoexpect were both written by Don Libes, NIST. # # Note that autoexpect does not guarantee a working script. It # necessarily has to guess about certain things. Two reasons a script # might fail are: # # 1) timing - A surprising number of programs (rn, ksh, zsh, telnet, # etc.) and devices discard or ignore keystrokes that arrive "too # quickly" after prompts. If you find your new script hanging up at # one spot, try adding a short sleep just before the previous send. # Setting "force_conservative" to 1 (see below) makes Expect do this # automatically - pausing briefly before sending each character. This # pacifies every program I know of. The -c flag makes the script do # this in the first place. The -C flag allows you to define a # character to toggle this mode off and on. set force_conservative 0 ;# set to 1 to force conservative mode even if ;# script wasn't run conservatively originally if {$force_conservative} { set send_slow {1 .1} proc send {ignore arg} { sleep .1 exp_send -s -- $arg } } # # 2) differing output - Some programs produce different output each time # they run. The "date" command is an obvious example. Another is # ftp, if it produces throughput statistics at the end of a file # transfer. If this causes a problem, delete these patterns or replace # them with wildcards. An alternative is to use the -p flag (for # "prompt") which makes Expect only look for the last line of output # (i.e., the prompt). The -P flag allows you to define a character to # toggle this mode off and on. # # Read the man page for more info. # # -Don set timeout -1 spawn ftp riblack6.americas.qcorp.net match_max 100000 expect "Name (riblack6.americas.qcorp.net:*): " send -- "geoweb\r" expect ":" send -- "xxxxxxxxxxxx\r" expect "ftp> " send -- "bin\r" expect "ftp> " send -- "hash\r" expect "ftp> " send -- "put [lrange $argv 0 0] [lrange $argv 1 1]\r" expect "ftp> " send -- "put [lrange $argv 2 2] [lrange $argv 3 3]\r" expect "ftp> " send -- "put [lrange $argv 4 4] [lrange $argv 5 5]\r" expect "ftp> " send -- "quit\r" expect "221 \r"
Ftp Expect Configuration File
I have one server that doesn’t get all the files that I generate. For this one I have created this configuration file so that each time I “submit” a file, it is checked against this file. If it matches something in the first column, it will then be pushed to this special server using the 2nd column as the remote filename.
#filename-exactly-as-we-push-it destination-on-build-server procedures/kickstart-rh70.html /home/httpd/html/linux/riblack/kickstart-rh70.html
Notes
I misuse variables and variable scope in these scripts. I also hardcode a lot of values where they should really be variables. These errors may or may not be cleaned up in time, for now it works – and to quote the old saying “If it isn’t broke, don’t fix it!”.
The openclose tags script is very slow (it seems to do about 1 tag per second). I do plan on migrating this one to C or C++. I feel that C or C++ could scan a whole html file in well under 10 seconds where it is taking more than a minute or two with the bash script.
I am currently – December 19, 2002 – not using any of the above scripts. I have converted over to a new set of scripts as shown below. Also I’m not currently checking for tag order using my own scripts — every once in a while I will all my pages and use “weblint” to check the tags on my pages.
Here are my current scripts:
re-index.sh
site_index.sh
apply_template.sh
TEMPLATE_GUIDEBAR
TEMPLATE_HEAD
TEMPLATE_TAIL
savedates.sh
restoredates.sh
re-index.sh which runs as a cron job once each night. Here’s the contents:
site_index.sh apply_template.sh
site_index.sh now takes the place of datesort.sh and indexer.sh (my two previous methods of creating a site map). Here’s the contents:
cd ~/public_html SORT_KEYS="sitemap-filename sitemap-date sitemap-size sitemap-title sitemap-description" FILES_TO_PROCESS="`ls *.html | grep -v 'sitemap-.*\.html'`" #FILENAME_FIELD_LENGTH=48 #DATE_FIELD_LENGTH=22 #DATE_FIELD_LENGTH=180 #SIZE_FIELD_LENGTH=12 #TITLE_FIELD_LENGTH=80 #TITLE_FIELD_LENGTH=280 #DESCR_FIELD_LENGTH=180 #DESCR_FIELD_LENGTH=360 generate_sort_keys() { # get sort keys for filename, date, size, title, and description for X in $FILES_TO_PROCESS; do echo "$X $X"; done > sitemap-filename.tmp for X in $FILES_TO_PROCESS; do find $X -printf "%T@ %f\n"; done > sitemap-date.tmp for X in $FILES_TO_PROCESS; do find $X -printf "%s %f\n"; done > sitemap-size.tmp for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-title.tmp for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-description.tmp } print_left() { #$1 = field width #$2 = field value if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi shift FIELD_VALUE="$@" echo -ne '<TD ALIGN="left" VALIGN="TOP" ' echo -ne "$WRAP" echo -ne '>' echo -ne "$FIELD_VALUE" echo -ne '</TD>' } print_right() { #$1 = field width #$2 = field value if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi shift FIELD_VALUE="$@" echo -ne '<TD ALIGN="right" VALIGN="TOP" ' echo -ne "$WRAP" echo -ne '>' echo -n $FIELD_VALUE echo -ne '</TD>' } print_line_formatted() { echo -ne '<TR ALIGN="center">' #print_left $DESCR_FIELD_LENGTH $RECORD_DESCRIPTION print_left nowrap $RECORD_FILENAME print_left nowrap $RECORD_DATE print_right nowrap $RECORD_SIZE print_left nowrap $RECORD_TITLE print_left wrap $RECORD_DESCRIPTION echo '</TR>' } print_header() { echo $1 | grep 'sitemap-filename.html' >/dev/null && RECORD_FILENAME='<A HREF="https://cyanogenmods.moviesx.org/sitemap-filename-r.html">Filename</A>' || RECORD_FILENAME='<A HREF="https://cyanogenmods.moviesx.org/sitemap-filename.html">Filename</A>' echo $1 | grep 'sitemap-date.html' >/dev/null && RECORD_DATE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-date-r.html">Date</A>' || RECORD_DATE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-date.html">Date</A>' echo $1 | grep 'sitemap-size.html' >/dev/null && RECORD_SIZE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-size-r.html">Size</A>' || RECORD_SIZE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-size.html">Size</A>' echo $1 | grep 'sitemap-title.html' >/dev/null && RECORD_TITLE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-title-r.html">Title</A>' || RECORD_TITLE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-title.html">Title</A>' echo $1 | grep 'sitemap-description.html' >/dev/null && RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.moviesx.org/sitemap-description-r.html">Description</A>' || RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.moviesx.org/sitemap-description.html">Description</A>' echo $1 | grep 'sitemap-filename' >/dev/null && SORT_REF="Name" echo $1 | grep 'sitemap-date' >/dev/null && SORT_REF="Date" echo $1 | grep 'sitemap-size' >/dev/null && SORT_REF="Size" echo $1 | grep 'sitemap-title.html' >/dev/null && SORT_REF="Title" echo $1 | grep 'sitemap-description.html' >/dev/null && SORT_REF="Description" cat <<EOF <!-- PLACE_CREATED_DATE_HERE = "December 16, 2002" PLACE_DESCRIPTION_HERE = "QLINUX Site Map - sorted by $SORT_REF" PLACE_TITLE_HERE = "qlinux Site Map - Sorted by $SORT_REF" PLACE_NEXT_ITEM_HERE = "fixme" PLACE_PREVIOUS_ITEM_HERE = "fixme" --> EOF echo '<TABLE ALIGN="CENTER">' print_line_formatted } print_record() { RECORD_FILENAME="` echo -ne '<A HREF="https://cyanogenmods.moviesx.org/' echo -ne "$1" echo -ne '">' echo -ne "$1" echo -ne '</A>' `" RECORD_DATE="`find $1 -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`" RECORD_SIZE="`find $1 -printf "%s"`" RECORD_TITLE="`cat $1 | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's,</TITLE>.*$,,' | head -1 | tr '\n' ' '`" RECORD_DESCRIPTION="`cat $1 | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's,".*$,,' | head -1 | tr '\n' ' '`" print_line_formatted } generate_sort_keys for X in $SORT_KEYS; do print_header ${X}.html > ${X}.html print_header ${X}-r.html > ${X}-r.html sort -g ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}.html sort -g -r ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}-r.html rm ${X}.tmp done cat sitemap-date.html | sed -e 's,sitemap-date-r.html">Date</A>,sitemap-date.html">Date</A>,' > sitemap-date.html.tmp cat sitemap-date-r.html | sed -e 's,sitemap-date.html">Date</A>,sitemap-date-r.html">Date</A>,' > sitemap-date.html mv sitemap-date.html.tmp sitemap-date-r.html -a sitemap-date.html sitemap.html -a sitemap-date.html datesort.html -a sitemap-date.html indexer.html
Here’s the contents of apply_template.sh
TEMPLATE_PATH=~/bin WEBSITE_PATH=~/public_html TEMPLATE_LIST="TEMPLATE_HEAD TEMPLATE_GUIDEBAR TEMPLATE_TAIL" FAILURE=0 for X in $TEMPLATE_LIST; do if [ ! -f $TEMPLATE_PATH/$X ]; then echo unable to find $TEMPLATE_PATH/$X; FAILURE=`expr $FAILURE + 1`; fi; done if [ $FAILURE -gt 1 ]; then echo check TEMPLATE_PATH variable in $0; fi if [ $FAILURE -eq 1 ]; then echo check TEMPLATE_LIST variable in $0; fi if [ $FAILURE -gt 0 ]; then exit; fi # See if we have updated this file or any of the template files. If any of that has been updated # then we need to reprocess _all_ the html files -- maybe a template has changed. So we remove the template.hist file if [ -e $TEMPLATE_PATH/template.hist ]; then for X in $TEMPLATE_LIST `basename $0`; do TEMPLATES_CHANGED=`find $TEMPLATE_PATH/$X -newer $TEMPLATE_PATH/template.hist` if [ -n "$TEMPLATES_CHANGED" ]; then rm $TEMPLATE_PATH/template.hist break fi done fi # # # # Let's do a file at a time -- here goes a big "for" loop # # cd $WEBSITE_PATH for HTML_FILE in `( if [ -e $TEMPLATE_PATH/template.hist ]; then find . -type f -newer $TEMPLATE_PATH/template.hist -mindepth 1 -maxdepth 1; else find . -type f -mindepth 1 -maxdepth 1; fi; if [ -n "$1" ]; then for X in $@; do find . -type f -mindepth 1 -maxdepth 1 -name $X; done; fi) | grep html$ | grep -v "\./index.html"`; do #echo -ne '.' echo $HTML_FILE #Strip out existing templates, keep file date and time for TEMPLATE_NAME in $TEMPLATE_LIST; do cat $HTML_FILE | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $HTML_FILE.template.tmp; touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp; mv $HTML_FILE.template.tmp $HTML_FILE; done #Insert new templates, keep file date and time #Do TEMPLATE_HEAD FIXME=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_HEAD | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_HEAD > template_head.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"` fi if [ "$VALUE" == "fixme" ]; then cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp elif [ -n "$VALUE" ]; then cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp else cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_head.tmp; FIXME=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_head.tmp fi done if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_head.tmp; fi #Do TEMPLATE_GUIDEBAR FIXME=0 GUIDEBAR_NOT_READY=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR > template_guidebar.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$VALUE" == "fixme" ]; then GUIDEBAR_NOT_READY=1 cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp elif [ -n "$VALUE" ]; then cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp else cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_guidebar.tmp; FIXME=1; GUIDEBAR_NOT_READY=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_guidebar.tmp fi done if [ "$FIXME" -ne 0 ]; then # Close off the fixme section echo '-->' >> template_guidebar.tmp fi if [ "$GUIDEBAR_NOT_READY" -gt 0 ]; then # Also if the guidebar isn't finished, I don't want to display it, so strip it out for TEMPLATE_NAME in TEMPLATE_GUIDEBAR; do cat template_guidebar.tmp | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > template_guidebar.tmp.1; mv template_guidebar.tmp.1 template_guidebar.tmp; done fi #Do TEMPLATE_TAIL FIXME=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_TAIL | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_TAIL > template_tail.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"` fi if [ "$VALUE" == "fixme" ]; then cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp elif [ -n "$VALUE" ]; then cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp else cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_tail.tmp; FIXME=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_tail.tmp fi done if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_tail.tmp; fi # Put all the pieces together ( #How many lines in template_guidebar? If template_guidebar is populated then strip off the last HR from template_head LINECOUNT=`cat template_guidebar.tmp | grep -v ^PLACE_ | wc -l` if [ "$LINECOUNT" -eq 0 ]; then GUIDEBAREMPTY="empty"; else GUIDEBAREMPTY="not_empty"; fi #Remove off last HR tag from template_head if GUIDEBAR is not empty, else don't strip if [ "$GUIDEBAREMPTY" = "not_empty" ]; then LASTHR=`cat template_head.tmp | grep '<HR>' | wc -l` cat template_head.tmp | awk -v LAST="$LASTHR" 'BEGIN {COUNT=0} /<HR>/ {COUNT=COUNT+1; if (COUNT==LAST) {next}} {print}' else cat template_head.tmp fi #Send the guidebar cat template_guidebar.tmp #Send the main html body - strip off any CRLF chars while we are at it cat $HTML_FILE | awk '{gsub("\r$",""); print}' if [ "$GUIDEBAREMPTY" = "not_empty" ]; then #if guidebar is populated, then send it, with no "fixup"'s, and strip HR from template_tail.tmp cat template_guidebar.tmp | grep -v ^PLACE_ #Remove off first HR tag from template_tail cat template_tail.tmp | awk 'BEGIN {FOUND=0} /<HR>/ {if (FOUND == 0) {FOUND=1; next}} {print}' else # else if guidebar is empty, then don't send it and don't strip any HR's from tail. cat template_tail.tmp fi ) > $HTML_FILE.template.tmp touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp mv $HTML_FILE.template.tmp $HTML_FILE #cleanup after putting all the pieces together rm template_head.tmp rm template_guidebar.tmp rm template_tail.tmp done # End HTML_FILE big loop # Here we keep track of which files we need to process touch $TEMPLATE_PATH/template.hist #Original for strip out templates from existing html file #for Y in awk.html; do for X in $TEMPLATE_LIST; do cat $Y | awk "BEGIN { PRINTVAR = 1 } /$X BEGIN/ { PRINTVAR = 0 } /$X END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $Y.template.tmp; touch -t `find $Y -mindepth 0 -maxdepth 1 -printf "%TY%Tm%Td%TH%TM.%TS"` $Y.template.tmp; mv $Y.template.tmp $Y; done; done #DO TEMPLATE_HEAD_DATESORT #cat if.html | awk '{gsub("\r$",""); print}' > if.html.1
Here’s savedates.sh, which I use when modifying formatting rather than content:
cd ../public_html for X in *.html; do echo -ne '.' >&2; find $X -type f -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS %p\n"; done > ../allfiles.txt; echo
And here’s restoredates.sh, which is savedates.sh complement:
cd ../public_html cat ../allfiles.txt | while read DATE FILENAME; do touch -t $DATE $FILENAME; echo -ne '.'; done; echo