Web Site Publishing Submit Scripts


Gaming Laptop Series


Submit

The “submit” script is used to “submit” my pages to the various web sites I maintain. The “submit” script performs various checks on the page and then sends them on their way.

Here’s the “submit” script:

#!/bin/sh
#Use this script to submit documents from the "prepsite" to the
#    "livesite".  Other scripts will be called when you do this.
#    Also the information from livesite will be pushed to other
#    sites as well that are to be kept in synchronization


#Please do not modify below this line


#Bring in configuration file variables and exit codes
. /scripts/wwwmanage/wwwmanage.conf




# 
#For now we have to be root
#   Later I'll see how we can get around this.
if [ "`whoami`" != "root" ]; then
	echo "You must be root to run this command"
	exit $EXIT_NOT_ROOT
fi



#Set up Filename variable
ARG_FORCE=""

if [ $# -gt 0 ]; then

	while [ $# -gt 0 ]; do

		case $1 in
			--force|-f)
			ARG_FORCE=1
			;;
			*)
			FILENAME=$1
			;;
		esac

		shift
	done

else
	FILENAME=$1
fi




if [ "$ARG_FORCE" = 1 ]; then
	#Do not bother checking tags
	:
else
	#Check tags...
	# HTML format
	/scripts/wwwmanage/checkreqtags.sh ${PREPSITE}$FILENAME
	EXIT=$?
	if [ $EXIT -ne 0 ]; then
		echo Check for Required TAGS failed!
		exit $EXIT
	fi
	#Check tags...
	# that all tags open and close in proper order
	/scripts/wwwmanage/openclosetags.sh ${PREPSITE}$FILENAME
	EXIT=$?
	if [ $EXIT -ne 0 ]; then
		echo Check for Matching TAGS failed!
		exit $EXIT
	fi
fi


TAR_ADD() {
	chown root.root ${LIVESITE}$1
	chmod 0644 ${LIVESITE}$1
	MODIFY=`find ${LIVESITE}$1 -printf "%TY%Tb%Td-%TH%TM%TS"`
	 -rp ${LIVESITE}$1 ${LIVESITE}$1-$MODIFY
	(cd $LIVESITE && tar -rvf $BACKUP $1-$MODIFY)
	rm ${LIVESITE}$1-$MODIFY
}


SITE_UPDATE() {

	#    It is possible we are only updating the format of the doc
	#    and are not changing the content and therefore I would probably
	#    want to keep the original modification date so that it is
	#    reflective of the last date the content changed, rather
	#    than just formatting changes.
	#
	#    We must compare between the current "Site Update" line in
	#    index.html and the date on the file being submitted.
	#
	#    We then use the date contained in the variable
	#    Modify_En to place on the index.html page.

	#Grab Date from "Site Update" line in index.html
	VAR_TMP=`cat ${LIVESITE}index.html | grep "Site Updated" | sed -e \
's/Site Updated //' | sed -e \
's/,//' | sed -e \
's/<BR>//'`

	#Do Year
	CMP_DATE=`echo $VAR_TMP | awk '{print $3}'`
	#There seems to be a trailing Carriage Return that is causing grief
	#    Remove the offending CR!
	CMP_DATE=`echo $CMP_DATE | awk '{sub ("\r$", ""); print}'`

	#Do Month
	TMP_MONTH=`echo $VAR_TMP | awk '{print $1}'`
	case $TMP_MONTH in
	  January) CMP_DATE=${CMP_DATE}01;;
	 February) CMP_DATE=${CMP_DATE}02;;
	    March) CMP_DATE=${CMP_DATE}03;;
	    April) CMP_DATE=${CMP_DATE}04;;
	      May) CMP_DATE=${CMP_DATE}05;;
	     June) CMP_DATE=${CMP_DATE}06;;
	     July) CMP_DATE=${CMP_DATE}07;;
	   August) CMP_DATE=${CMP_DATE}08;;
	September) CMP_DATE=${CMP_DATE}09;;
	  October) CMP_DATE=${CMP_DATE}10;;
	 November) CMP_DATE=${CMP_DATE}11;;
	 December) CMP_DATE=${CMP_DATE}12;;
	*) echo "Unknown Error in Case statement!"; exit 30;;
	#This is from a mangled "Site Updated" line in index.html
	#    We should put some sort of method here to repair that line.
	#    The line does get fixed; however, when we eventually push
	#    index.html
	esac

	#Do Day
	CMP_DATE_INDEX=$CMP_DATE`echo $VAR_TMP | awk '{print $2}'`

	#Get Date of File we are updating
	#    Note: It is probably much faster to do this "find -printf"
	#    command rather than to translate the variable Modify_En
	#    to this format so we can compare.
	CMP_DATE_FILE=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td"`

#echo Date in index.html is $CMP_DATE_INDEX
#echo Date of file being updated is $CMP_DATE_FILE

#echo -n "The newer of these two dates is: "
if [ $CMP_DATE_INDEX -gt $CMP_DATE_FILE ]; then
	#Date in header of index.html is newer than the file being
	#   submitted, do nothing
	:
else
	#Date of file being pushed is newer than that in the header
	#   of index.html, update the "Site Updated" line in index.html

	#Save modification date of index.html
	INDEX_MODIFY=`find ${LIVESITE}index.html -printf "%TY%Tm%Td%TH%TM.%TS"`

	#Update "Site Updated" line in index.html
	cat ${LIVESITE}index.html | sed -e \
"s/Site Updated.*<BR>/Site Updated ${MODIFY_EN}<BR>/" >${LIVESITE}index.tmp.html
	mv ${LIVESITE}index.tmp.html ${LIVESITE}index.html

	#Restore modification date of index.html
	touch -t $INDEX_MODIFY ${LIVESITE}index.html
fi
}






#Check to see if backup file exists, or to see
#    if we can at least create a backup in the
#    directory mentioned
if [ -f $BACKUP ]; then
	#Backup exists, life is good
	:
elif [ -d `dirname $BACKUP` ]; then
	#Backup directory exists (no .tarfile exists),
	#    life is still good
	:
else
	echo FAILURE:Backup dir does not exist
	EXIT $EXIT_BACKUP
fi


#Check if we have a file, dir, or unknown
if [ -f ${PREPSITE}$FILENAME ]; then
	#$FILENAME is a file;
	#echo ${PREPSITE}$FILENAME is a regular file
	:
elif [ -d ${PREPSITE}$FILENAME ]; then
	#$FILENAME is a directory
	echo ${PREPSITE}$FILENAME is a directory
	echo Directories are not currently handled;
	exit $EXIT_DIR;
else
	#$FILENAME is not a file nor a directory
	#   boy are we in trouble now.
	echo ${PREPSITE}$FILENAME is of Unknown File Type
	exit $EXIT_UNK_FIL_TYP
fi



#Check to see if directory exists, if not then create it.
DIRNAME=`dirname $FILENAME`
if [ ! -d $DIRNAME ]; then
	mkdir -p ${LIVESITE}$DIRNAME
#work needed here to create directories on ftp sites we  to.
#maybe set a flag that we look at later.
#    I probably need to have the ftp expect script generated from this
#    script, rather than being an existing script file.  That way I can more
#    easily control how many files I push in one shot, and if any
#    directories are to be created on the remote site.
fi



#
#Copy file from prepsite to livesite
MODIFY_EN=`find ${PREPSITE}$FILENAME -printf "%TB %Td, %TY"`
cat ${PREPSITE}$FILENAME | sed -e "s/June 19, 2001/$MODIFY_EN/" >${LIVESITE}${FILENAME}
#Reset file modification date to that of PrepSite
FILE_MODIFY=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td%TH%TM.%TS"`
touch -t $FILE_MODIFY ${LIVESITE}${FILENAME}

#Update "Site Update" line in index.html
SITE_UPDATE

#Add file to backup with date appended to filename
TAR_ADD $FILENAME

#
#Generate a new datesort.html file
(cd $LIVESITE && /scripts/wwwmanage/datesort.sh)
#Add file to backup with date appended to filename
TAR_ADD datesort.html


#Now push all 3 files (datesort.html, index.html and $FILENAME) to riblack6
/scripts/wwwmanage/riblack6.ftp.expect \
     ${LIVESITE}$FILENAME $FILENAME \
     ${LIVESITE}datesort.html datesort.html \
     ${LIVESITE}index.html index.html


#Now push all 3 files (datesort.html, index.html and $FILENAME) to www.geocities.com/rlcomp_1999
/scripts/wwwmanage/geo-rlcomp.ftp.expect \
     ${LIVESITE}$FILENAME $FILENAME \
     ${LIVESITE}datesort.html datesort.html \
     ${LIVESITE}index.html index.html

#Check if file ($Filename) needs to be updated on linux.cca.qcorp.net
cat /scripts/wwwmanage/linux.cca.files | grep $FILENAME | (
read SOURCE DESTINATION
if [ -n "$DESTINATION" ]; then
	/scripts/wwwmanage/linux.cca.ftp.expect ${LIVESITE}$SOURCE $DESTINATION
fi
)


NOTES


Datesort.sh Shell Script

datesort.sh is used for the main controlling and packaging of the do_datesort.sh script.

#!/bin/sh
FILENAME=datesort.html
TMPFILE=/tmp/datesort.tmp

cat <<EOF>$FILENAME
<HTML>
<BODY>
<CENTER><H1>Web Pages - Sorted By Date</H1></CENTER>
<HR>
<PRE>
EOF

/scripts/wwwmanage/do_datesort.sh >>$FILENAME
cat /scripts/wwwmanage/datesort.old >>$FILENAME

cat <<EOF>>$FILENAME
</PRE>
<HR>
<CENTER>This file generated at <BR>`date +"%H:%M:%S on %b %d, %Y"`</CENTER>

</BODY>
</HTML>
EOF

cat $FILENAME | grep -v $FILENAME >$TMPFILE; mv $TMPFILE $FILENAME


NOTES


Do_datesort.sh Shell Script

The do_datesort.sh script does the bulk of the work of generating the datesort.html document.

#!/bin/sh
NAME_FIELD=48
TIME_PAD=2
SIZE_FIELD=12
TITLE_PAD=2
TITLE_FIELD=80
DESCR_PAD=2
DESCR_FIELD=160

TEMPFILE=/tmp/sort.tmp

#Please don't modify below this line

#The following find command will generate
#    a tmpfile with last access time and name.
#    We do not care about the time format at this point.
#    Sort - this will be sorted in order of date and time.
find . -type f -printf "%T@ %p\n" | sort -r >$TEMPFILE

#Now that we have the web pages sorted, lets skip
#    over the first column (time) and pick out the
#    2nd column which are the web page names.
#    Then we will process each web page filename in order
#    of date (because we are already sorted)
COUNTER=0
for X in `cat $TEMPFILE`; do
	if [ $COUNTER -eq 1 ]; then
		#
		#Grab name
		#    Left justify, pad with blanks to the right
		#    until end of NAME_FIELD is reached.
		#Also Add in Hyperlink for name
		#
		NAME=`find $X -printf "%p"`
		echo -n '<A HREF='
		echo -n $NAME
		echo -n '>'
		echo -n $NAME
		echo -n '</A>'
		while [ ${#NAME} -lt $NAME_FIELD ]; do
			NAME="${NAME}."
			echo -ne '\040'
		done
		#
		#Grab time
		#Time is a fixed field, pad some spaces at the end
		#
		TIME=`find $X -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`
		echo -n $TIME
		TIME_COUNT=$TIME_PAD
		while [ $TIME_COUNT -ne 0 ]; do
			echo -ne '\040'
			TIME_COUNT=`expr $TIME_COUNT - 1`
		done
		#
		#Grab size
		#Size should be right justified, pad with
		#    spaces on the left.
		#
		SIZE=`find $X -printf "%s"`
		SIZE_C=$SIZE
		while [ ${#SIZE_C} -lt $SIZE_FIELD ]; do
			SIZE_C=".${SIZE_C}"
			echo -ne '\040'
		done
		echo -n $SIZE




		#Lets add on the title of the page
		#First lets space the column over
		TITLE_COUNT=$TITLE_PAD
		while [ $TITLE_COUNT -ne 0 ]; do
			echo -ne '\040'
			TITLE_COUNT=`expr $TITLE_COUNT - 1`
		done

		#display title
		TITLE=`cat $X | grep \<TITLE\> | sed -e \
's,<TITLE>,,' | sed -e \
's,</TITLE>,,'|awk '{sub("\r$","");print}'`
		echo -n $TITLE

		#Have title field pad out with spaces
		while [ ${#TITLE} -lt $TITLE_FIELD ]; do
			TITLE="${TITLE}."
			echo -ne '\040'
		done

echo -n "."





		#Lets display the description as well

		#space over just a little
		DESCR_COUNT=$DESCR_PAD
		while [ $DESCR_COUNT -ne 0 ]; do
			echo -ne '\040'
			DESCR_COUNT=`expr $DESCR_COUNT - 1`
		done

		#display description
		DESCRIPTION=`cat $X | grep '<META name="description"' | sed -e \
's,<META name="description" content=",,' | sed -e \
's/">//' | awk '{sub("\r$","");print}'`
		echo -n $DESCRIPTION

		#Pad to field size with spaces
		while [ ${#DESCRIPTION} -lt $DESCR_FIELD ]; do
			DESCRIPTION="${DESCRIPTION}."
			echo -ne '\040'
		done

echo -n "."

		#Last item on this line, give <BR> for line break
		echo '<BR>'


		#Now reset counter so we can skip
		#    the nextdate and time column.
		COUNTER=0

	else

		#This must be the first column which holds the time
		#    Now that we are sorted by time, we do not
		#    really care about the time field, so just
		#    skip over it.  Do nothing and increment counter.
		COUNTER=`expr $COUNTER + 1`
	fi
done


NOTES


Checkreqtags.sh Shell Script

The checkreqtags.sh script checks the html page for a list of tags that I require to be present. The tags are listed one at a time in a separate configuration file.

#!/bin/sh
#Use this script, named "checktags" is used to check a document for
#    specific html tags that should be present.  An error condition
#    is raised if not enough tags.  Zero is returned if the doc.
#    contains sufficient tags.


FILENAME=$1

# HTML format
EXIT=
cat /scripts/wwwmanage/tags.conf | while read; do

	echo Checking for tag $REPLY

	if [ -z "`cat $FILENAME | grep "$REPLY"`" ]; then
		echo Sorry, not enough tags present
		echo Failed on: $REPLY
		exit 1
	fi

done

EXIT=$?
if [ $EXIT -ne 0 ]; then
	echo Error, exiting, Error Code $EXIT
	exit $EXIT
fi

echo "All Necessary Tags are Present"


NOTES


Tags.conf Configuration File

These are the tags configuration file for the above checkreqtags.sh shell script. As I need more tags I can just add them to this file, one tag per line.

<HTML>
</HTML>
<TITLE>
</TITLE>
<HEAD>
</HEAD>
<META name="keywords" content="
<META name="description" content="
<BODY BGCOLOR="#FFFFFF" BACKGROUND="">
</BODY>


NOTES


Openclosetags.sh Shell Script

Check the html file for proper order on opening and closing tags. The tag to be closed should always be the last one that was opened.

#!/bin/sh

FILENAME=$1

if [ ! -f ${FILENAME} ]; then
	echo No File present,
	echo unable to continue
	exit 1
fi

gettag() {
#Find start of tag (starts with less than sign)
while [ -n "$HEX" ]; do
	HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
	if [ "$HEX" = 3c ]; then
		#3c is less than sign
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG=$CHAR
		break
	fi
	SEEK=`expr $SEEK + 1`
done
#check exit status of above loop - EOF reached?
if [ -z "$HEX" ]; then
	EOFSTATUS=1
	return 0
fi







#Get rest of tag up to space bar (important part of tag)
#  or up to greater than sign
SEEK=`expr $SEEK + 1`
while [ -n "$HEX" ]; do
	HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
	if [ "$HEX" = 20 ]; then
		#20 is space bar
		CURR_TAG="${CURR_TAG} "

		#Special Case - <A NAME="
		SEEK_CHECK=$SEEK
		TESTCASE='<A '
		if [ "$CURR_TAG" = "$TESTCASE" ]; then
			SEEK_CHECK=`expr $SEEK_CHECK + 1`
			SP_CASE_CHECK=`xxd -l 6 -s $SEEK_CHECK ${FILENAME} | awk '{print $5}'`
			TESTCASE='NAME="'
			if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then
				SEEK=`expr $SEEK_CHECK + 5`
				CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}"
			fi
			TESTCASE='HREF="'
			if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then
				SEEK=`expr $SEEK_CHECK + 5`
				CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}"
			fi
		fi

		#Because the we have hit a space, lets discard the rest
		#    of the tag until we get to the greater than sign
		#Eat up the rest of the input until the greater than sign
		#    is reached (end of tag)
		SEEK=`expr $SEEK + 1`
		while [ -n "$HEX" ]; do
			HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
			if [ "$HEX" = 3e ]; then
				#3e is greater than sign
				break
			fi
			SEEK=`expr $SEEK + 1`
		done

		break
	elif [ "$HEX" = 3e ]; then
		#3e is greater than sign
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG="${CURR_TAG}$CHAR"
		break
	elif [ -n "$HEX" ]; then
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG="${CURR_TAG}$CHAR"
	fi
	SEEK=`expr $SEEK + 1`
done
#check exit status of above loop - EOF reached?
if [ -z "$HEX" ]; then
	EOFSTATUS=1
	echo EOF reached before end of tag
	return 1
fi

}
checktag() {
TAG_CHECKTAG="$*"

#Investigate tag (start tag, stop tag, comment with no matching stop tag, ...)

#Check for keywords we can ignore
case "$TAG_CHECKTAG" in
    '<BR>')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<HR>')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<A NAME="')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<A HREF="')
		CURR_TAG="<A "
		TAGSTATUS=PUSH
		return 0
	;;
    \<\!--*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \<\!*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \<LINK*)
		TAGSTATUS=IGNORE
		return 0
	;;
    '<IMG ')
		TAGSTATUS=IGNORE
		return 0
	;;
    \<META*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \</*)
		TAGSTATUS=PULL
		return 0
	;;
    *)
		TAGSTATUS=PUSH
		return 0
	;;
esac
}

dotag() {
TAGSTATUS=
checktag "$1"
case "$TAGSTATUS" in
    IGNORE)     return 0
		;;
    PUSH)       if [ "$1" = '<A HREF="' ]; then
			TAGSTACK="${TAGSTACK}<A "
		else
			TAGSTACK="${TAGSTACK}$1"
		fi
		return 0
		;;
    PULL)

TAGCOMPARE="$TAGSTACK"
TAGKILL="`echo $1 | sed -e 's,</,<,'`"

#Try to remove the modified close tag from tagstack:
TAGSTACK=`echo "$TAGSTACK" | sed -e "s,${TAGKILL}$,,"`

if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then
	#If the above removal didn't work, then lets try one more time
	#    to remove TAGKill.
	#This time, lets remove the trailing greater than sign
	#    from tagkill.

	TAGKILL="`echo $TAGKILL | sed -e 's,>$,,'`"
	TAGSTACK="`echo $TAGSTACK | sed -e "s,${TAGKILL}$,,"`"
fi

if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then

	#Removal of tag failed on both attempts.
	#    Tags are out of order
	echo Tagstack is as follows:
	echo "$TAGSTACK"
	echo Failed while attempting to remove "$1"
	echo   from the stack.
	echo Seek count is $SEEK bytes
	return 1
fi
		return 0
		;;
    *)          echo Unknown error in dotag
		echo TAGSTATUS is "$TAGSTATUS"
		return 1
		;;
esac
}


SEEK=0
TAGS=
EOFSTATUS=0
CURR_TAG=
HEX=0
TAGSTACK=

#Go from beginning of file to end of file
while [ -n "$HEX" ]; do
	gettag
	if [ -z "$HEX" ]; then
		break
	fi
	echo "$CURR_TAG"
	dotag "$CURR_TAG"
	EXIT=$?
	if [ $EXIT -gt 0 ]; then
		echo Failure, exiting
		return $EXIT
	fi
#Debug
echo Tagstack is "$TAGSTACK"

done
#check exit status of above loop - EOF reached?
if [ "$EOFSTATUS" -eq 1 ]; then
	echo End of File Reached
fi

#Check if tagstack is empty
if [ -n "$TAGSTACK" ]; then
	echo FAILURE: Failed to remove all items from tagstack
	echo Tags do not match.
	echo Tagstack contents are "$TAGSTACK"
	exit 1
fi
echo Successfully cleared the tagstack, all tags match.
echo Tagstack contents are "$TAGSTACK"
exit 0


NOTES


Wwwmanage.conf Configuration File

#Global Variables

#Livesite is a local version of the live web site.  This livesite
#    can be running a web server, or not, it doesn't matter.
#    Files will be copied to livesite, then other scripts will
#    be run to push files from here to other sites you may wish to
#    keep synchronized.  Also other scripts will be run after files
#    are copied here, such as datesort.
LIVESITE=/home/httpd/www-geocities-rlcomp_1999/


#Prepsite is your preparation site that is not accessible by others.
#    All your work is done in prepsite, once your page is ready
#    "submit" it to the "livesite" by using this "submit" script.
PREPSITE=/home/ftp/pub/techcd/pub/www_prep_site/www-geocities-rlcomp_1999-prep/

#backup is where you will save backups to.  Files will be added
#    with their modification date as part of their filename
#    files will keep appending to the end of that backup.
BACKUP=/home/ftp/pub/techcd/pub/www_prep_site/www-geo-rlcomp_1999.tar

#DateSort Variables
NAME_FIELD=48
TIME_FIELD=2
SIZE_FIELD=12
TEMPFILE=/tmp/sort.tmp


#Home directory for wwwmanage
WWWMANAGE=/scripts/wwwmanage


#Exit Error Codes
EXIT_NO_TAGS=7
EXIT_NOT_ROOT=6
#File Type is unknown; not a regular file, nor a directory
EXIT_UNK_FIL_TYP=5
EXIT_DIR=4
EXIT_BACKUP=3


NOTES


Ftp Expect Scripts

#!/usr/bin/expect -f
#
# This Expect script was generated by autoexpect on Wed Jun  6 18:10:55 2001
# Expect and autoexpect were both written by Don Libes, NIST.
#
# Note that autoexpect does not guarantee a working script.  It
# necessarily has to guess about certain things.  Two reasons a script
# might fail are:
#
# 1) timing - A surprising number of programs (rn, ksh, zsh, telnet,
# etc.) and devices discard or ignore keystrokes that arrive "too
# quickly" after prompts.  If you find your new script hanging up at
# one spot, try adding a short sleep just before the previous send.
# Setting "force_conservative" to 1 (see below) makes Expect do this
# automatically - pausing briefly before sending each character.  This
# pacifies every program I know of.  The -c flag makes the script do
# this in the first place.  The -C flag allows you to define a
# character to toggle this mode off and on.

set force_conservative 0  ;# set to 1 to force conservative mode even if
			  ;# script wasn't run conservatively originally
if {$force_conservative} {
	set send_slow {1 .1}
	proc send {ignore arg} {
		sleep .1
		exp_send -s -- $arg
	}
}

#
# 2) differing output - Some programs produce different output each time
# they run.  The "date" command is an obvious example.  Another is
# ftp, if it produces throughput statistics at the end of a file
# transfer.  If this causes a problem, delete these patterns or replace
# them with wildcards.  An alternative is to use the -p flag (for
# "prompt") which makes Expect only look for the last line of output
# (i.e., the prompt).  The -P flag allows you to define a character to
# toggle this mode off and on.
#
# Read the man page for more info.
#
# -Don


set timeout -1
spawn ftp riblack6.americas.qcorp.net
match_max 100000
expect "Name (riblack6.americas.qcorp.net:*): "
send -- "geoweb\r"
expect ":"
send -- "xxxxxxxxxxxx\r"
expect "ftp> "
send -- "bin\r"
expect "ftp> "
send -- "hash\r"
expect "ftp> "
send -- "put [lrange $argv 0 0] [lrange $argv 1 1]\r"
expect "ftp> "
send -- "put [lrange $argv 2 2] [lrange $argv 3 3]\r"
expect "ftp> "
send -- "put [lrange $argv 4 4] [lrange $argv 5 5]\r"
expect "ftp> "
send -- "quit\r"
expect "221  \r"


NOTES


Ftp Expect Configuration File

I have one server that doesn’t get all the files that I generate. For this one I have created this configuration file so that each time I “submit” a file, it is checked against this file. If it matches something in the first column, it will then be pushed to this special server using the 2nd column as the remote filename.

#filename-exactly-as-we-push-it destination-on-build-server
procedures/kickstart-rh70.html /home/httpd/html/linux/riblack/kickstart-rh70.html


NOTES


Notes

I misuse variables and variable scope in these scripts. I also hardcode a lot of values where they should really be variables. These errors may or may not be cleaned up in time, for now it works – and to quote the old saying “If it isn’t broke, don’t fix it!”.

The openclose tags script is very slow (it seems to do about 1 tag per second). I do plan on migrating this one to C or C++. I feel that C or C++ could scan a whole html file in well under 10 seconds where it is taking more than a minute or two with the bash script.


I am currently – December 19, 2002 – not using any of the above scripts. I have converted over to a new set of scripts as shown below. Also I’m not currently checking for tag order using my own scripts — every once in a while I will all my pages and use “weblint” to check the tags on my pages.


Here are my current scripts:

re-index.sh
site_index.sh
apply_template.sh
TEMPLATE_GUIDEBAR
TEMPLATE_HEAD
TEMPLATE_TAIL
savedates.sh
restoredates.sh


re-index.sh which runs as a cron job once each night. Here’s the contents:

site_index.sh
apply_template.sh

site_index.sh now takes the place of datesort.sh and indexer.sh (my two previous methods of creating a site map). Here’s the contents:

cd ~/public_html
SORT_KEYS="sitemap-filename sitemap-date sitemap-size sitemap-title sitemap-description"
FILES_TO_PROCESS="`ls *.html | grep -v 'sitemap-.*\.html'`"
#FILENAME_FIELD_LENGTH=48
#DATE_FIELD_LENGTH=22
#DATE_FIELD_LENGTH=180
#SIZE_FIELD_LENGTH=12
#TITLE_FIELD_LENGTH=80
#TITLE_FIELD_LENGTH=280
#DESCR_FIELD_LENGTH=180
#DESCR_FIELD_LENGTH=360
generate_sort_keys()
{
	# get sort keys for filename, date, size, title, and description
	for X in $FILES_TO_PROCESS; do echo "$X $X"; done > sitemap-filename.tmp
	for X in $FILES_TO_PROCESS; do find $X -printf "%T@ %f\n"; done > sitemap-date.tmp
	for X in $FILES_TO_PROCESS; do find $X -printf "%s %f\n"; done > sitemap-size.tmp
	for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-title.tmp
	for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-description.tmp
}
print_left()
{
#$1 = field width
#$2 = field value
	if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi
	shift
	FIELD_VALUE="$@"
	echo -ne '<TD ALIGN="left" VALIGN="TOP" '
	echo -ne "$WRAP"
	echo -ne '>'
	echo -ne "$FIELD_VALUE"
	echo -ne '</TD>'
}
print_right()
{
#$1 = field width
#$2 = field value
	if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi
	shift
	FIELD_VALUE="$@"
	echo -ne '<TD ALIGN="right" VALIGN="TOP" '
	echo -ne "$WRAP"
	echo -ne '>'
	echo -n $FIELD_VALUE
	echo -ne '</TD>'
}
print_line_formatted()
{
echo -ne '<TR ALIGN="center">'
#print_left  $DESCR_FIELD_LENGTH $RECORD_DESCRIPTION
print_left nowrap $RECORD_FILENAME
print_left nowrap $RECORD_DATE
print_right nowrap $RECORD_SIZE
print_left nowrap $RECORD_TITLE
print_left  wrap $RECORD_DESCRIPTION
echo '</TR>'
}
print_header()
{
echo $1 | grep 'sitemap-filename.html' >/dev/null &&
	RECORD_FILENAME='<A HREF="https://cyanogenmods.moviesx.org/sitemap-filename-r.html">Filename</A>' ||
	RECORD_FILENAME='<A HREF="https://cyanogenmods.moviesx.org/sitemap-filename.html">Filename</A>'
echo $1 | grep 'sitemap-date.html' >/dev/null &&
	RECORD_DATE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-date-r.html">Date</A>' ||
	RECORD_DATE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-date.html">Date</A>'
echo $1 | grep 'sitemap-size.html' >/dev/null &&
	RECORD_SIZE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-size-r.html">Size</A>' ||
	RECORD_SIZE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-size.html">Size</A>'
echo $1 | grep 'sitemap-title.html' >/dev/null &&
	RECORD_TITLE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-title-r.html">Title</A>' ||
	RECORD_TITLE='<A HREF="https://cyanogenmods.moviesx.org/sitemap-title.html">Title</A>'
echo $1 | grep 'sitemap-description.html' >/dev/null &&
	RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.moviesx.org/sitemap-description-r.html">Description</A>' ||
	RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.moviesx.org/sitemap-description.html">Description</A>'

echo $1 | grep 'sitemap-filename' >/dev/null && SORT_REF="Name"
echo $1 | grep 'sitemap-date' >/dev/null && SORT_REF="Date"
echo $1 | grep 'sitemap-size' >/dev/null && SORT_REF="Size"
echo $1 | grep 'sitemap-title.html' >/dev/null && SORT_REF="Title"
echo $1 | grep 'sitemap-description.html' >/dev/null && SORT_REF="Description"

cat <<EOF
<!--
PLACE_CREATED_DATE_HERE = "December 16, 2002"
PLACE_DESCRIPTION_HERE = "QLINUX Site Map - sorted by $SORT_REF"
PLACE_TITLE_HERE = "qlinux Site Map - Sorted by $SORT_REF"
PLACE_NEXT_ITEM_HERE = "fixme"
PLACE_PREVIOUS_ITEM_HERE = "fixme"
-->
EOF
echo '<TABLE ALIGN="CENTER">'
print_line_formatted
}
print_record()
{
RECORD_FILENAME="`
echo -ne '<A HREF="https://cyanogenmods.moviesx.org/'
echo -ne "$1"
echo -ne '">'
echo -ne "$1"
echo -ne '</A>'
`"
RECORD_DATE="`find $1 -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`"
RECORD_SIZE="`find $1 -printf "%s"`"
RECORD_TITLE="`cat $1 | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's,</TITLE>.*$,,' | head -1 | tr '\n' ' '`"
RECORD_DESCRIPTION="`cat $1 | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's,".*$,,' | head -1 | tr '\n' ' '`"
print_line_formatted
}
generate_sort_keys

for X in $SORT_KEYS; do
	print_header ${X}.html > ${X}.html
	print_header ${X}-r.html > ${X}-r.html
	sort -g    ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}.html
	sort -g -r ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}-r.html
	rm ${X}.tmp
done
cat sitemap-date.html | sed -e 's,sitemap-date-r.html">Date</A>,sitemap-date.html">Date</A>,' > sitemap-date.html.tmp
cat sitemap-date-r.html | sed -e 's,sitemap-date.html">Date</A>,sitemap-date-r.html">Date</A>,' > sitemap-date.html
mv sitemap-date.html.tmp sitemap-date-r.html
 -a sitemap-date.html sitemap.html
 -a sitemap-date.html datesort.html
 -a sitemap-date.html indexer.html

Here’s the contents of apply_template.sh

TEMPLATE_PATH=~/bin
WEBSITE_PATH=~/public_html
TEMPLATE_LIST="TEMPLATE_HEAD TEMPLATE_GUIDEBAR TEMPLATE_TAIL"
FAILURE=0
for X in $TEMPLATE_LIST; do if [ ! -f $TEMPLATE_PATH/$X ]; then echo unable to find $TEMPLATE_PATH/$X; FAILURE=`expr $FAILURE + 1`; fi; done
if [ $FAILURE -gt 1 ]; then echo check TEMPLATE_PATH variable in $0; fi
if [ $FAILURE -eq 1 ]; then echo check TEMPLATE_LIST variable in $0; fi
if [ $FAILURE -gt 0 ]; then exit; fi

# See if we have updated this file or any of the template files.  If any of that has been updated
# then we need to reprocess _all_ the html files -- maybe a template has changed.  So we remove the template.hist file

if [ -e $TEMPLATE_PATH/template.hist ]; then
	for X in $TEMPLATE_LIST `basename $0`; do
		TEMPLATES_CHANGED=`find $TEMPLATE_PATH/$X -newer $TEMPLATE_PATH/template.hist`
		if [ -n "$TEMPLATES_CHANGED" ]; then
			rm $TEMPLATE_PATH/template.hist
			break
		fi
	done
fi




# #
# # Let's do a file at a time -- here goes a big "for" loop
# #
cd $WEBSITE_PATH
for HTML_FILE in `( if [ -e $TEMPLATE_PATH/template.hist ]; then find . -type f -newer $TEMPLATE_PATH/template.hist -mindepth 1 -maxdepth 1; else find . -type f -mindepth 1 -maxdepth 1; fi; if [ -n "$1" ]; then for X in $@; do find . -type f -mindepth 1 -maxdepth 1 -name $X; done; fi) | grep html$ | grep -v "\./index.html"`; do
#echo -ne '.'
echo $HTML_FILE

#Strip out existing templates, keep file date and time
for TEMPLATE_NAME in $TEMPLATE_LIST; do cat $HTML_FILE | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $HTML_FILE.template.tmp; touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp; mv $HTML_FILE.template.tmp $HTML_FILE; done

#Insert new templates, keep file date and time

#Do TEMPLATE_HEAD
FIXME=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_HEAD | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_HEAD > template_head.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then
		VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"`
	fi
	if [ "$VALUE" == "fixme" ]; then
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
	elif [ -n "$VALUE" ]; then
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
	else
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_head.tmp; FIXME=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_head.tmp
	fi
done
if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_head.tmp; fi



#Do TEMPLATE_GUIDEBAR
FIXME=0
GUIDEBAR_NOT_READY=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR > template_guidebar.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$VALUE" == "fixme" ]; then
		GUIDEBAR_NOT_READY=1
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
	elif [ -n "$VALUE" ]; then
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
	else
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_guidebar.tmp; FIXME=1; GUIDEBAR_NOT_READY=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_guidebar.tmp
	fi
done
if [ "$FIXME" -ne 0 ]; then
	# Close off the fixme section
	echo '-->' >> template_guidebar.tmp
fi
if [ "$GUIDEBAR_NOT_READY" -gt 0 ]; then
	# Also if the guidebar isn't finished, I don't want to display it, so strip it out
	for TEMPLATE_NAME in TEMPLATE_GUIDEBAR; do cat template_guidebar.tmp | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > template_guidebar.tmp.1; mv template_guidebar.tmp.1 template_guidebar.tmp; done
fi





#Do TEMPLATE_TAIL
FIXME=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_TAIL | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_TAIL > template_tail.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then
		VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"`
	fi
	if [ "$VALUE" == "fixme" ]; then
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
	elif [ -n "$VALUE" ]; then
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
	else
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_tail.tmp; FIXME=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_tail.tmp
	fi
done
if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_tail.tmp; fi









# Put all the pieces together
(
#How many lines in template_guidebar?  If template_guidebar is populated then strip off the last HR from template_head
LINECOUNT=`cat template_guidebar.tmp | grep -v ^PLACE_ | wc -l`
if [ "$LINECOUNT" -eq 0 ]; then GUIDEBAREMPTY="empty"; else GUIDEBAREMPTY="not_empty"; fi

#Remove off last HR tag from template_head if GUIDEBAR is not empty, else don't strip
if [ "$GUIDEBAREMPTY" = "not_empty" ]; then
	LASTHR=`cat template_head.tmp | grep '<HR>' | wc -l`
	cat template_head.tmp | awk -v LAST="$LASTHR" 'BEGIN {COUNT=0} /<HR>/ {COUNT=COUNT+1; if (COUNT==LAST) {next}} {print}'
else
	cat template_head.tmp
fi

#Send the guidebar
cat template_guidebar.tmp

#Send the main html body - strip off any CRLF chars while we are at it
cat $HTML_FILE | awk '{gsub("\r$",""); print}'

if [ "$GUIDEBAREMPTY" = "not_empty" ]; then
	#if guidebar is populated, then send it, with no "fixup"'s, and strip HR from template_tail.tmp
	cat template_guidebar.tmp | grep -v ^PLACE_
	#Remove off first HR tag from template_tail
	cat template_tail.tmp | awk 'BEGIN {FOUND=0} /<HR>/ {if (FOUND == 0) {FOUND=1; next}} {print}'
else
	# else if guidebar is empty, then don't send it and don't strip any HR's from tail.
	cat template_tail.tmp
fi

) > $HTML_FILE.template.tmp
touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp
mv $HTML_FILE.template.tmp $HTML_FILE
#cleanup after putting all the pieces together
rm template_head.tmp
rm template_guidebar.tmp
rm template_tail.tmp

done # End HTML_FILE big loop


# Here we keep track of which files we need to process
touch $TEMPLATE_PATH/template.hist




#Original for strip out templates from existing html file
#for Y in awk.html; do for X in $TEMPLATE_LIST; do cat $Y | awk "BEGIN { PRINTVAR = 1 } /$X BEGIN/ { PRINTVAR = 0 } /$X END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $Y.template.tmp; touch -t `find $Y -mindepth 0 -maxdepth 1 -printf "%TY%Tm%Td%TH%TM.%TS"` $Y.template.tmp; mv $Y.template.tmp $Y; done; done

#DO TEMPLATE_HEAD_DATESORT
#cat if.html | awk '{gsub("\r$",""); print}' > if.html.1

Here’s savedates.sh, which I use when modifying formatting rather than content:

cd ../public_html
for X in *.html; do echo -ne '.' >&2; find $X -type f -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS %p\n"; done > ../allfiles.txt; echo

And here’s restoredates.sh, which is savedates.sh complement:

cd ../public_html
cat ../allfiles.txt | while read DATE FILENAME; do touch -t $DATE $FILENAME; echo -ne '.'; done; echo

Leave a Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.