#!/bin/sh # # @(#) /u/des/src/check/index2html 1.16 03/05/29 13:02:03 # # index2html -- make an html file from a `check' style INDEX file # # For this all to work the best, directories in a hierarchy should use # the exact same name for each INDEX file (or use -A). That way, links # can be made directly between INDEX.html files automatically. # # Attempt is made to link to the INDEX file in the directory above, and # to INDEX files in any subdirectories. # # With the RECURSE option, INDEX.html files are touched on the way down # the directory structure, then created on the way up. This insures # that all forwards and backwards links will be created. # # Copyright (c) 1998 by Daniel E. Singer. All rights reserved. # Permission is granted to reproduce and distribute this program # with the following conditions: # 1) This copyright notice and the author identification below # must be left intact in the program and in any copies. # 2) Any modifications to the program must be clearly identified # in the source file. # # Availability: # http://www.cs.duke.edu/~des/scripts/ # ftp://ftp.cs.duke.edu/pub/des/scripts/ # email:des@cs.duke.edu # PROG=`basename "$0"` # # defaults, see descriptions below # DFLT_INDEX="INDEX" DFLT_FG_TEXT="#000000" DFLT_FG_LINK="#0000EE" DFLT_FG_ALINK="#FF0000" DFLT_FG_VLINK="#551A8B" DFLT_BG_TILE="" DFLT_BG_COLOR="#fbfbfb" DFLT_INC_HTML_PAT="&<" DFLT_INC_HTML_REPL="<" # # change this for your site; the copyright notice will appear at the # bottom of each generated HTML page; # I2H_COPYWHO can be set in the environment for this, also; # DFLT_COPYWHO="Duke University Department of Computer Science" USAGE=" Usage: $PROG [-ahLq] [-p perms] [-P name] [-x dir[|dir]] [file...] $PROG [-ahLqr] [-A alternate_filenames] [-p perms] [-P name] \ [-x dir[|dir]] [file] -a use alternate INDEX file format (see \"man check\"); -A alternative INDEX filenames to try in addition to the named file or the default (quoted, space-separated list); useful with \"-r\"; -h print usage message and exit; -L for symbolic links, link to the reference; -p permissions to set on the INDEX file(s) created; -P use the named protocol for the 'BASE HREF' tag, instead of the default 'file:'; -q quiet, fewer messages; -r recurse, go down directory tree and make INDEX.html files where possible; -x remove specified initial component for all generated pathnames; for a substitution, use 'dir1|dir2' file the name of an INDEX file; no more than one should be supplied with \"-r\"; default name is \"$DFLT_INDEX\"; use \"-\" for standard input; " SUMMARY=" $PROG: This is an add-on to the \"check\" program. It will take an INDEX file as produced by \"check\", and produce an HTML version. If similarly named INDEX.html files exist in directories above or below, direct hypertext links will be made to these other files; use of the \"-r\" (recursive) option insures that this is done properly. By default, references are of the \"file:\" type, using absolute, full paths. This might not work as desired, for example, when used with an anonymous FTP server where pathnames are altered. In such cases, the \"-P\" and \"-x\" options can be useful. For example: $PROG -P \"ftp:\" -x \"/usr/ftp|//ftp.site.net\"" ENVIRONMENT="\ Environment variables: I2H_COPYWHO name to put in the copyright notice (default = \"$DFLT_COPYWHO\") FG_TEXT foreground text color (default = \"$DFLT_FG_TEXT\") FG_LINK foreground link color (default = \"$DFLT_FG_LINK\") FG_ALINK foreground active link color (default = \"$DFLT_FG_ALINK\") FG_VLINK foreground visited link color (default = \"$DFLT_FG_VLINK\") BG_TILE background tile graphic file (default = \"$DFLT_BG_TILE\") BG_COLOR background color (default = \"$DFLT_BG_COLOR\") INC_HTML_PAT pattern indicating that the current line contains HTML that shouldn't be escaped (default = \"$DFLT_INC_HTML_PAT\") INC_HTML_REPL replacement string for INC_HTML_PAT (default = \"$DFLT_INC_HTML_REPL\") " # # used in the copyright message included at the end of the HTML doc # COPYRIGHT_BY=${I2H_COPYWHO:-"$DFLT_COPYWHO"} # add a period [ -n "$COPYRIGHT_BY" ] && COPYRIGHT_BY="${COPYRIGHT_BY}." # # page colors; # the first five are pretty much the netscape defaults; # these can be overridden by defining in the environment; # : ${FG_TEXT="$DFLT_FG_TEXT"} : ${FG_LINK="$DFLT_FG_LINK"} : ${FG_ALINK="$DFLT_FG_ALINK"} : ${FG_VLINK="$DFLT_FG_VLINK"} : ${BG_TILE="$DFLT_BG_TILE"} : ${BG_COLOR="$DFLT_BG_COLOR"} # # pattern (and replacement string) indicating that the current line # contains HTML that shouldn't be escaped; this allows explicit HTML # to be included in an INDEX file; # : ${INC_HTML_PAT="$DFLT_INC_HTML_PAT"} : ${INC_HTML_REPL="$DFLT_INC_HTML_REPL"} # command names that might need adjustment on a per/OS basis AWK=nawk # # OS specifics: # add special OS dependencies here; # SYS="`uname -sr`" # OS type and release case "$SYS" in "FreeBSD"*) AWK=awk ;; "HP-UX "*) AWK=awk ;; "Linux "*) AWK=awk esac # # these will not work on some older systems, and will # need some sort of replacement # STDIN="/dev/stdin" STDOUT="/dev/stdout" DOTS='.[--0-]*' # for matching file and dir names that start with dot; # some non-printing chars in there ALT_FORMAT=0 # use alternative INDEX file format ALT_NAMES= # alternative INDEX file names to try PERMS= # perms to set created files to CL_PROTO= # command line protocol for BASE HREF PROTO="file:" # protocol for BASE HREF QUIET=0 # fewer messages about progress RECURSE=0 # go thru subdirectories and do this CL_EXCLUDE_DIR= # command line path portion to exclude EXCLUDE_DIR= # path portion to exclude REPLACE_DIR= # replacement path LINKS=0 # make hypertext links to the referenced file SYNTAX="$PROG: option syntax error." # print command line options error message and exit syntax_error() { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } # check for valid syntax for an option with an argument arg_syntax_check() { [ "$1" -lt 2 ] && syntax_error } # # process command line options and arguments # while [ "$#" -gt 0 ]; do OPT="$1" case "$OPT" in # option without argument -a) ALT_FORMAT=1 ;; -h) echo "$SUMMARY" echo "$USAGE" echo "$ENVIRONMENT" exit 0 ;; -L) LINKS=1 ;; -q) QUIET=1 ;; -[rR]) RECURSE=1 ;; # option with argument -A) arg_syntax_check "$#" shift ALT_NAMES="$1" ;; -p) arg_syntax_check "$#" shift PERMS="$1" ;; -P) arg_syntax_check "$#" shift CL_PROTO="$1" case "$CL_PROTO" in *:) ;; *) CL_PROTO="$CL_PROTO:" esac PROTO="$CL_PROTO" ;; -x) arg_syntax_check "$#" shift CL_EXCLUDE_DIR="$1" case "$CL_EXCLUDE_DIR" in *'|'*) EXCLUDE_DIR=`expr "$CL_EXCLUDE_DIR" : '\(.*\)|.*'` REPLACE_DIR=`expr "$CL_EXCLUDE_DIR" : '.*|\(.*\)'` ;; *) EXCLUDE_DIR="$CL_EXCLUDE_DIR" esac ;; # ... --) shift break ;; # unknown option -?) syntax_error ;; # compound option -??*) # break up a compound option NEW_OPTS=`$AWK 'BEGIN { OPT_STR = "'"$OPT"'"; LEN = length(OPT_STR); NEW_OPTS = ""; STATUS = 0; for (POS=2; POS+0 <= LEN; ++POS) { OPT = substr(OPT_STR,POS,1); if (OPT !~ /[a-zA-Z0-9_]/) STATUS = 1; NEW_OPTS = NEW_OPTS " -" OPT; } print NEW_OPTS; exit STATUS; }' <&-` || { syntax_error } shift if [ "$#" -gt 0 ]; then set -- $NEW_OPTS "$@" else set -- $NEW_OPTS fi continue ;; # end of options, just command arguments left *) break esac shift done # # use default INDEX name? # if [ "$#" = 0 ]; then set -- "$DFLT_INDEX" elif [ "$#" -gt 1 -a "$RECURSE" = 1 ]; then syntax_error elif [ "$#" -gt 1 -a -n "$ALT_NAMES" ]; then syntax_error elif [ "$RECURSE" = 1 -a "$1" = '-' ]; then syntax_error fi STARTING_DIR="`pwd`" # # need names of subdirs for links to subdir/INDEX.html files, # and possibly also for recursion; # for links, we want all subdirs; # for recursion, we don't want any that are symlinks; # SUB_DIRS=`ls -d $DOTS/. */. 2>&- | sed 's/\/\.$//'` #echo "$SUB_DIRS" #exit # # get rid of the "/."s and filter out any symlinks # if [ -n "$SUB_DIRS" ]; then RECURSE_SUB_DIRS=`echo "$SUB_DIRS" | while read NAME; do [ ! -h "$NAME" ] && echo "$NAME" done` fi #echo "$RECURSE_SUB_DIRS" #exit #SUB_DIRS=`ls -d $DOTS/. */. 2>&-` ## ## get rid of the "/."s and filter out any symlinks ## #if [ -n "$SUB_DIRS" ]; then # SUB_DIRS=`echo "$SUB_DIRS" | sed 's/\/\.$//' | # while read NAME; do # [ ! -h "$NAME" ] && echo "$NAME" # done` # fi for INFILE do # read stdin if [ "/$INFILE" = '/-' ]; then INFILE="$STDIN" OUTFILE="$STDOUT" FILE="INDEX" # used in the HTML doc #DIRS_WITH_INDEX= DIA_STR="NUM_DIA = 0;" LINK_STR="NUM_LINKS = 0;" #UP_DIR=".." UP_DIR_INDEX= DO_RECURSE=0 IDATE= else CL_INFILE="$INFILE" # command line file CL_OUTFILE="$INFILE.html" FILE="$INFILE" OUTFILE="$INFILE.html" # # try to find the right INDEX file for the current dir # if [ ! -f "$CL_INFILE" ]; then for ALT_NAME in $ALT_NAMES; do [ ! -f "$ALT_NAME" ] && continue INFILE="$ALT_NAME" FILE="$ALT_NAME" OUTFILE="$ALT_NAME.html" break done fi # # try to link to INDEX.html file up one directory # #UP_DIR=".." UP_DIR_INDEX= if [ -f "../$CL_OUTFILE" ]; then UP_DIR_INDEX="../$CL_OUTFILE" else for ALT_NAME in $ALT_NAMES; do if [ -f "../$ALT_NAME.html" ]; then UP_DIR_INDEX="../$ALT_NAME.html" break fi done fi # # get the names of subdirectories if recursing # DO_RECURSE=0 if [ "$RECURSE" = 1 ]; then ##SUB_DIRS=`ls -d .??*/. */. 2>&-` #SUB_DIRS=`ls -d $DOTS/. */. 2>&-` ## ## get rid of the "/."s and filter out any symlinks ## #if [ -n "$SUB_DIRS" ]; then # SUB_DIRS=`echo "$SUB_DIRS" | sed 's/\/\.$//' | # while read NAME; do # [ ! -h "$NAME" ] && echo "$NAME" # done` # fi if [ -n "$RECURSE_SUB_DIRS" ]; then DO_RECURSE=1 fi fi # # get the date/time of the INDEX file # [ -f "$INFILE" ] && IDATE=`ls -l "$INFILE"` fi # # do the recursion # if [ "$DO_RECURSE" = 1 ]; then # # make sure the INDEX.html files are there so that up-links # are created further down the directory tree; # for NAME in "$INFILE" $ALT_NAMES; do [ ! -f "$NAME" ] && continue [ ! -f "$NAME.html" ] && > "$NAME.html" break done # # put the dirs into positional parameters # set -f HOLD_IFS="$IFS" IFS=' ' set -- $RECURSE_SUB_DIRS IFS="$HOLD_IFS" set +f # # make the new command line options # OPTS= [ "$ALT_FORMAT" = 1 ] && OPTS="$OPTS -a" [ "$LINKS" = 1 ] && OPTS="$OPTS -L" [ "$QUIET" = 1 ] && OPTS="$OPTS -q" OPTS="$OPTS -r" [ -n "$ALT_NAMES" ] && OPTS="$OPTS -A \"$ALT_NAMES\"" [ -n "$PERMS" ] && OPTS="$OPTS -p \"$PERMS\"" [ -n "$CL_PROTO" ] && OPTS="$OPTS -P \"$CL_PROTO\"" [ -n "$CL_EXCLUDE_DIR" ] && OPTS="$OPTS -x \"$CL_EXCLUDE_DIR\"" OPTS="$OPTS \"$CL_INFILE\"" [ "$QUIET" = 1 ] || echo " $PROG: recursing at \"$STARTING_DIR\"" >&2 for D do # run this script in the subdirectory cd "$D" && { eval "$PROG$OPTS" cd .. } done fi # # check for file access errors; # also check for INDEX.html files in subdirectories, # so that links can be made directly to them; # also check for symbolic links; # if [ "$INFILE" != "$STDIN" ]; then if [ "$QUIET" = 1 ]; then if [ "$RECURSE" = 1 ]; then echo "$PROG: directory \"$STARTING_DIR\"" >&2 fi else echo " $PROG: directory \"$STARTING_DIR\"" >&2 fi if [ ! -f "$INFILE" ]; then [ "$QUIET" = 1 -a "$RECURSE" = 1 ] || echo "$PROG: no file \"$INFILE\"." >&2 continue fi if [ ! -r "$INFILE" ]; then echo "$PROG: error accessing \"$INFILE\"." >&2 continue fi if [ -f "$OUTFILE" ]; then # # if the INDEX.html is not writable, try to remove it # so we can write over it # if [ ! -w "$OUTFILE" ]; then if rm -f "$OUTFILE" && > "$OUTFILE" ; then : else echo "$PROG: cannot write \"$OUTFILE\"." >&2 continue fi fi fi # # find subdirs with INDEX.html files; # had to move this code here so it happens after RECURSE stuff; # #echo "PWD=\"`pwd`\"" #echo "DIRS_WITH_INDEX=\`ls .??*\"/$OUTFILE\" *\"/$OUTFILE\" 2>&-\`" # # seems to be a bug with 'sh', `` */".INDEX" '' does not # seem to work as expected! (without the dot, it works) # #DIRS_WITH_INDEX=`ls -d .??*/"$OUTFILE" */"$OUTFILE" 2>&-` #DIRS_WITH_INDEX=`ls -d .??*/$OUTFILE */$OUTFILE 2>&-` set -f HOLD_IFS="$IFS" IFS=' ' set -- $SUB_DIRS IFS="$HOLD_IFS" set +f #echo $SUB_DIRS #exit DIRS_WITH_INDEX=` for DIR do if [ -f "$DIR/$CL_OUTFILE" ]; then echo "$DIR/$CL_OUTFILE" else for ALT_NAME in $ALT_NAMES; do [ ! -f "$DIR/$ALT_NAME.html" ] && continue echo "$DIR/$ALT_NAME.html" break done fi done` #echo "DIRS_WITH_INDEX=\"$DIRS_WITH_INDEX\"" #exit DIA_STR="NUM_DIA = 0;" if [ -n "$DIRS_WITH_INDEX" ]; then ## ## remove the /INDEX.html part ## #DIRS_WITH_INDEX=`echo "$DIRS_WITH_INDEX" | sed 's/\/'"$OUTFILE"'$//'` # # now go thru alot of trouble to make the string # palatable to AWK, with each name preceded and # followed by a newline; # set -f HOLD_IFS="$IFS" IFS=' ' set -- $DIRS_WITH_INDEX IFS="$HOLD_IFS" set +f # # this doesnt even work, since the line can get too # long for brain-damaged AWK # # DIRS_WITH_INDEX="$1" # shift # for D do # DIRS_WITH_INDEX="$DIRS_WITH_INDEX\\n$D" # done # # so instead, well make an AWK array # DIA_STR="NUM_DIA = $#;" for D do DIA_STR="$DIA_STR DIA[$#] = \"$D\";" shift done #echo "$DIA_STR" #exit fi # # get lists of any symbolic links and their referenced files # LINK_STR="NUM_LINKS = 0;" if [ "$LINKS" = 1 ]; then set -f _IFS="$IFS" IFS= LINK_LIST=`ls -A | while read NAME; do [ -h "$NAME" ] && echo "$NAME" done` IFS=' ' set -- X $LINK_LIST shift LINK_STR="NUM_LINKS = $#;" for L do LINK_STR="$LINK_STR LINK_LIST[\"$L\"] = \"`ls -l \"$L\" | sed 's/.* -> //'`\";" shift done IFS="$_IFS" set +f fi fi [ "$QUIET" = 1 ] || echo "$PROG: converting \"$INFILE\" to \"$OUTFILE\"..." >&2 # # process the INDEX file (finally!) and create the HTML document; # $AWK 'BEGIN { PROG = "'"$PROG"'"; DATE = "'"`date '+%h %e %R, %Y'`"'"; #DATE = "'"`date`"'"; IDATE = "'"$IDATE"'"; YEAR = "'"`date '+%Y'`"'"; MONTH = "'"`date '+%h'`"'"; STARTING_DIR = "'"$STARTING_DIR"'"; FILE = "'"$FILE"'"; OUTFILE = "'"$OUTFILE"'"; PROTO = "'"$PROTO"'"; EXCLUDE_DIR = "'"$EXCLUDE_DIR"'"; REPLACE_DIR = "'"$REPLACE_DIR"'"; #DIRS_WITH_INDEX = "''"$DIRS_WITH_INDEX"''"; # # this string contains an AWK array, and its size: # NUM_DIA = n; # DIA[1] = "dir1"; # ... # '"$DIA_STR"' # # this string contains an AWK array, and its size: # NUM_LINKS = n; # LINK_LIST[name] = "reference"; # ... # '"$LINK_STR"' ALT_FORMAT = "'"$ALT_FORMAT"'"; #UP_DIR = "'"$UP_DIR"'"; UP_DIR_INDEX = "'"$UP_DIR_INDEX"'"; COPYRIGHT_BY = "'"$COPYRIGHT_BY"'"; BG_COLOR = "'"$BG_COLOR"'"; BG_TILE = "'"$BG_TILE"'"; FG_TEXT = "'"$FG_TEXT"'"; FG_LINK = "'"$FG_LINK"'"; FG_ALINK = "'"$FG_ALINK"'"; FG_VLINK = "'"$FG_VLINK"'"; INC_HTML_PAT = "'"$DFLT_INC_HTML_PAT"'"; INC_HTML_REPL = "'"$DFLT_INC_HTML_REPL"'"; # this will help to avoid some RE side effects #if (DIRS_WITH_INDEX != "") { # N = split(DIRS_WITH_INDEX,DIA,"\n"); # for (I=1; I+0 <= N; ++I) { # DIR = substr(DIA[I],1,index(DIA[I],"/")-1); # DAA[DIR] = DIA[I]; # } #} for (I=1; I+0 <= NUM_DIA; ++I) { DIR = substr(DIA[I],1,index(DIA[I],"/")-1); DAA[DIR] = DIA[I]; } # # which field to use as the item names # FIELD = (ALT_FORMAT == 0) ? 1 : 2; # # Mod. date of the INDEX file # MM["Jan"] = 1; MM["Apr"] = 4; MM["Jul"] = 7; MM["Oct"] = 10; MM["Feb"] = 2; MM["May"] = 5; MM["Aug"] = 8; MM["Nov"] = 11; MM["Mar"] = 3; MM["Jun"] = 6; MM["Sep"] = 9; MM["Dec"] = 12; N = split(IDATE,IA); for (I=1; I+0 <= N; ++I) if (IA[I] ~ /^(Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)$/) break; IYEAR = IA[I+2]; if (IYEAR ~ /:/) { ITIME = " " IYEAR; IYEAR = (YEAR - (MM[MONTH]+0 < MM[IA[I]]+0)); } else { ITIME = ""; #IYEAR = ((IYEAR+0 >= 50 ? "19" : "20") IYEAR); } #IYEAR = (IYEAR !~ /:/) ? ((IYEAR+0 >= 50 ? "19" : "20") IYEAR) : (YEAR - (MM[MONTH]+0 < MM[IA[I]]+0)); IDATE = sprintf("%s %s%s, %s",IA[I],IA[I+1],ITIME,IYEAR); # # make HREF path substitutions # STARTING_DIR_TITLE = STARTING_DIR; sub("^"EXCLUDE_DIR,REPLACE_DIR,STARTING_DIR); sub("^"EXCLUDE_DIR,"",STARTING_DIR_TITLE); # # print out the HTML header info # print ""; print "
"; print "" STARTING_DIR_TITLE "
";
print "";
print ""; } { # # substitute (escape) any special HTML characters, # unless supressed for this line; # if ($0 !~ INC_HTML_PAT) { gsub(/&/,"\\&"); gsub(/,"\\<"); gsub(/>/,"\\>"); } else sub(INC_HTML_PAT,INC_HTML_REPL); # # see if INDEX file uses alternate format # if (NR == 1) { if ($1 == "#" && $2 == "$@") { if ($0 ~ /[ \t]-[^- \t]*a/) { ALT_FORMAT = 1; FIELD = 2; } } } if ($1 ~ /^[ \t]*$/) { print ""; next; } # # find in item name in the line; # see "man check" for more details; # if (ALT_FORMAT == 0) { if ($1 == "." || $1 == ".." || $1 == "#") { print; next; } } else { if ($1 != "." && $1 != "..") { print; next; } } ITEM = $FIELD; # # don"t add a link if this is the entry for the INDEX.html # file itself # if (ITEM != OUTFILE) { # # if this is a directory with an INDEX.html file, # link directly to the INDEX.html; # otherwise, just link to the item; # #if (DIRS_WITH_INDEX ~ "\n" ITEM "\n") if (ITEM in DAA) { #LINK = ITEM "/" OUTFILE; if (NUM_LINKS != 0 && ITEM in LINK_LIST) LINK = LINK_LIST[ITEM] substr(DAA[ITEM],index(DAA[ITEM],"/")); else LINK = DAA[ITEM]; } else { if (NUM_LINKS != 0 && ITEM in LINK_LIST) LINK = LINK_LIST[ITEM]; else LINK = ITEM; } NEW_ITEM = "" ITEM ""; # cannot use sub(), due to RE side effects #sub(ITEM,NEW_ITEM,$0); LOC = index($0,ITEM); $0 = substr($0,1,LOC-1) NEW_ITEM substr($0,LOC+length(ITEM)); } else { LINK = ITEM; NEW_ITEM = "" ITEM ""; LOC = index($0,ITEM); $0 = substr($0,1,LOC-1) NEW_ITEM substr($0,LOC+length(ITEM)); } # # undo the default space substitution used by "check" # gsub(""," "); print; } END { # # print out the HTML footer info # #print ""; print "
"; print "