#!/bin/sh # @(#) /u/des/src/grepz/grepz 1.10 98/06/17 00:41:35 # # grepz - search for patterns, handling compressed files and recursion # D.Singer, 10/93 # # Copyright (c) 1998 by Daniel E. Singer. All rights reserved. # Permission is granted to reproduce and distribute this program # with the following conditions: # 1) This copyright notice and the author identification below # must be left intact in the program and in any copies. # 2) Any modifications to the program must be clearly identified # in the source file. # # Written by: # Daniel E. Singer # UNIX Systems Administrator # Department of Computer Science # Duke University, Durham, NC # Phone: 919/660-6500 # Email: des@cs.duke.edu # # Availability: # http://www.cs.duke.edu/~des/scripts/ # ftp://ftp.cs.duke.edu/pub/des/scripts/ # # Modifications: # # 1/5/93, D.Singer # - added options passing; # - fixed SED problem with '/'; # 12/23/94, D.Singer # - added recognition of gzip'd files; # 11/27/96, D.Singer # - changed PATH for C.S.; remove paths from command variables; # - added stdin capability; # - added directory recursion; # - added --e; # - added --v; # 12/13/96, D.Singer # - added ORIG_PATH; # 6/4/98, D.Singer # - added --g, --s, --r; # - replaced option processing; # 6/9/98, D.Singer # - added --f; # 6/11/98, D.Singer # - added --x; ORIG_PATH="$PATH" #PATH='/usr/ucb:/usr/bin:/bin:/usr/gnu/bin:/usr/local/bin' #export PATH PROG=`basename "$0"` USAGE=" Usage: $PROG [--ersv] [--g grep] [\"grep-opts\"] pattern [file...] $PROG --f[esvx] [--g grep] [\"grep-opts\"] pattern [dir [find-pat]] This is a front end to the 'grep' command that will 'uncompress' or 'gunzip' files on the fly, as indicated by their suffixes. If file is a directory, then each file in the directory will be searched, recursively. --e use 'egrep' instead of 'grep'; --f use the 'find' command along with a quoted pattern for file name matching; --g use the grep program of your choice; --r don't recurse (ignored with --f); --s suppress efforts to add filename to the beginnings of lines; --v verbose; --x supply an entire quoted 'find' expression, instead of just a single file name pattern, implies '--f'; grep-opts options to pass to 'grep'; pattern expression to search for; file files to search, otherwise stdin; dir directory to search, default is \".\"; find-pat pattern for finding files, default is all; '$PROG' options start with double-dash, so that options for 'grep' can be specified the usual way, and we don't have to worry about conflicts. Some environment variable that can be set to specify alternative system commands: AWK FIND GREP GZCAT LS SED UNAME ZCAT " # # platform specific settings # _AWK="nawk" SYS="`$UNAME`" # OS type case "$SYS" in "FreeBSD "*) _AWK="awk" ;; "HP-UX "*) _AWK="awk" ;; "Linux "*) _AWK="awk" esac : ${AWK:="$_AWK"} : ${GREP:="grep"} : ${FIND:="find"} : ${LS:="ls"} : ${SED:="sed"} : ${ZCAT:="zcat"} : ${GZCAT:="gzcat"} : ${UNAME:="uname -sr"} GROUP= # group of regular text files to process all at once; PROG_PATH= # path for recursive calls; DID_RECURSE=0 # for spacing after recurse FIND_PAT= # pattern for 'find'; FIND_DIR='.' # directory for 'find'; OPTS= # optons to pass to 'grep'; ALL_OPTS= # optons to pass recursively; VERBOSE=0 # verbose mode; RECURSE=1 # recurse on directories; SUPPRESS=0 # suppress adding filenames; DO_FIND=0 # use 'find' to get files; XPRESSION=0 # get a 'find' expression, instead of just a pattern; # # process command line options # SYNTAX="$PROG: option syntax error." syntax_error() { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } arg_syntax_check() { [ "$1" -lt 1 ] && syntax_error } while [ "$#" -gt 0 ]; do OPT="$1" case "$OPT" in --) shift break ;; --e) GREP="egrep" ALL_OPTS="$ALL_OPTS $OPT" ;; --f) DO_FIND=1 #ALL_OPTS="$ALL_OPTS $OPT" ;; --g) shift arg_syntax_check "$#" GREP="$1" ALL_OPTS="$ALL_OPTS $OPT $1" ;; --r) RECURSE=0 ALL_OPTS="$ALL_OPTS $OPT" ;; --s) SUPPRESS=1 ALL_OPTS="$ALL_OPTS $OPT" ;; --v) VERBOSE=1 ALL_OPTS="$ALL_OPTS $OPT" ;; --x) XPRESSION=1 DO_FIND=1 #ALL_OPTS="$ALL_OPTS $OPT" ;; # compound option --[a-zA-Z0-9][a-zA-Z0-9]*) # break up a compound option NEW_OPTS=`$AWK 'BEGIN { OPT_STR = "'"$OPT"'"; LEN = length(OPT_STR); NEW_OPTS = ""; STATUS = 0; for (POS=3; POS+0 <= LEN; ++POS) { OPT = substr(OPT_STR,POS,1); if (OPT !~ /[a-zA-Z0-9_]/) STATUS = 1; NEW_OPTS = NEW_OPTS " --" OPT; } print NEW_OPTS; exit STATUS; }' <&-` || { syntax_error } shift set -- $NEW_OPTS ${1:+"$@"} continue ;; --*) syntax_error ;; -*) OPTS="$OPTS $OPT" ALL_OPTS="$ALL_OPTS $OPT" ;; # end of options, just command arguments left *) break esac shift done case "$#" in 0) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac PATTERN="$1" shift if [ "$DO_FIND" = 1 ]; then # # use 'find' method # if [ "$#" != 0 ]; then FIND_DIR="$1" shift if [ "$#" != 0 ]; then FIND_PAT="$1" shift if [ "$#" != 0 ]; then syntax_error fi fi fi else # # use arguments and/or recursion # if [ "$#" = 0 ]; then # input is from stdin, which will be processed as # uncompressed text; $GREP$OPTS "$PATTERN" exit elif [ "$#" = 1 -a -d "$1" -a "$RECURSE" = 0 ]; then # # if only argument is a directory and not recursing, expand it # set -f _IFS="$IFS" IFS=' ' set -- X `$LS -A "$1" 2>&- | $SED 's^'"$1"'/'` set +f IFS="$_IFS" shift fi fi # # determine the path to PROG so clients won't say "command not found". # get_path() { case "$1" in '/'*) PROG_PATH=`dirname "$1"` ;; *'/'*) PROG_PATH=`dirname "$1"` #cd "$PROG_PATH" #PROG_PATH="`pwd`" PROG_PATH="`cd \"$PROG_PATH\"; pwd`" ;; *) set -f _IFS="$IFS" IFS=':' set -- X $ORIG_PATH shift IFS="$_IFS" set +f for PC do if [ -f "$PC/$PROG" -a -x "$PC/$PROG" ]; then PROG_PATH="$PC" break fi done esac [ -z "$PROG_PATH" ] && PROG_PATH="`pwd`" } # # run on a directory # do_recurse() { set -f _IFS="$IFS" IFS=' ' set -- X `$LS -A "$FILE" 2>&- | $SED 's^'"$FILE"'/'` set +f IFS="$_IFS" shift if [ "$#" = 0 ]; then [ "$VERBOSE" = 1 ] && echo "$PROG: directory \"$FILE\" is empty." >&2 else [ -z "$PROG_PATH" ] && get_path "$0" [ "$VERBOSE" = 1 ] && # echo " #>> \"`pwd`/$FILE\"" echo " >> \"$FILE\"" "$PROG_PATH/$PROG" $ALL_OPTS "$PATTERN" "$@" fi DID_RECURSE=1 } # # search a group of plain files all at once; # the filenames are separated by newlines; # # Note: this runs the risk of exceeding the max arguments limit, and # may need to be supplemented with `xargs' or another strategy; # do_group() { [ -z "$GROUP" ] && return set -f _IFS="$IFS" IFS=' ' set -- $GROUP IFS="$_IFS" set +f case "$#" in 1) if [ "$SUPPRESS" = 1 ]; then $GREP$OPTS "$PATTERN" "$1" else $GREP$OPTS "$PATTERN" "$1" /dev/null fi ;; *) $GREP$OPTS "$PATTERN" "$@" esac GROUP= } # # if a file is compressed (*.Z), use ZCAT to uncompress into GREP, # and then stick the file name on the front of lines with SED; # for gzip'd files (*.gz), use GZCAT; # for noncompressed files, save up a group of names (until the next # *.Z or *.gz), then grep them as a group; # '/dev/null' guaranties getting the filename at the start of the line; # if [ "$DO_FIND" = 1 ]; then # # contruct the 'find' arguments the way we want them # if [ "$XPRESSION" = 0 ]; then [ -n "$FIND_PAT" ] && FIND_PAT=" -name \"$FIND_PAT\"" FIND_PAT="$FIND_PAT -print" else if [ -n "$FIND_PAT" ]; then FIND_PAT=" $FIND_PAT" else FIND_PAT=" -print" fi fi FIND_DIR="\"$FIND_DIR\"" eval "$FIND" "$FIND_DIR$FIND_PAT" | while : ; do set -f _IFS="$IFS" IFS=' ' read FILE STATUS="$?" IFS="$_IFS" set +f [ "$STATUS" != 0 ] && break [ ! -f "$FILE" ] && continue [ "$VERBOSE" = 1 ] && echo ">>>> \"$FILE\"" # # Note: the `do_group()' stuff has problems in this loop, # so it's been removed; # case "$FILE" in *.Z) if [ "$SUPPRESS" = 1 ]; then $ZCAT "$FILE" | $GREP$OPTS "$PATTERN" else $ZCAT "$FILE" | $GREP$OPTS "$PATTERN" | $SED -e "s^$FILE:" fi ;; *.gz) if [ "$SUPPRESS" = 1 ]; then $GZCAT "$FILE" | $GREP$OPTS "$PATTERN" else $GZCAT "$FILE" | $GREP$OPTS "$PATTERN" | $SED -e "s^$FILE:" fi ;; *) if [ "$SUPPRESS" = 1 ]; then $GREP$OPTS "$PATTERN" "$FILE" else $GREP$OPTS "$PATTERN" "$FILE" /dev/null fi esac done else for FILE do if [ -d "$FILE" -a ! -h "$FILE" ]; then if [ "$RECURSE" = 1 ]; then do_recurse "$FILE" fi continue fi [ ! -f "$FILE" ] && continue [ "$VERBOSE" = 1 -a "$DID_RECURSE" = 1 ] && { echo "" DID_RECURSE=0 } [ "$VERBOSE" = 1 ] && echo ">>>> \"$FILE\"" case "$FILE" in *.Z) [ -n "$GROUP" ] && do_group if [ "$SUPPRESS" = 1 ]; then $ZCAT "$FILE" | $GREP$OPTS "$PATTERN" else $ZCAT "$FILE" | $GREP$OPTS "$PATTERN" | $SED -e "s^$FILE:" fi ;; *.gz) [ -n "$GROUP" ] && do_group if [ "$SUPPRESS" = 1 ]; then $GZCAT "$FILE" | $GREP$OPTS "$PATTERN" else $GZCAT "$FILE" | $GREP$OPTS "$PATTERN" | $SED -e "s^$FILE:" fi ;; *) if [ "$SUPPRESS" = 1 ]; then $GREP$OPTS "$PATTERN" "$FILE" else if [ -z "$GROUP" ]; then GROUP="$FILE" else GROUP="$GROUP $FILE" fi fi esac done fi # # catch stragglers # [ -n "$GROUP" ] && do_group exit