#!/bin/sh # rsort: sort multi-line records from stdin to stdout # # Description: # sort the input records; # an input record is one or more lines of text; # a record begins with a line that does not begin with white space, # ie, it is flush with the left margin; # other lines in this record begin with white space # (spaces and/or tabs), or are blank lines; # #if filtering out blanks, a blank line will function as a # #record separator; # # Options: # See 'usage' below. # # D.Singer, 12/20/94 # # Modifications: # # 12/01/95, D.Singer # added -c flag; # added -ns flag; # added -r flag; # 2/2005, D.Singer # a little cleanup... PATH='/bin:/usr/bin:/usr/ucb' export PATH prog=`basename "$0"` # system/OS dependencies AWK='awk' SYS=`uname -sr` case "$SYS" in 'SunOS '*) AWK='nawk' esac COMBINE= FIELD= FIELD_SEP="" FIELD_SEP_ARG= FILTER_BLANKS=0 NO_SORT= REPLACE=0 REPLACE_STR= SORT_FLAGS="-bf -t$FIELD_SEP" SORT_FLAGS_MSG="-bf -t^A" usage=" Usage: $prog [-b] [-c] [-f fs] [-ns] [-r \"replace\"] [-s \"sort-flags\"] Sorts multi-line text records. A record begins with a line that does not begin with white-space, and continues with lines that begin with white-space and blank lines. -b filter out blank lines; -c combine record into single line; -f alternate field separator for record collection; each line in a record is a field; -ns no sorting; -r replace beginning spaces and tabs with \"replace\"; -s specify alternative sort flags, default are \"$SORT_FLAGS_MSG\"; " CMD1='echo "$prog: $cmd" >&2' CMD2='eval "$cmd"' #CMD='echo "$prog: $cmd" >&2; eval "$cmd"' CMD="$CMD1; $CMD2" debug= syntax="$prog: option syntax error." # # process command line options # while [ $# != 0 ]; do case "$1" in # -d) # debug=1 # CMD='echo "$prog: $cmd" >&2' # echo "$prog: debug mode." >&2 # ;; -b) FILTER_BLANKS=1 ;; -c) COMBINE=1 FIELD_SEP= ;; -ns) NO_SORT=1 ;; -f) shift FIELD=1 FIELD_SEP_ARG="$1" ;; -r) shift REPLACE=1 REPLACE_STR="$1" ;; -s) shift SORT_FLAGS="$1" ;; -*) echo "$syntax" >&2 echo "$usage" >&2 exit 1 ;; *) echo "$syntax" >&2 echo "$usage" >&2 exit 1 esac shift done # done this way so order of -f and -c don't matter [ "$FIELD" ] && FIELD_SEP="$FIELD_SEP_ARG" # # combine multi-line records into single lines so that they can be sorted; # continuation lines start with SPACE or TAB; # output is a single line for each multi-line records, where the input # lines become fields separated by a special field separator; # $AWK ' BEGIN { IN_REC = 0; # in the current input record CURR_REC = ""; # the current input record SEP = "'"$FIELD_SEP"'"; # separator between lines of multi-line record FILTER_BLANKS = "'"$FILTER_BLANKS"'"; # filter blanks flag REPLACE = "'"$REPLACE"'"; # replace beginning tabs and blanks REPLACE_STR = "'"$REPLACE_STR"'"; # with this string } { # filter out blank lines, including those # containing just space characters if (FILTER_BLANKS == 1) { if ($0 ~ /^[ \t\f\n\r]*$/) { # print out the current record? ##if (IN_REC == 1) ## print CURR_REC; ##IN_REC = 0; next; } } # start of a new record? # ie, does not begin with white space, and is not a blank line; if ($0 !~ /^[ \t]/ && $0 !~ /^$/) { # print out the current record? if (IN_REC == 1) print CURR_REC; # begin the new record; CURR_REC = $0; IN_REC = 1; } else { # continuation of the current record; if (REPLACE == 1) sub(/^[ \t][ \t]*/,REPLACE_STR); CURR_REC = CURR_REC SEP $0; IN_REC = 1; } } END { if (IN_REC == 1) print CURR_REC; }' | # # sort the records; # if [ "$NO_SORT" ]; then cat else sort $SORT_FLAGS fi | case "$FIELD_SEP" in '') cat ;; *) # # now, output the records, separating the fields back out into lines; # $AWK ' BEGIN { FS = "'"$FIELD_SEP"'"; #FILTER_BLANKS = "'"$FILTER_BLANKS"'"; } { # if blank line, just print it; if ($0 ~ /^[ \t\f\n\r]*$/) { print $0; next; } # print out each field, ie, each line of the multi-line record; for (FNUM=1; FNUM <= NF; ++FNUM) print $FNUM; }' esac exit