#!/bin/sh # # @(#)duf 1.23 2008/11/17 17:42:40 # # duf: disk usage formatter # # Description: # (see USAGE message below) # # Usage: # (see USAGE message below) # # Copyright (c) 2007 by Daniel E. Singer. All rights reserved. # Permission is granted to reproduce and distribute this program # with the following conditions: # 1) This copyright notice and the author identification below # must be left intact in the program and in any copies. # 2) Any modifications to the program must be clearly identified # in the source file. # # Written by Daniel E. Singer, Duke Univ. Dept of Computer Science, 8/26/1994 # # 4/2007: updated sort key syntax, per JED suggestion; # 8/2006: added scaling options suggested and prototyped by Rick Flower; # 10/2000: fixed DIR_PATH per Ray Butterworth suggestion; # ## uncomment and modify this if needed #PATH='/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin' #export PATH # DU_PROG can be set from the environment DFLT_DU_PROG='/usr/bin/du' : ${DU_PROG:=$DFLT_DU_PROG} PROG=`basename $0` USAGE=" Usage: $PROG [-alt] [-d num] [ -s f|s ] [-b k|m|g|t] [-p digit] [dir] $PROG [-alt] [-d num] [ -s f|s ] [-b k|m|g|t] [-p digit] -f {file|-} $PROG -h (help) " SUMMARY=" '$PROG' (Disk Usage Formatter) produces a hierarchical listing of disk space usage, starting at the current or given directory. Each figure is in kilobytes (or \"blocks\", if input is from a file or standard input), and includes all files and subdirectories for a given directory. See \"man du\" for more information. dir directory at which to begin the search; cannot be used with the '-f' option; -a list all files, not just directories; -b blocking display blocking as KB ('k', the default), MB ('m'), GB ('g'), or TB ('t'); -d depth maximum directory level depth to report; ie, how many levels of subdirectories; default is all levels; -f file take input from 'file' instead of the 'du' command; use '-' to read from standard input; cannot be used with the 'dir' option; -h help, print this message and exit; -l local, print [in brackets] the blocks of files that are local to the directory, ie, that aren't part of subdirectories; this is not very useful with '-a'; -p precision a digit indicating decimal precision; default is '0'; -s sort sort by filename ('f'); or by size ('s'), the default; -t add some tabular visual helpers; " SYNTAX="$PROG: option syntax error." DIR='.' # starting directory; ALL=0 # -a option; ALL_FLAG= # flag for du for all files, if needed; KB_FLAG= # flag for du for kilobytes, if needed; LOCAL=0 # -l option; FIELD_SEP='' # weird character to use as field separator; #FILE_SORT=" -t'$FIELD_SEP' +0n -1 +2 -3 +1nr -2" # filename sort flags; #FILE_SORT=" -t'$FIELD_SEP' -k 1,1n -k 3,3 -k 2,2nr" # filename sort flags; SIZE_SORT=" -t'$FIELD_SEP' +0n -1 +1nr -2 +2 -3" # size sort flags; SIZE_SORT=" -t'$FIELD_SEP' -k 1,1n -k 2,2nr -k 3,3" # size sort flags; SORT='s' # sort type, size or filename; SORT_FLAGS="$SIZE_SORT" # sort flags; BLOCKING='k' # blocking scale to use for output PRECISION='0' # decimal precision TABULAR_FLAG=0 # -t option; DU_FILE= # input file from command line; DEPTH='9999' # very deep; # hopefully this won't appear in any dir or file names; TEST_DIR="/tmp" # dir to use to test 'du' option; TMP_DIR="${TMPDIR:-/usr/tmp}" # where to put temporary files; INPUT_FILE="$TMP_DIR/$PROG.input.$$" # file to hold input in some situations; DFLT_PAGER="more" # default pager; : ${PAGER:=$DFLT_PAGER} # output pager, check environment; STATUS= # exit status, if we get far enough; TTY=0 # how to handle output; [ -t 1 ] && TTY=1 AWK=nawk # awk, nawk, gawk, ... # turn off rwx for group and other for any temp files; umask 077 # # OS specifics: # add special OS dependencies here; # SYS="`uname -sr`" # OS type case "$SYS" in "Linux "*) AWK=awk ;; "HP-UX "*) AWK=awk esac # # process command line options # while [ "$#" -gt 0 ]; do OPT="$1" case "$OPT" in -a) ALL=1 ALL_FLAG=" -a" ;; -b) [ "$#" -lt 2 ] && { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } BLOCKING="$2" case "$BLOCKING" in [kK]) BLOCKING='k' ;; [mM]) BLOCKING='m' ;; [gG]) BLOCKING='g' ;; [tT]) BLOCKING='t' ;; *) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac shift ;; -d) [ "$#" -lt 2 ] && { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } DEPTH="$2" case "$DEPTH" in [0-9]|[0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9][0-9][0-9]) ;; *) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac shift ;; -f) [ "$#" -lt 2 ] && { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } DU_FILE="$2" case "$DU_FILE" in '') echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac shift ;; -h|-help) echo "$USAGE$SUMMARY" exit 0 ;; -l) LOCAL=1 ;; -p) [ "$#" -lt 2 ] && { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } PRECISION="$2" case "$PRECISION" in [0-9]) ;; *) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac shift ;; -s) [ "$#" -lt 2 ] && { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } SORT="$2" case "$SORT" in [fF]) SORT='f' #SORT_FLAGS="$FILE_SORT" ;; [sS]) SORT='s' #SORT_FLAGS="$SIZE_SORT" ;; *) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac shift ;; -t) TABULAR_FLAG=1 ;; --) shift break ;; # unknown option -?) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 ;; # compound option -??*) # break up a compound option NEW_OPTS=`$AWK 'BEGIN { OPT_STR = "'"$OPT"'"; LEN = length(OPT_STR); NEW_OPTS = ""; STATUS = 0; for (POS=2; POS+0 <= LEN; ++POS) { OPT = substr(OPT_STR,POS,1); if (OPT !~ /[a-zA-Z0-9]/) STATUS = 1; NEW_OPTS = NEW_OPTS " -" OPT; } print NEW_OPTS; exit STATUS; }' <&-` || { echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 } shift if [ "$#" -gt 0 ]; then set -- $NEW_OPTS "$@" else set -- $NEW_OPTS fi continue ;; # end of options, just command arguments left *) break esac shift done #case "$SORT$LOCAL" in case "$SORT" in f) SORT_FLAGS="$FILE_SORT" ;; s) SORT_FLAGS="$SIZE_SORT" esac # remove tmp file on exit trap "rm -f $INPUT_FILE" 0 1 2 3 15 case "$#" in 0) case "$DU_FILE" in '') # input from "du ." DIR=`pwd` ;; '-') # input from stdin; get the starting dir from the last line; cat > "$INPUT_FILE" DIR=`tail -1 $INPUT_FILE` set -f set - X $DIR shift 2 DIR="$*" set +f DU_FILE="$INPUT_FILE" ;; *) # input from a file; get the starting dir from the last line; if [ ! -f "$DU_FILE" -o ! -r "$DU_FILE" ]; then echo "$PROG: cannot read \"$DU_FILE\"." >&2 exit 1 fi DIR=`tail -1 "$DU_FILE"` set -f set - X $DIR shift 2 DIR="$*" set +f esac ;; 1) # # can't have "-f file" and "dir"; # case "$DU_FILE" in ?*) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac DIR="$1" if [ ! -d "$DIR" ]; then echo "$PROG: no such directory \"$DIR\"." >&2 exit 1 fi if [ ! -r "$DIR" ]; then echo "$PROG: cannot read directory \"$DIR\"." >&2 exit 1 fi if [ ! -x "$DIR" ]; then echo "$PROG: cannot cd to directory \"$DIR\"." >&2 exit 1 fi # get the real path DIR=`( cd "$DIR"; pwd )` ;; *) echo "$SYNTAX" >&2 echo "$USAGE" >&2 exit 1 esac # # process output from 'du', doing the sorting, formatting, filtering, etc. # # A) run thru AWK to produce lines of "level blocks path [local_blocks]"; # also, filter out unwanted levels (depth); # B) sort by level, then other fields based on options; # C) run thru AWK again, to provide indentation and hierarchical ordering; # process() { $AWK ' BEGIN { DIR = "'"$DIR"'"; # dir where search starts DEPTH = '"$DEPTH"'; # number of levels to report LOCAL = '"$LOCAL"'; # keep track of "local" blocks OFS = "'"$FIELD_SEP"'"; # part to remove from pathnames; the trailing slash should be counted, # so 1 is added if it isn"t there; DIR_LEN = length(DIR) + (DIR !~ "/$"); } { # # input lines consist of 1) a block count, and 2) a directory pathname, # separated by a tab; # BLOCKS = $1; # # get the rest of the line: a dirname might contain space and/or tab; # #DIR_PATH = substr($0,index($0,"\t") + 1); # fix recommended by Ray Butterworth # for some versions of du that use a space separator instead of tab DIR_PATH = substr($0,match($0,/[ \t]/) + 1); # # chop off the starting path; ie, DIR; # DIR_PATH = substr(DIR_PATH,DIR_LEN + 1); # # sum blocks that are in subdirectories of a directory; # if (LOCAL) { #print ""; #print $0; if (DIR_PATH != "") { # # get the parent directory name, these blocks # will sum into its count of NLBs; # match(DIR_PATH,".*/"); dirname = substr(DIR_PATH,1,RLENGTH-1); # Non Local Blocks NLB[dirname] += BLOCKS; } #printf("\"%s\", %s, %s\n", dirname, BLOCKS, NLB[dirname]); } # # get the depth; if more than we want to see, skip it; # LEVEL = level(DIR_PATH) + 1; if (LEVEL+0 > DEPTH+0) next; if (LOCAL) { # Local Blocks LB = BLOCKS - NLB[DIR_PATH]; print LEVEL, BLOCKS, DIR_PATH, LB; } else print LEVEL, BLOCKS, DIR_PATH; } # # level: returns number of "/"s in the given dir string; # function level (dir) { gsub("[^/]","",dir); return(length(dir)); }' | eval sort $SORT_FLAGS | $AWK ' BEGIN { DIR = "'"$DIR"'"; # dir where search starts TABULAR_FLAG = '"$TABULAR_FLAG"'; # use tabular indications DUF_FLAG = length("'"$DU_FILE"'") != 0; # input came from file? FS = "'"$FIELD_SEP"'"; # input field separator LOCAL = '"$LOCAL"'; # keep track of "local" blocks BLOCKING = "'"$BLOCKING"'"; # blocking scale: k, m, g, or t PRECISION = '"$PRECISION"'; # precision: a digit if (BLOCKING == "k") { BLOCKING_LABEL = DUF_FLAG ? "blocks" : "kilobytes"; FACTOR = 1; } else if (BLOCKING == "m") { BLOCKING_LABEL = DUF_FLAG ? "blocks / 1024" : "megabytes"; FACTOR = 1024; } else if (BLOCKING == "g") { BLOCKING_LABEL = DUF_FLAG ? "blocks / 1024^2" : "gigabytes"; FACTOR = 1024^2; } else { BLOCKING_LABEL = DUF_FLAG ? "blocks / 1024^3" : "terabytes"; FACTOR = 1024^3; } # stuff for indentation if (TABULAR_FLAG == 1) PAD_STR = " . "; else PAD_STR = " "; PAD_STR_LEN = length(PAD_STR); PAD_LEN = 1; PAD = ""; # # start the listing by printing the starting point # printf("%s:\n\n",DIR); } { # # input line consists of 1) level count, 2) block count, # 3) a directory pathname, and, with the LOCAL option, # 4) local block count; # LEVEL = $1; BLOCKS = $2; DIR_PATH = $3; if (LOCAL == 1) LB = $4; #print ">> " $0; #print "DIR_PATH = " DIR_PATH; # # this is the special case of the search starting point; # (should be the first line of input) # if (DIR_PATH == "") { TOTAL_BLOCKS = BLOCKS; if (LOCAL) TOTAL_LB = LB == BLOCKS ? "" : " [" scale(LB) "]"; next; } # # get the parent directory path; # match(DIR_PATH,".*/"); DIR_PARENT = substr(DIR_PATH,1,RLENGTH-1); # # keep track of the number of subdirectories for the # parent directory; # this will be used later to find subdirectories for any # given directory; # for level 1, the parent directory is ""; # I = ++INDEXES[DIR_PARENT]; # # store the current line info, indexing via the parent directory # path and an additional numeric subscript; # this DIR_PARENT subarray should already be in the right order; # D_ARRAY[DIR_PARENT,I] = $0; } END { # # print the total, then recursively print out the rest of the data; # #printf("%6s TOTAL%s (%s)\n\n",TOTAL_BLOCKS,TOTAL_LB,DUF_FLAG?"blocks":"kilobytes"); printf("%6s TOTAL%s (%s)\n\n",scale(TOTAL_BLOCKS),TOTAL_LB,BLOCKING_LABEL); if (INDEXES[""] != "") print_level("",junk); } # # scale: scale a number of blocks according to BLOCKING and PRECISION; function scale (blocks) { return sprintf("%.*f", PRECISION, (blocks/FACTOR)); } # # print_level: print out the current level of data, and recursively # any sub-levels; function print_level (dir_parent,j) { # "dir_parent" is used to index data in the ARRAYs; # "j" as an argument is just to get around AWK recursion limitations, # so that "j" won"t get overwritten; # # print the data, and recursively printing other sub-levels; # for (j=1; j+0 <= INDEXES[dir_parent]; ++j) { # # get the data out; # $0 = D_ARRAY[dir_parent,j]; level = $1; blocks = $2; dirpath = $3; if (LOCAL) lb = $4; delete D_ARRAY[dir_parent,j]; # don"t need it anymore # # get the last component of the path name for printing # match(dirpath,".*/"); dirname = substr(dirpath,RLENGTH+1); # # extend the PAD string, if necessary; # it grows as needed; # for ( ; PAD_LEN+0 < level; ++PAD_LEN) PAD = PAD PAD_STR; # # construct the "local blocks" string; # if (LOCAL) lb = lb == blocks ? "" : " [" scale(lb) "]"; # # print out a formatted line of data; # printf("%s%6s %s%s\n",substr(PAD,1,PAD_STR_LEN*(level-1)), \ scale(blocks),dirname,lb); # # if this directory has subdirectories, print them out now; # the current directory pathname is used as the subscript; # if (INDEXES[dirpath] != "") print_level(dirpath,junk); delete INDEXES[dirpath]; # don"t need it anymore } }' } # # run 'du', process the output, and print; # # all this rigamarole with exec and STATUS and file descriptors 3 and 4, # and even putting the AWK script into a function, all have to do with # wanting to return the exit status of 'du' as the exit status for the # script; if all this were not done, the exit status of AWK would be # returned instead; why do I care? because I said in the man page that # this is what happens, so I might as well make sure it happens; # exec 3>&1 4>&1 STATUS=`{ case "$DU_FILE" in '') # # get input from 'du' # # # semi-reliable test to tell if the "-k" flag should be used with 'du'; # this assumes that TEST_DIR dir is readable; # if not, then this won't work; # no sense in doing this test if input is from a file or stdin; # case "\`$DU_PROG -k $TEST_DIR 2>&-\`" in [0-9]*) KB_FLAG=' -k' esac $DU_PROG$KB_FLAG$ALL_FLAG "$DIR" echo $? 1>&4 ;; *) # # get input from a file (or from stdin, see above) # cat "$DU_FILE" echo $? 1>&4 esac | # # sort, format, filter, etc.; # had to put this part in the function, otherwise the back-ticks # wreak havoc with the AWK script... # process | # # pipe to PAGER if output is a terminal # if [ "$TTY" = '1' ]; then $PAGER else cat fi 1>&3 } 4>&1` exit $STATUS