#! /usr/bin/env python """csed problem posed by Owen Astrachan, Duke Univ. Solution in Python by Bob Noonan, College of William and Mary Algorithm: a. break words on whitespace b. use hashtable to maintain count of each word c. for each word in hashtable, put it in a 2D array, where first dimension is frequency of word d. sort each sublist starting from high end, and then print each sorted sublist """ import string, sys, os max = 0 def printf(format, *args): """printf from Python Cookbook, 2nd ed, p 183""" sys.stdout.write(format % args) def readFile(fn): """reads file fn into a hash list""" global max inn = open(fn, "rU"); list = { } for line in inn: words = string.split(line) for w in words: w = w.lower() + " " if not list.has_key(w): list[w] = 0 list[w]= list[w]+1 if list[w] > max: max = list[w] # print "max = ", max return list def printlist(ct, words): """print a sorted list of words with same frequenct""" for w in words: printf("%d\t%s\n", ct, w.rstrip()) def sortlist(list, words): """ctreate a 2D word list where the row index""" """ is the frequency count""" for w in list.keys(): val = list[w] # print "val = ", val words[val].append(w) for i in range(max, 0, -1): words[i].sort() printlist(i, words[i]) def main(): """main function""" global max list = readFile(sys.argv[1]) words = [ ] for i in range(0, max+1): w = [ ] words.append(w) # print "len = ", len(words) sortlist(list, words) # for w in list.keys(): # print w, "\t", list[w] main()