import java.io.File; import java.util.*; import javax.swing.JFileChooser; public class WordCounter { /** * The class Word stores a string and the number * of occurrences of that string. Comparing word * objects is based on the number of occurrences * of the word so that words that occur frequently * will be less than words that occur less frequently. * @author ola * */ private static class Word implements Comparable{ private String word; private int count; /** * Initialize a word so that the associated string * occurs once. * @param s */ public Word(String s){ word = s; count = 1; } /** * Increment the number of occurrences of a word. */ public void plusplus(){ count++; } /** * Return both string/count in a printable string */ public String toString(){ return count + "\t" + word; } /** * Equality based on strings only to facilitate * look up by string. */ public boolean equals(Object o){ Word other = (Word) o; return word.equals(other.word); } /** * Based on occurrences, and for equal occurrences on string, * where frequently occurring words are less than words * that occur less frequently. */ public int compareTo(Object o){ Word other = (Word) o; int diff = other.count - count; if (diff == 0){ return word.compareTo(other.word); } return diff; } /** * Get the count associated with a word. * @return the count of how many times the string occurs. */ public int getCount(){ return count; } } private ArrayList myWords; private static JFileChooser ourChooser = new JFileChooser("."); public WordCounter(){ myWords = new ArrayList(); } /** * Read all words in a scanner and store them * for subsequent processing. A string read from * the scanner is stored once with an associated count. * @param s is the scanner/source of words */ public void readAndStore(Scanner s){ while (s.hasNext()){ Word word = new Word(s.nextString().toLowerCase()); int loc = myWords.indexOf(word); if (loc < 0){ myWords.add(word); } else { ((Word) myWords.get(loc)).plusplus(); } } } /** * Read words and return count of unique/distinct words. * @param s is source of strings * @return count of distinct words */ public int countWords(Scanner s){ readAndStore(s); return myWords.size(); } /** * Print a list of the 50 most frequently occurring words. */ public void print(){ Collections.sort(myWords); double max = ((Word)myWords.get(0)).getCount(); for(int k=0; k < 50; k++){ Word w = (Word) myWords.get(k); double ratio = w.getCount()/max; System.out.println(ratio + "\t" +w); } } public static void main(String[] args){ int retval = ourChooser.showOpenDialog(null); WordCounter wc = new WordCounter(); if (retval == JFileChooser.APPROVE_OPTION){ File f = ourChooser.getSelectedFile(); Scanner s = new Scanner(f); int count = wc.countWords(s); System.out.println("number of words: "+count); wc.print(); System.exit(0); } } }