import java.io.*; import java.util.*; /** * read compressed boggle file, store words in ArrayList * can also write an ArrayList in compressed format * to be read again * * * @author Owen Astrachan * @version 0.9 */ public class BogReader { /** * construct a reader to read from compressed bogdict file */ public BogReader() { // nothing to create } /** * read words from a compressed file (non-standard compression) * currently the filename is hardwired, but it's an easy * change to make the filename a parameter * * @param trie is the Trie to read words into */ public String[] readWords( ) { FileInputStream f = null; try { String filename = "bogdict"; f = new FileInputStream(filename); File file = new File(filename); byte buffer[] = new byte[(int) file.length()]; // read entire file, report reading, process file int bytesRead = f.read(buffer,0,buffer.length); System.out.println("read = " + bytesRead); String[] list = processBuffer(buffer,bytesRead); try { f.close(); } catch (IOException e) { // nothing here } return list; } catch (IOException e) { e.printStackTrace(); System.err.println("error reading dictionary " + e); } return null; } /** * read words, store in and return array of words * * @param buffer is the compressed characters to be uncompressed */ String[] processBuffer(byte buffer[], int size) { ArrayList list = new ArrayList(); char sb[] = new char[20]; int k; int count = 0; int numWords = 0; char ch; int index; for(index = 0; index < size; index++) { ch = (char) buffer[index]; if (Character.isLowerCase(ch)) { sb[count++] = ch; } else { // word ended, process if (count > 0) { list.add(new String(sb,0,count)); numWords++; if (numWords % 1000 == 0) { System.out.println("processed " + numWords + " words "); } } // decrease effective size of sb by ch characters // this treats ch (a control char) as a number // e.g., ^D (control-D) is 4 while (count >= 0 && count != (int) ch) { sb[count--] = '\0'; } } } return (String []) list.toArray(new String[0]); } // find length of common prefix of a and b private int prefixLength(String a, String b) { int k; int size = a.length() < b.length() ? a.length() : b.length(); int commonCount = 0; for(k=0; k < size; k++) { if (a.charAt(k) == b.charAt(k)) commonCount++; else return commonCount; } return commonCount; } /** * write words to a compressed file (e.g., for subsequent reading) * * @param list is strings to be written in compressed format * @param filename the name of the file storing compressed words */ public void writeWords(List list, String filename) { FileOutputStream st = null; DataOutputStream f = null; try { st = new FileOutputStream(filename); f = new DataOutputStream(st); int k; String last = ""; int prefix; byte buffer[] = null; for(k=0; k < list.size(); k++) { String s = (String) list.get(k); prefix = prefixLength(last,s); last = s; f.writeByte(prefix); buffer = s.getBytes(); f.write(buffer,prefix,s.length()-prefix); if (k % 1000 == 0) { System.out.println("wrote " + k + " words "); } } f.writeByte(10); // write a linefeed } catch (IOException e) { System.out.println("error opening " + filename); } finally { try { f.close(); st.close(); } catch (IOException e){ } } } public static void main(String args[]) { BogReader r = new BogReader(); String[] list = r.readWords(); for(int k=0; k < list.length; k++) { System.out.println(list[k]); } r.writeWords(Arrays.asList(list),"out"); } }