/** * CS U213 Assignment for week 11. * * If present, the first command-line argument names a file whose * words are to be counted. If absent, the file name is assumed * to be "test.txt". * * Usage: * java Week11 * java Week11 hamlet.txt * * @author William D Clinger * @version 1 * * 25 March 2004 * * */ import java.util.Iterator; //import java.io.InputStream; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.BufferedReader; import java.io.StringReader; import java.io.Reader; import java.io.StreamTokenizer; import java.io.FileNotFoundException; import java.io.IOException; //////////////////////////////////////////////////////////////// // // Week11 // //////////////////////////////////////////////////////////////// public class Week11 { public static void main (String[] args) { new StudentTests().run(); System.out.println (""); System.out.println ("--------"); System.out.println (""); StringBuffer sbuf = new StringBuffer (DEFAULT_STRING); String fn = DEFAULT_FILENAME; if (args.length > 0) fn = args[0]; WordCounter wc1 = new WordCounter(); WordCounter wc2 = new WordCounter(); wc1.countWords (new StringIterator (sbuf)); if (wc1.words() != DEFAULT_WORD_COUNT) System.err.println ("***** Incorrect word count: " + wc1.words()); wc1.printWords (WORDS_TO_PRINT); System.out.println ("--------"); wc2.countWords (new StringIterator (fn)); wc2.printWords (WORDS_TO_PRINT); System.out.println (wc2.words() + " distinct words"); } private static final String DEFAULT_FILENAME = "test.txt"; private static final String DEFAULT_STRING = "How much wood would a wood chuck chuck if a wood chuck " + "could chuck wood?"; // number of distinct words in the string above private static final int DEFAULT_WORD_COUNT = 8; // how many words to print private static final int WORDS_TO_PRINT = 30; } //////////////////////////////////////////////////////////////// // // StringIterator // //////////////////////////////////////////////////////////////// /** * StringIterator is a concrete class for iterating over all * the words in a StringBuffer or text file. * At present a word is defined to be a maximal contiguous * sequence of English letters. * * @author William D Clinger * @version 2 * * original: 2 December 2003 * modified: 25 March 2004 * */ class StringIterator implements Iterator { private StreamTokenizer tok; /** * @param filename the name of a text file whose words are generated */ public StringIterator (String filename) { try { FileInputStream fin = new FileInputStream (filename); InputStreamReader isr = new InputStreamReader (fin); BufferedReader br = new BufferedReader (isr); this.tok = wordTokenizer (br); } catch (FileNotFoundException e) { this.tok = wordTokenizer (new StringReader ("")); System.err.println (filename + " not found."); } } /** * @param sb the StringBuffer whose words are generated */ public StringIterator (StringBuffer sb) { this.tok = wordTokenizer (new StringReader (sb.toString())); } /** * @return whether another word can be obtained by calling next() */ public boolean hasNext () { int tt = nextToken(); while ((tt != StreamTokenizer.TT_EOF) && (tt != StreamTokenizer.TT_WORD)) { tt = nextToken(); } // Pretend we haven't seen this token yet. tok.pushBack(); return tt == StreamTokenizer.TT_WORD; } /** * @return the next word */ public Object next () { int tt = nextToken(); while ((tt != StreamTokenizer.TT_EOF) && (tt != StreamTokenizer.TT_WORD)) { tt = nextToken(); } if (tt == StreamTokenizer.TT_WORD) { return new Word (tok.sval); } throw new RuntimeException (eofError); } // Not implemented, not needed. public void remove () { } // Behaves like tok.nextToken(), but catches any IOException // and treats it as though it were the end of input. private int nextToken () { int tt = 0; try { tt = tok.nextToken(); } catch (IOException e) { tt = StreamTokenizer.TT_EOF; } return tt; } private static String eofError = "Tried to read past end of input."; // Given a Reader, returns a StreamTokenizer for that Reader // that parses words. private static StreamTokenizer wordTokenizer (Reader in) { StreamTokenizer tok = new StreamTokenizer (in); tok.resetSyntax (); tok.lowerCaseMode (true); tok.wordChars ('a', 'z'); tok.wordChars ('A', 'Z'); tok.eolIsSignificant (false); return tok; } }