/*
*/ import java.io.*; import java.util.*; /* * This class will search through all of the words in the * WordNet dictionary (available at http://www.cogsci.princeton.edu/~wn) * and write all of the words that only contain characters * that contain letters that can be represented by upside * down numbers. The conversion is: * i = 1 * z = 2 * e = 3 * h = 4 (optional, not true for all fonts) * s = 5 * q = 6 * l = 7 * b = 9 * o = 0 */ public class NumberWords { static Vector hwords = new Vector(); public static void main (String[] args) { // set up our default values, which can be overridden by // command line parameters String dirName = "C:\\Documents and Settings\\a217041\\Desktop\\WordNet-1.7.1\\dict"; String outputFileName = "numberwords.txt"; PrintWriter outputFile = null; if (args.length > 0) outputFileName = args[0]; if (args.length > 1) dirName = args[1]; try { outputFile = new PrintWriter( new FileWriter(outputFileName) ); // set up a simple filter, because the WordNet dictionary files we're // interested in all end with .idx final String suffix = "idx"; FilenameFilter suffixFilter = new FilenameFilter() { public boolean accept(File dir, String name) { // only accept file names that end with the specified suffix return name.toLowerCase().endsWith(suffix.toLowerCase()); } }; // get all the .idx files in the directory, and look at each one File f = new File(dirName); String[] theFiles = f.list(suffixFilter); for (int i = 0; i < theFiles.length; i++) searchFile(dirName + "\\" + theFiles[i], outputFile); outputFile.println(); outputFile.println("/* Words that contain h(4) */"); for (int i = 0; i < hwords.size(); i++) outputFile.println(hwords.get(i)); } catch (Exception e) { e.printStackTrace(); } finally { try { outputFile.close(); } catch (Exception e) {} } System.out.println("\nFinished! Output sent to " + outputFileName); } public static void searchFile (String fileName, PrintWriter out) { // This routine actually reads the words in the individual .idx files. // The format is that comment lines start with 2 spaces, and "word" lines // all start with a word, followed by a space, followed by other information // about the word. We'll read each line up to the first space, testing for // valid characters as we go BufferedReader in = null; String line; StringBuffer sb = new StringBuffer(64); int i; char[] cbuf; char c; String outString; String numChars = "isehzqlbo"; System.out.println("Searching " + fileName + "..."); try { in = new BufferedReader( new FileReader(fileName) ); while ((line = in.readLine()) != null) { cbuf = line.toLowerCase().toCharArray(); sb.setLength(0); // check each character of the first word in the line // against our list of valid characters, stopping when // we get to an invalid character or the end of the word // (the word ends with the first space) for (i = 0; i < cbuf.length; i++) { c = cbuf[i]; if (c == ' ') { break; } else if (numChars.indexOf(c) < 0) { sb.setLength(0); break; } else { sb.append(c); } } // if we got done and our StringBuffer is longer than // zero characters, then we had a valid word (for our // purposes, we want words longer than 1 character) if (sb.length() > 1) { outString = sb.toString() + " (" + convertChars(sb.toString()) + ")"; if (sb.toString().indexOf("h") >= 0) hwords.add(outString); else out.println(outString); } } } catch (Exception e) { System.err.println("Error in searchFile: " + e); } finally { try { in.close(); } catch (Exception e) {} } } public static String convertChars (String s) { // This method converts all of the characters // in a String to their corresponding numbers StringBuffer sb = new StringBuffer(s.length()); char[] cbuf = s.toLowerCase().toCharArray(); for (int i = cbuf.length - 1; i >= 0; i--) { switch (cbuf[i]) { case 'i': sb.append("1"); break; case 'z': sb.append("2"); break; case 'e': sb.append("3"); break; case 'h': sb.append("4"); break; case 's': sb.append("5"); break; case 'q': sb.append("6"); break; case 'l': sb.append("7"); break; case 'b': sb.append("9"); break; case 'o': sb.append("0"); break; default: } } return sb.toString(); } }