/*
*/
import java.io.*;
import java.util.*;
/*
* This class will search through all of the words in the
* WordNet dictionary (available at http://www.cogsci.princeton.edu/~wn)
* and write all of the words that only contain characters
* that contain letters that can be represented by upside
* down numbers. The conversion is:
* i = 1
* z = 2
* e = 3
* h = 4 (optional, not true for all fonts)
* s = 5
* q = 6
* l = 7
* b = 9
* o = 0
*/
public class NumberWords
{
static Vector hwords = new Vector();
public static void main (String[] args)
{
// set up our default values, which can be overridden by
// command line parameters
String dirName = "C:\\Documents and Settings\\a217041\\Desktop\\WordNet-1.7.1\\dict";
String outputFileName = "numberwords.txt";
PrintWriter outputFile = null;
if (args.length > 0)
outputFileName = args[0];
if (args.length > 1)
dirName = args[1];
try {
outputFile = new PrintWriter( new FileWriter(outputFileName) );
// set up a simple filter, because the WordNet dictionary files we're
// interested in all end with .idx
final String suffix = "idx";
FilenameFilter suffixFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
// only accept file names that end with the specified suffix
return name.toLowerCase().endsWith(suffix.toLowerCase());
}
};
// get all the .idx files in the directory, and look at each one
File f = new File(dirName);
String[] theFiles = f.list(suffixFilter);
for (int i = 0; i < theFiles.length; i++)
searchFile(dirName + "\\" + theFiles[i], outputFile);
outputFile.println();
outputFile.println("/* Words that contain h(4) */");
for (int i = 0; i < hwords.size(); i++)
outputFile.println(hwords.get(i));
} catch (Exception e) {
e.printStackTrace();
} finally {
try { outputFile.close(); } catch (Exception e) {}
}
System.out.println("\nFinished! Output sent to " + outputFileName);
}
public static void searchFile (String fileName, PrintWriter out)
{
// This routine actually reads the words in the individual .idx files.
// The format is that comment lines start with 2 spaces, and "word" lines
// all start with a word, followed by a space, followed by other information
// about the word. We'll read each line up to the first space, testing for
// valid characters as we go
BufferedReader in = null;
String line;
StringBuffer sb = new StringBuffer(64);
int i;
char[] cbuf;
char c;
String outString;
String numChars = "isehzqlbo";
System.out.println("Searching " + fileName + "...");
try {
in = new BufferedReader( new FileReader(fileName) );
while ((line = in.readLine()) != null) {
cbuf = line.toLowerCase().toCharArray();
sb.setLength(0);
// check each character of the first word in the line
// against our list of valid characters, stopping when
// we get to an invalid character or the end of the word
// (the word ends with the first space)
for (i = 0; i < cbuf.length; i++) {
c = cbuf[i];
if (c == ' ') {
break;
} else if (numChars.indexOf(c) < 0) {
sb.setLength(0);
break;
} else {
sb.append(c);
}
}
// if we got done and our StringBuffer is longer than
// zero characters, then we had a valid word (for our
// purposes, we want words longer than 1 character)
if (sb.length() > 1) {
outString = sb.toString() + " (" + convertChars(sb.toString()) + ")";
if (sb.toString().indexOf("h") >= 0)
hwords.add(outString);
else
out.println(outString);
}
}
} catch (Exception e) {
System.err.println("Error in searchFile: " + e);
} finally {
try { in.close(); } catch (Exception e) {}
}
}
public static String convertChars (String s)
{
// This method converts all of the characters
// in a String to their corresponding numbers
StringBuffer sb = new StringBuffer(s.length());
char[] cbuf = s.toLowerCase().toCharArray();
for (int i = cbuf.length - 1; i >= 0; i--) {
switch (cbuf[i]) {
case 'i':
sb.append("1");
break;
case 'z':
sb.append("2");
break;
case 'e':
sb.append("3");
break;
case 'h':
sb.append("4");
break;
case 's':
sb.append("5");
break;
case 'q':
sb.append("6");
break;
case 'l':
sb.append("7");
break;
case 'b':
sb.append("9");
break;
case 'o':
sb.append("0");
break;
default:
}
}
return sb.toString();
}
}