/*
*/
import java.io.*;
/*
* This class will search through all of the words in the
* WordNet dictionary (available at http://www.cogsci.princeton.edu/~wn)
* and write all of the words that only contain the hex
* characters "abcdef" to an output file. Optionally,
* you can set the global usePseudoChars variable to true
* and receive an output of words containing "abcdeflisoz",
* where the number 1 can be substituted for "l" and "i",
* 5 can be substituted for "s", 2 can be substituted for
* for "z", and 0 can be substituted for "o".
*
* The point here was to make a list of words that could
* be formed using only the letters and numbers used in
* hexadecimal notation.
*/
public class HexWords
{
// if usePseudoChars is true, then we'll also accept
// the letters l, i, s, z, and o in our words (which
// can be substituted for the numbers 1, 5, 2, and 0
static boolean usePseudoChars = false;
public static void main (String[] args)
{
// set up our default values, which can be overridden by
// command line parameters
String dirName = "C:\\Documents and Settings\\a217041\\Desktop\\WordNet-1.7.1\\dict";
String outputFileName = "hexwords.txt";
PrintWriter outputFile = null;
if (args.length > 0)
outputFileName = args[0];
if (args.length > 1)
dirName = args[1];
try {
outputFile = new PrintWriter( new FileWriter(outputFileName) );
// set up a simple filter, because the WordNet dictionary files we're
// interested in all end with .idx
final String suffix = "idx";
FilenameFilter suffixFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
// only accept file names that end with the specified suffix
return name.toLowerCase().endsWith(suffix.toLowerCase());
}
};
// get all the .idx files in the directory, and look at each one
File f = new File(dirName);
String[] theFiles = f.list(suffixFilter);
for (int i = 0; i < theFiles.length; i++)
searchFile(dirName + "\\" + theFiles[i], outputFile);
} catch (Exception e) {
e.printStackTrace();
} finally {
try { outputFile.close(); } catch (Exception e) {}
}
System.out.println("\nFinished! Output sent to " + outputFileName);
}
public static void searchFile (String fileName, PrintWriter out)
{
// This routine actually reads the words in the individual .idx files.
// The format is that comment lines start with 2 spaces, and "word" lines
// all start with a word, followed by a space, followed by other information
// about the word. We'll read each line up to the first space, testing for
// valid characters as we go
BufferedReader in = null;
String line;
StringBuffer sb = new StringBuffer(64);
int i;
char[] cbuf;
char c;
String pseudoChars = "lisoz"; // 1=l,i; 5=s; 2=z; 0=o
String hexChars = "abcdef" + ((usePseudoChars) ? pseudoChars : "");
System.out.println("Searching " + fileName + "...");
try {
in = new BufferedReader( new FileReader(fileName) );
while ((line = in.readLine()) != null) {
cbuf = line.toLowerCase().toCharArray();
sb.setLength(0);
// check each character of the first word in the line
// against our list of valid characters, stopping when
// we get to an invalid character or the end of the word
// (the word ends with the first space)
for (i = 0; i < cbuf.length; i++) {
c = cbuf[i];
if (c == ' ') {
break;
} else if (hexChars.indexOf(c) < 0) {
sb.setLength(0);
break;
} else {
sb.append(c);
}
}
// if we got done and our StringBuffer is longer than
// zero characters, then we had a valid word (for our
// purposes, we want words longer than 1 character)
if (sb.length() > 1)
out.println(sb +
((usePseudoChars) ? " (" + convertPseudoChars(sb.toString()) + ")" : ""));
}
} catch (Exception e) {
System.err.println("Error in searchFile: " + e);
} finally {
try { in.close(); } catch (Exception e) {}
}
}
public static String convertPseudoChars (String s)
{
// This method converts all of the "pseudo" characters
// in a String to their corresponding numbers
StringBuffer sb = new StringBuffer(s.length());
char[] cbuf = s.toUpperCase().toCharArray();
for (int i = 0; i < cbuf.length; i++) {
switch (cbuf[i]) {
case 'L': case 'I':
sb.append("1");
break;
case 'S':
sb.append("5");
break;
case 'Z':
sb.append("2");
break;
case 'O':
sb.append("0");
break;
default:
sb.append(cbuf[i]);
}
}
return sb.toString();
}
}