/*
 */
import java.io.*;

/*
 * This class will search through all of the words in the
 * WordNet dictionary (available at http://www.cogsci.princeton.edu/~wn)
 * and write all of the words that only contain the hex 
 * characters "abcdef" to an output file. Optionally,
 * you can set the global usePseudoChars variable to true
 * and receive an output of words containing "abcdeflisoz",
 * where the number 1 can be substituted for "l" and "i",
 * 5 can be substituted for "s", 2 can be substituted for
 * for "z", and 0 can be substituted for "o".
 *
 * The point here was to make a list of words that could
 * be formed using only the letters and numbers used in
 * hexadecimal notation.
 */
public class HexWords
{
	// if usePseudoChars is true, then we'll also accept
	// the letters l, i, s, z, and o in our words (which
	// can be substituted for the numbers 1, 5, 2, and 0
	static boolean usePseudoChars = false;
	
	public static void main (String[] args)
	{
		// set up our default values, which can be overridden by
		// command line parameters
		String dirName = "C:\\Documents and Settings\\a217041\\Desktop\\WordNet-1.7.1\\dict";
		String outputFileName = "hexwords.txt";
		PrintWriter outputFile = null;
		
		if (args.length > 0)
			outputFileName = args[0];
		if (args.length > 1)
			dirName = args[1];
		
		try {
 			outputFile = new PrintWriter( new FileWriter(outputFileName) );
 			
			// set up a simple filter, because the WordNet dictionary files we're 
			// interested in all end with .idx
			final String suffix = "idx";
			FilenameFilter suffixFilter = new FilenameFilter() {
 				public boolean accept(File dir, String name) {
 					// only accept file names that end with the specified suffix
 					return name.toLowerCase().endsWith(suffix.toLowerCase());
 				}
 			};
 			
 			// get all the .idx files in the directory, and look at each one
			File f = new File(dirName);
			String[] theFiles = f.list(suffixFilter);
			for (int i = 0; i < theFiles.length; i++)
				searchFile(dirName + "\\" + theFiles[i], outputFile);
			
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try { outputFile.close(); } catch (Exception e) {}
		}
		
		System.out.println("\nFinished! Output sent to " + outputFileName);
	}
	
	
	public static void searchFile (String fileName, PrintWriter out)
	{
		// This routine actually reads the words in the individual .idx files.
		// The format is that comment lines start with 2 spaces, and "word" lines
		// all start with a word, followed by a space, followed by other information
		// about the word. We'll read each line up to the first space, testing for
		// valid characters as we go
		BufferedReader in = null;
		String line;
		StringBuffer sb = new StringBuffer(64);
		int i;
		char[] cbuf;
		char c;
		
		String pseudoChars = "lisoz";	// 1=l,i; 5=s; 2=z; 0=o
		String hexChars = "abcdef" + ((usePseudoChars) ? pseudoChars : "");
		
		System.out.println("Searching " + fileName + "...");
		
		try {
			in = new BufferedReader( new FileReader(fileName) );
			while ((line = in.readLine()) != null) {
				cbuf = line.toLowerCase().toCharArray();
				sb.setLength(0);
				
				// check each character of the first word in the line
				// against our list of valid characters, stopping when
				// we get to an invalid character or the end of the word
				// (the word ends with the first space)
				for (i = 0; i < cbuf.length; i++) {
					c = cbuf[i];
					if (c == ' ') {
						break;
					} else if (hexChars.indexOf(c) < 0) {
						sb.setLength(0);
						break;
					} else {
						sb.append(c);
					}
				}
				
				// if we got done and our StringBuffer is longer than
				// zero characters, then we had a valid word (for our 
				// purposes, we want words longer than 1 character)
				if (sb.length() > 1)
					out.println(sb + 
							((usePseudoChars) ? " (" + convertPseudoChars(sb.toString()) + ")" : ""));
			}

		} catch (Exception e) {
			System.err.println("Error in searchFile: " + e);
		} finally {
			try { in.close(); } catch (Exception e) {}
		}
	}
	
	
	public static String convertPseudoChars (String s)
	{
		// This method converts all of the "pseudo" characters
		// in a String to their corresponding numbers
		StringBuffer sb = new StringBuffer(s.length());
		char[] cbuf = s.toUpperCase().toCharArray();
		
		for (int i = 0; i < cbuf.length; i++) {
			switch (cbuf[i]) {
				case 'L': case 'I': 
					sb.append("1");
					break;
				case 'S':
					sb.append("5");
					break;
				case 'Z':
					sb.append("2");
					break;
				case 'O':
					sb.append("0");
					break;
				default:
					sb.append(cbuf[i]);
			}
		}
		
		return sb.toString();
	}
	
}