/*
 */
import java.io.*;
import java.util.*;

/**
 * This is the SimpleMimeReader class, which is a basic way to
 * process a multi-part MIME message/transmission (per 
 * RFC 1521).
 *
 * There are plenty of other classes out there that will do the 
 * same thing, and probably much better, but most of the ones that 
 * I found are geared towards Servlets, and only take an HttpServletRequest
 * or ServletInputStream as a parameter in the constructor (I wanted
 * to use any InputStream). Or they aren't compatible with the 
 * old 1.1.8 JDK that I end up having to work with a lot.
 * 

* This implementation does very little storing of data, which * should make it more efficient, but it also means you can't go * backwards to find a previous part of the message, and you only * have one chance to get the data in a message part (after it's * been read, any repeated attempts to read the same data will * simply return without doing anything). It's almost like a SAX * processor for MIME data in that way. I am, however, storing the * header data for each message part as I get to that part, so you * can access the header and read its elements either before or * after you've accessed the part data (until you go to the next * part, anyway). The headers are usually small, and I didn't * think that this would hurt the performance too much. *

* If you really want to be able to go backwards and forwards * through a MIME message, it should be easy enough to run through * a message once using this reader and capture all of the message * parts in a Vector or something. *

* This implementation doesn't deal with nested multi-part messages, * so if one multi-part message is embedded within another, the * whole embedded message will be returned as a single chunk of * data within one of the message parts. You could always parse that * recursively yourself, using additional SimpleMimeReader instances. * Any Message/Partial parts will have to be assembled by you as well. *

* This implementation also doesn't do a lot of error checking to * see if this is a valid multi-part message or not. Essentially * it reads the first "header" on the InputStream (the first block * of data that ends with a blank line) and tries to find a MIME * boundary in the Content-Type field. If it finds one, it breaks * the message up along that boundary; if it doesn't, it assumes * that this is a single-part message, and your first call to * nextPart and getPartData will just return the entire content * of the InputStream following the header. *

* To make it easy to test this class out, I included a main method * so you can run this as a stand-alone class at the command line. * The meat of the main method is as follows: *

*


 * SimpleMimeReader smr = new SimpleMimeReader(new FileInputStream(args[0]));
 * 
 * System.out.println("BOUNDARY: " + smr.getBoundaryText());
 * System.out.println("PREAMBLE: " + smr.getPreamble());
 * System.out.println("CONTENT-TYPE: " + getHeaderValue(smr.getMessageHeader(), "content-type"));
 * System.out.println("MESSAGE HEADER:\n" + smr.getMessageHeader());
 *
 * ByteArrayOutputStream baos = new ByteArrayOutputStream();
 * int count = 0;
 * while (smr.nextPart()) {
 *     count++;
 *     System.out.println("----------\nPART " + count + "\n----------");
 *     System.out.println("TYPE: " + smr.getPartType());
 *     System.out.println("ENCODING: " + smr.getPartEncoding());
 *     System.out.println("CONTENT ID: " + smr.getPartID());
 *     System.out.println("HEADER:\n" + smr.getPartHeader());
 *     long size = smr.getPartData(baos);
 *     System.out.println("BODY LENGTH: " + size);
 *     
 *     baos.writeTo(new FileOutputStream(args[0] + ".file" + count));
 *     baos.reset();
 * }
 * 
 * System.out.println("----------");
 * System.out.println("EPILOGUE: " + smr.getEpilogue());
 * 

* None of the methods in this class throw exceptions. Instead, any * anticipated exceptions that occur are dealt with silently and * discarded. That's because this is supposed to be a "simple" reader, * and I didn't think you should have to get bogged down with too * much exception handling. If that really bothers you, it should be * trivial to add exceptions to the code yourself. *

* This code was originally written by me (Julian Robichaux). * I've tested this class with the 1.1.8 and the 1.3.1 JDK. Updates to * this program will be posted to my website, at * http://www.nsftools.com * * @author Julian Robichaux ( http://www.nsftools.com ) * @version 1.0 */ public class SimpleMimeReader { private InputStream in = null; private String boundary = ""; private String lastBoundary = ""; private String lastHeader = ""; private String docHeader = ""; private byte[] preamble = { }; private byte[] epilogue = { }; private boolean justGotPart = false; private ByteArrayOutputStream readBuffer = new ByteArrayOutputStream(1024); /** * A simple main method, in case you want to test the basic * functionality of this class by running it stand-alone. */ public static void main (String args[]) { if (args.length == 0) { System.out.println("USAGE: java SimpleMimeReader MimeFileName"); return; } long startTime = System.currentTimeMillis(); try { SimpleMimeReader smr = new SimpleMimeReader(new FileInputStream(args[0])); System.out.println("BOUNDARY: " + smr.getBoundaryText()); System.out.println("PREAMBLE: " + smr.getPreamble()); System.out.println("CONTENT-TYPE: " + getHeaderValue(smr.getMessageHeader(), "content-type")); System.out.println("MESSAGE HEADER:\n" + smr.getMessageHeader()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); int count = 0; while (smr.nextPart()) { count++; System.out.println("----------\nPART " + count + "\n----------"); System.out.println("TYPE: " + smr.getPartType()); System.out.println("ENCODING: " + smr.getPartEncoding()); System.out.println("CONTENT ID: " + smr.getPartID()); System.out.println("HEADER:\n" + smr.getPartHeader()); long size = smr.getPartData(baos); System.out.println("BODY LENGTH: " + size); baos.writeTo(new FileOutputStream(args[0] + ".file" + count)); baos.reset(); } System.out.println("----------"); System.out.println("EPILOGUE: " + smr.getEpilogue()); } catch (Exception e) { e.printStackTrace(); } long endTime = System.currentTimeMillis(); System.out.println(String.valueOf(endTime - startTime) + " ms"); } /** * The sole constructor for this class, which takes any kind * of InputStream as a parameter. * * @param inStream an InputStream that contains a Multi-part * MIME message */ public SimpleMimeReader (InputStream inStream) { // make sure we're buffering the input stream, for efficiency this.in = new BufferedInputStream(inStream, 4096); getMimeBoundary(); } /** * Advances to the next part of the message, if there is a * next part. When you create an instance of a SimpleMimeReader, * you need to call nextPart() before you start getting data. * * @return true if there is a next part, false if there isn't * (which generally means you're at the end of the * message) */ public boolean nextPart () { // if the last boundary we got was the boundary plus a "--", // then the message is officially over (the RFC allows for // epilogues after this last boundary, but they're supposed // to be ignored) if (lastBoundary.equals(boundary + "--")) { String tempBoundary = boundary; boundary = ""; justGotPart = false; epilogue = getPartDataAsBytes(); boundary = tempBoundary; lastBoundary = ""; lastHeader = ""; return false; } // we need to advance to the next boundary, unless we've // already got the previous part's data (which means we're // already there) if (!justGotPart) getPartData(null); // special consideration if we never found a boundary // (set the lastHeader to the docHeader on the first // call to this function) if ((boundary.length() == 0) && (lastHeader.length() == 0)) lastHeader = docHeader; else lastHeader = getHeader(); // reset our justGotPart flag justGotPart = false; // if our lastHeader variable has any data at all, we // should be at the next section; otherwise, we're at // the end of the input stream and should return false return (lastHeader.length() > 0); } /** * Get the boundary that we're breaking the message up on * * @return a String containing the message boundary, * or an empty String if the boundary isn't available */ public String getBoundaryText () { return boundary; } /** * Get the overall header of the message * * @return a String containing the message header, * or an empty String if the header isn't available */ public String getMessageHeader () { return docHeader; } /** * Get the header of the current message part that we're * looking at * * @return a String containing the current part's header, * or an empty String if the header isn't available */ public String getPartHeader () { return lastHeader; } /** * Get the preamble (anything after the message header and before * the first boundary) of the current message that we're looking at * as a String * * @return a String containing the preamble, or an empty String * if there is no preamble */ public String getPreamble () { return new String(preamble); } /** * Get the preamble (anything after the message header and before * the first boundary) of the current message that we're looking at * as a byte array * * @return a byte array containing the preamble, or an empty byte array * if there is no preamble */ public byte[] getPreambleBytes () { return preamble; } /** * Get the epilogue (anything after the ending boundary) * of the current message that we're looking at as a String * (available only after all the parts have been read) * * @return a String containing the epilogue, or an empty String * if there is no epilogue or if you haven't read through * all the parts of the message yet */ public String getEpilogue () { return new String(epilogue); } /** * Get the epilogue (anything after the ending boundary) * of the current message that we're looking at as a byte array * (available only after all the parts have been read) * * @return a byte array containing the epilogue, or an empty byte array * if there is no epilogue or if you haven't read through * all the parts of the message yet */ public byte[] getEpilogueBytes () { return epilogue; } /** * Gets the data contained in the current message part as * a byte array (this will return an empty byte array if you've already * got the data from this message part) * * @return a byte array containing the data in this message part, * or an empty byte array if you've already read this data */ public byte[] getPartDataAsBytes () { ByteArrayOutputStream baos = new ByteArrayOutputStream(); getPartData(baos); return baos.toByteArray(); } /** * Gets the data contained in the current message part as * a String (this will return an empty String if you've already * got the data from this message part) * * @return a String containing the data in this message part, * or an empty String if you've already read this data */ public String getPartDataAsString () { return new String(getPartDataAsBytes()); } /** * Writes the data contained in the current message part to * the OutputStream of your choice (this will return zero and * write nothing if you've already got the data from this * message part) * * @param outStream the OutputStream to write data to * @return a long value indicating the number of bytes * written to your output stream */ public long getPartData (OutputStream outStream) { long count = 0; String line; // if we've already got the data for this part, don't // even try to read any further (because we should be // at the next boundary, or at the end of the stream) if (justGotPart) return 0; // make sure we're buffering our output, for efficiency BufferedOutputStream out = null; if (outStream != null) out = new BufferedOutputStream(outStream, 1024); // start getting data -- this is going to seem a little cumbersome because // technically the CRLF (\r\n) that is supposed to appear just before the // boundary actually belongs to the boundary, not to the body data (if the // body is binary, an extra CRLF at the end could screw it up), so we're // always writing the previous line until we find the boundary byte[] blineLast = new byte[0]; byte[] bline = readByteLine(in); while (bline.length > 0) { line = new String(bline); if ((boundary.length() > 0) && (line.startsWith(boundary))) { // once we've found the next boundary, make sure we write the // data in the last line, minus the CRLF that's supposed to be // at the end (just to be nice, we'll even try to act properly // if the line terminates with a \n instead of a \r\n) if (blineLast.length > 1) { int len = (blineLast[blineLast.length-2] == '\r') ? blineLast.length-2 : blineLast.length-1; count += writeOut(out, blineLast, len); } lastBoundary = line.trim(); break; } else { count += writeOut(out, blineLast, blineLast.length); blineLast = bline; } // read the next line bline = readByteLine(in); } // if we hit the end of the file, make sure we write the blineLast // data before we finish up if ((bline.length == 0) && (blineLast.length > 0)) { count += writeOut(out, blineLast, blineLast.length); } // flush the buffered stream, to make sure the original // output stream gets everything if (out != null) try { out.flush(); } catch (Exception e) {} justGotPart = true; return count; } /* * A private method that tries to write a byte array to an OutputStream, * and returns the number of bytes that were written (0 if there was an error). * It's just a way of checking the stream and catching the exceptions in * one place, so we don't have to keep duplicating this logic in different * places in our code. */ private int writeOut (OutputStream out, byte[] data, int len) { // don't even try if the OutputStream is null if (out == null) return 0; try { out.write(data, 0, len); return len; } catch (Exception e) { return 0; } } /** * A convenience method to get the Content-Type for the current * message part * * @return a String containing the Content-Type entry of the header, * if it's available; null if it's not */ public String getPartType () { return getHeaderValue(lastHeader, "Content-Type"); } /** * A convenience method to get the Content-Transfer-Encoding for the current * message part * * @return a String containing the Content-Transfer-Encoding entry of the header, * if it's available; null if it's not */ public String getPartEncoding () { return getHeaderValue(lastHeader, "Content-Transfer-Encoding"); } /** * A convenience method to get the Content-ID for the current * message part * * @return a String containing the Content-ID entry of the header, * if it's available; null if it's not */ public String getPartID () { return getHeaderValue(lastHeader, "Content-ID"); } /** * Gets the specified value from a specified header, or null if * the entry does not exist * * @param header the header to look at * @param entry the name of the entry you're looking for * @return a String containing the value you're looking for, * or null if the entry cannot be found */ public static String getHeaderValue (String header, String entry) { String line; String value = null; boolean gotit = false; // use the lowercase version of the name, to avoid any case issues entry = entry.toLowerCase(); if (!entry.endsWith(":")) entry = entry + ":"; StringTokenizer st = new StringTokenizer(header, "\r\n"); while (st.hasMoreTokens()) { line = st.nextToken(); if (line.toLowerCase().startsWith(entry)) { value = line.substring(entry.length()).trim(); gotit = true; } else if ((gotit) && (line.length() > 0)) { // headers can actually span multiple lines, as long as // the next line starts with whitespace if (Character.isWhitespace(line.charAt(0))) value += " " + line.trim(); else gotit = false; } } return value; } /* * A private method to get the next header block on the InputStream. * For our purposes, a header is a block of text that ends with a * blank line. */ private String getHeader () { StringBuffer header = new StringBuffer(""); String line; byte[] bline = readByteLine(in); while (bline.length > 0) { line = new String(bline); if (line.trim().length() == 0) break; else header.append(line); bline = readByteLine(in); } return header.toString(); } /* * A private method to attempt to read the MIME boundary from the * Content-Type entry in the first header it finds. This should be * called once, when the class is first instantiated. */ private void getMimeBoundary () { String value; // this shouldn't happen, but in case the Stream starts with // one or more blank lines, we'll just skip those to get to // our header while (docHeader.trim().length() == 0) docHeader += getHeader(); // get the Content-Type entry in the header, and read the // boundary (if any) value = getHeaderValue(docHeader, "content-type"); if (value != null) { int pos1 = value.toLowerCase().indexOf("boundary"); int pos2 = value.indexOf(";", pos1); if (pos2 < 0) pos2 = value.length(); if ((pos1 > 0) && (pos2 > pos1)) boundary = value.substring(pos1+9, pos2); } // you're allowed to enclose your boundary in quotes too, // so we need to account for that possibility if (boundary.startsWith("\"")) boundary = boundary.substring(1); if (boundary.endsWith("\"")) boundary = boundary.substring(0, boundary.length()-1); boundary = boundary.trim(); // if we didn't find a boundary, we'll treat this as a // single-part message (which means we set justGotPart // to true so we don't go looking for anything when we // call nextPart() the first time) if (boundary.length() == 0) { justGotPart = true; } else { boundary = "--" + boundary; preamble = getPartDataAsBytes(); } } /* * A way to read a single "line" of bytes from an InputStream. * The byte array that is returned will include the line * terminator (\n), unless we reached the end of the stream. */ private byte[] readByteLine (InputStream in) { // we made readBuffer global, so we don't have to keep recreating it //ByteArrayOutputStream readBuffer = new ByteArrayOutputStream(1024); readBuffer.reset(); int c; try { // read the bytes one-by-one until we hit a line terminator // or the end of the file (we're only checking for \n here, // although if we really wanted to be picky we'd probably // check for \r and \0 as well) while ((c = in.read()) != -1) { readBuffer.write(c); if (c == '\n') break; } } catch (Exception e) { // we're not reporting any exceptions here } // and return what we have return readBuffer.toByteArray(); } }