Example 6-19: Processing large streams of repeating subdocuments

import org.xml.sax.*;
import org.w3c.dom.*;
import java.io.*;
import java.net.*;
import oracle.xml.parser.v2.*;

public class XMLDocumentSplitter extends HandlerBase {
  private Document curDoc;
  private Node     curNode;
  private Element  curElement;
  private URL      fileURL;
  private XMLDocumentHandler handler;
  private String splitOnElement = null;
  boolean seenDocElementYet = false;
  // Remember the XMLDocumentHandler we're being asked to use.
  public XMLDocumentSplitter(XMLDocumentHandler handler) {
   this.handler = handler;
  }
  // Split a large XML document into N subdocuments, each one identified
  // by the an opening 'splitElement' element. Invoke an XMLDocumentHandler
  // to process each subdocument encountered.
  public void split(URL fileURL, String splitElement)
   throws SAXParseException,SAXException,IOException {
   this.fileURL        = fileURL;
   this.splitOnElement = splitElement;
   // Create a new SAXParser
   Parser parser = new SAXParser();
   // Register the current instance of this class as the Document Handler
   parser.setDocumentHandler(this);
   // Create a SAX InputSource on the URL's InputStream
   InputSource is = new InputSource(fileURL.openStream());
   // Start parsing the stream of XML
   parser.parse(is);
  }
  // Handle the <Element> "start-element" parsing event
  public void startElement(String name,AttributeList atl) throws SAXException {
   // If we were given a null element name to split on, behave "normally"
   // by using the document element as the splitElement.
   if (splitOnElement == null && !seenDocElementYet) {
     splitOnElement = name;
     seenDocElementYet = true;
   }
   // Construct a DOM Element node for the current element being parsed
   curElement = new XMLElement(name);
   // Add DOM Attribute nodes to the element for each attribute parsed
   for (int i=0; i<atl.getLength(); i++) {
     curElement.setAttribute(atl.getName(i),atl.getValue(i));
   }
   // If we're NOT currently building a subdocument and the element name
   // is the split element, then create a new XMLDocument for new subdocument
   if (curDoc == null) {
     if (name.equals(splitOnElement)) {
       curDoc = new XMLDocument();
       curNode = curDoc;
     }
     else {
       // If we're NOT building a subdocument but this element
       // is not the splitterElement, then do nothing.
       return;
     }
   }
   // Append the current DOM Element as a child of the current node in the
   // subdocument being constructured, and set it to be the new current node
   curNode.appendChild(curElement);
   curNode = curElement;
  }
  // Handle the </Element> "end-element" parsing event
  public void endElement(String name) throws SAXException {
    // If we're NOT building a subdocument, we don't care. Just return.
    if (curDoc == null) return;
    // If this is the endElement event for the subdocument splitElement
    // then we're done with the subdocument and are ready to call the
    // handler to handle it.
    if (name.equals(splitOnElement)) {
     if (curDoc != null) {
       try {
         // Call the XMLDocumentHandler.handle() method for current subdoc
         handler.handleDocument(curDoc,fileURL);
       }
       catch (Exception e) {
         System.err.println(e.getMessage());
       }
     }
     // Get ready for the next subdoc by nulling out our 'current' variables
     curDoc = null;
     curNode = null;
     curElement = null;
    }
    else {
     // If this is the endElement for any other element, make
     // its parent the new current node
     curNode = curNode.getParentNode();
    }
  }
  // Handle the "just got some text" parsing event
  public void characters(char[] cbuf, int start, int len) {
    // If we get text characters, create a new DOM Text node and
    // append it as a child of the current node.
    curElement.appendChild(new XMLText(new String(cbuf,start,len)));
  }
}