import org.xml.sax.*; import org.w3c.dom.*; import java.io.*; import java.net.*; import oracle.xml.parser.v2.*; public class XMLDocumentSplitter extends HandlerBase { private Document curDoc; private Node curNode; private Element curElement; private URL fileURL; private XMLDocumentHandler handler; private String splitOnElement = null; boolean seenDocElementYet = false; // Remember the XMLDocumentHandler we're being asked to use. public XMLDocumentSplitter(XMLDocumentHandler handler) { this.handler = handler; } // Split a large XML document into N subdocuments, each one identified // by the an opening 'splitElement' element. Invoke an XMLDocumentHandler // to process each subdocument encountered. public void split(URL fileURL, String splitElement) throws SAXParseException,SAXException,IOException { this.fileURL = fileURL; this.splitOnElement = splitElement; // Create a new SAXParser Parser parser = new SAXParser(); // Register the current instance of this class as the Document Handler parser.setDocumentHandler(this); // Create a SAX InputSource on the URL's InputStream InputSource is = new InputSource(fileURL.openStream()); // Start parsing the stream of XML parser.parse(is); } // Handle the <Element> "start-element" parsing event public void startElement(String name,AttributeList atl) throws SAXException { // If we were given a null element name to split on, behave "normally" // by using the document element as the splitElement. if (splitOnElement == null && !seenDocElementYet) { splitOnElement = name; seenDocElementYet = true; } // Construct a DOM Element node for the current element being parsed curElement = new XMLElement(name); // Add DOM Attribute nodes to the element for each attribute parsed for (int i=0; i<atl.getLength(); i++) { curElement.setAttribute(atl.getName(i),atl.getValue(i)); } // If we're NOT currently building a subdocument and the element name // is the split element, then create a new XMLDocument for new subdocument if (curDoc == null) { if (name.equals(splitOnElement)) { curDoc = new XMLDocument(); curNode = curDoc; } else { // If we're NOT building a subdocument but this element // is not the splitterElement, then do nothing. return; } } // Append the current DOM Element as a child of the current node in the // subdocument being constructured, and set it to be the new current node curNode.appendChild(curElement); curNode = curElement; } // Handle the </Element> "end-element" parsing event public void endElement(String name) throws SAXException { // If we're NOT building a subdocument, we don't care. Just return. if (curDoc == null) return; // If this is the endElement event for the subdocument splitElement // then we're done with the subdocument and are ready to call the // handler to handle it. if (name.equals(splitOnElement)) { if (curDoc != null) { try { // Call the XMLDocumentHandler.handle() method for current subdoc handler.handleDocument(curDoc,fileURL); } catch (Exception e) { System.err.println(e.getMessage()); } } // Get ready for the next subdoc by nulling out our 'current' variables curDoc = null; curNode = null; curElement = null; } else { // If this is the endElement for any other element, make // its parent the new current node curNode = curNode.getParentNode(); } } // Handle the "just got some text" parsing event public void characters(char[] cbuf, int start, int len) { // If we get text characters, create a new DOM Text node and // append it as a child of the current node. curElement.appendChild(new XMLText(new String(cbuf,start,len))); } } |