View Javadoc

1   /*
2    * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
3    * 
4    * This software is open source. 
5    * See the bottom of this file for the licence.
6    * 
7    * $Id: STAXEventReader.java,v 1.4 2004/07/14 19:32:23 maartenc Exp $
8    */
9   
10  package org.dom4j.io;
11  
12  import java.io.InputStream;
13  import java.io.Reader;
14  import java.util.Iterator;
15  
16  import javax.xml.namespace.QName;
17  import javax.xml.stream.XMLEventReader;
18  import javax.xml.stream.XMLInputFactory;
19  import javax.xml.stream.XMLStreamConstants;
20  import javax.xml.stream.XMLStreamException;
21  import javax.xml.stream.events.Attribute;
22  import javax.xml.stream.events.Characters;
23  import javax.xml.stream.events.Comment;
24  import javax.xml.stream.events.EndElement;
25  import javax.xml.stream.events.EntityReference;
26  import javax.xml.stream.events.Namespace;
27  import javax.xml.stream.events.ProcessingInstruction;
28  import javax.xml.stream.events.StartDocument;
29  import javax.xml.stream.events.StartElement;
30  import javax.xml.stream.events.XMLEvent;
31  
32  import org.dom4j.CharacterData;
33  import org.dom4j.Document;
34  import org.dom4j.DocumentFactory;
35  import org.dom4j.Element;
36  import org.dom4j.Entity;
37  import org.dom4j.Node;
38  
39  /***
40   * Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
41   * {@link XMLEventReader}.
42   *
43   * @author Christian Niles
44   */
45  public class STAXEventReader {
46      
47      /*** Reference to the DocumentFactory used to build DOM4J nodes. */
48      private DocumentFactory factory;
49      
50      /*** A StAX input factory, used to construct streams from IO streams. */
51      private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
52      
53      /***
54       * Constructs a default <code>STAXEventReader</code> instance with a default
55       * {@link DocumentFactory}.
56       */
57      public STAXEventReader() {
58          this.factory = DocumentFactory.getInstance();
59      }
60      
61      /***
62       * Constructs a <code>STAXEventReader</code> instance that uses the
63       * specified {@link DocumentFactory} to construct DOM4J {@link Node}s.
64       *
65       * @param factory The DocumentFactory to use when constructing DOM4J nodes,
66       * 		or <code>null</code> if a default should be used.
67       */
68      public STAXEventReader(DocumentFactory factory) {
69          if (factory != null) {
70              this.factory = factory;
71          } else {
72              this.factory = DocumentFactory.getInstance();
73          }
74          
75      }
76      
77      /***
78       * Sets the DocumentFactory to be used when constructing DOM4J nodes.
79       *
80       * @param factory The DocumentFactory to use when constructing DOM4J nodes,
81       * 		or <code>null</code> if a default should be used.
82       */
83      public void setDocumentFactory(DocumentFactory factory) {
84          if (factory != null) {
85              this.factory = factory;
86          } else {
87              this.factory = DocumentFactory.getInstance();
88          }
89      }
90      
91      /***
92       * Constructs a StAX event stream from the provided I/O stream and reads a
93       * DOM4J document from it.
94       *
95       * @param is The I/O stream from which the Document will be read.
96       * @return The Document that was read from the stream.
97       * @throws XMLStreamException If an error occurs reading content from the
98       * 		stream.
99       */
100     public Document readDocument(InputStream is) throws XMLStreamException {
101         return readDocument(is, null);
102     }
103     
104     /***
105      * Constructs a StAX event stream from the provided I/O character stream and
106      * reads a DOM4J document from it.
107      *
108      * @param reader The character stream from which the Document will be read.
109      * @return The Document that was read from the stream.
110      * @throws XMLStreamException If an error occurs reading content from the
111      * 		stream.
112      */
113     public Document readDocument(Reader reader) throws XMLStreamException {
114         return readDocument(reader, null);
115     }
116     
117     /***
118      * Constructs a StAX event stream from the provided I/O stream and reads a
119      * DOM4J document from it.
120      *
121      * @param is The I/O stream from which the Document will be read.
122      * @param systemId A system id used to resolve entities.
123      * @return The Document that was read from the stream.
124      * @throws XMLStreamException If an error occurs reading content from the
125      * 		stream.
126      */
127     public Document readDocument(InputStream is, String systemId) throws XMLStreamException {
128         XMLEventReader eventReader = inputFactory.createXMLEventReader(systemId, is);
129         
130         try {
131             return readDocument(eventReader);
132         } finally {
133             eventReader.close();
134         }
135     }
136     
137     /***
138      * Constructs a StAX event stream from the provided I/O character stream and
139      * reads a DOM4J document from it.
140      *
141      * @param reader The character stream from which the Document will be read.
142      * @param systemId A system id used to resolve entities.
143      * @return The Document that was read from the stream.
144      * @throws XMLStreamException If an error occurs reading content from the
145      * 		stream.
146      */
147     public Document readDocument(Reader reader, String systemId) throws XMLStreamException {
148         XMLEventReader eventReader = inputFactory.createXMLEventReader(systemId, reader);
149         
150         try {
151             return readDocument(eventReader);
152         } finally {
153             eventReader.close();
154         }
155     }
156     
157     /***
158      * Reads a {@link Node} from the event stream. If the next event is a
159      * {@link StartElement}, all events until the closing {@link EndElement}
160      * will be read, and the resulting nodes will be added to the returned
161      * {@link Element}.
162      * <br /><br />
163      * <strong>Pre-Conditions</strong>: The stream must be positioned before an
164      * event other than an <code>EndElement</code>, <code>EndDocument</code>, or
165      * any DTD-related events, which are not currently supported.
166      *
167      * @param reader The reader from which events will be read.
168      * @return A DOM4J {@link Node} constructed from the read events.
169      * @throws XMLStreamException If an error occurs reading from the stream,
170      * 		or the stream was positioned before an unsupported event.
171      */
172     public Node readNode(XMLEventReader reader) throws XMLStreamException {
173         XMLEvent event = reader.peek();
174         if (event.isStartElement()) {
175             return readElement(reader);
176         } else if (event.isCharacters()) {
177             return readCharacters(reader);
178         } else if (event.isStartDocument()) {
179             return readDocument(reader);
180         } else if (event.isProcessingInstruction()) {
181             return readProcessingInstruction(reader);
182         } else if (event.isEntityReference()) {
183             return readEntityReference(reader);
184         } else if (event.isAttribute()) {
185             return readAttribute(reader);
186         } else if (event.isNamespace()) {
187             return readNamespace(reader);
188         } else {
189             throw new XMLStreamException("Unsupported event: " + event);
190         }
191     }
192     
193     /***
194      * Reads a DOM4J {@link Document} from the provided stream. The stream
195      * should be positioned at the start of a document, or before a
196      * {@link StartElement} event.
197      *
198      * @param reader The event stream from which to read the {@link Document}.
199      * @return The {@link Document} that was read from the stream.
200      * @throws XMLStreamException If an error occurs reading events from the
201      * 		stream.
202      */
203     public Document readDocument(XMLEventReader reader) throws XMLStreamException {
204         Document doc = null;
205 
206         while (reader.hasNext()) {
207             XMLEvent nextEvent = reader.peek();
208             int type = nextEvent.getEventType();
209             switch (type) {
210                 case XMLStreamConstants.START_DOCUMENT :
211                     StartDocument startDoc = (StartDocument)reader.nextEvent();
212                     if (doc == null) {
213                         // create document
214                         if (startDoc.encodingSet()) {
215                             String encodingScheme = 
216                                 startDoc.getCharacterEncodingScheme();
217                             doc = factory.createDocument(encodingScheme);
218                         } else {
219                             doc = factory.createDocument();
220                         }
221                     } else {
222                         // duplicate or misplaced xml declaration
223                         throw new XMLStreamException(
224                                 "Unexpected StartDocument event",
225                                 startDoc.getLocation());
226                     }
227                     break;
228 
229                 case XMLStreamConstants.END_DOCUMENT :
230                 case XMLStreamConstants.SPACE :
231                 case XMLStreamConstants.CHARACTERS :
232                     // skip end document and space outside the root element
233                     reader.nextEvent();
234                     break;
235                     
236                 default :
237                     if (doc == null) {
238                         // create document
239                         doc = factory.createDocument();
240                     }
241                     Node n = readNode(reader);
242                     doc.add(n);
243             }
244         }
245         
246         return doc;
247     }
248     
249     /***
250      * Reads a DOM4J Element from the provided event stream. The stream must
251      * be positioned before an {@link StartElement} event. In addition to the
252      * initial start event, all events up to and including the closing
253      * {@link EndElement} will be read, and included with the returned element.
254      *
255      * @param reader The event stream from which to read the Element.
256      * @return The Element that was read from the stream.
257      * @throws XMLStreamException If an error occured reading events from the
258      * 		stream, or the stream was not positioned before a
259      * 		{@link StartElement} event.
260      */
261     public Element readElement(XMLEventReader reader) throws XMLStreamException {
262         XMLEvent event = reader.peek();
263         if (event.isStartElement()) {
264             // advance the reader and get the StartElement event
265             StartElement startTag = reader.nextEvent().asStartElement();
266             Element elem = createElement(startTag);
267             
268             // read element content
269             while (true) {
270                 if (!reader.hasNext()) {
271                     throw new XMLStreamException(
272                             "Unexpected end of stream while reading element content");
273                 }
274                 
275                 XMLEvent nextEvent = reader.peek();
276                 if (nextEvent.isEndElement()) {
277                     EndElement endElem = reader.nextEvent().asEndElement();
278                     if (!endElem.getName().equals(startTag.getName())) {
279                         throw new XMLStreamException(
280                                 "Expected "
281                                 + startTag.getName()
282                                 + " end-tag, but found"
283                                 + endElem.getName());
284                     }
285                     
286                     break;
287                 }
288                 
289                 Node child = readNode(reader);
290                 elem.add(child);
291             }
292             
293             return elem;
294         } else {
295             throw new XMLStreamException("Expected Element event, found: " + event);
296         }
297     }
298     
299     /***
300      * Constructs a DOM4J Attribute from the provided event stream. The stream
301      * must be positioned before an {@link Attribute} event.
302      *
303      * @param reader The event stream from which to read the Attribute.
304      * @return The Attribute that was read from the stream.
305      * @throws XMLStreamException If an error occured reading events from the
306      * 		stream, or the stream was not positioned before an
307      * 		{@link Attribute} event.
308      */
309     public org.dom4j.Attribute readAttribute(XMLEventReader reader) throws XMLStreamException {
310         XMLEvent event = reader.peek();
311         if (event.isAttribute()) {
312             Attribute attr = (Attribute) reader.nextEvent();
313             return createAttribute(null, attr);
314         } else {
315             throw new XMLStreamException("Expected Attribute event, found: " + event);
316         }
317     }
318     
319     /***
320      * Constructs a DOM4J Namespace from the provided event stream. The stream
321      * must be positioned before a {@link Namespace} event.
322      *
323      * @param reader The event stream from which to read the Namespace.
324      * @return The Namespace that was read from the stream.
325      * @throws XMLStreamException If an error occured reading events from the
326      * 		stream, or the stream was not positioned before a
327      * 		{@link Namespace} event.
328      */
329     public org.dom4j.Namespace readNamespace(XMLEventReader reader) throws XMLStreamException {
330         XMLEvent event = reader.peek();
331         if (event.isNamespace()) {
332             Namespace ns = (Namespace) reader.nextEvent();
333             return createNamespace(ns);
334         } else {
335             throw new XMLStreamException("Expected Namespace event, found: " + event);
336         }
337     }
338     
339     /***
340      * Constructs a DOM4J Text or CDATA section from the provided event stream.
341      * The stream must be positioned before a {@link Characters} event.
342      *
343      * @param reader The event stream from which to read the Text or CDATA.
344      * @return The Text or CDATA that was read from the stream.
345      * @throws XMLStreamException If an error occured reading events from the
346      * 		stream, or the stream was not positioned before a
347      * 		{@link Characters} event.
348      */
349     public CharacterData readCharacters(XMLEventReader reader) throws XMLStreamException {
350         XMLEvent event = reader.peek();
351 
352         if (event.isCharacters()) {
353             Characters characters = reader.nextEvent().asCharacters();
354             return createCharacterData(characters);
355         } else {
356             throw new XMLStreamException("Expected Characters event, found: " + event);
357         }
358     }
359     
360     /***
361      * Constructs a DOM4J Comment from the provided event stream. The stream
362      * must be positioned before a {@link Comment} event.
363      *
364      * @param reader The event stream from which to read the Comment.
365      * @return The Comment that was read from the stream.
366      * @throws XMLStreamException If an error occured reading events from the
367      * 		stream, or the stream was not positioned before a
368      * 		{@link Comment} event.
369      */
370     public org.dom4j.Comment readComment(XMLEventReader reader) throws XMLStreamException {
371         XMLEvent event = reader.peek();
372 
373         if (event instanceof Comment) {
374             return createComment((Comment) reader.nextEvent());
375         } else {
376             throw new XMLStreamException("Expected Comment event, found: " + event);
377         }
378     }
379     
380     /***
381      * Constructs a DOM4J Entity from the provided event stream. The stream
382      * must be positioned before an {@link EntityReference} event.
383      *
384      * @param reader The event stream from which to read the
385      * 		{@link EntityReference}.
386      * @return The {@link org.dom4j.Entity} that was read from the stream.
387      * @throws XMLStreamException If an error occured reading events from the
388      * 		stream, or the stream was not positioned before an
389      * 		{@link EntityReference} event.
390      */
391     public Entity readEntityReference(XMLEventReader reader) throws XMLStreamException {
392         XMLEvent event = reader.peek();
393 
394         if (event.isEntityReference()) {
395             EntityReference entityRef = (EntityReference) reader.nextEvent();
396             return createEntity(entityRef);
397         } else {
398             throw new XMLStreamException("Expected EntityReference event, found: " + event);
399         }
400     }
401     
402     /***
403      * Constructs a DOM4J ProcessingInstruction from the provided event stream.
404      * The stream must be positioned before a {@link ProcessingInstruction}
405      * event.
406      *
407      * @param reader The event stream from which to read the
408      * 		ProcessingInstruction.
409      * @return The ProcessingInstruction that was read from the stream.
410      * @throws XMLStreamException If an error occured reading events from the
411      * 		stream, or the stream was not positioned before a
412      * 		{@link ProcessingInstruction} event.
413      */
414     public org.dom4j.ProcessingInstruction readProcessingInstruction(XMLEventReader reader)
415             throws XMLStreamException {
416         XMLEvent event = reader.peek();
417 
418         if (event.isProcessingInstruction()) {
419             ProcessingInstruction pi =
420             (ProcessingInstruction) reader.nextEvent();
421             return createProcessingInstruction(pi);
422         } else {
423             throw new XMLStreamException("Expected ProcessingInstruction event, found: " + event);
424         }
425     }
426     
427     /***
428      * Constructs a new DOM4J Element from the provided StartElement event. All
429      * attributes and namespaces will be added to the returned element.
430      *
431      * @param startEvent The StartElement event from which to construct the new
432      * 		DOM4J Element.
433      * @return The Element constructed from the provided StartElement event.
434      */
435     public Element createElement(StartElement startEvent) {
436         QName qname = startEvent.getName();
437         org.dom4j.QName elemName = createQName(qname);
438         
439         Element elem = factory.createElement(elemName);
440         
441         // create attributes
442         for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
443             Attribute attr = (Attribute) i.next();
444             elem.addAttribute(createQName(attr.getName()), attr.getValue());
445         }
446         
447         // create namespaces
448         for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
449             Namespace ns = (Namespace) i.next();
450             elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
451         }
452         
453         return elem;
454     }
455     
456     /***
457      * Constructs a new DOM4J Attribute from the provided StAX Attribute event.
458      *
459      * @param attr The Attribute event from which to construct the new DOM4J
460      * 		Attribute.
461      * @return The Attribute constructed from the provided Attribute event.
462      */
463     public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
464         return factory.createAttribute(elem,
465                                        createQName(attr.getName()),
466                                        attr.getValue());
467     }
468     
469     /***
470      * Constructs a new DOM4J Namespace from the provided StAX Namespace event.
471      *
472      * @param ns The Namespace event from which to construct the new DOM4J
473      * 		Namespace.
474      * @return The Namespace constructed from the provided Namespace event.
475      */
476     public org.dom4j.Namespace createNamespace(Namespace ns) {
477         return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
478     }
479     
480     /***
481      * Constructs a new DOM4J Text or CDATA object from the provided Characters
482      * event.
483      *
484      * @param characters The Characters event from which to construct the new
485      * 		DOM4J Text or CDATA object.
486      * @return The Text or CDATA object constructed from the provided Characters
487      * 		event.
488      */
489     public CharacterData createCharacterData(Characters characters) {
490         String data = characters.getData();
491 
492         if (characters.isCData()) {
493             return factory.createCDATA(data);
494         } else {
495             return factory.createText(data);
496         }
497     }
498     
499     /***
500      * Constructs a new DOM4J Comment from the provided StAX Comment event.
501      *
502      * @param comment The Comment event from which to construct the new DOM4J
503      * 		Comment.
504      * @return The Comment constructed from the provided Comment event.
505      */
506     public org.dom4j.Comment createComment(Comment comment) {
507         return factory.createComment(comment.getText());
508     }
509     
510     /***
511      * Constructs a new DOM4J Entity from the provided StAX EntityReference
512      * event.
513      *
514      * @param entityRef The EntityReference event from which to construct the
515      * 		new DOM4J Entity.
516      * @return The Entity constructed from the provided EntityReference event.
517      */
518     public org.dom4j.Entity createEntity(EntityReference entityRef) {
519         return factory.createEntity(entityRef.getName(),
520                                     entityRef.getDeclaration().getReplacementText()
521         );
522     }
523     
524     /***
525      * Constructs a new DOM4J ProcessingInstruction from the provided StAX
526      * ProcessingInstruction event.
527      *
528      * @param pi The ProcessingInstruction event from which to construct the
529      * 		new DOM4J ProcessingInstruction.
530      * @return The ProcessingInstruction constructed from the provided
531      * 		ProcessingInstruction event.
532      */
533     public org.dom4j.ProcessingInstruction createProcessingInstruction(ProcessingInstruction pi) {
534         return factory.createProcessingInstruction(pi.getTarget(), pi.getData());
535     }
536     
537     /***
538      * Constructs a new DOM4J QName from the provided JAXP QName.
539      *
540      * @param qname The JAXP QName from which to create a DOM4J QName.
541      * @return The newly constructed DOM4J QName.
542      */
543     public org.dom4j.QName createQName(QName qname) {
544         return factory.createQName(qname.getLocalPart(),
545                                    qname.getPrefix(),
546                                    qname.getNamespaceURI());
547     }
548     
549 }
550 
551 
552 
553 
554 /*
555  * Redistribution and use of this software and associated documentation
556  * ("Software"), with or without modification, are permitted provided
557  * that the following conditions are met:
558  *
559  * 1. Redistributions of source code must retain copyright
560  *    statements and notices.  Redistributions must also contain a
561  *    copy of this document.
562  *
563  * 2. Redistributions in binary form must reproduce the
564  *    above copyright notice, this list of conditions and the
565  *    following disclaimer in the documentation and/or other
566  *    materials provided with the distribution.
567  *
568  * 3. The name "DOM4J" must not be used to endorse or promote
569  *    products derived from this Software without prior written
570  *    permission of MetaStuff, Ltd.  For written permission,
571  *    please contact dom4j-info@metastuff.com.
572  *
573  * 4. Products derived from this Software may not be called "DOM4J"
574  *    nor may "DOM4J" appear in their names without prior written
575  *    permission of MetaStuff, Ltd. DOM4J is a registered
576  *    trademark of MetaStuff, Ltd.
577  *
578  * 5. Due credit should be given to the DOM4J Project - 
579  *    http://www.dom4j.org
580  *
581  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
582  * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
583  * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
584  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
585  * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
586  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
587  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
588  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
589  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
590  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
591  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
592  * OF THE POSSIBILITY OF SUCH DAMAGE.
593  *
594  * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
595  *
596  * $Id: STAXEventReader.java,v 1.4 2004/07/14 19:32:23 maartenc Exp $
597  */
598