1
2
3
4
5
6
7
8
9
10 package org.dom4j.io;
11
12 import java.io.InputStream;
13 import java.io.Reader;
14 import java.util.Iterator;
15
16 import javax.xml.namespace.QName;
17 import javax.xml.stream.XMLEventReader;
18 import javax.xml.stream.XMLInputFactory;
19 import javax.xml.stream.XMLStreamConstants;
20 import javax.xml.stream.XMLStreamException;
21 import javax.xml.stream.events.Attribute;
22 import javax.xml.stream.events.Characters;
23 import javax.xml.stream.events.Comment;
24 import javax.xml.stream.events.EndElement;
25 import javax.xml.stream.events.EntityReference;
26 import javax.xml.stream.events.Namespace;
27 import javax.xml.stream.events.ProcessingInstruction;
28 import javax.xml.stream.events.StartDocument;
29 import javax.xml.stream.events.StartElement;
30 import javax.xml.stream.events.XMLEvent;
31
32 import org.dom4j.CharacterData;
33 import org.dom4j.Document;
34 import org.dom4j.DocumentFactory;
35 import org.dom4j.Element;
36 import org.dom4j.Entity;
37 import org.dom4j.Node;
38
39 /***
40 * Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
41 * {@link XMLEventReader}.
42 *
43 * @author Christian Niles
44 */
45 public class STAXEventReader {
46
47 /*** Reference to the DocumentFactory used to build DOM4J nodes. */
48 private DocumentFactory factory;
49
50 /*** A StAX input factory, used to construct streams from IO streams. */
51 private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
52
53 /***
54 * Constructs a default <code>STAXEventReader</code> instance with a default
55 * {@link DocumentFactory}.
56 */
57 public STAXEventReader() {
58 this.factory = DocumentFactory.getInstance();
59 }
60
61 /***
62 * Constructs a <code>STAXEventReader</code> instance that uses the
63 * specified {@link DocumentFactory} to construct DOM4J {@link Node}s.
64 *
65 * @param factory The DocumentFactory to use when constructing DOM4J nodes,
66 * or <code>null</code> if a default should be used.
67 */
68 public STAXEventReader(DocumentFactory factory) {
69 if (factory != null) {
70 this.factory = factory;
71 } else {
72 this.factory = DocumentFactory.getInstance();
73 }
74
75 }
76
77 /***
78 * Sets the DocumentFactory to be used when constructing DOM4J nodes.
79 *
80 * @param factory The DocumentFactory to use when constructing DOM4J nodes,
81 * or <code>null</code> if a default should be used.
82 */
83 public void setDocumentFactory(DocumentFactory factory) {
84 if (factory != null) {
85 this.factory = factory;
86 } else {
87 this.factory = DocumentFactory.getInstance();
88 }
89 }
90
91 /***
92 * Constructs a StAX event stream from the provided I/O stream and reads a
93 * DOM4J document from it.
94 *
95 * @param is The I/O stream from which the Document will be read.
96 * @return The Document that was read from the stream.
97 * @throws XMLStreamException If an error occurs reading content from the
98 * stream.
99 */
100 public Document readDocument(InputStream is) throws XMLStreamException {
101 return readDocument(is, null);
102 }
103
104 /***
105 * Constructs a StAX event stream from the provided I/O character stream and
106 * reads a DOM4J document from it.
107 *
108 * @param reader The character stream from which the Document will be read.
109 * @return The Document that was read from the stream.
110 * @throws XMLStreamException If an error occurs reading content from the
111 * stream.
112 */
113 public Document readDocument(Reader reader) throws XMLStreamException {
114 return readDocument(reader, null);
115 }
116
117 /***
118 * Constructs a StAX event stream from the provided I/O stream and reads a
119 * DOM4J document from it.
120 *
121 * @param is The I/O stream from which the Document will be read.
122 * @param systemId A system id used to resolve entities.
123 * @return The Document that was read from the stream.
124 * @throws XMLStreamException If an error occurs reading content from the
125 * stream.
126 */
127 public Document readDocument(InputStream is, String systemId) throws XMLStreamException {
128 XMLEventReader eventReader = inputFactory.createXMLEventReader(systemId, is);
129
130 try {
131 return readDocument(eventReader);
132 } finally {
133 eventReader.close();
134 }
135 }
136
137 /***
138 * Constructs a StAX event stream from the provided I/O character stream and
139 * reads a DOM4J document from it.
140 *
141 * @param reader The character stream from which the Document will be read.
142 * @param systemId A system id used to resolve entities.
143 * @return The Document that was read from the stream.
144 * @throws XMLStreamException If an error occurs reading content from the
145 * stream.
146 */
147 public Document readDocument(Reader reader, String systemId) throws XMLStreamException {
148 XMLEventReader eventReader = inputFactory.createXMLEventReader(systemId, reader);
149
150 try {
151 return readDocument(eventReader);
152 } finally {
153 eventReader.close();
154 }
155 }
156
157 /***
158 * Reads a {@link Node} from the event stream. If the next event is a
159 * {@link StartElement}, all events until the closing {@link EndElement}
160 * will be read, and the resulting nodes will be added to the returned
161 * {@link Element}.
162 * <br /><br />
163 * <strong>Pre-Conditions</strong>: The stream must be positioned before an
164 * event other than an <code>EndElement</code>, <code>EndDocument</code>, or
165 * any DTD-related events, which are not currently supported.
166 *
167 * @param reader The reader from which events will be read.
168 * @return A DOM4J {@link Node} constructed from the read events.
169 * @throws XMLStreamException If an error occurs reading from the stream,
170 * or the stream was positioned before an unsupported event.
171 */
172 public Node readNode(XMLEventReader reader) throws XMLStreamException {
173 XMLEvent event = reader.peek();
174 if (event.isStartElement()) {
175 return readElement(reader);
176 } else if (event.isCharacters()) {
177 return readCharacters(reader);
178 } else if (event.isStartDocument()) {
179 return readDocument(reader);
180 } else if (event.isProcessingInstruction()) {
181 return readProcessingInstruction(reader);
182 } else if (event.isEntityReference()) {
183 return readEntityReference(reader);
184 } else if (event.isAttribute()) {
185 return readAttribute(reader);
186 } else if (event.isNamespace()) {
187 return readNamespace(reader);
188 } else {
189 throw new XMLStreamException("Unsupported event: " + event);
190 }
191 }
192
193 /***
194 * Reads a DOM4J {@link Document} from the provided stream. The stream
195 * should be positioned at the start of a document, or before a
196 * {@link StartElement} event.
197 *
198 * @param reader The event stream from which to read the {@link Document}.
199 * @return The {@link Document} that was read from the stream.
200 * @throws XMLStreamException If an error occurs reading events from the
201 * stream.
202 */
203 public Document readDocument(XMLEventReader reader) throws XMLStreamException {
204 Document doc = null;
205
206 while (reader.hasNext()) {
207 XMLEvent nextEvent = reader.peek();
208 int type = nextEvent.getEventType();
209 switch (type) {
210 case XMLStreamConstants.START_DOCUMENT :
211 StartDocument startDoc = (StartDocument)reader.nextEvent();
212 if (doc == null) {
213
214 if (startDoc.encodingSet()) {
215 String encodingScheme =
216 startDoc.getCharacterEncodingScheme();
217 doc = factory.createDocument(encodingScheme);
218 } else {
219 doc = factory.createDocument();
220 }
221 } else {
222
223 throw new XMLStreamException(
224 "Unexpected StartDocument event",
225 startDoc.getLocation());
226 }
227 break;
228
229 case XMLStreamConstants.END_DOCUMENT :
230 case XMLStreamConstants.SPACE :
231 case XMLStreamConstants.CHARACTERS :
232
233 reader.nextEvent();
234 break;
235
236 default :
237 if (doc == null) {
238
239 doc = factory.createDocument();
240 }
241 Node n = readNode(reader);
242 doc.add(n);
243 }
244 }
245
246 return doc;
247 }
248
249 /***
250 * Reads a DOM4J Element from the provided event stream. The stream must
251 * be positioned before an {@link StartElement} event. In addition to the
252 * initial start event, all events up to and including the closing
253 * {@link EndElement} will be read, and included with the returned element.
254 *
255 * @param reader The event stream from which to read the Element.
256 * @return The Element that was read from the stream.
257 * @throws XMLStreamException If an error occured reading events from the
258 * stream, or the stream was not positioned before a
259 * {@link StartElement} event.
260 */
261 public Element readElement(XMLEventReader reader) throws XMLStreamException {
262 XMLEvent event = reader.peek();
263 if (event.isStartElement()) {
264
265 StartElement startTag = reader.nextEvent().asStartElement();
266 Element elem = createElement(startTag);
267
268
269 while (true) {
270 if (!reader.hasNext()) {
271 throw new XMLStreamException(
272 "Unexpected end of stream while reading element content");
273 }
274
275 XMLEvent nextEvent = reader.peek();
276 if (nextEvent.isEndElement()) {
277 EndElement endElem = reader.nextEvent().asEndElement();
278 if (!endElem.getName().equals(startTag.getName())) {
279 throw new XMLStreamException(
280 "Expected "
281 + startTag.getName()
282 + " end-tag, but found"
283 + endElem.getName());
284 }
285
286 break;
287 }
288
289 Node child = readNode(reader);
290 elem.add(child);
291 }
292
293 return elem;
294 } else {
295 throw new XMLStreamException("Expected Element event, found: " + event);
296 }
297 }
298
299 /***
300 * Constructs a DOM4J Attribute from the provided event stream. The stream
301 * must be positioned before an {@link Attribute} event.
302 *
303 * @param reader The event stream from which to read the Attribute.
304 * @return The Attribute that was read from the stream.
305 * @throws XMLStreamException If an error occured reading events from the
306 * stream, or the stream was not positioned before an
307 * {@link Attribute} event.
308 */
309 public org.dom4j.Attribute readAttribute(XMLEventReader reader) throws XMLStreamException {
310 XMLEvent event = reader.peek();
311 if (event.isAttribute()) {
312 Attribute attr = (Attribute) reader.nextEvent();
313 return createAttribute(null, attr);
314 } else {
315 throw new XMLStreamException("Expected Attribute event, found: " + event);
316 }
317 }
318
319 /***
320 * Constructs a DOM4J Namespace from the provided event stream. The stream
321 * must be positioned before a {@link Namespace} event.
322 *
323 * @param reader The event stream from which to read the Namespace.
324 * @return The Namespace that was read from the stream.
325 * @throws XMLStreamException If an error occured reading events from the
326 * stream, or the stream was not positioned before a
327 * {@link Namespace} event.
328 */
329 public org.dom4j.Namespace readNamespace(XMLEventReader reader) throws XMLStreamException {
330 XMLEvent event = reader.peek();
331 if (event.isNamespace()) {
332 Namespace ns = (Namespace) reader.nextEvent();
333 return createNamespace(ns);
334 } else {
335 throw new XMLStreamException("Expected Namespace event, found: " + event);
336 }
337 }
338
339 /***
340 * Constructs a DOM4J Text or CDATA section from the provided event stream.
341 * The stream must be positioned before a {@link Characters} event.
342 *
343 * @param reader The event stream from which to read the Text or CDATA.
344 * @return The Text or CDATA that was read from the stream.
345 * @throws XMLStreamException If an error occured reading events from the
346 * stream, or the stream was not positioned before a
347 * {@link Characters} event.
348 */
349 public CharacterData readCharacters(XMLEventReader reader) throws XMLStreamException {
350 XMLEvent event = reader.peek();
351
352 if (event.isCharacters()) {
353 Characters characters = reader.nextEvent().asCharacters();
354 return createCharacterData(characters);
355 } else {
356 throw new XMLStreamException("Expected Characters event, found: " + event);
357 }
358 }
359
360 /***
361 * Constructs a DOM4J Comment from the provided event stream. The stream
362 * must be positioned before a {@link Comment} event.
363 *
364 * @param reader The event stream from which to read the Comment.
365 * @return The Comment that was read from the stream.
366 * @throws XMLStreamException If an error occured reading events from the
367 * stream, or the stream was not positioned before a
368 * {@link Comment} event.
369 */
370 public org.dom4j.Comment readComment(XMLEventReader reader) throws XMLStreamException {
371 XMLEvent event = reader.peek();
372
373 if (event instanceof Comment) {
374 return createComment((Comment) reader.nextEvent());
375 } else {
376 throw new XMLStreamException("Expected Comment event, found: " + event);
377 }
378 }
379
380 /***
381 * Constructs a DOM4J Entity from the provided event stream. The stream
382 * must be positioned before an {@link EntityReference} event.
383 *
384 * @param reader The event stream from which to read the
385 * {@link EntityReference}.
386 * @return The {@link org.dom4j.Entity} that was read from the stream.
387 * @throws XMLStreamException If an error occured reading events from the
388 * stream, or the stream was not positioned before an
389 * {@link EntityReference} event.
390 */
391 public Entity readEntityReference(XMLEventReader reader) throws XMLStreamException {
392 XMLEvent event = reader.peek();
393
394 if (event.isEntityReference()) {
395 EntityReference entityRef = (EntityReference) reader.nextEvent();
396 return createEntity(entityRef);
397 } else {
398 throw new XMLStreamException("Expected EntityReference event, found: " + event);
399 }
400 }
401
402 /***
403 * Constructs a DOM4J ProcessingInstruction from the provided event stream.
404 * The stream must be positioned before a {@link ProcessingInstruction}
405 * event.
406 *
407 * @param reader The event stream from which to read the
408 * ProcessingInstruction.
409 * @return The ProcessingInstruction that was read from the stream.
410 * @throws XMLStreamException If an error occured reading events from the
411 * stream, or the stream was not positioned before a
412 * {@link ProcessingInstruction} event.
413 */
414 public org.dom4j.ProcessingInstruction readProcessingInstruction(XMLEventReader reader)
415 throws XMLStreamException {
416 XMLEvent event = reader.peek();
417
418 if (event.isProcessingInstruction()) {
419 ProcessingInstruction pi =
420 (ProcessingInstruction) reader.nextEvent();
421 return createProcessingInstruction(pi);
422 } else {
423 throw new XMLStreamException("Expected ProcessingInstruction event, found: " + event);
424 }
425 }
426
427 /***
428 * Constructs a new DOM4J Element from the provided StartElement event. All
429 * attributes and namespaces will be added to the returned element.
430 *
431 * @param startEvent The StartElement event from which to construct the new
432 * DOM4J Element.
433 * @return The Element constructed from the provided StartElement event.
434 */
435 public Element createElement(StartElement startEvent) {
436 QName qname = startEvent.getName();
437 org.dom4j.QName elemName = createQName(qname);
438
439 Element elem = factory.createElement(elemName);
440
441
442 for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
443 Attribute attr = (Attribute) i.next();
444 elem.addAttribute(createQName(attr.getName()), attr.getValue());
445 }
446
447
448 for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
449 Namespace ns = (Namespace) i.next();
450 elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
451 }
452
453 return elem;
454 }
455
456 /***
457 * Constructs a new DOM4J Attribute from the provided StAX Attribute event.
458 *
459 * @param attr The Attribute event from which to construct the new DOM4J
460 * Attribute.
461 * @return The Attribute constructed from the provided Attribute event.
462 */
463 public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
464 return factory.createAttribute(elem,
465 createQName(attr.getName()),
466 attr.getValue());
467 }
468
469 /***
470 * Constructs a new DOM4J Namespace from the provided StAX Namespace event.
471 *
472 * @param ns The Namespace event from which to construct the new DOM4J
473 * Namespace.
474 * @return The Namespace constructed from the provided Namespace event.
475 */
476 public org.dom4j.Namespace createNamespace(Namespace ns) {
477 return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
478 }
479
480 /***
481 * Constructs a new DOM4J Text or CDATA object from the provided Characters
482 * event.
483 *
484 * @param characters The Characters event from which to construct the new
485 * DOM4J Text or CDATA object.
486 * @return The Text or CDATA object constructed from the provided Characters
487 * event.
488 */
489 public CharacterData createCharacterData(Characters characters) {
490 String data = characters.getData();
491
492 if (characters.isCData()) {
493 return factory.createCDATA(data);
494 } else {
495 return factory.createText(data);
496 }
497 }
498
499 /***
500 * Constructs a new DOM4J Comment from the provided StAX Comment event.
501 *
502 * @param comment The Comment event from which to construct the new DOM4J
503 * Comment.
504 * @return The Comment constructed from the provided Comment event.
505 */
506 public org.dom4j.Comment createComment(Comment comment) {
507 return factory.createComment(comment.getText());
508 }
509
510 /***
511 * Constructs a new DOM4J Entity from the provided StAX EntityReference
512 * event.
513 *
514 * @param entityRef The EntityReference event from which to construct the
515 * new DOM4J Entity.
516 * @return The Entity constructed from the provided EntityReference event.
517 */
518 public org.dom4j.Entity createEntity(EntityReference entityRef) {
519 return factory.createEntity(entityRef.getName(),
520 entityRef.getDeclaration().getReplacementText()
521 );
522 }
523
524 /***
525 * Constructs a new DOM4J ProcessingInstruction from the provided StAX
526 * ProcessingInstruction event.
527 *
528 * @param pi The ProcessingInstruction event from which to construct the
529 * new DOM4J ProcessingInstruction.
530 * @return The ProcessingInstruction constructed from the provided
531 * ProcessingInstruction event.
532 */
533 public org.dom4j.ProcessingInstruction createProcessingInstruction(ProcessingInstruction pi) {
534 return factory.createProcessingInstruction(pi.getTarget(), pi.getData());
535 }
536
537 /***
538 * Constructs a new DOM4J QName from the provided JAXP QName.
539 *
540 * @param qname The JAXP QName from which to create a DOM4J QName.
541 * @return The newly constructed DOM4J QName.
542 */
543 public org.dom4j.QName createQName(QName qname) {
544 return factory.createQName(qname.getLocalPart(),
545 qname.getPrefix(),
546 qname.getNamespaceURI());
547 }
548
549 }
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598