1
2
3
4
5
6
7
8
9
10 package org.dom4j.io;
11
12 import java.io.BufferedReader;
13 import java.io.File;
14 import java.io.FileReader;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.InputStreamReader;
18 import java.io.Reader;
19 import java.net.URL;
20
21 import org.dom4j.Document;
22 import org.dom4j.DocumentException;
23 import org.dom4j.DocumentFactory;
24 import org.dom4j.Element;
25 import org.dom4j.ElementHandler;
26 import org.dom4j.xpp.ProxyXmlStartTag;
27 import org.gjt.xpp.XmlEndTag;
28 import org.gjt.xpp.XmlPullParser;
29 import org.gjt.xpp.XmlPullParserException;
30 import org.gjt.xpp.XmlPullParserFactory;
31
32 /*** <p><code>XPPReader</code> is a Reader of DOM4J documents that
33 * uses the fast
34 * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x</a>.
35 * It does not currently support comments, CDATA or ProcessingInstructions or
36 * validation but it is very fast for use in SOAP style environments.</p>
37 *
38 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
39 * @version $Revision: 1.5 $
40 */
41 public class XPPReader {
42
43 /*** <code>DocumentFactory</code> used to create new document objects */
44 private DocumentFactory factory;
45
46 /*** <code>XmlPullParser</code> used to parse XML */
47 private XmlPullParser xppParser;
48
49 /*** <code>XmlPullParser</code> used to parse XML */
50 private XmlPullParserFactory xppFactory;
51
52 /*** DispatchHandler to call when each <code>Element</code> is encountered */
53 private DispatchHandler dispatchHandler;
54
55
56
57 public XPPReader() {
58 }
59
60 public XPPReader(DocumentFactory factory) {
61 this.factory = factory;
62 }
63
64
65
66
67 /*** <p>Reads a Document from the given <code>File</code></p>
68 *
69 * @param file is the <code>File</code> to read from.
70 * @return the newly created Document instance
71 * @throws DocumentException if an error occurs during parsing.
72 * @throws MalformedURLException if a URL could not be made for the given File
73 */
74 public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
75 String systemID = file.getAbsolutePath();
76 return read( new BufferedReader( new FileReader( file ) ), systemID );
77 }
78
79 /*** <p>Reads a Document from the given <code>URL</code></p>
80 *
81 * @param url <code>URL</code> to read from.
82 * @return the newly created Document instance
83 * @throws DocumentException if an error occurs during parsing.
84 */
85 public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
86 String systemID = url.toExternalForm();
87 return read( createReader( url.openStream() ), systemID);
88 }
89
90 /*** <p>Reads a Document from the given URL or filename.</p>
91 *
92 * <p>
93 * If the systemID contains a <code>':'</code> character then it is
94 * assumed to be a URL otherwise its assumed to be a file name.
95 * If you want finer grained control over this mechansim then please
96 * explicitly pass in either a {@link URL} or a {@link File} instance
97 * instead of a {@link String} to denote the source of the document.
98 * </p>
99 *
100 * @param systemID is a URL for a document or a file name.
101 * @return the newly created Document instance
102 * @throws DocumentException if an error occurs during parsing.
103 * @throws MalformedURLException if a URL could not be made for the given File
104 */
105 public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException {
106 if ( systemID.indexOf( ':' ) >= 0 ) {
107
108 return read(new URL(systemID));
109 }
110 else {
111
112 return read( new File(systemID) );
113 }
114 }
115
116 /*** <p>Reads a Document from the given stream</p>
117 *
118 * @param in <code>InputStream</code> to read from.
119 * @return the newly created Document instance
120 * @throws DocumentException if an error occurs during parsing.
121 */
122 public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
123 return read( createReader( in ) );
124 }
125
126 /*** <p>Reads a Document from the given <code>Reader</code></p>
127 *
128 * @param reader is the reader for the input
129 * @return the newly created Document instance
130 * @throws DocumentException if an error occurs during parsing.
131 */
132 public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException {
133 getXPPParser().setInput(reader);
134 return parseDocument();
135 }
136
137 /*** <p>Reads a Document from the given array of characters</p>
138 *
139 * @param text is the text to parse
140 * @return the newly created Document instance
141 * @throws DocumentException if an error occurs during parsing.
142 */
143 public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException {
144 getXPPParser().setInput(text);
145 return parseDocument();
146 }
147
148 /*** <p>Reads a Document from the given stream</p>
149 *
150 * @param in <code>InputStream</code> to read from.
151 * @param systemID is the URI for the input
152 * @return the newly created Document instance
153 * @throws DocumentException if an error occurs during parsing.
154 */
155 public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
156 return read( createReader( in ), systemID );
157 }
158
159 /*** <p>Reads a Document from the given <code>Reader</code></p>
160 *
161 * @param reader is the reader for the input
162 * @param systemID is the URI for the input
163 * @return the newly created Document instance
164 * @throws DocumentException if an error occurs during parsing.
165 */
166 public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException {
167 Document document = read( reader );
168 document.setName( systemID );
169 return document;
170 }
171
172
173
174
175
176 public XmlPullParser getXPPParser() throws XmlPullParserException {
177 if ( xppParser == null ) {
178 xppParser = getXPPFactory().newPullParser();
179 }
180 return xppParser;
181 }
182
183 public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
184 if ( xppFactory == null ) {
185 xppFactory = XmlPullParserFactory.newInstance();
186 }
187 return xppFactory;
188 }
189
190 public void setXPPFactory(XmlPullParserFactory xppFactory) {
191 this.xppFactory = xppFactory;
192 }
193
194 /*** @return the <code>DocumentFactory</code> used to create document objects
195 */
196 public DocumentFactory getDocumentFactory() {
197 if (factory == null) {
198 factory = DocumentFactory.getInstance();
199 }
200 return factory;
201 }
202
203 /*** <p>This sets the <code>DocumentFactory</code> used to create new documents.
204 * This method allows the building of custom DOM4J tree objects to be implemented
205 * easily using a custom derivation of {@link DocumentFactory}</p>
206 *
207 * @param factory <code>DocumentFactory</code> used to create DOM4J objects
208 */
209 public void setDocumentFactory(DocumentFactory factory) {
210 this.factory = factory;
211 }
212
213
214 /*** Adds the <code>ElementHandler</code> to be called when the
215 * specified path is encounted.
216 *
217 * @param path is the path to be handled
218 * @param handler is the <code>ElementHandler</code> to be called
219 * by the event based processor.
220 */
221 public void addHandler(String path, ElementHandler handler) {
222 getDispatchHandler().addHandler(path, handler);
223 }
224
225 /*** Removes the <code>ElementHandler</code> from the event based
226 * processor, for the specified path.
227 *
228 * @param path is the path to remove the <code>ElementHandler</code> for.
229 */
230 public void removeHandler(String path) {
231 getDispatchHandler().removeHandler(path);
232 }
233
234 /*** When multiple <code>ElementHandler</code> instances have been
235 * registered, this will set a default <code>ElementHandler</code>
236 * to be called for any path which does <b>NOT</b> have a handler
237 * registered.
238 * @param handler is the <code>ElementHandler</code> to be called
239 * by the event based processor.
240 */
241 public void setDefaultHandler(ElementHandler handler) {
242 getDispatchHandler().setDefaultHandler(handler);
243 }
244
245
246
247 protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
248 Document document = getDocumentFactory().createDocument();
249 Element parent = null;
250 XmlPullParser xppParser = getXPPParser();
251 xppParser.setNamespaceAware(true);
252 ProxyXmlStartTag startTag = new ProxyXmlStartTag();
253 XmlEndTag endTag = xppFactory.newEndTag();
254 while (true) {
255 int type = xppParser.next();
256 switch (type) {
257 case XmlPullParser.END_DOCUMENT: {
258 return document;
259 }
260 case XmlPullParser.START_TAG: {
261 xppParser.readStartTag( startTag );
262 Element newElement = startTag.getElement();
263 if ( parent != null ) {
264 parent.add( newElement );
265 }
266 else {
267 document.add( newElement );
268 }
269 parent = newElement;
270 break;
271 }
272 case XmlPullParser.END_TAG: {
273 xppParser.readEndTag( endTag );
274 if (parent != null) {
275 parent = parent.getParent();
276 }
277 break;
278 }
279 case XmlPullParser.CONTENT: {
280 String text = xppParser.readContent();
281 if ( parent != null ) {
282 parent.addText( text );
283 }
284 else {
285 throw new DocumentException( "Cannot have text content outside of the root document" );
286 }
287 break;
288 }
289 default: {
290 throw new DocumentException( "Error: unknown PullParser type: " + type );
291 }
292 }
293 }
294 }
295
296 protected DispatchHandler getDispatchHandler() {
297 if (dispatchHandler == null) {
298 dispatchHandler = new DispatchHandler();
299 }
300 return dispatchHandler;
301 }
302
303 protected void setDispatchHandler(DispatchHandler dispatchHandler) {
304 this.dispatchHandler = dispatchHandler;
305 }
306
307 /*** Factory method to create a Reader from the given InputStream.
308 */
309 protected Reader createReader(InputStream in) throws IOException {
310 return new BufferedReader( new InputStreamReader( in ) );
311 }
312 }
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360