View Javadoc

1   /*
2    * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
3    * 
4    * This software is open source. 
5    * See the bottom of this file for the licence.
6    * 
7    * $Id: XPPReader.java,v 1.5 2004/06/25 08:03:38 maartenc Exp $
8    */
9   
10  package org.dom4j.io;
11  
12  import java.io.BufferedReader;
13  import java.io.File;
14  import java.io.FileReader;
15  import java.io.IOException;
16  import java.io.InputStream;
17  import java.io.InputStreamReader;
18  import java.io.Reader;
19  import java.net.URL;
20  
21  import org.dom4j.Document;
22  import org.dom4j.DocumentException;
23  import org.dom4j.DocumentFactory;
24  import org.dom4j.Element;
25  import org.dom4j.ElementHandler;
26  import org.dom4j.xpp.ProxyXmlStartTag;
27  import org.gjt.xpp.XmlEndTag;
28  import org.gjt.xpp.XmlPullParser;
29  import org.gjt.xpp.XmlPullParserException;
30  import org.gjt.xpp.XmlPullParserFactory;
31  
32  /*** <p><code>XPPReader</code> is a Reader of DOM4J documents that 
33    * uses the fast 
34    * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x</a>.
35    * It does not currently support comments, CDATA or ProcessingInstructions or
36    * validation but it is very fast for use in SOAP style environments.</p>
37    *
38    * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
39    * @version $Revision: 1.5 $
40    */
41  public class XPPReader {
42  
43      /*** <code>DocumentFactory</code> used to create new document objects */
44      private DocumentFactory factory;
45      
46      /*** <code>XmlPullParser</code> used to parse XML */
47      private XmlPullParser xppParser;
48      
49      /*** <code>XmlPullParser</code> used to parse XML */
50      private XmlPullParserFactory xppFactory;
51      
52      /*** DispatchHandler to call when each <code>Element</code> is encountered */
53      private DispatchHandler dispatchHandler;
54   
55          
56      
57      public XPPReader() {
58      }
59  
60      public XPPReader(DocumentFactory factory) {
61          this.factory = factory;
62      }
63  
64      
65      
66          
67      /*** <p>Reads a Document from the given <code>File</code></p>
68        *
69        * @param file is the <code>File</code> to read from.
70        * @return the newly created Document instance
71        * @throws DocumentException if an error occurs during parsing.
72        * @throws MalformedURLException if a URL could not be made for the given File
73        */
74      public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
75          String systemID = file.getAbsolutePath();
76          return read( new BufferedReader( new FileReader( file ) ), systemID );
77      }
78      
79      /*** <p>Reads a Document from the given <code>URL</code></p>
80        *
81        * @param url <code>URL</code> to read from.
82        * @return the newly created Document instance
83        * @throws DocumentException if an error occurs during parsing.
84        */
85      public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
86          String systemID = url.toExternalForm();
87          return read( createReader( url.openStream() ), systemID);
88      }
89      
90      /*** <p>Reads a Document from the given URL or filename.</p>
91        *
92        * <p>
93        * If the systemID contains a <code>':'</code> character then it is
94        * assumed to be a URL otherwise its assumed to be a file name.
95        * If you want finer grained control over this mechansim then please
96        * explicitly pass in either a {@link URL} or a {@link File} instance
97        * instead of a {@link String} to denote the source of the document.
98        * </p>
99        *
100       * @param systemID is a URL for a document or a file name.
101       * @return the newly created Document instance
102       * @throws DocumentException if an error occurs during parsing.
103       * @throws MalformedURLException if a URL could not be made for the given File
104       */
105     public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException {
106         if ( systemID.indexOf( ':' ) >= 0 ) {
107             // lets assume its a URL
108             return read(new URL(systemID));
109         }
110         else {
111             // lets assume that we are given a file name
112             return read( new File(systemID) );
113         }
114     }
115 
116     /*** <p>Reads a Document from the given stream</p>
117       *
118       * @param in <code>InputStream</code> to read from.
119       * @return the newly created Document instance
120       * @throws DocumentException if an error occurs during parsing.
121       */
122     public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
123         return read( createReader( in ) );
124     }
125 
126     /*** <p>Reads a Document from the given <code>Reader</code></p>
127       *
128       * @param reader is the reader for the input
129       * @return the newly created Document instance
130       * @throws DocumentException if an error occurs during parsing.
131       */
132     public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException {
133         getXPPParser().setInput(reader);
134         return parseDocument();
135     }
136 
137     /*** <p>Reads a Document from the given array of characters</p>
138       *
139       * @param text is the text to parse
140       * @return the newly created Document instance
141       * @throws DocumentException if an error occurs during parsing.
142       */
143     public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException {
144         getXPPParser().setInput(text);
145         return parseDocument();
146     }
147 
148     /*** <p>Reads a Document from the given stream</p>
149       *
150       * @param in <code>InputStream</code> to read from.
151       * @param systemID is the URI for the input
152       * @return the newly created Document instance
153       * @throws DocumentException if an error occurs during parsing.
154       */
155     public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
156         return read( createReader( in ), systemID );
157     }
158 
159     /*** <p>Reads a Document from the given <code>Reader</code></p>
160       *
161       * @param reader is the reader for the input
162       * @param systemID is the URI for the input
163       * @return the newly created Document instance
164       * @throws DocumentException if an error occurs during parsing.
165       */
166     public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException {
167         Document document = read( reader );
168         document.setName( systemID );
169         return document;
170     }
171 
172     
173     // Properties
174     //-------------------------------------------------------------------------                
175 
176     public XmlPullParser getXPPParser() throws XmlPullParserException {
177         if ( xppParser == null ) {
178             xppParser = getXPPFactory().newPullParser();
179         }
180         return xppParser;
181     }
182     
183     public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
184         if ( xppFactory == null ) {
185             xppFactory = XmlPullParserFactory.newInstance();
186         }
187         return xppFactory;
188     }
189 
190     public void setXPPFactory(XmlPullParserFactory xppFactory) {
191         this.xppFactory = xppFactory;
192     }
193     
194     /*** @return the <code>DocumentFactory</code> used to create document objects
195       */
196     public DocumentFactory getDocumentFactory() {
197         if (factory == null) {
198             factory = DocumentFactory.getInstance();
199         }
200         return factory;
201     }
202 
203     /*** <p>This sets the <code>DocumentFactory</code> used to create new documents.
204       * This method allows the building of custom DOM4J tree objects to be implemented
205       * easily using a custom derivation of {@link DocumentFactory}</p>
206       *
207       * @param factory <code>DocumentFactory</code> used to create DOM4J objects
208       */
209     public void setDocumentFactory(DocumentFactory factory) {
210         this.factory = factory;
211     }
212 
213     
214     /*** Adds the <code>ElementHandler</code> to be called when the 
215       * specified path is encounted.
216       *
217       * @param path is the path to be handled
218       * @param handler is the <code>ElementHandler</code> to be called
219       * by the event based processor.
220       */
221     public void addHandler(String path, ElementHandler handler) {
222         getDispatchHandler().addHandler(path, handler);   
223     }
224     
225     /*** Removes the <code>ElementHandler</code> from the event based
226       * processor, for the specified path.
227       *
228       * @param path is the path to remove the <code>ElementHandler</code> for.
229       */
230     public void removeHandler(String path) {
231         getDispatchHandler().removeHandler(path);   
232     }
233     
234     /*** When multiple <code>ElementHandler</code> instances have been 
235       * registered, this will set a default <code>ElementHandler</code>
236       * to be called for any path which does <b>NOT</b> have a handler
237       * registered.
238       * @param handler is the <code>ElementHandler</code> to be called
239       * by the event based processor.
240       */
241     public void setDefaultHandler(ElementHandler handler) {
242         getDispatchHandler().setDefaultHandler(handler);   
243     }
244     
245     // Implementation methods    
246     //-------------------------------------------------------------------------                    
247     protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
248         Document document = getDocumentFactory().createDocument();
249         Element parent = null;
250         XmlPullParser xppParser = getXPPParser();
251         xppParser.setNamespaceAware(true);
252         ProxyXmlStartTag startTag = new ProxyXmlStartTag();
253         XmlEndTag endTag = xppFactory.newEndTag();
254         while (true) {
255             int type = xppParser.next();
256             switch (type) {
257                 case XmlPullParser.END_DOCUMENT: {
258                     return document;
259                 }
260                 case XmlPullParser.START_TAG: {
261                     xppParser.readStartTag( startTag );
262                     Element newElement = startTag.getElement();
263                     if ( parent != null ) {
264                         parent.add( newElement );
265                     }
266                     else {
267                         document.add( newElement );
268                     }
269                     parent = newElement;
270                     break;
271                 }
272                 case XmlPullParser.END_TAG: {
273                     xppParser.readEndTag( endTag );
274                     if (parent != null) {
275                         parent = parent.getParent();
276                     }
277                     break;
278                 }
279                 case XmlPullParser.CONTENT: {
280                     String text = xppParser.readContent();
281                     if ( parent != null ) {
282                         parent.addText( text );
283                     }
284                     else {
285                         throw new DocumentException( "Cannot have text content outside of the root document" );
286                     }
287                     break;
288                 }
289                 default: {
290                     throw new DocumentException( "Error: unknown PullParser type: " + type );
291                 }
292             }
293         }
294     }
295 
296     protected DispatchHandler getDispatchHandler() {
297         if (dispatchHandler == null) {
298             dispatchHandler = new DispatchHandler();
299         }
300         return dispatchHandler;   
301     }
302     
303     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
304         this.dispatchHandler = dispatchHandler;
305     }
306     
307     /*** Factory method to create a Reader from the given InputStream.
308      */
309     protected Reader createReader(InputStream in) throws IOException {
310         return new BufferedReader( new InputStreamReader( in ) );
311     }    
312 }
313 
314 
315 
316 
317 /*
318  * Redistribution and use of this software and associated documentation
319  * ("Software"), with or without modification, are permitted provided
320  * that the following conditions are met:
321  *
322  * 1. Redistributions of source code must retain copyright
323  *    statements and notices.  Redistributions must also contain a
324  *    copy of this document.
325  *
326  * 2. Redistributions in binary form must reproduce the
327  *    above copyright notice, this list of conditions and the
328  *    following disclaimer in the documentation and/or other
329  *    materials provided with the distribution.
330  *
331  * 3. The name "DOM4J" must not be used to endorse or promote
332  *    products derived from this Software without prior written
333  *    permission of MetaStuff, Ltd.  For written permission,
334  *    please contact dom4j-info@metastuff.com.
335  *
336  * 4. Products derived from this Software may not be called "DOM4J"
337  *    nor may "DOM4J" appear in their names without prior written
338  *    permission of MetaStuff, Ltd. DOM4J is a registered
339  *    trademark of MetaStuff, Ltd.
340  *
341  * 5. Due credit should be given to the DOM4J Project - 
342  *    http://www.dom4j.org
343  *
344  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
345  * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
346  * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
347  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
348  * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
349  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
350  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
351  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
352  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
353  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
354  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
355  * OF THE POSSIBILITY OF SUCH DAMAGE.
356  *
357  * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
358  *
359  * $Id: XPPReader.java,v 1.5 2004/06/25 08:03:38 maartenc Exp $
360  */