View Javadoc

1   /*
2    * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    *
7    * $Id: DOMReader.java,v 1.15 2004/06/25 08:03:36 maartenc Exp $
8    */
9   
10  package org.dom4j.io;
11  
12  import java.util.ArrayList;
13  import java.util.List;
14  
15  import org.dom4j.Branch;
16  import org.dom4j.Document;
17  import org.dom4j.DocumentFactory;
18  import org.dom4j.Element;
19  import org.dom4j.Namespace;
20  import org.dom4j.QName;
21  import org.dom4j.tree.NamespaceStack;
22  
23  /*** <p><code>DOMReader</code> navigates a W3C DOM tree and creates
24    * a DOM4J tree from it.</p>
25    *
26    * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
27    * @version $Revision: 1.15 $
28    */
29  public class DOMReader {
30  
31      /*** <code>DocumentFactory</code> used to create new document objects */
32      private DocumentFactory factory;
33  
34      /*** stack of <code>Namespace</code> and <code>QName</code> objects */
35      private NamespaceStack namespaceStack;
36  
37  
38      public DOMReader() {
39          this.factory = DocumentFactory.getInstance();
40          this.namespaceStack = new NamespaceStack(factory);
41      }
42  
43      public DOMReader(DocumentFactory factory) {
44          this.factory = factory;
45          this.namespaceStack = new NamespaceStack(factory);
46      }
47  
48      /*** @return the <code>DocumentFactory</code> used to create document objects
49        */
50      public DocumentFactory getDocumentFactory() {
51          return factory;
52      }
53  
54      /*** <p>This sets the <code>DocumentFactory</code> used to create new documents.
55        * This method allows the building of custom DOM4J tree objects to be implemented
56        * easily using a custom derivation of {@link DocumentFactory}</p>
57        *
58        * @param factory <code>DocumentFactory</code> used to create DOM4J objects
59        */
60      public void setDocumentFactory(DocumentFactory factory) {
61          this.factory = factory;
62          this.namespaceStack.setDocumentFactory(factory);
63      }
64  
65      public Document read(org.w3c.dom.Document domDocument) {
66          if ( domDocument instanceof Document ) {
67              return (Document) domDocument;
68          }
69          Document document = createDocument();
70  
71          clearNamespaceStack();
72  
73          org.w3c.dom.NodeList nodeList = domDocument.getChildNodes();
74          for ( int i = 0, size = nodeList.getLength(); i < size; i++ ) {
75              readTree( nodeList.item(i), document );
76          }
77          return document;
78      }
79  
80  
81      // Implementation methods
82      protected void readTree(org.w3c.dom.Node node, Branch current) {
83          Element element = null;
84          Document document = null;
85          if ( current instanceof Element ) {
86              element = (Element) current;
87          }
88          else {
89              document = (Document) current;
90          }
91          switch (node.getNodeType()) {
92              case org.w3c.dom.Node.ELEMENT_NODE:
93                  readElement(node, current);
94                  break;
95  
96              case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE:
97                  if ( current instanceof Element ) {
98                      ((Element) current).addProcessingInstruction(
99                          node.getNodeName(), node.getNodeValue()
100                     );
101                 }
102                 else {
103                     ((Document) current).addProcessingInstruction(
104                         node.getNodeName(), node.getNodeValue()
105                     );
106                 }
107                 break;
108 
109             case org.w3c.dom.Node.COMMENT_NODE:
110                 if ( current instanceof Element ) {
111                     ((Element) current).addComment( node.getNodeValue() );
112                 }
113                 else {
114                     ((Document) current).addComment( node.getNodeValue() );
115                 }
116                 break;
117 
118             case org.w3c.dom.Node.DOCUMENT_TYPE_NODE:
119                 org.w3c.dom.DocumentType domDocType
120                     = (org.w3c.dom.DocumentType) node;
121 
122                 document.addDocType(
123                     domDocType.getName(),
124                     domDocType.getPublicId(),
125                     domDocType.getSystemId()
126                 );
127                 break;
128 
129             case org.w3c.dom.Node.TEXT_NODE:
130                 element.addText( node.getNodeValue() );
131                 break;
132 
133             case org.w3c.dom.Node.CDATA_SECTION_NODE:
134                 element.addCDATA( node.getNodeValue() );
135                 break;
136 
137 
138             case org.w3c.dom.Node.ENTITY_REFERENCE_NODE: {
139                 // is there a better way to get the value of an entity?
140                     org.w3c.dom.Node firstChild = node.getFirstChild();
141                     if ( firstChild != null ) {
142                         element.addEntity(
143                             node.getNodeName(),
144                             firstChild.getNodeValue()
145                         );
146                     }
147                     else {
148                         element.addEntity( node.getNodeName(), "" );
149                     }
150                 }
151                 break;
152 
153             case org.w3c.dom.Node.ENTITY_NODE:
154                 element.addEntity(
155                     node.getNodeName(),
156                     node.getNodeValue()
157                 );
158                 break;
159 
160             default:
161                 System.out.println( "WARNING: Unknown DOM node type: " + node.getNodeType() );
162         }
163     }
164 
165     protected void readElement(org.w3c.dom.Node node, Branch current) {
166         int previouslyDeclaredNamespaces = namespaceStack.size();
167 
168         String namespaceUri = node.getNamespaceURI();
169         String elementPrefix = node.getPrefix();
170         if (elementPrefix == null) {
171             elementPrefix = "";
172         }
173         
174         org.w3c.dom.NamedNodeMap attributeList = node.getAttributes();
175         if (( attributeList != null ) && ( namespaceUri == null )) {
176             // test if we have an "xmlns" attribute
177             org.w3c.dom.Node attribute = attributeList.getNamedItem( "xmlns" );
178             if ( attribute != null ) {
179                 namespaceUri = attribute.getNodeValue();
180                 elementPrefix = "";
181             }
182         }
183 
184         QName qName = namespaceStack.getQName( namespaceUri, node.getLocalName(), node.getNodeName() );
185         Element element = current.addElement(qName);
186         
187         if ( attributeList != null ) {
188             int size = attributeList.getLength();
189             List attributes = new ArrayList(size);
190             for ( int i = 0; i < size; i++ ) {
191                 org.w3c.dom.Node attribute = attributeList.item(i);
192 
193                 // Define all namespaces first then process attributes later
194                 String name = attribute.getNodeName();
195                 if (name.startsWith("xmlns")) {
196                     String prefix = getPrefix(name);
197                     String uri = attribute.getNodeValue();
198 
199 //                    if (!uri.equals(namespaceUri) || !prefix.equals(elementPrefix)) {
200                         Namespace namespace = namespaceStack.addNamespace( prefix, uri );
201                         element.add( namespace );
202 //                    }
203                 }
204                 else {
205                     attributes.add( attribute );
206                 }
207             }
208 
209             // now add the attributes, the namespaces should be available
210             size = attributes.size();
211             for ( int i = 0; i < size; i++ ) {
212                 org.w3c.dom.Node attribute = (org.w3c.dom.Node) attributes.get(i);
213                 QName attributeQName = namespaceStack.getQName(
214                     attribute.getNamespaceURI(),
215                     attribute.getLocalName(),
216                     attribute.getNodeName()
217                 );
218                 element.addAttribute( attributeQName, attribute.getNodeValue() );
219             }
220         }
221 
222         // Recurse on child nodes
223         org.w3c.dom.NodeList children = node.getChildNodes();
224         for ( int i = 0, size = children.getLength(); i < size; i++ ) {
225             org.w3c.dom.Node child = children.item(i);
226             readTree( child, element );
227         }
228 
229         // pop namespaces from the stack
230         while (namespaceStack.size() > previouslyDeclaredNamespaces) {
231             namespaceStack.pop();
232         }
233     }
234 
235     protected Namespace getNamespace(String prefix, String uri) {
236         return getDocumentFactory().createNamespace(prefix, uri);
237     }
238 
239     protected Document createDocument() {
240         return getDocumentFactory().createDocument();
241     }
242 
243     protected void clearNamespaceStack() {
244         namespaceStack.clear();
245         if ( ! namespaceStack.contains( Namespace.XML_NAMESPACE ) ) {
246             namespaceStack.push( Namespace.XML_NAMESPACE );
247         }
248     }
249     
250     private String getPrefix(String xmlnsDecl) {
251         int index = xmlnsDecl.indexOf(':', 5);
252         if (index != -1) {
253             return xmlnsDecl.substring(index + 1);
254         } else {
255             return "";
256         }
257     }
258 }
259 
260 
261 
262 
263 /*
264  * Redistribution and use of this software and associated documentation
265  * ("Software"), with or without modification, are permitted provided
266  * that the following conditions are met:
267  *
268  * 1. Redistributions of source code must retain copyright
269  *    statements and notices.  Redistributions must also contain a
270  *    copy of this document.
271  *
272  * 2. Redistributions in binary form must reproduce the
273  *    above copyright notice, this list of conditions and the
274  *    following disclaimer in the documentation and/or other
275  *    materials provided with the distribution.
276  *
277  * 3. The name "DOM4J" must not be used to endorse or promote
278  *    products derived from this Software without prior written
279  *    permission of MetaStuff, Ltd.  For written permission,
280  *    please contact dom4j-info@metastuff.com.
281  *
282  * 4. Products derived from this Software may not be called "DOM4J"
283  *    nor may "DOM4J" appear in their names without prior written
284  *    permission of MetaStuff, Ltd. DOM4J is a registered
285  *    trademark of MetaStuff, Ltd.
286  *
287  * 5. Due credit should be given to the DOM4J Project - 
288  *    http://www.dom4j.org
289  *
290  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
291  * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
292  * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
293  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
294  * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
295  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
296  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
297  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
298  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
299  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
300  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
301  * OF THE POSSIBILITY OF SUCH DAMAGE.
302  *
303  * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
304  *
305  * $Id: DOMReader.java,v 1.15 2004/06/25 08:03:36 maartenc Exp $
306  */