1
2
3
4
5
6
7
8
9
10 package org.dom4j.io;
11
12 import java.util.ArrayList;
13 import java.util.List;
14
15 import org.dom4j.Branch;
16 import org.dom4j.Document;
17 import org.dom4j.DocumentFactory;
18 import org.dom4j.Element;
19 import org.dom4j.Namespace;
20 import org.dom4j.QName;
21 import org.dom4j.tree.NamespaceStack;
22
23 /*** <p><code>DOMReader</code> navigates a W3C DOM tree and creates
24 * a DOM4J tree from it.</p>
25 *
26 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
27 * @version $Revision: 1.15 $
28 */
29 public class DOMReader {
30
31 /*** <code>DocumentFactory</code> used to create new document objects */
32 private DocumentFactory factory;
33
34 /*** stack of <code>Namespace</code> and <code>QName</code> objects */
35 private NamespaceStack namespaceStack;
36
37
38 public DOMReader() {
39 this.factory = DocumentFactory.getInstance();
40 this.namespaceStack = new NamespaceStack(factory);
41 }
42
43 public DOMReader(DocumentFactory factory) {
44 this.factory = factory;
45 this.namespaceStack = new NamespaceStack(factory);
46 }
47
48 /*** @return the <code>DocumentFactory</code> used to create document objects
49 */
50 public DocumentFactory getDocumentFactory() {
51 return factory;
52 }
53
54 /*** <p>This sets the <code>DocumentFactory</code> used to create new documents.
55 * This method allows the building of custom DOM4J tree objects to be implemented
56 * easily using a custom derivation of {@link DocumentFactory}</p>
57 *
58 * @param factory <code>DocumentFactory</code> used to create DOM4J objects
59 */
60 public void setDocumentFactory(DocumentFactory factory) {
61 this.factory = factory;
62 this.namespaceStack.setDocumentFactory(factory);
63 }
64
65 public Document read(org.w3c.dom.Document domDocument) {
66 if ( domDocument instanceof Document ) {
67 return (Document) domDocument;
68 }
69 Document document = createDocument();
70
71 clearNamespaceStack();
72
73 org.w3c.dom.NodeList nodeList = domDocument.getChildNodes();
74 for ( int i = 0, size = nodeList.getLength(); i < size; i++ ) {
75 readTree( nodeList.item(i), document );
76 }
77 return document;
78 }
79
80
81
82 protected void readTree(org.w3c.dom.Node node, Branch current) {
83 Element element = null;
84 Document document = null;
85 if ( current instanceof Element ) {
86 element = (Element) current;
87 }
88 else {
89 document = (Document) current;
90 }
91 switch (node.getNodeType()) {
92 case org.w3c.dom.Node.ELEMENT_NODE:
93 readElement(node, current);
94 break;
95
96 case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE:
97 if ( current instanceof Element ) {
98 ((Element) current).addProcessingInstruction(
99 node.getNodeName(), node.getNodeValue()
100 );
101 }
102 else {
103 ((Document) current).addProcessingInstruction(
104 node.getNodeName(), node.getNodeValue()
105 );
106 }
107 break;
108
109 case org.w3c.dom.Node.COMMENT_NODE:
110 if ( current instanceof Element ) {
111 ((Element) current).addComment( node.getNodeValue() );
112 }
113 else {
114 ((Document) current).addComment( node.getNodeValue() );
115 }
116 break;
117
118 case org.w3c.dom.Node.DOCUMENT_TYPE_NODE:
119 org.w3c.dom.DocumentType domDocType
120 = (org.w3c.dom.DocumentType) node;
121
122 document.addDocType(
123 domDocType.getName(),
124 domDocType.getPublicId(),
125 domDocType.getSystemId()
126 );
127 break;
128
129 case org.w3c.dom.Node.TEXT_NODE:
130 element.addText( node.getNodeValue() );
131 break;
132
133 case org.w3c.dom.Node.CDATA_SECTION_NODE:
134 element.addCDATA( node.getNodeValue() );
135 break;
136
137
138 case org.w3c.dom.Node.ENTITY_REFERENCE_NODE: {
139
140 org.w3c.dom.Node firstChild = node.getFirstChild();
141 if ( firstChild != null ) {
142 element.addEntity(
143 node.getNodeName(),
144 firstChild.getNodeValue()
145 );
146 }
147 else {
148 element.addEntity( node.getNodeName(), "" );
149 }
150 }
151 break;
152
153 case org.w3c.dom.Node.ENTITY_NODE:
154 element.addEntity(
155 node.getNodeName(),
156 node.getNodeValue()
157 );
158 break;
159
160 default:
161 System.out.println( "WARNING: Unknown DOM node type: " + node.getNodeType() );
162 }
163 }
164
165 protected void readElement(org.w3c.dom.Node node, Branch current) {
166 int previouslyDeclaredNamespaces = namespaceStack.size();
167
168 String namespaceUri = node.getNamespaceURI();
169 String elementPrefix = node.getPrefix();
170 if (elementPrefix == null) {
171 elementPrefix = "";
172 }
173
174 org.w3c.dom.NamedNodeMap attributeList = node.getAttributes();
175 if (( attributeList != null ) && ( namespaceUri == null )) {
176
177 org.w3c.dom.Node attribute = attributeList.getNamedItem( "xmlns" );
178 if ( attribute != null ) {
179 namespaceUri = attribute.getNodeValue();
180 elementPrefix = "";
181 }
182 }
183
184 QName qName = namespaceStack.getQName( namespaceUri, node.getLocalName(), node.getNodeName() );
185 Element element = current.addElement(qName);
186
187 if ( attributeList != null ) {
188 int size = attributeList.getLength();
189 List attributes = new ArrayList(size);
190 for ( int i = 0; i < size; i++ ) {
191 org.w3c.dom.Node attribute = attributeList.item(i);
192
193
194 String name = attribute.getNodeName();
195 if (name.startsWith("xmlns")) {
196 String prefix = getPrefix(name);
197 String uri = attribute.getNodeValue();
198
199
200 Namespace namespace = namespaceStack.addNamespace( prefix, uri );
201 element.add( namespace );
202
203 }
204 else {
205 attributes.add( attribute );
206 }
207 }
208
209
210 size = attributes.size();
211 for ( int i = 0; i < size; i++ ) {
212 org.w3c.dom.Node attribute = (org.w3c.dom.Node) attributes.get(i);
213 QName attributeQName = namespaceStack.getQName(
214 attribute.getNamespaceURI(),
215 attribute.getLocalName(),
216 attribute.getNodeName()
217 );
218 element.addAttribute( attributeQName, attribute.getNodeValue() );
219 }
220 }
221
222
223 org.w3c.dom.NodeList children = node.getChildNodes();
224 for ( int i = 0, size = children.getLength(); i < size; i++ ) {
225 org.w3c.dom.Node child = children.item(i);
226 readTree( child, element );
227 }
228
229
230 while (namespaceStack.size() > previouslyDeclaredNamespaces) {
231 namespaceStack.pop();
232 }
233 }
234
235 protected Namespace getNamespace(String prefix, String uri) {
236 return getDocumentFactory().createNamespace(prefix, uri);
237 }
238
239 protected Document createDocument() {
240 return getDocumentFactory().createDocument();
241 }
242
243 protected void clearNamespaceStack() {
244 namespaceStack.clear();
245 if ( ! namespaceStack.contains( Namespace.XML_NAMESPACE ) ) {
246 namespaceStack.push( Namespace.XML_NAMESPACE );
247 }
248 }
249
250 private String getPrefix(String xmlnsDecl) {
251 int index = xmlnsDecl.indexOf(':', 5);
252 if (index != -1) {
253 return xmlnsDecl.substring(index + 1);
254 } else {
255 return "";
256 }
257 }
258 }
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306