1
2
3
4
5
6
7
8
9
10 package org.dom4j.io;
11
12 import java.io.BufferedWriter;
13 import java.io.IOException;
14 import java.io.OutputStream;
15 import java.io.OutputStreamWriter;
16 import java.io.UnsupportedEncodingException;
17 import java.io.Writer;
18 import java.util.HashMap;
19 import java.util.Iterator;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.StringTokenizer;
23
24 import org.dom4j.Attribute;
25 import org.dom4j.CDATA;
26 import org.dom4j.Comment;
27 import org.dom4j.Document;
28 import org.dom4j.DocumentType;
29 import org.dom4j.Element;
30 import org.dom4j.Entity;
31 import org.dom4j.Namespace;
32 import org.dom4j.Node;
33 import org.dom4j.ProcessingInstruction;
34 import org.dom4j.Text;
35 import org.dom4j.tree.NamespaceStack;
36 import org.xml.sax.Attributes;
37 import org.xml.sax.InputSource;
38 import org.xml.sax.Locator;
39 import org.xml.sax.SAXException;
40 import org.xml.sax.SAXNotRecognizedException;
41 import org.xml.sax.SAXNotSupportedException;
42 import org.xml.sax.XMLReader;
43 import org.xml.sax.ext.LexicalHandler;
44 import org.xml.sax.helpers.XMLFilterImpl;
45
46 /***<p><code>XMLWriter</code> takes a DOM4J tree and formats it to a
47 * stream as XML.
48 * It can also take SAX events too so can be used by SAX clients as this object
49 * implements the {@link org.xml.sax.ContentHandler} and {@link LexicalHandler} interfaces.
50 * as well. This formatter performs typical document
51 * formatting. The XML declaration and processing instructions are
52 * always on their own lines. An {@link OutputFormat} object can be
53 * used to define how whitespace is handled when printing and allows various
54 * configuration options, such as to allow suppression of the XML declaration,
55 * the encoding declaration or whether empty documents are collapsed.</p>
56 *
57 * <p> There are <code>write(...)</code> methods to print any of the
58 * standard DOM4J classes, including <code>Document</code> and
59 * <code>Element</code>, to either a <code>Writer</code> or an
60 * <code>OutputStream</code>. Warning: using your own
61 * <code>Writer</code> may cause the writer's preferred character
62 * encoding to be ignored. If you use encodings other than UTF8, we
63 * recommend using the method that takes an OutputStream instead.
64 * </p>
65 *
66 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
67 * @author Joseph Bowbeer
68 * @version $Revision: 1.76 $
69 */
70 public class XMLWriter extends XMLFilterImpl implements LexicalHandler {
71
72 private static final String PAD_TEXT = " ";
73
74 protected static final String[] LEXICAL_HANDLER_NAMES = {
75 "http://xml.org/sax/properties/lexical-handler",
76 "http://xml.org/sax/handlers/LexicalHandler"
77 };
78
79 protected static final OutputFormat DEFAULT_FORMAT = new OutputFormat();
80
81 /*** Should entityRefs by resolved when writing ? */
82 private boolean resolveEntityRefs = true;
83
84 /*** Stores the last type of node written so algorithms can refer to the
85 * previous node type */
86 protected int lastOutputNodeType;
87
88 /*** Stores the xml:space attribute value of preserve for whitespace flag */
89 protected boolean preserve=false;
90
91 /*** The Writer used to output to */
92 protected Writer writer;
93
94 /*** The Stack of namespaceStack written so far */
95 private NamespaceStack namespaceStack = new NamespaceStack();
96
97 /*** The format used by this writer */
98 private OutputFormat format;
99
100 /*** whether we should escape text */
101 private boolean escapeText = true;
102 /*** The initial number of indentations (so you can print a whole
103 document indented, if you like) **/
104 private int indentLevel = 0;
105
106 /*** buffer used when escaping strings */
107 private StringBuffer buffer = new StringBuffer();
108
109 /*** whether we have added characters before from the same chunk of characters */
110 private boolean charactersAdded = false;
111 private char lastChar;
112
113 /*** Whether a flush should occur after writing a document */
114 private boolean autoFlush;
115
116 /*** Lexical handler we should delegate to */
117 private LexicalHandler lexicalHandler;
118
119 /*** Whether comments should appear inside DTD declarations - defaults to false */
120 private boolean showCommentsInDTDs;
121
122 /*** Is the writer curerntly inside a DTD definition? */
123 private boolean inDTD;
124
125 /*** The namespaces used for the current element when consuming SAX events */
126 private Map namespacesMap;
127
128 /***
129 * what is the maximum allowed character code
130 * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
131 * or -1 to not escape any characters (other than the special XML characters like < > &)
132 */
133 private int maximumAllowedCharacter;
134
135
136 public XMLWriter(Writer writer) {
137 this( writer, DEFAULT_FORMAT );
138 }
139
140 public XMLWriter(Writer writer, OutputFormat format) {
141 this.writer = writer;
142 this.format = format;
143 namespaceStack.push(Namespace.NO_NAMESPACE);
144 }
145
146 public XMLWriter() {
147 this.format = DEFAULT_FORMAT;
148 this.writer = new BufferedWriter( new OutputStreamWriter( System.out ) );
149 this.autoFlush = true;
150 namespaceStack.push(Namespace.NO_NAMESPACE);
151 }
152
153 public XMLWriter(OutputStream out) throws UnsupportedEncodingException {
154 this.format = DEFAULT_FORMAT;
155 this.writer = createWriter(out, format.getEncoding());
156 this.autoFlush = true;
157 namespaceStack.push(Namespace.NO_NAMESPACE);
158 }
159
160 public XMLWriter(OutputStream out, OutputFormat format) throws UnsupportedEncodingException {
161 this.format = format;
162 this.writer = createWriter(out, format.getEncoding());
163 this.autoFlush = true;
164 namespaceStack.push(Namespace.NO_NAMESPACE);
165 }
166
167 public XMLWriter(OutputFormat format) throws UnsupportedEncodingException {
168 this.format = format;
169 this.writer = createWriter( System.out, format.getEncoding() );
170 this.autoFlush = true;
171 namespaceStack.push(Namespace.NO_NAMESPACE);
172 }
173
174 public void setWriter(Writer writer) {
175 this.writer = writer;
176 this.autoFlush = false;
177 }
178
179 public void setOutputStream(OutputStream out) throws UnsupportedEncodingException {
180 this.writer = createWriter(out, format.getEncoding());
181 this.autoFlush = true;
182 }
183
184 /***
185 * @return true if text thats output should be escaped.
186 * This is enabled by default. It could be disabled if
187 * the output format is textual, like in XSLT where we can have
188 * xml, html or text output.
189 */
190 public boolean isEscapeText() {
191 return escapeText;
192 }
193
194 /***
195 * Sets whether text output should be escaped or not.
196 * This is enabled by default. It could be disabled if
197 * the output format is textual, like in XSLT where we can have
198 * xml, html or text output.
199 */
200 public void setEscapeText(boolean escapeText) {
201 this.escapeText = escapeText;
202 }
203
204
205 /*** Set the initial indentation level. This can be used to output
206 * a document (or, more likely, an element) starting at a given
207 * indent level, so it's not always flush against the left margin.
208 * Default: 0
209 *
210 * @param indentLevel the number of indents to start with
211 */
212 public void setIndentLevel(int indentLevel) {
213 this.indentLevel = indentLevel;
214 }
215
216 /***
217 * Returns the maximum allowed character code that should be allowed
218 * unescaped which defaults to 127 in US-ASCII (7 bit) or
219 * 255 in ISO-* (8 bit).
220 */
221 public int getMaximumAllowedCharacter() {
222 if (maximumAllowedCharacter == 0) {
223 maximumAllowedCharacter = defaultMaximumAllowedCharacter();
224 }
225 return maximumAllowedCharacter;
226 }
227
228 /***
229 * Sets the maximum allowed character code that should be allowed
230 * unescaped
231 * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
232 * or -1 to not escape any characters (other than the special XML characters like < > &)
233 *
234 * If this is not explicitly set then it is defaulted from the encoding.
235 *
236 * @param maximumAllowedCharacter The maximumAllowedCharacter to set
237 */
238 public void setMaximumAllowedCharacter(int maximumAllowedCharacter) {
239 this.maximumAllowedCharacter = maximumAllowedCharacter;
240 }
241
242 /*** Flushes the underlying Writer */
243 public void flush() throws IOException {
244 writer.flush();
245 }
246
247 /*** Closes the underlying Writer */
248 public void close() throws IOException {
249 writer.close();
250 }
251
252 /*** Writes the new line text to the underlying Writer */
253 public void println() throws IOException {
254 writer.write( format.getLineSeparator() );
255 }
256
257 /*** Writes the given {@link Attribute}.
258 *
259 * @param attribute <code>Attribute</code> to output.
260 */
261 public void write(Attribute attribute) throws IOException {
262 writeAttribute(attribute);
263
264 if ( autoFlush ) {
265 flush();
266 }
267 }
268
269
270 /*** <p>This will print the <code>Document</code> to the current Writer.</p>
271 *
272 * <p> Warning: using your own Writer may cause the writer's
273 * preferred character encoding to be ignored. If you use
274 * encodings other than UTF8, we recommend using the method that
275 * takes an OutputStream instead. </p>
276 *
277 * <p>Note: as with all Writers, you may need to flush() yours
278 * after this method returns.</p>
279 *
280 * @param doc <code>Document</code> to format.
281 * @throws <code>IOException</code> - if there's any problem writing.
282 **/
283 public void write(Document doc) throws IOException {
284 writeDeclaration();
285
286 if (doc.getDocType() != null) {
287 indent();
288 writeDocType(doc.getDocType());
289 }
290
291 for ( int i = 0, size = doc.nodeCount(); i < size; i++ ) {
292 Node node = doc.node(i);
293 writeNode( node );
294 }
295 writePrintln();
296
297 if ( autoFlush ) {
298 flush();
299 }
300 }
301
302 /*** <p>Writes the <code>{@link Element}</code>, including
303 * its <code>{@link Attribute}</code>s, and its value, and all
304 * its content (child nodes) to the current Writer.</p>
305 *
306 * @param element <code>Element</code> to output.
307 */
308 public void write(Element element) throws IOException {
309 writeElement(element);
310
311 if ( autoFlush ) {
312 flush();
313 }
314 }
315
316
317 /*** Writes the given {@link CDATA}.
318 *
319 * @param cdata <code>CDATA</code> to output.
320 */
321 public void write(CDATA cdata) throws IOException {
322 writeCDATA( cdata.getText() );
323
324 if ( autoFlush ) {
325 flush();
326 }
327 }
328
329 /*** Writes the given {@link Comment}.
330 *
331 * @param comment <code>Comment</code> to output.
332 */
333 public void write(Comment comment) throws IOException {
334 writeComment( comment.getText() );
335
336 if ( autoFlush ) {
337 flush();
338 }
339 }
340
341 /*** Writes the given {@link DocumentType}.
342 *
343 * @param docType <code>DocumentType</code> to output.
344 */
345 public void write(DocumentType docType) throws IOException {
346 writeDocType(docType);
347
348 if ( autoFlush ) {
349 flush();
350 }
351 }
352
353
354 /*** Writes the given {@link Entity}.
355 *
356 * @param entity <code>Entity</code> to output.
357 */
358 public void write(Entity entity) throws IOException {
359 writeEntity( entity );
360
361 if ( autoFlush ) {
362 flush();
363 }
364 }
365
366
367 /*** Writes the given {@link Namespace}.
368 *
369 * @param namespace <code>Namespace</code> to output.
370 */
371 public void write(Namespace namespace) throws IOException {
372 writeNamespace(namespace);
373
374 if ( autoFlush ) {
375 flush();
376 }
377 }
378
379 /*** Writes the given {@link ProcessingInstruction}.
380 *
381 * @param processingInstruction <code>ProcessingInstruction</code> to output.
382 */
383 public void write(ProcessingInstruction processingInstruction) throws IOException {
384 writeProcessingInstruction(processingInstruction);
385
386 if ( autoFlush ) {
387 flush();
388 }
389 }
390
391 /*** <p>Print out a {@link String}, Perfoms
392 * the necessary entity escaping and whitespace stripping.</p>
393 *
394 * @param text is the text to output
395 */
396 public void write(String text) throws IOException {
397 writeString(text);
398
399 if ( autoFlush ) {
400 flush();
401 }
402 }
403
404 /*** Writes the given {@link Text}.
405 *
406 * @param text <code>Text</code> to output.
407 */
408 public void write(Text text) throws IOException {
409 writeString(text.getText());
410
411 if ( autoFlush ) {
412 flush();
413 }
414 }
415
416 /*** Writes the given {@link Node}.
417 *
418 * @param node <code>Node</code> to output.
419 */
420 public void write(Node node) throws IOException {
421 writeNode(node);
422
423 if ( autoFlush ) {
424 flush();
425 }
426 }
427
428 /*** Writes the given object which should be a String, a Node or a List
429 * of Nodes.
430 *
431 * @param object is the object to output.
432 */
433 public void write(Object object) throws IOException {
434 if (object instanceof Node) {
435 write((Node) object);
436 }
437 else if (object instanceof String) {
438 write((String) object);
439 }
440 else if (object instanceof List) {
441 List list = (List) object;
442 for ( int i = 0, size = list.size(); i < size; i++ ) {
443 write( list.get(i) );
444 }
445 }
446 else if (object != null) {
447 throw new IOException( "Invalid object: " + object );
448 }
449 }
450
451
452 /*** <p>Writes the opening tag of an {@link Element},
453 * including its {@link Attribute}s
454 * but without its content.</p>
455 *
456 * @param element <code>Element</code> to output.
457 */
458 public void writeOpen(Element element) throws IOException {
459 writer.write("<");
460 writer.write( element.getQualifiedName() );
461 writeAttributes(element);
462 writer.write(">");
463 }
464
465 /*** <p>Writes the closing tag of an {@link Element}</p>
466 *
467 * @param element <code>Element</code> to output.
468 */
469 public void writeClose(Element element) throws IOException {
470 writeClose( element.getQualifiedName() );
471 }
472
473
474
475
476 public void parse(InputSource source) throws IOException, SAXException {
477 installLexicalHandler();
478 super.parse(source);
479 }
480
481
482 public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
483 for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
484 if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
485 setLexicalHandler((LexicalHandler) value);
486 return;
487 }
488 }
489 super.setProperty(name, value);
490 }
491
492 public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
493 for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
494 if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
495 return getLexicalHandler();
496 }
497 }
498 return super.getProperty(name);
499 }
500
501 public void setLexicalHandler (LexicalHandler handler) {
502 if (handler == null) {
503 throw new NullPointerException("Null lexical handler");
504 }
505 else {
506 this.lexicalHandler = handler;
507 }
508 }
509
510 public LexicalHandler getLexicalHandler(){
511 return lexicalHandler;
512 }
513
514
515
516
517 public void setDocumentLocator(Locator locator) {
518 super.setDocumentLocator(locator);
519 }
520
521 public void startDocument() throws SAXException {
522 try {
523 writeDeclaration();
524 super.startDocument();
525 }
526 catch (IOException e) {
527 handleException(e);
528 }
529 }
530
531 public void endDocument() throws SAXException {
532 super.endDocument();
533
534 if ( autoFlush ) {
535 try {
536 flush();
537 } catch ( IOException e) {}
538 }
539 }
540
541 public void startPrefixMapping(String prefix, String uri) throws SAXException {
542 if ( namespacesMap == null ) {
543 namespacesMap = new HashMap();
544 }
545 namespacesMap.put(prefix, uri);
546 super.startPrefixMapping(prefix, uri);
547 }
548
549 public void endPrefixMapping(String prefix) throws SAXException {
550 super.endPrefixMapping(prefix);
551 }
552
553 public void startElement(String namespaceURI, String localName, String qName, Attributes attributes) throws SAXException {
554 try {
555 charactersAdded = false;
556
557 writePrintln();
558 indent();
559 writer.write("<");
560 writer.write(qName);
561 writeNamespaces();
562 writeAttributes( attributes );
563 writer.write(">");
564 ++indentLevel;
565 lastOutputNodeType = Node.ELEMENT_NODE;
566
567 super.startElement( namespaceURI, localName, qName, attributes );
568 }
569 catch (IOException e) {
570 handleException(e);
571 }
572 }
573
574 public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
575 try {
576 charactersAdded = false;
577 --indentLevel;
578 if ( lastOutputNodeType == Node.ELEMENT_NODE ) {
579 writePrintln();
580 indent();
581 }
582
583
584
585 boolean hadContent = true;
586 if ( hadContent ) {
587 writeClose(qName);
588 }
589 else {
590 writeEmptyElementClose(qName);
591 }
592 lastOutputNodeType = Node.ELEMENT_NODE;
593
594 super.endElement( namespaceURI, localName, qName );
595 }
596 catch (IOException e) {
597 handleException(e);
598 }
599 }
600
601 public void characters(char[] ch, int start, int length) throws SAXException {
602 if (ch == null || ch.length == 0 || length <= 0) {
603 return;
604 }
605
606 try {
607
608
609
610
611
612
613 String string = new String(ch, start, length);
614
615 if (escapeText) {
616 string = escapeElementEntities(string);
617 }
618
619 if (format.isTrimText()) {
620 if ((lastOutputNodeType == Node.TEXT_NODE) && !charactersAdded) {
621 writer.write(" ");
622 } else if (charactersAdded && Character.isWhitespace(lastChar)) {
623 writer.write(lastChar);
624 }
625
626 String delim = "";
627 StringTokenizer tokens = new StringTokenizer(string);
628 while (tokens.hasMoreTokens()) {
629 writer.write(delim);
630 writer.write(tokens.nextToken());
631 delim = " ";
632 }
633 } else {
634 writer.write(string);
635 }
636
637 charactersAdded = true;
638 lastChar = ch[start + length - 1];
639 lastOutputNodeType = Node.TEXT_NODE;
640
641 super.characters(ch, start, length);
642 }
643 catch (IOException e) {
644 handleException(e);
645 }
646 }
647
648 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
649 super.ignorableWhitespace(ch, start, length);
650 }
651
652 public void processingInstruction(String target, String data) throws SAXException {
653 try {
654 indent();
655 writer.write("<?");
656 writer.write(target);
657 writer.write(" ");
658 writer.write(data);
659 writer.write("?>");
660 writePrintln();
661 lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
662
663 super.processingInstruction(target, data);
664 }
665 catch (IOException e) {
666 handleException(e);
667 }
668 }
669
670
671
672
673
674 public void notationDecl(String name, String publicID, String systemID) throws SAXException {
675 super.notationDecl(name, publicID, systemID);
676 }
677
678 public void unparsedEntityDecl(String name, String publicID, String systemID, String notationName) throws SAXException {
679 super.unparsedEntityDecl(name, publicID, systemID, notationName);
680 }
681
682
683
684
685 public void startDTD(String name, String publicID, String systemID) throws SAXException {
686 inDTD = true;
687 try {
688 writeDocType(name, publicID, systemID);
689 }
690 catch (IOException e) {
691 handleException(e);
692 }
693
694 if (lexicalHandler != null) {
695 lexicalHandler.startDTD(name, publicID, systemID);
696 }
697 }
698
699 public void endDTD() throws SAXException {
700 inDTD = false;
701 if (lexicalHandler != null) {
702 lexicalHandler.endDTD();
703 }
704 }
705
706 public void startCDATA() throws SAXException {
707 try {
708 writer.write( "<![CDATA[" );
709 }
710 catch (IOException e) {
711 handleException(e);
712 }
713
714 if (lexicalHandler != null) {
715 lexicalHandler.startCDATA();
716 }
717 }
718
719 public void endCDATA() throws SAXException {
720 try {
721 writer.write( "]]>" );
722 }
723 catch (IOException e) {
724 handleException(e);
725 }
726
727 if (lexicalHandler != null) {
728 lexicalHandler.endCDATA();
729 }
730 }
731
732 public void startEntity(String name) throws SAXException {
733 try {
734 writeEntityRef(name);
735 }
736 catch (IOException e) {
737 handleException(e);
738 }
739
740 if (lexicalHandler != null) {
741 lexicalHandler.startEntity(name);
742 }
743 }
744
745 public void endEntity(String name) throws SAXException {
746 if (lexicalHandler != null) {
747 lexicalHandler.endEntity(name);
748 }
749 }
750
751 public void comment(char[] ch, int start, int length) throws SAXException {
752 if ( showCommentsInDTDs || ! inDTD ) {
753 try {
754 charactersAdded = false;
755 writeComment( new String(ch, start, length) );
756 }
757 catch (IOException e) {
758 handleException(e);
759 }
760 }
761
762 if (lexicalHandler != null) {
763 lexicalHandler.comment(ch, start, length);
764 }
765 }
766
767
768
769
770
771 protected void writeElement(Element element) throws IOException {
772 int size = element.nodeCount();
773 String qualifiedName = element.getQualifiedName();
774
775 writePrintln();
776 indent();
777
778 writer.write("<");
779 writer.write(qualifiedName);
780
781 int previouslyDeclaredNamespaces = namespaceStack.size();
782 Namespace ns = element.getNamespace();
783 if (isNamespaceDeclaration( ns ) ) {
784 namespaceStack.push(ns);
785 writeNamespace(ns);
786 }
787
788
789 boolean textOnly = true;
790 for ( int i = 0; i < size; i++ ) {
791 Node node = element.node(i);
792 if ( node instanceof Namespace ) {
793 Namespace additional = (Namespace) node;
794 if (isNamespaceDeclaration( additional ) ) {
795 namespaceStack.push(additional);
796 writeNamespace(additional);
797 }
798 }
799 else if ( node instanceof Element) {
800 textOnly = false;
801 }
802 else if ( node instanceof Comment) {
803 textOnly = false;
804 }
805 }
806
807 writeAttributes(element);
808
809 lastOutputNodeType = Node.ELEMENT_NODE;
810
811 if ( size <= 0 ) {
812 writeEmptyElementClose(qualifiedName);
813 }
814 else {
815 writer.write(">");
816 if ( textOnly ) {
817
818
819 writeElementContent(element);
820 }
821 else {
822
823 ++indentLevel;
824
825 writeElementContent(element);
826
827 --indentLevel;
828
829 writePrintln();
830 indent();
831 }
832 writer.write("</");
833 writer.write(qualifiedName);
834 writer.write(">");
835 }
836
837
838 while (namespaceStack.size() > previouslyDeclaredNamespaces) {
839 namespaceStack.pop();
840 }
841
842 lastOutputNodeType = Node.ELEMENT_NODE;
843 }
844
845 /***
846 * Determines if element is a special case of XML elements
847 * where it contains an xml:space attribute of "preserve".
848 * If it does, then retain whitespace.
849 */
850 protected final boolean isElementSpacePreserved(Element element) {
851 final Attribute attr = (Attribute)element.attribute("space");
852 boolean preserveFound=preserve;
853 if (attr!=null) {
854 if ("xml".equals(attr.getNamespacePrefix()) &&
855 "preserve".equals(attr.getText())) {
856 preserveFound = true;
857 }
858 else {
859 preserveFound = false;
860 }
861 }
862 return preserveFound;
863 }
864 /*** Outputs the content of the given element. If whitespace trimming is
865 * enabled then all adjacent text nodes are appended together before
866 * the whitespace trimming occurs to avoid problems with multiple
867 * text nodes being created due to text content that spans parser buffers
868 * in a SAX parser.
869 */
870 protected void writeElementContent(Element element) throws IOException {
871 boolean trim = format.isTrimText();
872 boolean oldPreserve=preserve;
873 if (trim) {
874 preserve=isElementSpacePreserved(element);
875 trim = !preserve;
876 }
877 if (trim) {
878
879
880 Text lastTextNode = null;
881 StringBuffer buffer = null;
882 boolean textOnly = true;
883 for ( int i = 0, size = element.nodeCount(); i < size; i++ ) {
884 Node node = element.node(i);
885 if ( node instanceof Text ) {
886 if ( lastTextNode == null ) {
887 lastTextNode = (Text) node;
888 }
889 else {
890 if (buffer == null) {
891 buffer = new StringBuffer( lastTextNode.getText() );
892 }
893 buffer.append( ((Text) node).getText() );
894 }
895 }
896 else {
897 if (!textOnly && format.isPadText()) {
898 writer.write(PAD_TEXT);
899 }
900
901 textOnly = false;
902
903 if ( lastTextNode != null ) {
904 if ( buffer != null ) {
905 writeString( buffer.toString() );
906 buffer = null;
907 }
908 else {
909 writeString( lastTextNode.getText() );
910 }
911 lastTextNode = null;
912
913 if (format.isPadText()) {
914 writer.write(PAD_TEXT);
915 }
916 }
917 writeNode(node);
918 }
919 }
920 if ( lastTextNode != null ) {
921 if (!textOnly && format.isPadText()) {
922 writer.write(PAD_TEXT);
923 }
924 if ( buffer != null ) {
925 writeString( buffer.toString() );
926 buffer = null;
927 }
928 else {
929 writeString( lastTextNode.getText() );
930 }
931 lastTextNode = null;
932 }
933 }
934 else {
935 Node lastTextNode = null;
936 for ( int i = 0, size = element.nodeCount(); i < size; i++ ) {
937 Node node = element.node(i);
938 if (node instanceof Text) {
939 writeNode(node);
940 lastTextNode = node;
941 } else {
942 if ((lastTextNode != null) && format.isPadText()) {
943 writer.write(PAD_TEXT);
944 }
945 writeNode(node);
946 if ((lastTextNode != null) && format.isPadText()) {
947 writer.write(PAD_TEXT);
948 }
949 lastTextNode = null;
950 }
951 }
952 }
953 preserve=oldPreserve;
954 }
955 protected void writeCDATA(String text) throws IOException {
956 writer.write( "<![CDATA[" );
957 writer.write( text );
958 writer.write( "]]>" );
959
960 lastOutputNodeType = Node.CDATA_SECTION_NODE;
961 }
962
963 protected void writeDocType(DocumentType docType) throws IOException {
964 if (docType != null) {
965 docType.write( writer );
966
967 writePrintln();
968 }
969 }
970
971
972 protected void writeNamespace(Namespace namespace) throws IOException {
973 if ( namespace != null ) {
974 writeNamespace(namespace.getPrefix(), namespace.getURI());
975 }
976 }
977
978 /***
979 * Writes the SAX namepsaces
980 */
981 protected void writeNamespaces() throws IOException {
982 if ( namespacesMap != null ) {
983 for ( Iterator iter = namespacesMap.entrySet().iterator(); iter.hasNext(); ) {
984 Map.Entry entry = (Map.Entry) iter.next();
985 String prefix = (String) entry.getKey();
986 String uri = (String) entry.getValue();
987 writeNamespace(prefix, uri);
988 }
989 namespacesMap = null;
990 }
991 }
992
993 /***
994 * Writes the SAX namepsaces
995 */
996 protected void writeNamespace(String prefix, String uri) throws IOException {
997 if ( prefix != null && prefix.length() > 0 ) {
998 writer.write(" xmlns:");
999 writer.write(prefix);
1000 writer.write("=\"");
1001 }
1002 else {
1003 writer.write(" xmlns=\"");
1004 }
1005 writer.write(uri);
1006 writer.write("\"");
1007 }
1008
1009 protected void writeProcessingInstruction(ProcessingInstruction processingInstruction) throws IOException {
1010
1011 writer.write( "<?" );
1012 writer.write( processingInstruction.getName() );
1013 writer.write( " " );
1014 writer.write( processingInstruction.getText() );
1015 writer.write( "?>" );
1016 writePrintln();
1017
1018 lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
1019 }
1020
1021 protected void writeString(String text) throws IOException {
1022 if ( text != null && text.length() > 0 ) {
1023 if ( escapeText ) {
1024 text = escapeElementEntities(text);
1025 }
1026
1027
1028
1029
1030
1031
1032
1033 if (format.isTrimText()) {
1034 boolean first = true;
1035 StringTokenizer tokenizer = new StringTokenizer(text);
1036 while (tokenizer.hasMoreTokens()) {
1037 String token = tokenizer.nextToken();
1038 if ( first ) {
1039 first = false;
1040 if ( lastOutputNodeType == Node.TEXT_NODE ) {
1041 writer.write(" ");
1042 }
1043 }
1044 else {
1045 writer.write(" ");
1046 }
1047 writer.write(token);
1048 lastOutputNodeType = Node.TEXT_NODE;
1049 }
1050 }
1051 else {
1052 lastOutputNodeType = Node.TEXT_NODE;
1053 writer.write(text);
1054 }
1055 }
1056 }
1057
1058 /***
1059 * This method is used to write out Nodes that contain text
1060 * and still allow for xml:space to be handled properly.
1061 *
1062 */
1063 protected void writeNodeText(Node node) throws IOException {
1064 String text = node.getText();
1065 if (text != null && text.length() > 0) {
1066 if (escapeText) {
1067 text = escapeElementEntities(text);
1068 }
1069
1070 lastOutputNodeType = Node.TEXT_NODE;
1071 writer.write(text);
1072 }
1073 }
1074
1075 protected void writeNode(Node node) throws IOException {
1076 int nodeType = node.getNodeType();
1077 switch (nodeType) {
1078 case Node.ELEMENT_NODE:
1079 writeElement((Element) node);
1080 break;
1081 case Node.ATTRIBUTE_NODE:
1082 writeAttribute((Attribute) node);
1083 break;
1084 case Node.TEXT_NODE:
1085 writeNodeText(node);
1086
1087 break;
1088 case Node.CDATA_SECTION_NODE:
1089 writeCDATA(node.getText());
1090 break;
1091 case Node.ENTITY_REFERENCE_NODE:
1092 writeEntity((Entity) node);
1093 break;
1094 case Node.PROCESSING_INSTRUCTION_NODE:
1095 writeProcessingInstruction((ProcessingInstruction) node);
1096 break;
1097 case Node.COMMENT_NODE:
1098 writeComment(node.getText());
1099 break;
1100 case Node.DOCUMENT_NODE:
1101 write((Document) node);
1102 break;
1103 case Node.DOCUMENT_TYPE_NODE:
1104 writeDocType((DocumentType) node);
1105 break;
1106 case Node.NAMESPACE_NODE:
1107
1108
1109 break;
1110 default:
1111 throw new IOException( "Invalid node type: " + node );
1112 }
1113 }
1114
1115
1116
1117
1118 protected void installLexicalHandler() {
1119 XMLReader parent = getParent();
1120 if (parent == null) {
1121 throw new NullPointerException("No parent for filter");
1122 }
1123
1124 for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
1125 try {
1126 parent.setProperty(LEXICAL_HANDLER_NAMES[i], this);
1127 break;
1128 }
1129 catch (SAXNotRecognizedException ex) {
1130
1131 }
1132 catch (SAXNotSupportedException ex) {
1133
1134 }
1135 }
1136 }
1137
1138 protected void writeDocType(String name, String publicID, String systemID) throws IOException {
1139 boolean hasPublic = false;
1140
1141 writer.write("<!DOCTYPE ");
1142 writer.write(name);
1143 if ((publicID != null) && (!publicID.equals(""))) {
1144 writer.write(" PUBLIC \"");
1145 writer.write(publicID);
1146 writer.write("\"");
1147 hasPublic = true;
1148 }
1149 if ((systemID != null) && (!systemID.equals(""))) {
1150 if (!hasPublic) {
1151 writer.write(" SYSTEM");
1152 }
1153 writer.write(" \"");
1154 writer.write(systemID);
1155 writer.write("\"");
1156 }
1157 writer.write(">");
1158 writePrintln();
1159 }
1160
1161 protected void writeEntity(Entity entity) throws IOException {
1162 if (!resolveEntityRefs()) {
1163 writeEntityRef( entity.getName() );
1164 } else {
1165 writer.write(entity.getText());
1166 }
1167 }
1168
1169 protected void writeEntityRef(String name) throws IOException {
1170 writer.write( "&" );
1171 writer.write( name );
1172 writer.write( ";" );
1173
1174 lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;
1175 }
1176
1177 protected void writeComment(String text) throws IOException {
1178 if (format.isNewlines()) {
1179 println();
1180 indent();
1181 }
1182 writer.write( "<!--" );
1183 writer.write( text );
1184 writer.write( "-->" );
1185
1186 lastOutputNodeType = Node.COMMENT_NODE;
1187 }
1188
1189 /*** Writes the attributes of the given element
1190 *
1191 */
1192 protected void writeAttributes( Element element ) throws IOException {
1193
1194
1195
1196
1197
1198 for ( int i = 0, size = element.attributeCount(); i < size; i++ ) {
1199 Attribute attribute = element.attribute(i);
1200 Namespace ns = attribute.getNamespace();
1201 if (ns != null && ns != Namespace.NO_NAMESPACE && ns != Namespace.XML_NAMESPACE) {
1202 String prefix = ns.getPrefix();
1203 String uri = namespaceStack.getURI(prefix);
1204 if (!ns.getURI().equals(uri)) {
1205 writeNamespace(ns);
1206 namespaceStack.push(ns);
1207 }
1208 }
1209
1210
1211
1212
1213 String attName = attribute.getName();
1214 if (attName.startsWith("xmlns:")) {
1215 String prefix = attName.substring(6);
1216 if (namespaceStack.getNamespaceForPrefix(prefix) == null) {
1217 String uri = attribute.getValue();
1218 namespaceStack.push(prefix, uri);
1219 writeNamespace(prefix, uri);
1220 }
1221 } else if (attName.equals("xmlns")) {
1222 if (namespaceStack.getDefaultNamespace() == null) {
1223 String uri = attribute.getValue();
1224 namespaceStack.push(null, uri);
1225 writeNamespace(null, uri);
1226 }
1227 } else {
1228 char quote = format.getAttributeQuoteCharacter();
1229 writer.write(" ");
1230 writer.write(attribute.getQualifiedName());
1231 writer.write("=");
1232 writer.write(quote);
1233 writeEscapeAttributeEntities(attribute.getValue());
1234 writer.write(quote);
1235 }
1236 }
1237 }
1238
1239 protected void writeAttribute(Attribute attribute) throws IOException {
1240 writer.write(" ");
1241 writer.write(attribute.getQualifiedName());
1242 writer.write("=");
1243
1244 char quote = format.getAttributeQuoteCharacter();
1245 writer.write(quote);
1246
1247 writeEscapeAttributeEntities(attribute.getValue());
1248
1249 writer.write(quote);
1250 lastOutputNodeType = Node.ATTRIBUTE_NODE;
1251 }
1252
1253 protected void writeAttributes(Attributes attributes) throws IOException {
1254 for (int i = 0, size = attributes.getLength(); i < size; i++) {
1255 writeAttribute( attributes, i );
1256 }
1257 }
1258
1259 protected void writeAttribute(Attributes attributes, int index) throws IOException {
1260 char quote = format.getAttributeQuoteCharacter();
1261 writer.write(" ");
1262 writer.write(attributes.getQName(index));
1263 writer.write("=");
1264 writer.write(quote);
1265 writeEscapeAttributeEntities(attributes.getValue(index));
1266 writer.write(quote);
1267 }
1268
1269
1270
1271 protected void indent() throws IOException {
1272 String indent = format.getIndent();
1273 if ( indent != null && indent.length() > 0 ) {
1274 for ( int i = 0; i < indentLevel; i++ ) {
1275 writer.write(indent);
1276 }
1277 }
1278 }
1279
1280 /***
1281 * <p>
1282 * This will print a new line only if the newlines flag was set to true
1283 * </p>
1284 */
1285 protected void writePrintln() throws IOException {
1286 if (format.isNewlines()) {
1287 writer.write( format.getLineSeparator() );
1288 }
1289 }
1290
1291 /***
1292 * Get an OutputStreamWriter, use preferred encoding.
1293 */
1294 protected Writer createWriter(OutputStream outStream, String encoding) throws UnsupportedEncodingException {
1295 return new BufferedWriter(
1296 new OutputStreamWriter( outStream, encoding )
1297 );
1298 }
1299
1300 /***
1301 * <p>
1302 * This will write the declaration to the given Writer.
1303 * Assumes XML version 1.0 since we don't directly know.
1304 * </p>
1305 */
1306 protected void writeDeclaration() throws IOException {
1307 String encoding = format.getEncoding();
1308
1309
1310 if (! format.isSuppressDeclaration()) {
1311
1312 if (encoding.equals("UTF8")) {
1313 writer.write("<?xml version=\"1.0\"");
1314 if (!format.isOmitEncoding()) {
1315 writer.write(" encoding=\"UTF-8\"");
1316 }
1317 writer.write("?>");
1318 } else {
1319 writer.write("<?xml version=\"1.0\"");
1320 if (! format.isOmitEncoding()) {
1321 writer.write(" encoding=\"" + encoding + "\"");
1322 }
1323 writer.write("?>");
1324 }
1325 if (format.isNewLineAfterDeclaration()) {
1326 println();
1327 }
1328 }
1329 }
1330
1331 protected void writeClose(String qualifiedName) throws IOException {
1332 writer.write("</");
1333 writer.write(qualifiedName);
1334 writer.write(">");
1335 }
1336
1337 protected void writeEmptyElementClose(String qualifiedName) throws IOException {
1338
1339 if (! format.isExpandEmptyElements()) {
1340 writer.write("/>");
1341 } else {
1342 writer.write("></");
1343 writer.write(qualifiedName);
1344 writer.write(">");
1345 }
1346 }
1347
1348 protected boolean isExpandEmptyElements() {
1349 return format.isExpandEmptyElements();
1350 }
1351
1352
1353 /*** This will take the pre-defined entities in XML 1.0 and
1354 * convert their character representation to the appropriate
1355 * entity reference, suitable for XML attributes.
1356 */
1357 protected String escapeElementEntities(String text) {
1358 char[] block = null;
1359 int i, last = 0, size = text.length();
1360 for ( i = 0; i < size; i++ ) {
1361 String entity = null;
1362 char c = text.charAt(i);
1363 switch( c ) {
1364 case '<' :
1365 entity = "<";
1366 break;
1367 case '>' :
1368 entity = ">";
1369 break;
1370 case '&' :
1371 entity = "&";
1372 break;
1373 case '\t': case '\n': case '\r':
1374
1375 if (preserve) {
1376 entity=String.valueOf(c);
1377 }
1378 break;
1379 default:
1380 if (c < 32 || shouldEncodeChar(c)) {
1381 entity = "&#" + (int) c + ";";
1382 }
1383 break;
1384 }
1385 if (entity != null) {
1386 if ( block == null ) {
1387 block = text.toCharArray();
1388 }
1389 buffer.append(block, last, i - last);
1390 buffer.append(entity);
1391 last = i + 1;
1392 }
1393 }
1394 if ( last == 0 ) {
1395 return text;
1396 }
1397 if ( last < size ) {
1398 if ( block == null ) {
1399 block = text.toCharArray();
1400 }
1401 buffer.append(block, last, i - last);
1402 }
1403 String answer = buffer.toString();
1404 buffer.setLength(0);
1405 return answer;
1406 }
1407
1408
1409 protected void writeEscapeAttributeEntities(String text) throws IOException {
1410 if ( text != null ) {
1411 String escapedText = escapeAttributeEntities( text );
1412 writer.write( escapedText );
1413 }
1414 }
1415 /*** This will take the pre-defined entities in XML 1.0 and
1416 * convert their character representation to the appropriate
1417 * entity reference, suitable for XML attributes.
1418 */
1419 protected String escapeAttributeEntities(String text) {
1420 char quote = format.getAttributeQuoteCharacter();
1421
1422 char[] block = null;
1423 int i, last = 0, size = text.length();
1424 for ( i = 0; i < size; i++ ) {
1425 String entity = null;
1426 char c = text.charAt(i);
1427 switch( c ) {
1428 case '<' :
1429 entity = "<";
1430 break;
1431 case '>' :
1432 entity = ">";
1433 break;
1434 case '\'' :
1435 if (quote == '\'') {
1436 entity = "'";
1437 }
1438 break;
1439 case '\"' :
1440 if (quote == '\"') {
1441 entity = """;
1442 }
1443 break;
1444 case '&' :
1445 entity = "&";
1446 break;
1447 case '\t': case '\n': case '\r':
1448
1449 break;
1450 default:
1451 if (c < 32 || shouldEncodeChar(c)) {
1452 entity = "&#" + (int) c + ";";
1453 }
1454 break;
1455 }
1456 if (entity != null) {
1457 if ( block == null ) {
1458 block = text.toCharArray();
1459 }
1460 buffer.append(block, last, i - last);
1461 buffer.append(entity);
1462 last = i + 1;
1463 }
1464 }
1465 if ( last == 0 ) {
1466 return text;
1467 }
1468 if ( last < size ) {
1469 if ( block == null ) {
1470 block = text.toCharArray();
1471 }
1472 buffer.append(block, last, i - last);
1473 }
1474 String answer = buffer.toString();
1475 buffer.setLength(0);
1476 return answer;
1477 }
1478
1479 /***
1480 * Should the given character be escaped. This depends on the
1481 * encoding of the document.
1482 *
1483 * @return boolean
1484 */
1485 protected boolean shouldEncodeChar(char c) {
1486 int max = getMaximumAllowedCharacter();
1487 return max > 0 && c > max;
1488 }
1489
1490 /***
1491 * Returns the maximum allowed character code that should be allowed
1492 * unescaped which defaults to 127 in US-ASCII (7 bit) or
1493 * 255 in ISO-* (8 bit).
1494 */
1495 protected int defaultMaximumAllowedCharacter() {
1496 String encoding = format.getEncoding();
1497 if (encoding != null) {
1498 if (encoding.equals("US-ASCII")) {
1499 return 127;
1500 }
1501 }
1502
1503 return -1;
1504 }
1505
1506 protected boolean isNamespaceDeclaration( Namespace ns ) {
1507 if (ns != null && ns != Namespace.XML_NAMESPACE) {
1508 String uri = ns.getURI();
1509 if ( uri != null ) {
1510 if ( ! namespaceStack.contains( ns ) ) {
1511 return true;
1512
1513 }
1514 }
1515 }
1516 return false;
1517
1518 }
1519
1520 protected void handleException(IOException e) throws SAXException {
1521 throw new SAXException(e);
1522 }
1523
1524
1525 /*** Lets subclasses get at the current format object, so they can call setTrimText, setNewLines, etc.
1526 * Put in to support the HTMLWriter, in the way
1527 * that it pushes the current newline/trim state onto a stack and overrides
1528 * the state within preformatted tags.
1529 */
1530 protected OutputFormat getOutputFormat() {
1531 return format;
1532 }
1533
1534 public boolean resolveEntityRefs() {
1535 return resolveEntityRefs;
1536 }
1537
1538 public void setResolveEntityRefs(boolean resolve) {
1539 this.resolveEntityRefs = resolve;
1540 }
1541 }
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589