View Javadoc

1   /*
2    * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    *
7    * $Id: OutputFormat.java,v 1.13 2004/06/25 08:03:37 maartenc Exp $
8    */
9   
10  package org.dom4j.io;
11  
12  /*** <p><code>OutputFormat</code> represents the format configuration
13    * used by {@link XMLWriter} and its base classes to format the XML output
14    *
15    * @author <a href="mailto:james.strachan@metastuff.com">James Strachan</a>
16    * @version $Revision: 1.13 $
17    */
18  public class OutputFormat implements Cloneable {
19  
20      /*** standard value to indent by, if we are indenting **/
21      protected static final String STANDARD_INDENT = "  ";
22  
23      /*** Whether or not to suppress the XML declaration - default is <code>false</code> */
24      private boolean suppressDeclaration = false;
25      
26      /*** Whether or not to print new line after the XML declaration - default is <code>true</code> */
27      private boolean newLineAfterDeclaration = true;
28  
29      /*** The encoding format */
30      private String encoding = "UTF-8";
31  
32      /*** Whether or not to output the encoding in the XML declaration - default is <code>false</code> */
33      private boolean omitEncoding = false;
34  
35      /*** The default indent is no spaces (as original document) */
36      private String indent = null;
37  
38      /*** Whether or not to expand empty elements to &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code> */
39      private boolean expandEmptyElements = false;
40  
41      /*** The default new line flag, set to do new lines only as in original document */
42      private boolean newlines = false;
43  
44      /*** New line separator */
45      private String lineSeparator = "\n";
46  
47      /*** should we preserve whitespace or not in text nodes? */
48      private boolean trimText = false;
49  
50      /*** pad string-element boundaries with whitespace **/
51      private boolean padText = false;
52  
53      /*** Whether or not to use XHTML standard. */
54      private boolean doXHTML = false;
55  
56      /*** Controls when to output a line.separtor every so many tags in case of no lines and total text trimming.*/
57      private int newLineAfterNTags = 0;  //zero means don't bother.
58      
59      /*** Quote character to use when writing attributes. */
60      private char attributeQuoteChar = '\"';
61  
62  
63      /*** Creates an <code>OutputFormat</code> with
64        * no additional whitespace (indent or new lines) added.
65        * The whitespace from the element text content is fully preserved.
66        */
67      public OutputFormat() {
68      }
69  
70      /*** Creates an <code>OutputFormat</code> with the given indent added but
71        * no new lines added. All whitespace from element text will be included.
72        *
73        * @param indent is the indent string to be used for indentation
74        * (usually a number of spaces).
75        */
76      public OutputFormat(String indent) {
77          this.indent = indent;
78      }
79  
80      /*** Creates an <code>OutputFormat</code> with the given indent added
81        * with optional newlines between the Elements.
82        * All whitespace from element text will be included.
83        *
84        * @param indent is the indent string to be used for indentation
85        *     (usually a number of spaces).
86        * @param newlines whether new lines are added to layout the
87        */
88      public OutputFormat(String indent, boolean newlines) {
89          this.indent = indent;
90          this.newlines = newlines;
91      }
92  
93      /*** Creates an <code>OutputFormat</code> with the given indent added
94        * with optional newlines between the Elements
95        * and the given encoding format.
96        *
97        * @param indent is the indent string to be used for indentation
98        *     (usually a number of spaces).
99        * @param newlines whether new lines are added to layout the
100       * @param encoding is the text encoding to use for writing the XML
101       */
102     public OutputFormat(String indent, boolean newlines, String encoding) {
103         this.indent = indent;
104         this.newlines = newlines;
105         this.encoding = encoding;
106     }
107 
108     public String getLineSeparator() {
109         return lineSeparator;
110     }
111 
112 
113     /*** <p>This will set the new-line separator. The default is
114       * <code>\n</code>. Note that if the "newlines" property is
115       * false, this value is irrelevant.  To make it output the system
116       * default line ending string, call
117       * <code>setLineSeparator(System.getProperty("line.separator"))</code>
118       * </p>
119       * @see #setNewlines(boolean)
120       * @param separator <code>String</code> line separator to use.
121       */
122     public void setLineSeparator(String separator) {
123         lineSeparator = separator;
124     }
125 
126     public boolean isNewlines() {
127         return newlines;
128     }
129 
130     /*** @see #setLineSeparator(String)
131       * @param newlines <code>true</code> indicates new lines should be
132       *                 printed, else new lines are ignored (compacted).
133       */
134     public void setNewlines(boolean newlines) {
135         this.newlines = newlines;
136     }
137 
138     public String getEncoding() {
139         return encoding;
140     }
141 
142     /*** @param encoding encoding format */
143     public void setEncoding(String encoding) {
144         this.encoding = encoding;
145     }
146 
147     public boolean isOmitEncoding() {
148         return omitEncoding;
149     }
150 
151     /*** <p> This will set whether the XML declaration
152       * (<code>&lt;?xml version="1.0" encoding="UTF-8"?&gt;</code>)
153       * includes the encoding of the document.
154       * It is common to suppress this in protocols such as WML and SOAP.</p>
155       *
156       * @param omitEncoding <code>boolean</code> indicating whether or not
157       *        the XML declaration should indicate the document encoding.
158       */
159     public void setOmitEncoding(boolean omitEncoding) {
160         this.omitEncoding = omitEncoding;
161     }
162 
163     /*** <p> This will set whether the XML declaration
164       * (<code>&lt;?xml version="1.0" encoding="UTF-8"?&gt;</code>)
165       * is included or not.
166       * It is common to suppress this in protocols such as WML and SOAP.</p>
167       *
168       * @param suppressDeclaration <code>boolean</code> indicating whether or not
169       *     the XML declaration should be suppressed.
170       */
171     public void setSuppressDeclaration(boolean suppressDeclaration) {
172         this.suppressDeclaration = suppressDeclaration;
173     }
174 
175     /*** @return true if the output of the XML declaration
176       * (<code>&lt;?xml version="1.0"?&gt;</code>) should be suppressed else false.
177       */
178     public boolean isSuppressDeclaration() {
179         return suppressDeclaration;
180     }
181     
182     /*** <p> This will set whether a new line is printed after the XML
183       *  declaration (assuming it is not supressed.)
184       * 
185       *  @param newLineAfterDeclaration <code>boolean</code> indicating 
186       *  whether or not to print new line following the XML declaration. The
187       *  default is true.
188       *  
189       */
190     public void setNewLineAfterDeclaration(boolean newLineAfterDeclaration) {
191         this.newLineAfterDeclaration = newLineAfterDeclaration;
192     }
193 
194     /*** @return true if a new line should be printed following XML declaration
195       */
196     public boolean isNewLineAfterDeclaration() {
197         return newLineAfterDeclaration;
198     }
199 
200     public boolean isExpandEmptyElements() {
201         return expandEmptyElements;
202     }
203 
204     /*** <p>This will set whether empty elements are expanded from
205       * <code>&lt;tagName&gt;</code> to
206       * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.</p>
207       *
208       * @param expandEmptyElements <code>boolean</code> indicating whether or not
209       *     empty elements should be expanded.
210       */
211     public void setExpandEmptyElements(boolean expandEmptyElements) {
212         this.expandEmptyElements = expandEmptyElements;
213     }
214 
215     public boolean isTrimText() {
216         return trimText;
217     }
218 
219     /*** <p> This will set whether the text is output verbatim (false)
220       *  or with whitespace stripped as per <code>{@link
221       *  org.dom4j.Element#getTextTrim()}</code>.<p>
222       *
223       * <p>Default: false </p>
224       *
225       * @param trimText <code>boolean</code> true=>trim the whitespace, false=>use text verbatim
226       */
227     public void setTrimText(boolean trimText) {
228         this.trimText = trimText;
229     }
230 
231     public boolean isPadText() {
232         return padText;
233     }
234 
235     /*** <p> Ensure that text immediately preceded by or followed by an
236       * element will be "padded" with a single space.  This is used to
237       * allow make browser-friendly HTML, avoiding trimText's
238       * transformation of, e.g.,
239       * <code>The quick &lt;b&gt;brown&lt;/b&gt; fox</code> into
240       * <code>The quick&lt;b&gt;brown&lt;/b&gt;fox</code> (the latter
241       * will run the three separate words together into a single word).
242       *
243       * This setting is not too useful if you haven't also called
244       * {@link #setTrimText}.</p>
245       *
246       * <p>Default: false </p>
247       *
248       * @param padText <code>boolean</code> if true, pad string-element boundaries
249       */
250     public void setPadText(boolean padText) {
251         this.padText = padText;
252     }
253 
254     public String getIndent() {
255         return indent;
256     }
257 
258     /*** <p> This will set the indent <code>String</code> to use; this
259       * is usually a <code>String</code> of empty spaces. If you pass
260       * null, or the empty string (""), then no indentation will
261       * happen. </p>
262       * Default: none (null)
263       *
264       * @param indent <code>String</code> to use for indentation.
265       */
266     public void setIndent(String indent) {
267         // nullify empty string to void unnecessary indentation code
268         if ( indent != null && indent.length() <= 0 ) {
269             indent = null;
270         }
271         this.indent = indent;
272     }
273 
274     /*** Set the indent on or off.  If setting on, will use the value of
275       * STANDARD_INDENT, which is usually two spaces.
276       *
277       * @param doIndent if true, set indenting on; if false, set indenting off
278       */
279     public void setIndent(boolean doIndent) {
280         if (doIndent) {
281             this.indent = STANDARD_INDENT;
282         }
283         else {
284             this.indent = null;
285         }
286     }
287 
288     /*** <p>This will set the indent <code>String</code>'s size; an indentSize
289       * of 4 would result in the indention being equivalent to the <code>String</code>
290       * "&nbsp;&nbsp;&nbsp;&nbsp;" (four space characters).</p>
291       *
292       * @param indentSize <code>int</code> number of spaces in indentation.
293       */
294     public void setIndentSize(int indentSize) {
295         StringBuffer indentBuffer = new StringBuffer();
296         for ( int i = 0; i < indentSize; i++ ) {
297             indentBuffer.append(" ");
298         }
299         this.indent = indentBuffer.toString();
300     }
301 
302     /*** <p> Whether or not to use the XHTML standard: like HTML but passes an XML parser with real,
303      *  closed tags.  Also, XHTML CDATA sections  will be output with the CDATA delimiters:
304      *  ( &quot;<b>&lt;![CDATA[</b>&quot; and &quot;<b>]]&gt;</b>&quot; )
305      *  otherwise, the class HTMLWriter will output the CDATA text, but not the delimiters.</p>
306      *
307      *  <p> Default is <code>false</code></p>
308      */
309     public boolean isXHTML(){return doXHTML;}
310 
311     /*** <p> This will set whether or not to use the XHTML standard: like HTML but passes an XML parser with real,
312      *  closed tags.  Also, XHTML CDATA sections
313      *  will be output with the CDATA delimiters: ( &quot;<b>&lt;[CDATA[</b>&quot; and &quot;<b>]]&lt;</b> )
314      *  otherwise, the class HTMLWriter will output the CDATA text, but not the delimiters.</p>
315      *
316       * <p>Default: false </p>
317       *
318       * @param xhtml <code>boolean</code> true=>conform to XHTML, false=>conform to HTML, can have unclosed tags, etc.
319       */
320     public void setXHTML(boolean xhtml){
321         doXHTML = xhtml;
322     }
323 
324     public int getNewLineAfterNTags(){
325         return newLineAfterNTags;
326     }
327 
328     /*** Controls output of a line.separator every tagCount tags when isNewlines is false.
329      *  If tagCount equals zero, it means don't do anything special.  If greater than zero, then a line.separator
330      *  will be output after tagCount tags have been output.  Used when you would like to squeeze the html as
331      *  much as possible, but some browsers don't like really long lines. A tag count of 10 would
332      *  produce a line.separator in the output after 10 close tags (including single tags).*/
333     public void setNewLineAfterNTags(int tagCount){
334         newLineAfterNTags = tagCount;
335     }
336 
337     public char getAttributeQuoteCharacter() {
338         return attributeQuoteChar;
339     }
340 
341     /***
342      * Sets the character used to quote attribute values. The specified
343      * character must be a valid XML attribute quote character, otherwise an
344      * <code>IllegalArgumentException</code> will be thrown.
345      * 
346      * @param quoteChar The character to use when quoting attribute values.
347      * @throws IllegalArgumentException If the specified character is not a
348      *         valid XML attribute quote character.
349      */
350     public void setAttributeQuoteCharacter(char quoteChar) {
351         if (quoteChar == '\'' || quoteChar == '"') {
352             attributeQuoteChar = quoteChar;
353         } else {
354             throw new IllegalArgumentException(
355                     "Invalid attribute quote character (" + quoteChar + ")");
356         }
357     }
358 
359     /*** Parses command line arguments of the form
360       * <code>-omitEncoding -indentSize 3 -newlines -trimText</code>
361       *
362       * @param args is the array of command line arguments
363       * @param i is the index in args to start parsing options
364       * @return the index of first parameter that we didn't understand
365       */
366     public int parseOptions(String[] args, int i) {
367         for ( int size = args.length; i < size; i++ ) {
368             if (args[i].equals("-suppressDeclaration")) {
369                 setSuppressDeclaration(true);
370             }
371             else if (args[i].equals("-omitEncoding")) {
372                 setOmitEncoding(true);
373             }
374             else if (args[i].equals("-indent")) {
375                 setIndent(args[++i]);
376             }
377             else if (args[i].equals("-indentSize")) {
378                 setIndentSize(Integer.parseInt(args[++i]));
379             }
380             else if (args[i].startsWith("-expandEmpty")) {
381                 setExpandEmptyElements(true);
382             }
383             else if (args[i].equals("-encoding")) {
384                 setEncoding(args[++i]);
385             }
386             else if (args[i].equals("-newlines")) {
387                 setNewlines(true);
388             }
389             else if (args[i].equals("-lineSeparator")) {
390                 setLineSeparator(args[++i]);
391             }
392             else if (args[i].equals("-trimText")) {
393                 setTrimText(true);
394             }
395             else if (args[i].equals("-padText")) {
396                 setPadText(true);
397             }
398             else if (args[i].startsWith("-xhtml")) {
399                 setXHTML(true);
400             }
401             else {
402                 return i;
403             }
404         }
405         return i;
406     }
407 
408 
409     /*** A static helper method to create the default pretty printing format.
410       * This format consists of an indent of 2 spaces, newlines after each
411       * element and all other whitespace trimmed, and XMTML is false.
412       */
413     public static OutputFormat createPrettyPrint() {
414         OutputFormat format = new OutputFormat();
415         format.setIndentSize( 2 );
416         format.setNewlines(true);
417         format.setTrimText(true);
418         return format;
419     }
420 
421     /*** A static helper method to create the default compact format.
422       * This format does not have any indentation or newlines after an alement
423       * and all other whitespace trimmed
424       */
425     public static OutputFormat createCompactFormat() {
426         OutputFormat format = new OutputFormat();
427         format.setIndent(false);
428         format.setNewlines(false);
429         format.setTrimText(true);
430         return format;
431     }
432 
433 }
434 
435 
436 
437 
438 /*
439  * Redistribution and use of this software and associated documentation
440  * ("Software"), with or without modification, are permitted provided
441  * that the following conditions are met:
442  *
443  * 1. Redistributions of source code must retain copyright
444  *    statements and notices.  Redistributions must also contain a
445  *    copy of this document.
446  *
447  * 2. Redistributions in binary form must reproduce the
448  *    above copyright notice, this list of conditions and the
449  *    following disclaimer in the documentation and/or other
450  *    materials provided with the distribution.
451  *
452  * 3. The name "DOM4J" must not be used to endorse or promote
453  *    products derived from this Software without prior written
454  *    permission of MetaStuff, Ltd.  For written permission,
455  *    please contact dom4j-info@metastuff.com.
456  *
457  * 4. Products derived from this Software may not be called "DOM4J"
458  *    nor may "DOM4J" appear in their names without prior written
459  *    permission of MetaStuff, Ltd. DOM4J is a registered
460  *    trademark of MetaStuff, Ltd.
461  *
462  * 5. Due credit should be given to the DOM4J Project - 
463  *    http://www.dom4j.org
464  *
465  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
466  * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
467  * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
468  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
469  * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
470  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
471  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
472  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
473  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
474  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
475  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
476  * OF THE POSSIBILITY OF SUCH DAMAGE.
477  *
478  * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
479  *
480  * $Id: OutputFormat.java,v 1.13 2004/06/25 08:03:37 maartenc Exp $
481  */