View Javadoc

1   /*
2    * XmlParser.java
3    * Copyright (C) 1999,2000,2001 The Free Software Foundation
4    * 
5    * This file is part of GNU JAXP, a library.
6    *
7    * GNU JAXP is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   * 
12   * GNU JAXP is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   * 
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, write to the Free Software
19   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20   *
21   * As a special exception, if you link this library with other files to
22   * produce an executable, this library does not by itself cause the
23   * resulting executable to be covered by the GNU General Public License.
24   * This exception does not however invalidate any other reasons why the
25   * executable file might be covered by the GNU General Public License. 
26   */
27  
28  //
29  // Copyright (c) 1997, 1998 by Microstar Software Ltd.
30  // From Microstar's README (the entire original license):
31  //
32  //	Separate statements also said it's in the public domain.
33  //	All modifications are distributed under the license
34  //	above (GPL with library exception).
35  //
36  // AElfred is free for both commercial and non-commercial use and
37  // redistribution, provided that Microstar's copyright and disclaimer are
38  // retained intact.  You are free to modify AElfred for your own use and
39  // to redistribute AElfred with your modifications, provided that the
40  // modifications are clearly documented.
41  //
42  // This program is distributed in the hope that it will be useful, but
43  // WITHOUT ANY WARRANTY; without even the implied warranty of
44  // merchantability or fitness for a particular purpose.  Please use it AT
45  // YOUR OWN RISK.
46  //
47  
48  
49  package org.dom4j.io.aelfred2;
50  
51  import java.io.BufferedInputStream;
52  import java.io.CharConversionException;
53  import java.io.EOFException;
54  import java.io.InputStream;
55  import java.io.InputStreamReader;
56  import java.io.IOException;
57  import java.io.Reader;
58  import java.io.UnsupportedEncodingException;
59  import java.net.URL;
60  import java.net.URLConnection;
61  
62  // maintaining 1.1 compatibility for now ...
63  // Iterator and Hashmap ought to be faster
64  import java.util.Enumeration;
65  import java.util.Hashtable;
66  import java.util.Stack;
67  
68  import org.xml.sax.InputSource;
69  import org.xml.sax.SAXException;
70  
71  
72  /***
73   * Parse XML documents and return parse events through call-backs.
74   * Use the <code>SAXDriver</code> class as your entry point, as all
75   * internal parser interfaces are subject to change.
76   *
77   * @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
78   *	(version 1.2a with bugfixes)
79   * @author Updated by David Brownell &lt;dbrownell@users.sourceforge.net&gt;
80   * @see SAXDriver
81   */
82  final class XmlParser
83  {
84      // avoid slow per-character readCh()
85      private final static boolean USE_CHEATS = true;
86  
87  
88      //////////////////////////////////////////////////////////////////////
89      // Constructors.
90      ////////////////////////////////////////////////////////////////////////
91  
92  
93      /***
94       * Construct a new parser with no associated handler.
95       * @see #setHandler
96       * @see #parse
97       */
98      // package private
99      XmlParser ()
100     {
101     }
102 
103 
104     /***
105      * Set the handler that will receive parsing events.
106      * @param handler The handler to receive callback events.
107      * @see #parse
108      */
109     // package private
110     void setHandler (SAXDriver handler)
111     {
112 	this.handler = handler;
113     }
114 
115 
116     /***
117      * Parse an XML document from the character stream, byte stream, or URI
118      * that you provide (in that order of preference).  Any URI that you
119      * supply will become the base URI for resolving relative URI, and may
120      * be used to acquire a reader or byte stream.
121      *
122      * <p> Only one thread at a time may use this parser; since it is
123      * private to this package, post-parse cleanup is done by the caller,
124      * which MUST NOT REUSE the parser (just null it).
125      *
126      * @param systemId Absolute URI of the document; should never be null,
127      *	but may be so iff a reader <em>or</em> a stream is provided.
128      * @param publicId The public identifier of the document, or null.
129      * @param reader A character stream; must be null if stream isn't.
130      * @param stream A byte input stream; must be null if reader isn't.
131      * @param encoding The suggested encoding, or null if unknown.
132      * @exception java.lang.Exception Basically SAXException or IOException
133      */
134     // package private 
135     void doParse (
136 	String		systemId,
137 	String		publicId,
138 	Reader		reader,
139 	InputStream	stream,
140 	String		encoding
141     ) throws Exception
142     {
143 	if (handler == null)
144 	    throw new IllegalStateException ("no callback handler");
145 
146 	initializeVariables ();
147 
148 	// predeclare the built-in entities here (replacement texts)
149 	// we don't need to intern(), since we're guaranteed literals
150 	// are always (globally) interned.
151 	setInternalEntity ("amp", "&#38;");
152 	setInternalEntity ("lt", "&#60;");
153 	setInternalEntity ("gt", "&#62;");
154 	setInternalEntity ("apos", "&#39;");
155 	setInternalEntity ("quot", "&#34;");
156 
157 	try {
158 	    // pushURL first to ensure locator is correct in startDocument
159 	    // ... it might report an IO or encoding exception.
160 	    handler.startDocument ();
161 	    pushURL (false, "[document]",
162 			// default baseURI: null
163 		    new String [] { publicId, systemId, null},
164 		    reader, stream, encoding, false);
165 
166 	    parseDocument ();
167 	} catch (EOFException e){
168 	    //empty input
169 	    error("empty document, with no root element.");
170 	}finally {
171 	    if (reader != null)
172 		try { reader.close ();
173 		} catch (IOException e) { /* ignore */ }
174 	    if (stream != null)
175 		try { stream.close ();
176 		} catch (IOException e) { /* ignore */ }
177 	    if (is != null)
178 		try { is.close ();
179 		} catch (IOException e) { /* ignore */ }
180 	    if (reader != null)
181 		try {
182 		    reader.close ();
183 		} catch (IOException e) { /* ignore */
184 		}
185 	    scratch = null;
186 	}
187     }
188 
189 
190     ////////////////////////////////////////////////////////////////////////
191     // Constants.
192     ////////////////////////////////////////////////////////////////////////
193 
194     //
195     // Constants for element content type.
196     //
197 
198     /***
199      * Constant: an element has not been declared.
200      * @see #getElementContentType
201      */
202     public final static int CONTENT_UNDECLARED = 0;
203 
204     /***
205      * Constant: the element has a content model of ANY.
206      * @see #getElementContentType
207      */
208     public final static int CONTENT_ANY = 1;
209 
210     /***
211      * Constant: the element has declared content of EMPTY.
212      * @see #getElementContentType
213      */
214     public final static int CONTENT_EMPTY = 2;
215 
216     /***
217      * Constant: the element has mixed content.
218      * @see #getElementContentType
219      */
220     public final static int CONTENT_MIXED = 3;
221 
222     /***
223      * Constant: the element has element content.
224      * @see #getElementContentType
225      */
226     public final static int CONTENT_ELEMENTS = 4;
227 
228 
229     //
230     // Constants for the entity type.
231     //
232 
233     /***
234      * Constant: the entity has not been declared.
235      * @see #getEntityType
236      */
237     public final static int ENTITY_UNDECLARED = 0;
238 
239     /***
240      * Constant: the entity is internal.
241      * @see #getEntityType
242      */
243     public final static int ENTITY_INTERNAL = 1;
244 
245     /***
246      * Constant: the entity is external, non-parsable data.
247      * @see #getEntityType
248      */
249     public final static int ENTITY_NDATA = 2;
250 
251     /***
252      * Constant: the entity is external XML data.
253      * @see #getEntityType
254      */
255     public final static int ENTITY_TEXT = 3;
256 
257 
258     //
259     // Attribute type constants are interned literal strings.
260     //
261 
262     //
263     // Constants for supported encodings.  "external" is just a flag.
264     //
265     private final static int ENCODING_EXTERNAL = 0;
266     private final static int ENCODING_UTF_8 = 1;
267     private final static int ENCODING_ISO_8859_1 = 2;
268     private final static int ENCODING_UCS_2_12 = 3;
269     private final static int ENCODING_UCS_2_21 = 4;
270     private final static int ENCODING_UCS_4_1234 = 5;
271     private final static int ENCODING_UCS_4_4321 = 6;
272     private final static int ENCODING_UCS_4_2143 = 7;
273     private final static int ENCODING_UCS_4_3412 = 8;
274     private final static int ENCODING_ASCII = 9;
275 
276 
277     //
278     // Constants for attribute default value.
279     //
280 
281     /***
282      * Constant: the attribute is not declared.
283      * @see #getAttributeDefaultValueType
284      */
285     public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
286 
287     /***
288      * Constant: the attribute has a literal default value specified.
289      * @see #getAttributeDefaultValueType
290      * @see #getAttributeDefaultValue
291      */
292     public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
293 
294     /***
295      * Constant: the attribute was declared #IMPLIED.
296      * @see #getAttributeDefaultValueType
297      */
298     public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
299 
300     /***
301      * Constant: the attribute was declared #REQUIRED.
302      * @see #getAttributeDefaultValueType
303      */
304     public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
305 
306     /***
307      * Constant: the attribute was declared #FIXED.
308      * @see #getAttributeDefaultValueType
309      * @see #getAttributeDefaultValue
310      */
311     public final static int ATTRIBUTE_DEFAULT_FIXED = 34;
312 
313 
314     //
315     // Constants for input.
316     //
317     private final static int INPUT_NONE = 0;
318     private final static int INPUT_INTERNAL = 1;
319     private final static int INPUT_STREAM = 3;
320     private final static int INPUT_READER = 5;
321 
322 
323     //
324     // Flags for reading literals.
325     //
326 	// expand general entity refs (attribute values in dtd and content)
327     private final static int LIT_ENTITY_REF = 2;
328 	// normalize this value (space chars) (attributes, public ids)
329     private final static int LIT_NORMALIZE = 4;
330 	// literal is an attribute value 
331     private final static int LIT_ATTRIBUTE = 8;
332 	// don't expand parameter entities
333     private final static int LIT_DISABLE_PE = 16;
334 	// don't expand [or parse] character refs
335     private final static int LIT_DISABLE_CREF = 32;
336 	// don't parse general entity refs
337     private final static int LIT_DISABLE_EREF = 64;
338 	// literal is a public ID value 
339     private final static int LIT_PUBID = 256;
340 
341 
342     //
343     // Flags affecting PE handling in DTDs (if expandPE is true).
344     // PEs expand with space padding, except inside literals.
345     //
346     private final static int CONTEXT_NORMAL = 0;
347     private final static int CONTEXT_LITERAL = 1;
348 
349 
350     //////////////////////////////////////////////////////////////////////
351     // Error reporting.
352     //////////////////////////////////////////////////////////////////////
353 
354 
355     /***
356      * Report an error.
357      * @param message The error message.
358      * @param textFound The text that caused the error (or null).
359      * @see SAXDriver#error
360      * @see #line
361      */
362     private void error (String message, String textFound, String textExpected)
363     throws SAXException
364     {
365 	if (textFound != null) {
366 	    message = message + " (found \"" + textFound + "\")";
367 	}
368 	if (textExpected != null) {
369 	    message = message + " (expected \"" + textExpected + "\")";
370 	}
371 	handler.fatal (message);
372 
373 	// "can't happen"
374 	throw new SAXException (message);
375     }
376 
377 
378     /***
379      * Report a serious error.
380      * @param message The error message.
381      * @param textFound The text that caused the error (or null).
382      */
383     private void error (String message, char textFound, String textExpected)
384     throws SAXException
385     {
386 	error (message, new Character (textFound).toString (), textExpected);
387     }
388 
389     /*** Report typical case fatal errors. */
390     private void error (String message)
391     throws SAXException
392     {
393 	handler.fatal (message);
394     }
395 
396 
397     //////////////////////////////////////////////////////////////////////
398     // Major syntactic productions.
399     //////////////////////////////////////////////////////////////////////
400 
401 
402     /***
403      * Parse an XML document.
404      * <pre>
405      * [1] document ::= prolog element Misc*
406      * </pre>
407      * <p>This is the top-level parsing function for a single XML
408      * document.  As a minimum, a well-formed document must have
409      * a document element, and a valid document must have a prolog
410      * (one with doctype) as well.
411      */
412     private void parseDocument ()
413     throws Exception
414     {
415         try {                                       // added by MHK
416     	    boolean sawDTD = parseProlog ();
417     	    require ('<');
418     	    parseElement (!sawDTD);
419         } catch (EOFException ee) {                 // added by MHK
420             error("premature end of file", "[EOF]", null);
421         }
422         
423     	try {
424     	    parseMisc ();   //skip all white, PIs, and comments
425     	    char c = readCh ();    //if this doesn't throw an exception...
426     	    error ("unexpected characters after document end", c, null);
427     	} catch (EOFException e) {
428     	    return;
429     	}
430     }
431 
432     static final char	startDelimComment [] = { '<', '!', '-', '-' };
433     static final char	endDelimComment [] = { '-', '-' };
434 
435     /***
436      * Skip a comment.
437      * <pre>
438      * [15] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
439      * </pre>
440      * <p> (The <code>&lt;!--</code> has already been read.)
441      */
442     private void parseComment ()
443     throws Exception
444     {
445 	char c;
446 	boolean saved = expandPE;
447 
448 	expandPE = false;
449 	parseUntil (endDelimComment);
450 	require ('>');
451 	expandPE = saved;
452 	handler.comment (dataBuffer, 0, dataBufferPos);
453 	dataBufferPos = 0;
454     }
455 
456     static final char	startDelimPI [] = { '<', '?' };
457     static final char	endDelimPI [] = { '?', '>' };
458 
459     /***
460      * Parse a processing instruction and do a call-back.
461      * <pre>
462      * [16] PI ::= '&lt;?' PITarget
463      *		(S (Char* - (Char* '?&gt;' Char*)))?
464      *		'?&gt;'
465      * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
466      * </pre>
467      * <p> (The <code>&lt;?</code> has already been read.)
468      */
469     private void parsePI ()
470     throws SAXException, IOException
471     {
472 	String name;
473 	boolean saved = expandPE;
474 
475 	expandPE = false;
476 	name = readNmtoken (true);
477 	//NE08
478 	if (name.indexOf(':') >= 0)
479            error ("Illegal character(':') in processing instruction name ", name, null);
480 	if ("xml".equalsIgnoreCase (name))
481 	    error ("Illegal processing instruction target", name, null);
482 	if (!tryRead (endDelimPI)) {
483 	    requireWhitespace ();
484 	    parseUntil (endDelimPI);
485 	}
486 	expandPE = saved;
487 	handler.processingInstruction (name, dataBufferToString ());
488     }
489 
490 
491     static final char	endDelimCDATA [] = { ']', ']', '>' };
492 
493 	private boolean isDirtyCurrentElement;
494 
495     /***
496      * Parse a CDATA section.
497      * <pre>
498      * [18] CDSect ::= CDStart CData CDEnd
499      * [19] CDStart ::= '&lt;![CDATA['
500      * [20] CData ::= (Char* - (Char* ']]&gt;' Char*))
501      * [21] CDEnd ::= ']]&gt;'
502      * </pre>
503      * <p> (The '&lt;![CDATA[' has already been read.)
504      */
505     private void parseCDSect ()
506     throws Exception
507     {
508 	parseUntil (endDelimCDATA);
509 	dataBufferFlush ();
510     }
511 
512 
513     /***
514      * Parse the prolog of an XML document.
515      * <pre>
516      * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
517      * </pre>
518      * <p>We do not look for the XML declaration here, because it was
519      * handled by pushURL ().
520      * @see pushURL
521      * @return true if a DTD was read.
522      */
523     private boolean parseProlog ()
524     throws Exception
525     {
526 	parseMisc ();
527 
528 	if (tryRead ("<!DOCTYPE")) {
529 	    parseDoctypedecl ();
530 	    parseMisc ();
531 	    return true;
532 	}
533 	return false;
534     }
535 
536     private void checkLegalVersion (String version)
537     throws SAXException
538     {
539 	int len = version.length ();
540 	for (int i = 0; i < len; i++) {
541 	    char c = version.charAt (i);
542 	    if ('0' <= c && c <= '9')
543 		continue;
544 	    if (c == '_' || c == '.' || c == ':' || c == '-')
545 		continue;
546 	    if ('a' <= c && c <= 'z')
547 		continue;
548 	    if ('A' <= c && c <= 'Z')
549 		continue;
550 	    error ("illegal character in version", version, "1.0");
551 	}
552     }
553 
554 
555     /***
556      * Parse the XML declaration.
557      * <pre>
558      * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'
559      * [24] VersionInfo ::= S 'version' Eq
560      *		("'" VersionNum "'" | '"' VersionNum '"' )
561      * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
562      * [32] SDDecl ::= S 'standalone' Eq
563      *		( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
564      * [80] EncodingDecl ::= S 'encoding' Eq
565      *		( "'" EncName "'" | "'" EncName "'" )
566      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
567      * </pre>
568      * <p> (The <code>&lt;?xml</code> and whitespace have already been read.)
569      * @return the encoding in the declaration, uppercased; or null
570      * @see #parseTextDecl
571      * @see #setupDecoding
572      */
573     private String parseXMLDecl (boolean ignoreEncoding)
574     throws SAXException, IOException
575     {
576 	String	version;
577 	String	encodingName = null;
578 	String	standalone = null;
579 	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
580 
581 	// Read the version.
582 	require ("version");
583 	parseEq ();
584 	checkLegalVersion (version = readLiteral (flags));
585 	if (!version.equals ("1.0")){
586 	    if(version.equals ("1.1")){
587 	    	handler.warn ("expected XML version 1.0, not: " + version);
588 	    	xmlVersion = XML_11;
589 	    }else {
590 	    	error("illegal XML version", version, "1.0 or 1.1");
591 	    }
592 	}
593 	else
594 	    xmlVersion = XML_10;
595 	// Try reading an encoding declaration.
596 	boolean white = tryWhitespace ();
597 
598 	if (tryRead ("encoding")) {
599 	    if (!white)
600 		error ("whitespace required before 'encoding='");
601 	    parseEq ();
602 	    encodingName = readLiteral (flags);
603 	    if (!ignoreEncoding)
604 		setupDecoding (encodingName);
605 	}
606 
607 	// Try reading a standalone declaration
608 	if (encodingName != null)
609 	    white = tryWhitespace ();
610 	if (tryRead ("standalone")) {
611 	    if (!white)
612 		error ("whitespace required before 'standalone='");
613 	    parseEq ();
614 	    standalone = readLiteral (flags);
615 	    if ("yes".equals (standalone))
616 		docIsStandalone = true;
617 	    else if (!"no".equals (standalone))
618 		error ("standalone flag must be 'yes' or 'no'");
619 	}
620 
621 	skipWhitespace ();
622 	require ("?>");
623 
624 	return encodingName;
625     }
626 
627 
628     /***
629      * Parse a text declaration.
630      * <pre>
631      * [79] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?&gt;'
632      * [80] EncodingDecl ::= S 'encoding' Eq
633      *		( '"' EncName '"' | "'" EncName "'" )
634      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
635      * </pre>
636      * <p> (The <code>&lt;?xml</code>' and whitespace have already been read.)
637      * @return the encoding in the declaration, uppercased; or null
638      * @see #parseXMLDecl
639      * @see #setupDecoding
640      */
641     private String parseTextDecl (boolean ignoreEncoding)
642     throws SAXException, IOException
643     {
644 	String	encodingName = null;
645 	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
646 
647 	// Read an optional version.
648 	if (tryRead ("version")) {
649 	    String version;
650 	    parseEq ();
651 	    checkLegalVersion (version = readLiteral (flags));
652 	    
653 	    if (version.equals ("1.1")){
654 	    	if (xmlVersion == XML_10){
655 	    	   error ("external subset has later version number.", "1.0", version);    
656 	    	}
657 		handler.warn ("expected XML version 1.0, not: " + version);
658 		xmlVersion = XML_11;
659              }else if(!version.equals ("1.0")) {
660 		 error("illegal XML version", version, "1.0 or 1.1");
661 	     }
662 	    requireWhitespace ();
663 	}
664 
665 
666 	// Read the encoding.
667 	require ("encoding");
668 	parseEq ();
669 	encodingName = readLiteral (flags);
670 	if (!ignoreEncoding)
671 	    setupDecoding (encodingName);
672 
673 	skipWhitespace ();
674 	require ("?>");
675 
676 	return encodingName;
677     }
678 
679 
680     /***
681      * Sets up internal state so that we can decode an entity using the
682      * specified encoding.  This is used when we start to read an entity
683      * and we have been given knowledge of its encoding before we start to
684      * read any data (e.g. from a SAX input source or from a MIME type).
685      *
686      * <p> It is also used after autodetection, at which point only very
687      * limited adjustments to the encoding may be used (switching between
688      * related builtin decoders).
689      *
690      * @param encodingName The name of the encoding specified by the user.
691      * @exception IOException if the encoding isn't supported either
692      *	internally to this parser, or by the hosting JVM.
693      * @see #parseXMLDecl
694      * @see #parseTextDecl
695      */
696     private void setupDecoding (String encodingName)
697     throws SAXException, IOException
698     {
699 	encodingName = encodingName.toUpperCase ();
700 
701 	// ENCODING_EXTERNAL indicates an encoding that wasn't
702 	// autodetected ... we can use builtin decoders, or
703 	// ones from the JVM (InputStreamReader).
704 
705 	// Otherwise we can only tweak what was autodetected, and
706 	// only for single byte (ASCII derived) builtin encodings.
707 
708 	// ASCII-derived encodings
709 	if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) {
710 	    if (encodingName.equals ("ISO-8859-1")
711 		    || encodingName.equals ("8859_1")
712 		    || encodingName.equals ("ISO8859_1")
713 	      ) {
714 		encoding = ENCODING_ISO_8859_1;
715 		return;
716 	    } else if (encodingName.equals ("US-ASCII")
717 			|| encodingName.equals ("ASCII")) {
718 		encoding = ENCODING_ASCII;
719 		return;
720 	    } else if (encodingName.equals ("UTF-8")
721 			|| encodingName.equals ("UTF8")) {
722 		encoding = ENCODING_UTF_8;
723 		return;
724 	    } else if (encoding != ENCODING_EXTERNAL) {
725 		// used to start with a new reader ...
726 		throw new UnsupportedEncodingException (encodingName);
727 	    }
728 	    // else fallthrough ...
729 	    // it's ASCII-ish and something other than a builtin
730 	}
731 
732 	// Unicode and such
733 	if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) {
734 	    if (!(encodingName.equals ("ISO-10646-UCS-2")
735 		    || encodingName.equals ("UTF-16")
736 		    || encodingName.equals ("UTF-16BE")
737 		    || encodingName.equals ("UTF-16LE")))
738 		error ("unsupported Unicode encoding",
739 		       encodingName,
740 		       "UTF-16");
741 	    return;
742 	}
743 
744 	// four byte encodings
745 	if (encoding == ENCODING_UCS_4_1234
746 		|| encoding == ENCODING_UCS_4_4321
747 		|| encoding == ENCODING_UCS_4_2143
748 		|| encoding == ENCODING_UCS_4_3412) {
749 	    // Strictly:  "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists
750 	    if (!encodingName.equals ("ISO-10646-UCS-4"))
751 		error ("unsupported 32-bit encoding",
752 		       encodingName,
753 		       "ISO-10646-UCS-4");
754 	    return;
755 	}
756 
757 	// assert encoding == ENCODING_EXTERNAL
758 	// if (encoding != ENCODING_EXTERNAL)
759 	//     throw new RuntimeException ("encoding = " + encoding);
760 
761 	if (encodingName.equals ("UTF-16BE")) {
762 	    encoding = ENCODING_UCS_2_12;
763 	    return;
764 	}
765 	if (encodingName.equals ("UTF-16LE")) {
766 	    encoding = ENCODING_UCS_2_21;
767 	    return;
768 	}
769 
770 	// We couldn't use the builtin decoders at all.  But we can try to
771 	// create a reader, since we haven't messed up buffering.  Tweak
772 	// the encoding name if necessary.
773 
774 	if (encodingName.equals ("UTF-16")
775 		|| encodingName.equals ("ISO-10646-UCS-2"))
776 	    encodingName = "Unicode";
777 	// Ignoring all the EBCDIC aliases here
778 
779 	reader = new InputStreamReader (is, encodingName);
780 	sourceType = INPUT_READER;
781     }
782 
783 
784     /***
785      * Parse miscellaneous markup outside the document element and DOCTYPE
786      * declaration.
787      * <pre>
788      * [27] Misc ::= Comment | PI | S
789      * </pre>
790      */
791     private void parseMisc ()
792     throws Exception
793     {
794 	while (true) {
795 	    skipWhitespace ();
796 	    if (tryRead (startDelimPI)) {
797 		parsePI ();
798 	    } else if (tryRead (startDelimComment)) {
799 		parseComment ();
800 	    } else {
801 		return;
802 	    }
803 	}
804     }
805 
806 
807     /***
808      * Parse a document type declaration.
809      * <pre>
810      * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?
811      *		('[' (markupdecl | PEReference | S)* ']' S?)? '&gt;'
812      * </pre>
813      * <p> (The <code>&lt;!DOCTYPE</code> has already been read.)
814      */
815     private void parseDoctypedecl ()
816     throws Exception
817     {
818 	String rootName, ids[];
819 
820 	// Read the document type name.
821 	requireWhitespace ();
822 	rootName = readNmtoken (true);
823 
824 	// Read the External subset's IDs
825 	skipWhitespace ();
826 	ids = readExternalIds (false, true);
827 
828 	// report (a) declaration of name, (b) lexical info (ids)
829 	handler.doctypeDecl (rootName, ids [0], ids [1]);
830 
831 	// Internal subset is parsed first, if present
832 	skipWhitespace ();
833 	if (tryRead ('[')) {
834 
835 	    // loop until the subset ends
836 	    while (true) {
837 		doReport = expandPE = true;
838 		skipWhitespace ();
839 		doReport = expandPE = false;
840 		if (tryRead (']')) {
841 		    break; 		// end of subset
842 		} else {
843 		    // WFC, PEs in internal subset (only between decls)
844 		    peIsError = expandPE = true;
845 		    parseMarkupdecl ();
846 		    peIsError = expandPE = false;
847 		}
848 	    }
849 	}
850 	skipWhitespace ();
851 	require ('>');
852 
853 	// Read the external subset, if any
854 	InputSource	subset;
855 
856 	if (ids [1] == null)
857 	    subset = handler.getExternalSubset (rootName,
858 	    		handler.getSystemId ());
859 	else
860 	    subset = null;
861 	if (ids [1] != null || subset != null) {
862 	    pushString (null, ">");
863 
864 	    // NOTE:  [dtd] is so we say what SAX2 expects,
865 	    // though it's misleading (subset, not entire dtd)
866 	    if (ids [1] != null)
867 		pushURL (true, "[dtd]", ids, null, null, null, true);
868 	    else {
869 		handler.warn ("modifying document by adding external subset");
870 		pushURL (true, "[dtd]",
871 		    new String [] { subset.getPublicId (),
872 			    subset.getSystemId (), null },
873 		    subset.getCharacterStream (),
874 		    subset.getByteStream (),
875 		    subset.getEncoding (),
876 		    false);
877 	    }
878 
879 	    // Loop until we end up back at '>'
880 	    while (true) {
881 		doReport = expandPE = true;
882 		skipWhitespace ();
883 		doReport = expandPE = false;
884 		if (tryRead ('>')) {
885 		    break;
886 		} else {
887 		    expandPE = true;
888 		    parseMarkupdecl ();
889 		    expandPE = false;
890 		}
891 	    }
892 
893 	    // the ">" string isn't popped yet
894 	    if (inputStack.size () != 1)
895 		error ("external subset has unmatched '>'");
896 	}
897 
898 	// done dtd
899 	handler.endDoctype ();
900 	expandPE = false;
901 	doReport = true;
902     }
903 
904 
905     /***
906      * Parse a markup declaration in the internal or external DTD subset.
907      * <pre>
908      * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
909      *		| NotationDecl | PI | Comment
910      * [30] extSubsetDecl ::= (markupdecl | conditionalSect
911      *		| PEReference | S) *
912      * </pre>
913      * <p> Reading toplevel PE references is handled as a lexical issue
914      * by the caller, as is whitespace.
915      */
916     private void parseMarkupdecl ()
917     throws Exception
918     {
919 	char	saved [] = null;
920 	boolean	savedPE = expandPE;
921 
922 	// prevent "<%foo;" and ensures saved entity is right
923 	require ('<');
924 	unread ('<');
925 	expandPE = false;
926 
927 	if (tryRead ("<!ELEMENT")) {
928 	    saved = readBuffer;
929 	    expandPE = savedPE;
930 	    parseElementDecl ();
931 	} else if (tryRead ("<!ATTLIST")) {
932 	    saved = readBuffer;
933 	    expandPE = savedPE;
934 	    parseAttlistDecl ();
935 	} else if (tryRead ("<!ENTITY")) {
936 	    saved = readBuffer;
937 	    expandPE = savedPE;
938 	    parseEntityDecl ();
939 	} else if (tryRead ("<!NOTATION")) {
940 	    saved = readBuffer;
941 	    expandPE = savedPE;
942 	    parseNotationDecl ();
943 	} else if (tryRead (startDelimPI)) {
944 	    saved = readBuffer;
945 	    expandPE = savedPE;
946 	    parsePI ();
947 	} else if (tryRead (startDelimComment)) {
948 	    saved = readBuffer;
949 	    expandPE = savedPE;
950 	    parseComment ();
951 	} else if (tryRead ("<![")) {
952 	    saved = readBuffer;
953 	    expandPE = savedPE;
954 	    if (inputStack.size () > 0)
955 		parseConditionalSect (saved);
956 	    else
957 		error ("conditional sections illegal in internal subset");
958 	} else {
959 	    error ("expected markup declaration");
960 	}
961 
962 	// VC: Proper Decl/PE Nesting
963 	if (readBuffer != saved)
964 	    handler.verror ("Illegal Declaration/PE nesting");
965     }
966 
967 
968     /***
969      * Parse an element, with its tags.
970      * <pre>
971      * [39] element ::= EmptyElementTag | STag content ETag
972      * [40] STag ::= '&lt;' Name (S Attribute)* S? '&gt;'
973      * [44] EmptyElementTag ::= '&lt;' Name (S Attribute)* S? '/&gt;'
974      * </pre>
975      * <p> (The '&lt;' has already been read.)
976      * <p>NOTE: this method actually chains onto parseContent (), if necessary,
977      * and parseContent () will take care of calling parseETag ().
978      */
979     private void parseElement (boolean maybeGetSubset)
980     throws Exception
981     {
982 	String	gi;
983 	char	c;
984 	int	oldElementContent = currentElementContent;
985 	String	oldElement = currentElement;
986 	Object	element [];
987 
988 	// This is the (global) counter for the
989 	// array of specified attributes.
990 	tagAttributePos = 0;
991 
992 	// Read the element type name.
993 	gi = readNmtoken (true);
994 
995 	// If we saw no DTD, and this is the document root element,
996 	// let the application modify the input stream by providing one.
997 	if (maybeGetSubset) {
998 	    InputSource	subset = handler.getExternalSubset (gi,
999 	    		handler.getSystemId ());
1000 	    if (subset != null) {
1001 		String	publicId = subset.getPublicId ();
1002 		String	systemId = subset.getSystemId ();
1003 
1004 		handler.warn ("modifying document by adding DTD");
1005 		handler.doctypeDecl (gi, publicId, systemId);
1006 		pushString (null, ">");
1007 
1008 		// NOTE:  [dtd] is so we say what SAX2 expects,
1009 		// though it's misleading (subset, not entire dtd)
1010 		pushURL (true, "[dtd]",
1011 		    new String [] { publicId, systemId, null },
1012 		    subset.getCharacterStream (),
1013 		    subset.getByteStream (),
1014 		    subset.getEncoding (),
1015 		    false);
1016 
1017 		// Loop until we end up back at '>'
1018 		while (true) {
1019 		    doReport = expandPE = true;
1020 		    skipWhitespace ();
1021 		    doReport = expandPE = false;
1022 		    if (tryRead ('>')) {
1023 			break;
1024 		    } else {
1025 			expandPE = true;
1026 			parseMarkupdecl ();
1027 			expandPE = false;
1028 		    }
1029 		}
1030 
1031 		// the ">" string isn't popped yet
1032 		if (inputStack.size () != 1)
1033 		    error ("external subset has unmatched '>'");
1034 
1035 		handler.endDoctype ();
1036 	    }
1037 	}
1038 
1039 	// Determine the current content type.
1040 	currentElement = gi;
1041 	element = (Object []) elementInfo.get (gi);
1042 	currentElementContent = getContentType (element, CONTENT_ANY);
1043 
1044 	// Read the attributes, if any.
1045 	// After this loop, "c" is the closing delimiter.
1046 	boolean white = tryWhitespace ();
1047 	c = readCh ();
1048 	while (c != '/' && c != '>') {
1049 	    unread (c);
1050 	    if (!white)
1051 		error ("need whitespace between attributes");
1052 	    parseAttribute (gi);
1053 	    white = tryWhitespace ();
1054 	    c = readCh ();
1055 	}
1056 
1057 	// Supply any defaulted attributes.
1058 	Enumeration atts = declaredAttributes (element);
1059 	if (atts != null) {
1060 	    String aname;
1061 loop:
1062 	    while (atts.hasMoreElements ()) {
1063 		aname = (String) atts.nextElement ();
1064 		// See if it was specified.
1065 		for (int i = 0; i < tagAttributePos; i++) {
1066 		    if (tagAttributes [i] == aname) {
1067 			continue loop;
1068 		    }
1069 		}
1070 		// ... or has a default
1071 		String value = getAttributeDefaultValue (gi, aname);
1072 
1073 		if (value == null)
1074 		    continue;
1075 		handler.attribute (aname, value, false);
1076 	    }
1077 	}
1078 
1079 	// Figure out if this is a start tag
1080 	// or an empty element, and dispatch an
1081 	// event accordingly.
1082 	switch (c) {
1083 	case '>':
1084 	    handler.startElement (gi);
1085 	    parseContent ();
1086 	    break;
1087 	case '/':
1088 	    require ('>');
1089 	    handler.startElement (gi);
1090 	    handler.endElement (gi);
1091 	    break;
1092 	}
1093 
1094 	// Restore the previous state.
1095 	currentElement = oldElement;
1096 	currentElementContent = oldElementContent;
1097     }
1098 
1099 
1100     /***
1101      * Parse an attribute assignment.
1102      * <pre>
1103      * [41] Attribute ::= Name Eq AttValue
1104      * </pre>
1105      * @param name The name of the attribute's element.
1106      * @see SAXDriver#attribute
1107      */
1108     private void parseAttribute (String name)
1109     throws Exception
1110     {
1111 	String aname;
1112 	String type;
1113 	String value;
1114 	int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF;
1115 
1116 	// Read the attribute name.
1117 	aname = readNmtoken (true);
1118 	type = getAttributeType (name, aname);
1119 
1120 	// Parse '='
1121 	parseEq ();
1122 
1123 	// Read the value, normalizing whitespace
1124 	// unless it is CDATA.
1125   if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
1126     if (type == "CDATA" || type == null) {
1127 	    value = readLiteral (flags);
1128     } else {
1129 	    value = readLiteral (flags | LIT_NORMALIZE);
1130     }
1131   } else {
1132     if (type.equals("CDATA") || type == null) {
1133 	    value = readLiteral (flags);
1134     } else {
1135 	    value = readLiteral (flags | LIT_NORMALIZE);
1136     }
1137   }
1138 
1139 	// WFC: no duplicate attributes
1140 	for (int i = 0; i < tagAttributePos; i++)
1141 	    if (aname.equals (tagAttributes [i]))
1142 		error ("duplicate attribute", aname, null);
1143 
1144 	// Inform the handler about the
1145 	// attribute.
1146 	handler.attribute (aname, value, true);
1147 	dataBufferPos = 0;
1148 
1149 	// Note that the attribute has been
1150 	// specified.
1151 	if (tagAttributePos == tagAttributes.length) {
1152 	    String newAttrib[] = new String [tagAttributes.length * 2];
1153 	    System.arraycopy (tagAttributes, 0, newAttrib, 0, tagAttributePos);
1154 	    tagAttributes = newAttrib;
1155 	}
1156 	tagAttributes [tagAttributePos++] = aname;
1157     }
1158 
1159 
1160     /***
1161      * Parse an equals sign surrounded by optional whitespace.
1162      * <pre>
1163      * [25] Eq ::= S? '=' S?
1164      * </pre>
1165      */
1166     private void parseEq ()
1167     throws SAXException, IOException
1168     {
1169 	skipWhitespace ();
1170 	require ('=');
1171 	skipWhitespace ();
1172     }
1173 
1174 
1175     /***
1176      * Parse an end tag.
1177      * <pre>
1178      * [42] ETag ::= '</' Name S? '>'
1179      * </pre>
1180      * <p>NOTE: parseContent () chains to here, we already read the
1181      * "&lt;/".
1182      */
1183     private void parseETag ()
1184     throws Exception
1185     {
1186 	require (currentElement);
1187 	skipWhitespace ();
1188 	require ('>');
1189 	handler.endElement (currentElement);
1190 	// not re-reporting any SAXException re bogus end tags,
1191 	// even though that diagnostic might be clearer ...
1192     }
1193 
1194 
1195     /***
1196      * Parse the content of an element.
1197      * <pre>
1198      * [43] content ::= (element | CharData | Reference
1199      *		| CDSect | PI | Comment)*
1200      * [67] Reference ::= EntityRef | CharRef
1201      * </pre>
1202      * <p> NOTE: consumes ETtag.
1203      */
1204     private void parseContent ()
1205     throws Exception
1206     {
1207 	char c;
1208 
1209 	while (true) {
1210 	    // consume characters (or ignorable whitspace) until delimiter
1211 	    parseCharData ();
1212 
1213 	    // Handle delimiters
1214 	    c = readCh ();
1215 	    switch (c) {
1216 
1217 	    case '&': 			// Found "&"
1218 		c = readCh ();
1219 		if (c == '#') {
1220 		    parseCharRef ();
1221 		} else {
1222 		    unread (c);
1223 		    parseEntityRef (true);
1224 		}
1225 		isDirtyCurrentElement = true;
1226 		break;
1227 
1228 	      case '<': 			// Found "<"
1229 		dataBufferFlush ();
1230 		c = readCh ();
1231 		switch (c) {
1232 		  case '!': 			// Found "<!"
1233 		    c = readCh ();
1234 		    switch (c) {
1235 		      case '-': 		// Found "<!-"
1236 			require ('-');
1237 			isDirtyCurrentElement = false;
1238 			parseComment ();
1239 			break;
1240 		      case '[': 		// Found "<!["
1241 		      	isDirtyCurrentElement = false;
1242 			require ("CDATA[");
1243 			handler.startCDATA ();
1244 			inCDATA = true;
1245 			parseCDSect ();
1246 			inCDATA = false;
1247 			handler.endCDATA ();
1248 			break;
1249 		      default:
1250 			error ("expected comment or CDATA section", c, null);
1251 	                break;
1252 		    }
1253 		    break;
1254 
1255 		  case '?': 		// Found "<?"
1256 		    isDirtyCurrentElement = false;
1257 		    parsePI ();
1258 		    break;
1259 
1260 		  case '/': 		// Found "</"
1261 		    isDirtyCurrentElement = false;
1262 		    parseETag ();
1263 		    return;
1264 
1265 		  default: 		// Found "<" followed by something else
1266 		    isDirtyCurrentElement = false;
1267 		    unread (c);
1268 		    parseElement (false);
1269 		    break;
1270 		}
1271 	    }
1272 	}
1273 	
1274     }
1275 
1276 
1277     /***
1278      * Parse an element type declaration.
1279      * <pre>
1280      * [45] elementdecl ::= '&lt;!ELEMENT' S Name S contentspec S? '&gt;'
1281      * </pre>
1282      * <p> NOTE: the '&lt;!ELEMENT' has already been read.
1283      */
1284     private void parseElementDecl ()
1285     throws Exception
1286     {
1287 	String name;
1288 
1289 	requireWhitespace ();
1290 	// Read the element type name.
1291 	name = readNmtoken (true);
1292 
1293 	requireWhitespace ();
1294 	// Read the content model.
1295 	parseContentspec (name);
1296 
1297 	skipWhitespace ();
1298 	require ('>');
1299     }
1300 
1301 
1302     /***
1303      * Content specification.
1304      * <pre>
1305      * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
1306      * </pre>
1307      */
1308     private void parseContentspec (String name)
1309     throws Exception
1310     {
1311 // FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ...
1312 	if (tryRead ("EMPTY")) {
1313 	    setElement (name, CONTENT_EMPTY, null, null);
1314 	    if (!skippedPE)
1315 		handler.getDeclHandler ().elementDecl (name, "EMPTY");
1316 	    return;
1317 	} else if (tryRead ("ANY")) {
1318 	    setElement (name, CONTENT_ANY, null, null);
1319 	    if (!skippedPE)
1320 		handler.getDeclHandler ().elementDecl (name, "ANY");
1321 	    return;
1322 	} else {
1323 	    String	model;
1324 	    char	saved []; 
1325 
1326 	    require ('(');
1327 	    saved = readBuffer;
1328 	    dataBufferAppend ('(');
1329 	    skipWhitespace ();
1330 	    if (tryRead ("#PCDATA")) {
1331 		dataBufferAppend ("#PCDATA");
1332 		parseMixed (saved);
1333 		model = dataBufferToString ();
1334 		setElement (name, CONTENT_MIXED, model, null);
1335 	    } else {
1336 		parseElements (saved);
1337 		model = dataBufferToString ();
1338 		setElement (name, CONTENT_ELEMENTS, model, null);
1339 	    }
1340 	    if (!skippedPE)
1341 		handler.getDeclHandler ().elementDecl (name, model);
1342 	}
1343     }
1344 
1345     /***
1346      * Parse an element-content model.
1347      * <pre>
1348      * [47] elements ::= (choice | seq) ('?' | '*' | '+')?
1349      * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')'
1350      * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
1351      * </pre>
1352      *
1353      * <p> NOTE: the opening '(' and S have already been read.
1354      *
1355      * @param saved Buffer for entity that should have the terminal ')'
1356      */
1357     private void parseElements (char saved [])
1358     throws Exception
1359     {
1360 	char c;
1361 	char sep;
1362 
1363 	// Parse the first content particle
1364 	skipWhitespace ();
1365 	parseCp ();
1366 
1367 	// Check for end or for a separator.
1368 	skipWhitespace ();
1369 	c = readCh ();
1370 	switch (c) {
1371 	case ')':
1372 	    // VC: Proper Group/PE Nesting
1373 	    if (readBuffer != saved)
1374 		handler.verror ("Illegal Group/PE nesting");
1375 
1376 	    dataBufferAppend (')');
1377 	    c = readCh ();
1378 	    switch (c) {
1379 	    case '*':
1380 	    case '+':
1381 	    case '?':
1382 		dataBufferAppend (c);
1383 		break;
1384 	    default:
1385 		unread (c);
1386 	    }
1387 	    return;
1388 	case ',': 			// Register the separator.
1389 	case '|':
1390 	    sep = c;
1391 	    dataBufferAppend (c);
1392 	    break;
1393 	default:
1394 	    error ("bad separator in content model", c, null);
1395 	    return;
1396 	}
1397 
1398 	// Parse the rest of the content model.
1399 	while (true) {
1400 	    skipWhitespace ();
1401 	    parseCp ();
1402 	    skipWhitespace ();
1403 	    c = readCh ();
1404 	    if (c == ')') {
1405 		// VC: Proper Group/PE Nesting
1406 		if (readBuffer != saved)
1407 		    handler.verror ("Illegal Group/PE nesting");
1408 
1409 		dataBufferAppend (')');
1410 		break;
1411 	    } else if (c != sep) {
1412 		error ("bad separator in content model", c, null);
1413 		return;
1414 	    } else {
1415 		dataBufferAppend (c);
1416 	    }
1417 	}
1418 
1419 	// Check for the occurrence indicator.
1420 	c = readCh ();
1421 	switch (c) {
1422 	case '?':
1423 	case '*':
1424 	case '+':
1425 	    dataBufferAppend (c);
1426 	    return;
1427 	default:
1428 	    unread (c);
1429 	    return;
1430 	}
1431     }
1432 
1433 
1434     /***
1435      * Parse a content particle.
1436      * <pre>
1437      * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1438      * </pre>
1439      */
1440     private void parseCp ()
1441     throws Exception
1442     {
1443 	if (tryRead ('(')) {
1444 	    dataBufferAppend ('(');
1445 	    parseElements (readBuffer);
1446 	} else {
1447 	    dataBufferAppend (readNmtoken (true));
1448 	    char c = readCh ();
1449 	    switch (c) {
1450 	    case '?':
1451 	    case '*':
1452 	    case '+':
1453 		dataBufferAppend (c);
1454 		break;
1455 	    default:
1456 		unread (c);
1457 		break;
1458 	    }
1459 	}
1460     }
1461 
1462 
1463     /***
1464      * Parse mixed content.
1465      * <pre>
1466      * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*'
1467      *	      | '(' S? ('#PCDATA') S? ')'
1468      * </pre>
1469      *
1470      * @param saved Buffer for entity that should have the terminal ')'
1471      */
1472     private void parseMixed (char saved [])
1473     throws Exception
1474     {
1475 	// Check for PCDATA alone.
1476 	skipWhitespace ();
1477 	if (tryRead (')')) {
1478 	    // VC: Proper Group/PE Nesting
1479 	    if (readBuffer != saved)
1480 		handler.verror ("Illegal Group/PE nesting");
1481 
1482 	    dataBufferAppend (")*");
1483 	    tryRead ('*');
1484 	    return;
1485 	}
1486 
1487 	// Parse mixed content.
1488 	skipWhitespace ();
1489 	while (!tryRead (")")) {
1490 	    require ('|');
1491 	    dataBufferAppend ('|');
1492 	    skipWhitespace ();
1493 	    dataBufferAppend (readNmtoken (true));
1494 	    skipWhitespace ();
1495 	}
1496 
1497 	// VC: Proper Group/PE Nesting
1498 	if (readBuffer != saved)
1499 	    handler.verror ("Illegal Group/PE nesting");
1500 
1501 	require ('*');
1502 	dataBufferAppend (")*");
1503     }
1504 
1505 
1506     /***
1507      * Parse an attribute list declaration.
1508      * <pre>
1509      * [52] AttlistDecl ::= '&lt;!ATTLIST' S Name AttDef* S? '&gt;'
1510      * </pre>
1511      * <p>NOTE: the '&lt;!ATTLIST' has already been read.
1512      */
1513     private void parseAttlistDecl ()
1514     throws Exception
1515     {
1516 	String elementName;
1517 
1518 	requireWhitespace ();
1519 	elementName = readNmtoken (true);
1520 	boolean white = tryWhitespace ();
1521 	while (!tryRead ('>')) {
1522 	    if (!white)
1523 		error ("whitespace required before attribute definition");
1524 	    parseAttDef (elementName);
1525 	    white = tryWhitespace ();
1526 	}
1527     }
1528 
1529 
1530     /***
1531      * Parse a single attribute definition.
1532      * <pre>
1533      * [53] AttDef ::= S Name S AttType S DefaultDecl
1534      * </pre>
1535      */
1536     private void parseAttDef (String elementName)
1537     throws Exception
1538     {
1539 	String name;
1540 	String type;
1541 	String enumer = null;
1542 
1543 	// Read the attribute name.
1544 	name = readNmtoken (true);
1545 
1546 	// Read the attribute type.
1547 	requireWhitespace ();
1548 	type = readAttType ();
1549 
1550 	// Get the string of enumerated values if necessary.
1551   if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
1552     if ("ENUMERATION" == type || "NOTATION" == type)
1553 	    enumer = dataBufferToString ();
1554   } else {
1555     if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
1556 	    enumer = dataBufferToString ();
1557   }
1558 
1559 	// Read the default value.
1560 	requireWhitespace ();
1561 	parseDefault (elementName, name, type, enumer);
1562     }
1563 
1564 
1565   /***
1566    * Parse the attribute type.
1567    * <pre>
1568    * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1569    * [55] StringType ::= 'CDATA'
1570    * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
1571    *		| 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1572    * [57] EnumeratedType ::= NotationType | Enumeration
1573    * </pre>
1574    */
1575   private String readAttType ()
1576     throws Exception
1577   {
1578     if (tryRead ('(')) {
1579 	    parseEnumeration (false);
1580 	    return "ENUMERATION";
1581     } else {
1582 	    String typeString = readNmtoken (true);
1583       if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
1584         if ("NOTATION" == typeString) {
1585           parseNotationType ();
1586           return typeString;
1587         } else if ("CDATA" == typeString
1588                    || "ID" == typeString
1589                    || "IDREF" == typeString
1590                    || "IDREFS" == typeString
1591                    || "ENTITY" == typeString
1592                    || "ENTITIES" == typeString
1593                    || "NMTOKEN" == typeString
1594                    || "NMTOKENS" == typeString)
1595           return typeString;
1596       } else {
1597         if ("NOTATION".equals(typeString)) {
1598           parseNotationType ();
1599           return typeString;
1600         } else if ("CDATA".equals(typeString)
1601                    || "ID".equals(typeString)
1602                    || "IDREF".equals(typeString)
1603                    || "IDREFS".equals(typeString)
1604                    || "ENTITY".equals(typeString)
1605                    || "ENTITIES".equals(typeString)
1606                    || "NMTOKEN".equals(typeString)
1607                    || "NMTOKENS".equals(typeString))
1608           return typeString;
1609       }
1610 	    error ("illegal attribute type", typeString, null);
1611 	    return null;
1612     }
1613   }
1614   
1615 
1616     /***
1617      * Parse an enumeration.
1618      * <pre>
1619      * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1620      * </pre>
1621      * <p>NOTE: the '(' has already been read.
1622      */
1623     private void parseEnumeration (boolean isNames)
1624     throws Exception
1625     {
1626 	dataBufferAppend ('(');
1627 
1628 	// Read the first token.
1629 	skipWhitespace ();
1630 	dataBufferAppend (readNmtoken (isNames));
1631 	// Read the remaining tokens.
1632 	skipWhitespace ();
1633 	while (!tryRead (')')) {
1634 	    require ('|');
1635 	    dataBufferAppend ('|');
1636 	    skipWhitespace ();
1637 	    dataBufferAppend (readNmtoken (isNames));
1638 	    skipWhitespace ();
1639 	}
1640 	dataBufferAppend (')');
1641     }
1642 
1643 
1644     /***
1645      * Parse a notation type for an attribute.
1646      * <pre>
1647      * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks
1648      *		(S? '|' S? name)* S? ')'
1649      * </pre>
1650      * <p>NOTE: the 'NOTATION' has already been read
1651      */
1652     private void parseNotationType ()
1653     throws Exception
1654     {
1655 	requireWhitespace ();
1656 	require ('(');
1657 
1658 	parseEnumeration (true);
1659     }
1660 
1661 
1662     /***
1663      * Parse the default value for an attribute.
1664      * <pre>
1665      * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1666      *		| (('#FIXED' S)? AttValue)
1667      * </pre>
1668      */
1669     private void parseDefault (
1670 	String elementName,
1671 	String name,
1672 	String type,
1673 	String enumer
1674     ) throws Exception
1675     {
1676 	int	valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
1677 	String	value = null;
1678 	int	flags = LIT_ATTRIBUTE;
1679 	boolean	saved = expandPE;
1680 	String	defaultType = null;
1681 
1682 	// LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace
1683 	// chars to spaces (doesn't matter when that's done if it doesn't
1684 	// interfere with char refs expanding to whitespace).
1685 
1686 	if (!skippedPE) {
1687     flags |= LIT_ENTITY_REF;
1688     if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
1689 	    if ("CDATA" != type)
1690         flags |= LIT_NORMALIZE;
1691     } else {
1692 	    if (!"CDATA".equals(type))
1693         flags |= LIT_NORMALIZE;
1694     }
1695 	}
1696 
1697 	expandPE = false;
1698 	if (tryRead ('#')) {
1699 	    if (tryRead ("FIXED")) {
1700 		defaultType = "#FIXED";
1701 		valueType = ATTRIBUTE_DEFAULT_FIXED;
1702 		requireWhitespace ();
1703 		value = readLiteral (flags);
1704 	    } else if (tryRead ("REQUIRED")) {
1705 		defaultType = "#REQUIRED";
1706 		valueType = ATTRIBUTE_DEFAULT_REQUIRED;
1707 	    } else if (tryRead ("IMPLIED")) {
1708 		defaultType = "#IMPLIED";
1709 		valueType = ATTRIBUTE_DEFAULT_IMPLIED;
1710 	    } else {
1711 		error ("illegal keyword for attribute default value");
1712 	    }
1713 	} else
1714 	    value = readLiteral (flags);
1715 	expandPE = saved;
1716 	setAttribute (elementName, name, type, enumer, value, valueType);
1717   if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
1718     if ("ENUMERATION" == type)
1719 	    type = enumer;
1720     else if ("NOTATION" == type)
1721 	    type = "NOTATION " + enumer;
1722   } else {
1723     if ("ENUMERATION".equals(type))
1724 	    type = enumer;
1725     else if ("NOTATION".equals(type))
1726 	    type = "NOTATION " + enumer;
1727   }
1728 	if (!skippedPE) handler.getDeclHandler ()
1729 	    .attributeDecl (elementName, name, type, defaultType, value);
1730     }
1731 
1732 
1733     /***
1734      * Parse a conditional section.
1735      * <pre>
1736      * [61] conditionalSect ::= includeSect || ignoreSect
1737      * [62] includeSect ::= '&lt;![' S? 'INCLUDE' S? '['
1738      *		extSubsetDecl ']]&gt;'
1739      * [63] ignoreSect ::= '&lt;![' S? 'IGNORE' S? '['
1740      *		ignoreSectContents* ']]&gt;'
1741      * [64] ignoreSectContents ::= Ignore
1742      *		('&lt;![' ignoreSectContents* ']]&gt;' Ignore )*
1743      * [65] Ignore ::= Char* - (Char* ( '&lt;![' | ']]&gt;') Char* )
1744      * </pre>
1745      * <p> NOTE: the '&gt;![' has already been read.
1746      */
1747     private void parseConditionalSect (char saved [])
1748     throws Exception
1749     {
1750 	skipWhitespace ();
1751 	if (tryRead ("INCLUDE")) {
1752 	    skipWhitespace ();
1753 	    require ('[');
1754 	    // VC: Proper Conditional Section/PE Nesting
1755 	    if (readBuffer != saved)
1756 		handler.verror ("Illegal Conditional Section/PE nesting");
1757 	    skipWhitespace ();
1758 	    while (!tryRead ("]]>")) {
1759 		parseMarkupdecl ();
1760 		skipWhitespace ();
1761 	    }
1762 	} else if (tryRead ("IGNORE")) {
1763 	    skipWhitespace ();
1764 	    require ('[');
1765 	    // VC: Proper Conditional Section/PE Nesting
1766 	    if (readBuffer != saved)
1767 		handler.verror ("Illegal Conditional Section/PE nesting");
1768 	    int nesting = 1;
1769 	    char c;
1770 	    expandPE = false;
1771 	    for (int nest = 1; nest > 0;) {
1772 		c = readCh ();
1773 		switch (c) {
1774 		case '<':
1775 		    if (tryRead ("![")) {
1776 			nest++;
1777 		    }
1778 		case ']':
1779 		    if (tryRead ("]>")) {
1780 			nest--;
1781 		    }
1782 		}
1783 	    }
1784 	    expandPE = true;
1785 	} else {
1786 	    error ("conditional section must begin with INCLUDE or IGNORE");
1787 	}
1788     }
1789 
1790   private void parseCharRef ()
1791     throws SAXException, IOException
1792   {
1793     parseCharRef (true /* do flushDataBuffer by default */);
1794   }
1795 
1796   /***
1797    * Try to read a character reference without consuming data from buffer.
1798    * <pre>
1799    * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1800    * </pre>
1801    * <p>NOTE: the '&#' has already been read.
1802    */
1803   private void tryReadCharRef ()
1804   throws SAXException, IOException
1805   {
1806   	int value = 0;
1807 	char c;
1808 
1809 	if (tryRead ('x')) {
1810 loop1:
1811 	    while (true) {
1812 		c = readCh ();
1813 		int n;
1814 		switch (c) {
1815 		case '0': case '1': case '2': case '3': case '4':
1816 		case '5': case '6': case '7': case '8': case '9':
1817 		    n = c - '0';
1818 		    break;
1819 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1820 		    n = (c - 'a') + 10;
1821 		    break;
1822 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1823 		    n = (c - 'A') + 10;
1824 		    break;
1825 		case ';':
1826 		    break loop1;
1827 		default:
1828 		    error ("illegal character in character reference", c, null);
1829 		    break loop1;
1830 		}
1831 		value *= 16;
1832 		value += n;
1833 	    }
1834 	} else {
1835 loop2:
1836 	    while (true) {
1837 		c = readCh ();
1838 		switch (c) {
1839 		case '0': case '1': case '2': case '3': case '4':
1840 		case '5': case '6': case '7': case '8': case '9':
1841 		    value *= 10;
1842 		    value += c - '0';
1843 		    break;
1844 		case ';':
1845 		    break loop2;
1846 		default:
1847 		    error ("illegal character in character reference", c, null);
1848 		    break loop2;
1849 		}
1850 	    }
1851 	}
1852 
1853 	// check for character refs being legal XML
1854 	if ((value < 0x0020
1855 		&& ! (value == '\n' || value == '\t' || value == '\r'))
1856 		|| (value >= 0xD800 && value <= 0xDFFF)
1857 		|| value == 0xFFFE || value == 0xFFFF
1858 		|| value > 0x0010ffff)
1859 	    error ("illegal XML character reference U+"
1860 		    + Integer.toHexString (value));
1861 
1862 	// Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
1863 	//  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
1864 	if (value > 0x0010ffff) {
1865 	    // too big for surrogate
1866 	    error ("character reference " + value + " is too large for UTF-16",
1867 		   new Integer (value).toString (), null);
1868 	}
1869 
1870   }
1871   
1872     /***
1873      * Read and interpret a character reference.
1874      * <pre>
1875      * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1876      * </pre>
1877      * <p>NOTE: the '&#' has already been read.
1878      */
1879     private void parseCharRef (boolean doFlush)
1880     throws SAXException, IOException
1881     {
1882 	int value = 0;
1883 	char c;
1884 
1885 	if (tryRead ('x')) {
1886 loop1:
1887 	    while (true) {
1888 		c = readCh ();
1889 		int n;
1890 		switch (c) {
1891 		case '0': case '1': case '2': case '3': case '4':
1892 		case '5': case '6': case '7': case '8': case '9':
1893 		    n = c - '0';
1894 		    break;
1895 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1896 		    n = (c - 'a') + 10;
1897 		    break;
1898 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1899 		    n = (c - 'A') + 10;
1900 		    break;
1901 		case ';':
1902 		    break loop1;
1903 		default:
1904 		    error ("illegal character in character reference", c, null);
1905 		    break loop1;
1906 		}
1907 		value *= 16;
1908 		value += n;
1909 	    }
1910 	} else {
1911 loop2:
1912 	    while (true) {
1913 		c = readCh ();
1914 		switch (c) {
1915 		case '0': case '1': case '2': case '3': case '4':
1916 		case '5': case '6': case '7': case '8': case '9':
1917 		    value *= 10;
1918 		    value += c - '0';
1919 		    break;
1920 		case ';':
1921 		    break loop2;
1922 		default:
1923 		    error ("illegal character in character reference", c, null);
1924 		    break loop2;
1925 		}
1926 	    }
1927 	}
1928 
1929 	// check for character refs being legal XML
1930 	if ((value < 0x0020
1931 		&& ! (value == '\n' || value == '\t' || value == '\r'))
1932 		|| (value >= 0xD800 && value <= 0xDFFF)
1933 		|| value == 0xFFFE || value == 0xFFFF
1934 		|| value > 0x0010ffff)
1935 	    error ("illegal XML character reference U+"
1936 		    + Integer.toHexString (value));
1937 
1938 	// Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
1939 	//  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
1940 	if (value <= 0x0000ffff) {
1941 	    // no surrogates needed
1942 	    dataBufferAppend ((char) value);
1943 	} else if (value <= 0x0010ffff) {
1944 	    value -= 0x10000;
1945 	    // > 16 bits, surrogate needed
1946 	    dataBufferAppend ((char) (0xd800 | (value >> 10)));
1947 	    dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff)));
1948 	} else {
1949 	    // too big for surrogate
1950 	    error ("character reference " + value + " is too large for UTF-16",
1951 		   new Integer (value).toString (), null);
1952 	}
1953   if (doFlush) dataBufferFlush ();
1954     }
1955 
1956 
1957     /***
1958      * Parse and expand an entity reference.
1959      * <pre>
1960      * [68] EntityRef ::= '&' Name ';'
1961      * </pre>
1962      * <p>NOTE: the '&amp;' has already been read.
1963      * @param externalAllowed External entities are allowed here.
1964      */
1965     private void parseEntityRef (boolean externalAllowed)
1966     throws SAXException, IOException
1967     {
1968 	String name;
1969 
1970 	name = readNmtoken (true);
1971 	require (';');
1972 	switch (getEntityType (name)) {
1973 	case ENTITY_UNDECLARED:
1974 	    // NOTE:  XML REC describes amazingly convoluted handling for
1975 	    // this case.  Nothing as meaningful as being a WFness error
1976 	    // unless the processor might _legitimately_ not have seen a
1977 	    // declaration ... which is what this implements.
1978 	    String	message;
1979 	    
1980 	    message = "reference to undeclared general entity " + name;
1981 	    if (skippedPE && !docIsStandalone) {
1982 		handler.verror (message);
1983 		// we don't know this entity, and it might be external...
1984 		if (externalAllowed)
1985 		    handler.skippedEntity (name);
1986 	    } else
1987 		error (message);
1988 	    break;
1989 	case ENTITY_INTERNAL:
1990             pushString (name, getEntityValue (name));
1991 	    
1992 	    //workaround for possible input pop before marking
1993             //the buffer reading position	
1994             char t = readCh ();
1995             unread (t);
1996             int bufferPosMark = readBufferPos;
1997            
1998             int end = readBufferPos + getEntityValue (name).length();
1999             for(int k = readBufferPos; k < end; k++){
2000 	            t = readCh ();
2001 	            if (t == '&'){
2002 	            	t = readCh ();   
2003 	            	if (t  == '#'){ 
2004 	            	   //try to match a character ref
2005 	                   tryReadCharRef ();
2006 	                   
2007 	                   //everything has been read
2008 	                   if (readBufferPos >= end)
2009 	                      break;
2010 	                   k = readBufferPos;
2011 	                   continue;
2012 	                }
2013 	                else if (Character.isLetter(t)){
2014 	            	   //looks like an entity ref
2015 	            	   unread (t);
2016 	            	   readNmtoken (true);
2017 	        	   require (';');
2018 	        	
2019 	        	   //everything has been read
2020 	        	   if (readBufferPos >= end)
2021 		              break;
2022 		           k = readBufferPos;
2023 	                   continue;
2024 	                }
2025 	                error(" malformed entity reference");
2026 	            }
2027 	           
2028             }
2029             readBufferPos = bufferPosMark;
2030 	    break;
2031 	case ENTITY_TEXT:
2032 	    if (externalAllowed) {
2033 		pushURL (false, name, getEntityIds (name),
2034 			null, null, null, true);
2035 	    } else {
2036 		error ("reference to external entity in attribute value.",
2037 			name, null);
2038 	    }
2039 	    break;
2040 	case ENTITY_NDATA:
2041 	    if (externalAllowed) {
2042 		error ("unparsed entity reference in content", name, null);
2043 	    } else {
2044 		error ("reference to external entity in attribute value.",
2045 			name, null);
2046 	    }
2047 	    break;
2048 	default:
2049 	    throw new RuntimeException ();
2050 	}
2051     }
2052 
2053 
2054     /***
2055      * Parse and expand a parameter entity reference.
2056      * <pre>
2057      * [69] PEReference ::= '%' Name ';'
2058      * </pre>
2059      * <p>NOTE: the '%' has already been read.
2060      */
2061     private void parsePEReference ()
2062     throws SAXException, IOException
2063     {
2064 	String name;
2065 
2066 	name = "%" + readNmtoken (true);
2067 	require (';');
2068 	switch (getEntityType (name)) {
2069 	case ENTITY_UNDECLARED:
2070 	    // VC: Entity Declared
2071 	    handler.verror ("reference to undeclared parameter entity " + name);
2072 
2073 	    // we should disable handling of all subsequent declarations
2074 	    // unless this is a standalone document (info discarded)
2075 	    break;
2076 	case ENTITY_INTERNAL:
2077 	    if (inLiteral)
2078 		pushString (name, getEntityValue (name));
2079 	    else
2080 		pushString (name, ' ' + getEntityValue (name) + ' ');
2081 	    break;
2082 	case ENTITY_TEXT:
2083 	    if (!inLiteral)
2084 		pushString (null, " ");
2085 	    pushURL (true, name, getEntityIds (name), null, null, null, true);
2086 	    if (!inLiteral)
2087 		pushString (null, " ");
2088 	    break;
2089 	}
2090     }
2091 
2092     /***
2093      * Parse an entity declaration.
2094      * <pre>
2095      * [70] EntityDecl ::= GEDecl | PEDecl
2096      * [71] GEDecl ::= '&lt;!ENTITY' S Name S EntityDef S? '&gt;'
2097      * [72] PEDecl ::= '&lt;!ENTITY' S '%' S Name S PEDef S? '&gt;'
2098      * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2099      * [74] PEDef ::= EntityValue | ExternalID
2100      * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2101      *		   | 'PUBLIC' S PubidLiteral S SystemLiteral
2102      * [76] NDataDecl ::= S 'NDATA' S Name
2103      * </pre>
2104      * <p>NOTE: the '&lt;!ENTITY' has already been read.
2105      */
2106     private void parseEntityDecl ()
2107     throws Exception
2108     {
2109 	boolean peFlag = false;
2110 	int flags = 0;
2111 
2112 	// Check for a parameter entity.
2113 	expandPE = false;
2114 	requireWhitespace ();
2115 	if (tryRead ('%')) {
2116 	    peFlag = true;
2117 	    requireWhitespace ();
2118 	}
2119 	expandPE = true;
2120 
2121 	// Read the entity name, and prepend
2122 	// '%' if necessary.
2123 	String name = readNmtoken (true);
2124         //NE08
2125 	if (name.indexOf(':') >= 0)
2126            error ("Illegal character(':') in entity name ", name, null);
2127 	if (peFlag) {
2128 	    name = "%" + name;
2129 	}
2130 
2131 	// Read the entity value.
2132 	requireWhitespace ();
2133 	char c = readCh ();
2134 	unread (c);
2135 	if (c == '"' || c == '\'') {
2136 	    // Internal entity ... replacement text has expanded refs
2137 	    // to characters and PEs, but not to general entities
2138 	    String value = readLiteral (flags);
2139 	    setInternalEntity (name, value);
2140 	} else {
2141 	    // Read the external IDs
2142 	    String ids [] = readExternalIds (false, false);
2143 
2144 	    // Check for NDATA declaration.
2145 	    boolean white = tryWhitespace ();
2146 	    if (!peFlag && tryRead ("NDATA")) {
2147 		if (!white)
2148 		    error ("whitespace required before NDATA");
2149 		requireWhitespace ();
2150 		String notationName = readNmtoken (true);
2151 		if (!skippedPE) {
2152 		    setExternalEntity (name, ENTITY_NDATA, ids, notationName);
2153 		    handler.unparsedEntityDecl (name, ids, notationName);
2154 		}
2155 	    } else if (!skippedPE) {
2156 		setExternalEntity (name, ENTITY_TEXT, ids, null);
2157 		handler.getDeclHandler ()
2158 		    .externalEntityDecl (name, ids [0],
2159 			    handler.resolveURIs ()
2160 	    				// FIXME: ASSUMES not skipped
2161 					// "false" forces error on bad URI
2162 				? handler.absolutize (ids [2], ids [1], false)
2163 				: ids [1]);
2164 	    }
2165 	}
2166 
2167 	// Finish the declaration.
2168 	skipWhitespace ();
2169 	require ('>');
2170     }
2171 
2172 
2173     /***
2174      * Parse a notation declaration.
2175      * <pre>
2176      * [82] NotationDecl ::= '&lt;!NOTATION' S Name S
2177      *		(ExternalID | PublicID) S? '&gt;'
2178      * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2179      * </pre>
2180      * <P>NOTE: the '&lt;!NOTATION' has already been read.
2181      */
2182     private void parseNotationDecl ()
2183     throws Exception
2184     {
2185 	String nname, ids[];
2186 
2187 
2188 	requireWhitespace ();
2189 	nname = readNmtoken (true);
2190         //NE08
2191 	if (nname.indexOf(':') >= 0)
2192            error ("Illegal character(':') in notation name ", nname, null);
2193 	requireWhitespace ();
2194 
2195 	// Read the external identifiers.
2196 	ids = readExternalIds (true, false);
2197 
2198 	// Register the notation.
2199 	setNotation (nname, ids);
2200 
2201 	skipWhitespace ();
2202 	require ('>');
2203     }
2204 
2205 
2206     /***
2207      * Parse character data.
2208      * <pre>
2209      * [14] CharData ::= [^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)
2210      * </pre>
2211      */
2212     private void parseCharData ()
2213     throws Exception
2214     {
2215 	char	c;
2216 	int	state = 0;
2217 	boolean pureWhite = false;
2218 
2219 	// assert (dataBufferPos == 0);
2220 
2221 	// are we expecting pure whitespace?  it might be dirty...
2222 	if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement)
2223 	    pureWhite = true;
2224 
2225 	// always report right out of readBuffer
2226 	// to minimize (pointless) buffer copies
2227 	while (true) {
2228 	    int lineAugment = 0;
2229 	    int columnAugment = 0;
2230 	    int i;
2231 
2232 loop:
2233 	    for (i = readBufferPos; i < readBufferLength; i++) {
2234 		switch (c = readBuffer [i]) {
2235 		case '\n':
2236 		    lineAugment++;
2237 		    columnAugment = 0;
2238 		    // pureWhite unmodified
2239 		    break;
2240 		case '\r':	// should not happen!!
2241 		case '\t':
2242 		case ' ':
2243 		    // pureWhite unmodified
2244 		    columnAugment++;
2245 		    break;
2246 		case '&':
2247 		case '<':
2248 		    columnAugment++;
2249 		    // pureWhite unmodified
2250 		    // CLEAN end of text sequence
2251 		    state = 1;
2252 		    break loop;
2253 		case ']':
2254 		    // that's not a whitespace char, and
2255 		    // can not terminate pure whitespace either
2256 		    pureWhite = false;
2257 		    if ((i + 2) < readBufferLength) {
2258 			if (readBuffer [i + 1] == ']'
2259 				&& readBuffer [i + 2] == '>') {
2260 			    // ERROR end of text sequence
2261 			    state = 2;
2262 			    break loop;
2263 			}
2264 		    } else {
2265 			// FIXME missing two end-of-buffer cases
2266 		    }
2267 		    columnAugment++;
2268 		    break;
2269 		default:
2270 			if ((c < 0x0020 || c > 0xFFFD)
2271 			   || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) 
2272 			       && xmlVersion == XML_11)) 
2273 				error ("illegal XML character U+"
2274 					+ Integer.toHexString (c));
2275 		    // that's not a whitespace char
2276 		    pureWhite = false;
2277 		    columnAugment++;
2278 		}
2279 	    }
2280 
2281 	    // report text thus far
2282 	    if (lineAugment > 0) {
2283 		line += lineAugment;
2284 		column = columnAugment;
2285 	    } else {
2286 		column += columnAugment;
2287 	    }
2288 
2289 	    // report characters/whitspace
2290 	    int		length = i - readBufferPos;
2291 
2292 	    if (length != 0) {
2293 		if (pureWhite)
2294 		    handler.ignorableWhitespace (readBuffer,
2295 		    		readBufferPos, length);
2296 		else
2297 		    handler.charData (readBuffer, readBufferPos, length);
2298 		readBufferPos = i;
2299 	    }
2300 	    
2301 	    if (state != 0)
2302 		break;
2303 
2304 	    // fill next buffer from this entity, or
2305 	    // pop stack and continue with previous entity
2306 	    unread (readCh ());
2307 	}
2308         if (!pureWhite)
2309            isDirtyCurrentElement = true;
2310 	// finish, maybe with error
2311 	if (state != 1)	// finish, no error
2312 	    error ("character data may not contain ']]>'");
2313     }
2314 
2315 
2316     //////////////////////////////////////////////////////////////////////
2317     // High-level reading and scanning methods.
2318     //////////////////////////////////////////////////////////////////////
2319 
2320     /***
2321      * Require whitespace characters.
2322      */
2323     private void requireWhitespace ()
2324     throws SAXException, IOException
2325     {
2326 	char c = readCh ();
2327 	if (isWhitespace (c)) {
2328 	    skipWhitespace ();
2329 	} else {
2330 	    error ("whitespace required", c, null);
2331 	}
2332     }
2333 
2334 
2335     /***
2336      * Skip whitespace characters.
2337      * <pre>
2338      * [3] S ::= (#x20 | #x9 | #xd | #xa)+
2339      * </pre>
2340      */
2341     private void skipWhitespace ()
2342     throws SAXException, IOException
2343     {
2344 	// Start with a little cheat.  Most of
2345 	// the time, the white space will fall
2346 	// within the current read buffer; if
2347 	// not, then fall through.
2348 	if (USE_CHEATS) {
2349 	    int lineAugment = 0;
2350 	    int columnAugment = 0;
2351 
2352 loop:
2353 	    for (int i = readBufferPos; i < readBufferLength; i++) {
2354 		switch (readBuffer [i]) {
2355 		case ' ':
2356 		case '\t':
2357 		case '\r':
2358 		    columnAugment++;
2359 		    break;
2360 		case '\n':
2361 		    lineAugment++;
2362 		    columnAugment = 0;
2363 		    break;
2364 		case '%':
2365 		    if (expandPE)
2366 			break loop;
2367 		    // else fall through...
2368 		default:
2369 		    readBufferPos = i;
2370 		    if (lineAugment > 0) {
2371 			line += lineAugment;
2372 			column = columnAugment;
2373 		    } else {
2374 			column += columnAugment;
2375 		    }
2376 		    return;
2377 		}
2378 	    }
2379 	}
2380 
2381 	// OK, do it the slow way.
2382 	char c = readCh ();
2383 	while (isWhitespace (c)) {
2384 	    c = readCh ();
2385 	}
2386 	unread (c);
2387     }
2388 
2389 
2390     /***
2391      * Read a name or (when parsing an enumeration) name token.
2392      * <pre>
2393      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2394      * [7] Nmtoken ::= (NameChar)+
2395      * </pre>
2396      */
2397     private String readNmtoken (boolean isName)
2398     throws SAXException, IOException
2399     {
2400 	char c;
2401 
2402 	if (USE_CHEATS) {
2403 loop:
2404 	    for (int i = readBufferPos; i < readBufferLength; i++) {
2405 		c = readBuffer [i];
2406 		switch (c) {
2407 		  case '%':
2408 		    if (expandPE)
2409 			break loop;
2410 		    // else fall through...
2411 
2412 		    // What may legitimately come AFTER a name/nmtoken?
2413 		  case '<': case '>': case '&':
2414 		  case ',': case '|': case '*': case '+': case '?':
2415 		  case ')':
2416 		  case '=':
2417 		  case '\'': case '"':
2418 		  case '[':
2419 		  case ' ': case '\t': case '\r': case '\n':
2420 		  case ';':
2421 		  case '/':
2422 		    int start = readBufferPos;
2423 		    if (i == start)
2424 			error ("name expected", readBuffer [i], null);
2425 		    readBufferPos = i;
2426 		    return intern (readBuffer, start, i - start);
2427 
2428 		  default:
2429 // FIXME ... per IBM's OASIS test submission, these:
2430 //   ?		U+06dd 
2431 //   Combining	U+309B
2432 		    //these switches are kind of ugly but at least we won't
2433 		    //have to go over the whole lits for each char
2434 		    if (isName && i == readBufferPos){
2435 			    char c2 = (char) (c & 0x00f0);
2436 	                    switch (c & 0xff00){
2437 	                    	//starting with 01
2438 	                    	case 0x0100:
2439 	                       	    switch (c2){
2440 	                    	        case 0x0030:
2441 	                    	            if (c == 0x0132 || c == 0x0133 || c == 0x013f)
2442 	                    	            	error ("Not a name start character, U+"
2443 	              				       + Integer.toHexString (c));
2444 	                    	        break;
2445 	                    	        case 0x0040:
2446 	                	            if (c == 0x0140 || c == 0x0149)
2447 	                	            	error ("Not a name start character, U+"
2448 	          				       + Integer.toHexString (c));
2449 	                	        break;
2450 	                    	        case 0x00c0:
2451 	            	                    if (c == 0x01c4 || c == 0x01cc)
2452 	            	            	        error ("Not a name start character, U+"
2453 	      				               + Integer.toHexString (c));
2454 	            	                break;
2455 	                    	        case 0x00f0:
2456 	        	                    if (c == 0x01f1 || c == 0x01f3)
2457 	        	            	        error ("Not a name start character, U+"
2458 	  				               + Integer.toHexString (c));
2459 	        	                break;
2460 	                    	        case 0x00b0:
2461 	    	                            if (c == 0x01f1 || c == 0x01f3)
2462 	    	            	                error ("Not a name start character, U+"
2463 					               + Integer.toHexString (c));
2464 	    	                        break;
2465 	        	                default:
2466 	        	                    if (c == 0x017f)
2467 	                	            	error ("Not a name start character, U+"
2468 	          				        + Integer.toHexString (c));	
2469 	                    	    }
2470 				    
2471 	                    	break;
2472 	                    	//starting with 11
2473 	                    	case 0x1100:
2474 	                            switch (c2){
2475 	                                case 0x0000:
2476 	                                    if (c == 0x1104 || c == 0x1108 ||
2477 	                                    	c == 0x110a || c == 0x110d)
2478 	                                      	error ("Not a name start character, U+"
2479 	                      		             + Integer.toHexString (c));
2480 	                                break;
2481 	                                case 0x0030:
2482 	                                    if (c == 0x113b || c == 0x113f)
2483 	                                      	error ("Not a name start character, U+"
2484 	                          	               + Integer.toHexString (c));
2485 	                                break;
2486 	                                case 0x0040:
2487 	                                    if (c == 0x1141 || c == 0x114d
2488 	                                        || c == 0x114f )
2489 	                                      	error ("Not a name start character, U+"
2490 	                          	               + Integer.toHexString (c));
2491 	                                break;
2492 	                                case 0x0050:
2493 	                                     if (c == 0x1151 || c == 0x1156)
2494 	                                         error ("Not a name start character, U+"
2495 	                          		        + Integer.toHexString (c));
2496 	                                break;
2497 	                                case 0x0060:
2498 		                             if (c == 0x1162 || c == 0x1164
2499 		                             	 || c == 0x1166 || c == 0x116b
2500 						 || c == 0x116f)
2501 		                                 error ("Not a name start character, U+"
2502 		                          		 + Integer.toHexString (c));
2503 		                                break;
2504 	                                case 0x00b0:
2505 	                                     if (c == 0x11b6 || c == 0x11b9
2506 	                                         || c == 0x11bb || c == 0x116f)
2507 	                                         error ("Not a name start character, U+"
2508 	                          		        + Integer.toHexString (c));
2509 	                                break;
2510 	                                default:
2511 	                                    if (c == 0x1174 || c == 0x119f
2512 	                                    	|| c == 0x11ac || c == 0x11c3
2513 						|| c == 0x11f1)
2514 	                                        error ("Not a name start character, U+"
2515 	                                                + Integer.toHexString (c));
2516 	                            }
2517 	                        break;
2518 	                        default:
2519 	                           if (c == 0x0e46 || c == 0x1011 
2520 	                               || c == 0x212f || c == 0x0587
2521 				       || c == 0x0230 )
2522 	                	       error ("Not a name start character, U+"
2523 	          		              + Integer.toHexString (c));
2524 	                    }
2525 		    }
2526 		    // punt on exact tests from Appendix A; approximate
2527 		    // them using the Unicode ID start/part rules
2528 		    if (i == readBufferPos && isName) {
2529 			if (!Character.isUnicodeIdentifierStart (c)
2530 				&& c != ':' && c != '_')
2531 			    error ("Not a name start character, U+"
2532 				  + Integer.toHexString (c));
2533 		    } else if (!Character.isUnicodeIdentifierPart (c)
2534 			    && c != '-' && c != ':' && c != '_' && c != '.'
2535 			    && !isExtender (c))
2536 			error ("Not a name character, U+"
2537 				+ Integer.toHexString (c));
2538 		}
2539 	    }
2540 	}
2541 
2542 	nameBufferPos = 0;
2543 
2544 	// Read the first character.
2545 loop:
2546 	while (true) {
2547 	    c = readCh ();
2548 	    switch (c) {
2549 	    case '%':
2550 	    case '<': case '>': case '&':
2551 	    case ',': case '|': case '*': case '+': case '?':
2552 	    case ')':
2553 	    case '=':
2554 	    case '\'': case '"':
2555 	    case '[':
2556 	    case ' ': case '\t': case '\n': case '\r':
2557 	    case ';':
2558 	    case '/':
2559 		unread (c);
2560 		if (nameBufferPos == 0) {
2561 		    error ("name expected");
2562 		}
2563 		// punt on exact tests from Appendix A, but approximate them
2564 		if (isName
2565 			&& !Character.isUnicodeIdentifierStart (
2566 				nameBuffer [0])
2567 			&& ":_".indexOf (nameBuffer [0]) == -1)
2568 		    error ("Not a name start character, U+"
2569 			      + Integer.toHexString (nameBuffer [0]));
2570 		String s = intern (nameBuffer, 0, nameBufferPos);
2571 		nameBufferPos = 0;
2572 		return s;
2573 	    default:
2574 		// punt on exact tests from Appendix A, but approximate them
2575 
2576 		if ((nameBufferPos != 0 || !isName)
2577 			&& !Character.isUnicodeIdentifierPart (c)
2578 			&& ":-_.".indexOf (c) == -1
2579 			&& !isExtender (c))
2580 		    error ("Not a name character, U+"
2581 			    + Integer.toHexString (c));
2582 		if (nameBufferPos >= nameBuffer.length)
2583 		    nameBuffer =
2584 			(char[]) extendArray (nameBuffer,
2585 				    nameBuffer.length, nameBufferPos);
2586 		nameBuffer [nameBufferPos++] = c;
2587 	    }
2588 	}
2589     }
2590 
2591     private static boolean isExtender (char c)
2592     {
2593 	// [88] Extender ::= ...
2594 	return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
2595 	       || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005
2596 	       || (c >= 0x3031 && c <= 0x3035)
2597 	       || (c >= 0x309d && c <= 0x309e)
2598 	       || (c >= 0x30fc && c <= 0x30fe);
2599     }
2600 
2601 
2602     /***
2603      * Read a literal.  With matching single or double quotes as
2604      * delimiters (and not embedded!) this is used to parse:
2605      * <pre>
2606      *	[9] EntityValue ::= ... ([^%&amp;] | PEReference | Reference)* ...
2607      *	[10] AttValue ::= ... ([^<&] | Reference)* ...
2608      *	[11] SystemLiteral ::= ... (URLchar - "'")* ...
2609      *	[12] PubidLiteral ::= ... (PubidChar - "'")* ...
2610      * </pre>
2611      * as well as the quoted strings in XML and text declarations
2612      * (for version, encoding, and standalone) which have their
2613      * own constraints.
2614      */
2615     private String readLiteral (int flags)
2616     throws SAXException, IOException
2617     {
2618 	char	delim, c;
2619 	int	startLine = line;
2620 	boolean	saved = expandPE;
2621 	boolean	savedReport = doReport;
2622 
2623 	// Find the first delimiter.
2624 	delim = readCh ();
2625 	if (delim != '"' && delim != '\'') {
2626 	    error ("expected '\"' or \"'\"", delim, null);
2627 	    return null;
2628 	}
2629 	inLiteral = true;
2630 	if ((flags & LIT_DISABLE_PE) != 0)
2631 	    expandPE = false;
2632 	doReport = false;
2633 
2634 	// Each level of input source has its own buffer; remember
2635 	// ours, so we won't read the ending delimiter from any
2636 	// other input source, regardless of entity processing.
2637 	char ourBuf [] = readBuffer;
2638 
2639 	// Read the literal.
2640 	try {
2641 	    c = readCh ();
2642 	    boolean ampRead = false;
2643 loop:
2644 	    while (! (c == delim && readBuffer == ourBuf)) {
2645 		switch (c) {
2646 		    // attributes and public ids are normalized
2647 		    // in almost the same ways
2648 		case '\n':
2649 		case '\r':
2650 		    if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
2651 			c = ' ';
2652 		    break;
2653 		case '\t':
2654 		    if ((flags & LIT_ATTRIBUTE) != 0)
2655 			c = ' ';
2656 		    break;
2657 		case '&':
2658 		    c = readCh ();
2659 		    // Char refs are expanded immediately, except for
2660 		    // all the cases where it's deferred.
2661 		    if (c == '#') {
2662 			if ((flags & LIT_DISABLE_CREF) != 0) {
2663 			    dataBufferAppend ('&');
2664 			    break;
2665 			}
2666                         parseCharRef (false /* Do not do flushDataBuffer */);
2667 
2668 			// exotic WFness risk: this is an entity literal,
2669 			// dataBuffer [dataBufferPos - 1] == '&', and
2670 			// following chars are a _partial_ entity/char ref
2671                    
2672 		    // It looks like an entity ref ...
2673 		    } else {
2674 			unread (c);
2675 			// Expand it?
2676 			if ((flags & LIT_ENTITY_REF) > 0) {
2677 			    parseEntityRef (false);
2678 			    if (String.valueOf (readBuffer).equals("&#38;"))
2679 			    	ampRead = true;
2680                         //Is it just data?
2681 			} else if ((flags & LIT_DISABLE_EREF) != 0) {
2682 			    dataBufferAppend ('&');
2683 
2684 			// OK, it will be an entity ref -- expanded later.
2685 			} else {
2686 			    String name = readNmtoken (true);
2687 			    require (';');
2688 			    dataBufferAppend ('&');
2689 			    dataBufferAppend (name);
2690 			    dataBufferAppend (';');
2691 			}
2692 		    }
2693 		    c = readCh ();
2694 		    continue loop;
2695 
2696 		case '<':
2697 		    // and why?  Perhaps so "&foo;" expands the same
2698 		    // inside and outside an attribute?
2699 		    if ((flags & LIT_ATTRIBUTE) != 0)
2700 			error ("attribute values may not contain '<'");
2701 		    break;
2702 
2703 		// We don't worry about case '%' and PE refs, readCh does.
2704 
2705 		default:
2706 		    break;
2707 		}
2708 		dataBufferAppend (c);
2709 		c = readCh ();
2710 	    }
2711 	} catch (EOFException e) {
2712 	    error ("end of input while looking for delimiter (started on line "
2713 		   + startLine + ')', null, new Character (delim).toString ());
2714 	}
2715 	inLiteral = false;
2716 	expandPE = saved;
2717 	doReport = savedReport;
2718 
2719 	// Normalise whitespace if necessary.
2720 	if ((flags & LIT_NORMALIZE) > 0) {
2721 	    dataBufferNormalize ();
2722 	}
2723 
2724 	// Return the value.
2725 	return dataBufferToString ();
2726     }
2727 
2728 
2729     /***
2730      * Try reading external identifiers.
2731      * A system identifier is not required for notations.
2732      * @param inNotation Are we parsing a notation decl?
2733      * @param isSubset Parsing external subset decl (may be omitted)?
2734      * @return A three-member String array containing the identifiers,
2735      *	or nulls. Order: public, system, baseURI.
2736      */
2737     private String[] readExternalIds (boolean inNotation, boolean isSubset)
2738     throws Exception
2739     {
2740 	char	c;
2741 	String	ids[] = new String [3];
2742 	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2743 
2744 	if (tryRead ("PUBLIC")) {
2745 	    requireWhitespace ();
2746 	    ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags);
2747 	    if (inNotation) {
2748 		skipWhitespace ();
2749 		c = readCh ();
2750 		unread (c);
2751 		if (c == '"' || c == '\'') {
2752 		    ids [1] = readLiteral (flags);
2753 		}
2754 	    } else {
2755 		requireWhitespace ();
2756 		ids [1] = readLiteral (flags);
2757 	    }
2758 
2759 	    for (int i = 0; i < ids [0].length (); i++) {
2760 		c = ids [0].charAt (i);
2761 		if (c >= 'a' && c <= 'z')
2762 		    continue;
2763 		if (c >= 'A' && c <= 'Z')
2764 		    continue;
2765 		if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1)
2766 		    continue;
2767 		error ("illegal PUBLIC id character U+"
2768 			+ Integer.toHexString (c));
2769 	    }
2770 	} else if (tryRead ("SYSTEM")) {
2771 	    requireWhitespace ();
2772 	    ids [1] = readLiteral (flags);
2773 	} else if (!isSubset) 
2774 		error ("missing SYSTEM or PUBLIC keyword");
2775 
2776 	if (ids [1] != null) {
2777 	    if (ids [1].indexOf ('#') != -1)
2778 		handler.verror ("SYSTEM id has a URI fragment: " + ids [1]);
2779 	    ids [2] = handler.getSystemId ();
2780 	    if (ids [2] == null)
2781 		handler.warn ("No base URI; hope URI is absolute: "
2782 			+ ids [1]);
2783 	}
2784 
2785 	return ids;
2786     }
2787 
2788 
2789     /***
2790      * Test if a character is whitespace.
2791      * <pre>
2792      * [3] S ::= (#x20 | #x9 | #xd | #xa)+
2793      * </pre>
2794      * @param c The character to test.
2795      * @return true if the character is whitespace.
2796      */
2797     private final boolean isWhitespace (char c)
2798     {
2799 	if (c > 0x20)
2800 	    return false;
2801 	if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d)
2802 	    return true;
2803 	return false;	// illegal ...
2804     }
2805 
2806 
2807     //////////////////////////////////////////////////////////////////////
2808     // Utility routines.
2809     //////////////////////////////////////////////////////////////////////
2810 
2811 
2812     /***
2813      * Add a character to the data buffer.
2814      */
2815     private void dataBufferAppend (char c)
2816     {
2817 	// Expand buffer if necessary.
2818 	if (dataBufferPos >= dataBuffer.length)
2819 	    dataBuffer =
2820 		(char[]) extendArray (dataBuffer,
2821 			dataBuffer.length, dataBufferPos);
2822 	dataBuffer [dataBufferPos++] = c;
2823     }
2824 
2825 
2826     /***
2827      * Add a string to the data buffer.
2828      */
2829     private void dataBufferAppend (String s)
2830     {
2831 	dataBufferAppend (s.toCharArray (), 0, s.length ());
2832     }
2833 
2834 
2835     /***
2836      * Append (part of) a character array to the data buffer.
2837      */
2838     private void dataBufferAppend (char ch[], int start, int length)
2839     {
2840 	dataBuffer = (char[])
2841 		extendArray (dataBuffer, dataBuffer.length,
2842 				    dataBufferPos + length);
2843 
2844 	System.arraycopy (ch, start, dataBuffer, dataBufferPos, length);
2845 	dataBufferPos += length;
2846     }
2847 
2848 
2849     /***
2850      * Normalise space characters in the data buffer.
2851      */
2852     private void dataBufferNormalize ()
2853     {
2854 	int i = 0;
2855 	int j = 0;
2856 	int end = dataBufferPos;
2857 
2858 	// Skip spaces at the start.
2859 	while (j < end && dataBuffer [j] == ' ') {
2860 	    j++;
2861 	}
2862 
2863 	// Skip whitespace at the end.
2864 	while (end > j && dataBuffer [end - 1] == ' ') {
2865 	    end --;
2866 	}
2867 
2868 	// Start copying to the left.
2869 	while (j < end) {
2870 
2871 	    char c = dataBuffer [j++];
2872 
2873 	    // Normalise all other spaces to
2874 	    // a single space.
2875 	    if (c == ' ') {
2876 		while (j < end && dataBuffer [j++] == ' ')
2877 		    continue;
2878 		dataBuffer [i++] = ' ';
2879 		dataBuffer [i++] = dataBuffer [j - 1];
2880 	    } else {
2881 		dataBuffer [i++] = c;
2882 	    }
2883 	}
2884 
2885 	// The new length is <= the old one.
2886 	dataBufferPos = i;
2887     }
2888 
2889 
2890     /***
2891      * Convert the data buffer to a string.
2892      */
2893     private String dataBufferToString ()
2894     {
2895 	String s = new String (dataBuffer, 0, dataBufferPos);
2896 	dataBufferPos = 0;
2897 	return s;
2898     }
2899 
2900 
2901     /***
2902      * Flush the contents of the data buffer to the handler, as
2903      * appropriate, and reset the buffer for new input.
2904      */
2905     private void dataBufferFlush ()
2906     throws SAXException
2907     {
2908 	if (currentElementContent == CONTENT_ELEMENTS
2909 		&& dataBufferPos > 0
2910 		&& !inCDATA
2911 		) {
2912 	    // We can't just trust the buffer to be whitespace, there
2913 	    // are (error) cases when it isn't
2914 	    for (int i = 0; i < dataBufferPos; i++) {
2915 		if (!isWhitespace (dataBuffer [i])) {
2916 		    handler.charData (dataBuffer, 0, dataBufferPos);
2917 		    dataBufferPos = 0;
2918 		}
2919 	    }
2920 	    if (dataBufferPos > 0) {
2921 		handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos);
2922 		dataBufferPos = 0;
2923 	    }
2924 	} else if (dataBufferPos > 0) {
2925 	    handler.charData (dataBuffer, 0, dataBufferPos);
2926 	    dataBufferPos = 0;
2927 	}
2928     }
2929 
2930 
2931     /***
2932      * Require a string to appear, or throw an exception.
2933      * <p><em>Precondition:</em> Entity expansion is not required.
2934      * <p><em>Precondition:</em> data buffer has no characters that
2935      * will get sent to the application.
2936      */
2937     private void require (String delim)
2938     throws SAXException, IOException
2939     {
2940 	int	length = delim.length ();
2941 	char	ch [];
2942 		
2943 	if (length < dataBuffer.length) {
2944 	    ch = dataBuffer;
2945 	    delim.getChars (0, length, ch, 0);
2946 	} else
2947 	    ch = delim.toCharArray ();
2948 
2949 	if (USE_CHEATS
2950 		&& length <= (readBufferLength - readBufferPos)) {
2951 	    int offset = readBufferPos;
2952 
2953 	    for (int i = 0; i < length; i++, offset++)
2954 		if (ch [i] != readBuffer [offset])
2955 		    error ("required string", null, delim);
2956 	    readBufferPos = offset;
2957 	    
2958 	} else {
2959 	    for (int i = 0; i < length; i++)
2960 		require (ch [i]);
2961 	}
2962     }
2963 
2964 
2965     /***
2966      * Require a character to appear, or throw an exception.
2967      */
2968     private void require (char delim)
2969     throws SAXException, IOException
2970     {
2971 	char c = readCh ();
2972 
2973 	if (c != delim) {
2974 	    error ("required character", c, new Character (delim).toString ());
2975 	}
2976     }
2977 
2978 
2979     /***
2980      * Create an interned string from a character array.
2981      * &AElig;lfred uses this method to create an interned version
2982      * of all names and name tokens, so that it can test equality
2983      * with <code>==</code> instead of <code>String.equals ()</code>.
2984      *
2985      * <p>This is much more efficient than constructing a non-interned
2986      * string first, and then interning it.
2987      *
2988      * @param ch an array of characters for building the string.
2989      * @param start the starting position in the array.
2990      * @param length the number of characters to place in the string.
2991      * @return an interned string.
2992      * @see #intern (String)
2993      * @see java.lang.String#intern
2994      */
2995     public String intern (char ch[], int start, int length)
2996     {
2997 	int	index = 0;
2998 	int	hash = 0;
2999 	Object	bucket [];
3000 
3001 	// Generate a hash code.  This is a widely used string hash,
3002 	// often attributed to Brian Kernighan.
3003 	for (int i = start; i < start + length; i++)
3004 	    hash = 31 * hash + ch [i];
3005 	hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH;
3006 
3007 	// Get the bucket -- consists of {array,String} pairs
3008 	if ((bucket = symbolTable [hash]) == null) {
3009 	    // first string in this bucket
3010 	    bucket = new Object [8];
3011 
3012 	// Search for a matching tuple, and
3013 	// return the string if we find one.
3014 	} else {
3015 	    while (index < bucket.length) {
3016 		char chFound [] = (char []) bucket [index];
3017 
3018 		// Stop when we hit an empty entry.
3019 		if (chFound == null)
3020 		    break;
3021 
3022 		// If they're the same length, check for a match.
3023 		if (chFound.length == length) {
3024 		    for (int i = 0; i < chFound.length; i++) {
3025 			// continue search on failure
3026 			if (ch [start + i] != chFound [i]) {
3027 			    break;
3028 			} else if (i == length - 1) {
3029 			    // That's it, we have a match!
3030 			    return (String) bucket [index + 1];
3031 			}
3032 		    }
3033 		}
3034 		index += 2;
3035 	    }
3036 	    // Not found -- we'll have to add it.
3037 
3038 	    // Do we have to grow the bucket?
3039 	    bucket = (Object []) extendArray (bucket, bucket.length, index);
3040 	}
3041 	symbolTable [hash] = bucket;
3042 
3043 	// OK, add it to the end of the bucket -- "local" interning.
3044 	// Intern "globally" to let applications share interning benefits.
3045 	// That is, "!=" and "==" work on our strings, not just equals().
3046 	String s = new String (ch, start, length).intern ();
3047 	bucket [index] = s.toCharArray ();
3048 	bucket [index + 1] = s;
3049 	return s;
3050     }
3051 
3052     /***
3053      * Ensure the capacity of an array, allocating a new one if
3054      * necessary.  Usually extends only for name hash collisions. 
3055      */
3056     private Object extendArray (Object array, int currentSize, int requiredSize)
3057     {
3058 	if (requiredSize < currentSize) {
3059 	    return array;
3060 	} else {
3061 	    Object newArray = null;
3062 	    int newSize = currentSize * 2;
3063 
3064 	    if (newSize <= requiredSize)
3065 		newSize = requiredSize + 1;
3066 
3067 	    if (array instanceof char[])
3068 		newArray = new char [newSize];
3069 	    else if (array instanceof Object[])
3070 		newArray = new Object [newSize];
3071 	    else
3072 		throw new RuntimeException ();
3073 
3074 	    System.arraycopy (array, 0, newArray, 0, currentSize);
3075 	    return newArray;
3076 	}
3077     }
3078 
3079 
3080     //////////////////////////////////////////////////////////////////////
3081     // XML query routines.
3082     //////////////////////////////////////////////////////////////////////
3083 
3084 
3085     boolean isStandalone () { return docIsStandalone; }
3086 
3087 
3088     //
3089     // Elements
3090     //
3091 
3092     private int getContentType (Object element [], int defaultType)
3093     {
3094 	int retval;
3095 
3096 	if (element == null)
3097 	    return defaultType;
3098 	retval = ((Integer) element [0]).intValue ();
3099 	if (retval == CONTENT_UNDECLARED)
3100 	    retval = defaultType;
3101 	return retval;
3102     }
3103 
3104 
3105     /***
3106      * Look up the content type of an element.
3107      * @param name The element type name.
3108      * @return An integer constant representing the content type.
3109      * @see #CONTENT_UNDECLARED
3110      * @see #CONTENT_ANY
3111      * @see #CONTENT_EMPTY
3112      * @see #CONTENT_MIXED
3113      * @see #CONTENT_ELEMENTS
3114      */
3115     public int getElementContentType (String name)
3116     {
3117 	Object element [] = (Object []) elementInfo.get (name);
3118 	return getContentType (element, CONTENT_UNDECLARED);
3119     }
3120 
3121 
3122     /***
3123      * Register an element.
3124      * Array format:
3125      *  [0] element type name
3126      *  [1] content model (mixed, elements only)
3127      *  [2] attribute hash table
3128      */
3129     private void setElement (
3130 	String		name,
3131 	int		contentType,
3132 	String		contentModel,
3133 	Hashtable	attributes
3134     ) throws SAXException 
3135     {
3136 	if (skippedPE)
3137 	    return;
3138 
3139 	Object element [] = (Object []) elementInfo.get (name);
3140 
3141 	// first <!ELEMENT ...> or <!ATTLIST ...> for this type?
3142 	if (element == null) {
3143 	    element = new Object [3];
3144 	    element [0] = new Integer (contentType);
3145 	    element [1] = contentModel;
3146 	    element [2] = attributes;
3147 	    elementInfo.put (name, element);
3148 	    return;
3149 	}
3150 
3151 	// <!ELEMENT ...> declaration?
3152 	if (contentType != CONTENT_UNDECLARED) {
3153 	    // ... following an associated <!ATTLIST ...>
3154 	    if (((Integer) element [0]).intValue () == CONTENT_UNDECLARED) {
3155 		element [0] = new Integer (contentType);
3156 		element [1] = contentModel;
3157 	    } else
3158 		// VC: Unique Element Type Declaration
3159 		handler.verror ("multiple declarations for element type: "
3160 			+ name);
3161 	}
3162 
3163 	// first <!ATTLIST ...>, before <!ELEMENT ...> ?
3164 	else if (attributes != null)
3165 	    element [2] = attributes;
3166     }
3167 
3168 
3169     /***
3170      * Look up the attribute hash table for an element.
3171      * The hash table is the second item in the element array.
3172      */
3173     private Hashtable getElementAttributes (String name)
3174     {
3175 	Object element[] = (Object[]) elementInfo.get (name);
3176 	if (element == null)
3177 	    return null;
3178 	else
3179 	    return (Hashtable) element [2];
3180     }
3181 
3182 
3183 
3184     //
3185     // Attributes
3186     //
3187 
3188     /***
3189      * Get the declared attributes for an element type.
3190      * @param elname The name of the element type.
3191      * @return An Enumeration of all the attributes declared for
3192      *	 a specific element type.  The results will be valid only
3193      *	 after the DTD (if any) has been parsed.
3194      * @see #getAttributeType
3195      * @see #getAttributeEnumeration
3196      * @see #getAttributeDefaultValueType
3197      * @see #getAttributeDefaultValue
3198      * @see #getAttributeExpandedValue
3199      */
3200     private Enumeration declaredAttributes (Object element [])
3201     {
3202 	Hashtable attlist;
3203 
3204 	if (element == null)
3205 	    return null;
3206 	if ((attlist = (Hashtable) element [2]) == null)
3207 	    return null;
3208 	return attlist.keys ();
3209     }
3210 
3211     /***
3212      * Get the declared attributes for an element type.
3213      * @param elname The name of the element type.
3214      * @return An Enumeration of all the attributes declared for
3215      *	 a specific element type.  The results will be valid only
3216      *	 after the DTD (if any) has been parsed.
3217      * @see #getAttributeType
3218      * @see #getAttributeEnumeration
3219      * @see #getAttributeDefaultValueType
3220      * @see #getAttributeDefaultValue
3221      * @see #getAttributeExpandedValue
3222      */
3223     public Enumeration declaredAttributes (String elname)
3224     {
3225 	return declaredAttributes ((Object []) elementInfo.get (elname));
3226     }
3227 
3228 
3229     /***
3230      * Retrieve the declared type of an attribute.
3231      * @param name The name of the associated element.
3232      * @param aname The name of the attribute.
3233      * @return An interend string denoting the type, or null
3234      *	indicating an undeclared attribute.
3235      */
3236     public String getAttributeType (String name, String aname)
3237     {
3238 	Object attribute[] = getAttribute (name, aname);
3239 	if (attribute == null) {
3240 	    return null;
3241 	} else {
3242 	    return (String) attribute [0];
3243 	}
3244     }
3245 
3246 
3247     /***
3248      * Retrieve the allowed values for an enumerated attribute type.
3249      * @param name The name of the associated element.
3250      * @param aname The name of the attribute.
3251      * @return A string containing the token list.
3252      */
3253     public String getAttributeEnumeration (String name, String aname)
3254     {
3255 	Object attribute[] = getAttribute (name, aname);
3256 	if (attribute == null) {
3257 	    return null;
3258 	} else {
3259 	    // assert:  attribute [0] is "ENUMERATION" or "NOTATION"
3260 	    return (String) attribute [3];
3261 	}
3262     }
3263 
3264 
3265     /***
3266      * Retrieve the default value of a declared attribute.
3267      * @param name The name of the associated element.
3268      * @param aname The name of the attribute.
3269      * @return The default value, or null if the attribute was
3270      *	 #IMPLIED or simply undeclared and unspecified.
3271      * @see #getAttributeExpandedValue
3272      */
3273     public String getAttributeDefaultValue (String name, String aname)
3274     {
3275 	Object attribute[] = getAttribute (name, aname);
3276 	if (attribute == null) {
3277 	    return null;
3278 	} else {
3279 	    return (String) attribute [1];
3280 	}
3281     }
3282 
3283     /*
3284 
3285 // FIXME:  Leaving this in, until W3C finally resolves the confusion
3286 // between parts of the XML 2nd REC about when entity declararations
3287 // are guaranteed to be known.  Current code matches what section 5.1
3288 // (conformance) describes, but some readings of the self-contradicting
3289 // text in 4.1 (the "Entity Declared" WFC and VC) seem to expect that
3290 // attribute expansion/normalization must be deferred in some cases
3291 // (just TRY to identify them!).
3292 
3293      * Retrieve the expanded value of a declared attribute.
3294      * <p>General entities (and char refs) will be expanded (once).
3295      * @param name The name of the associated element.
3296      * @param aname The name of the attribute.
3297      * @return The expanded default value, or null if the attribute was
3298      *	 #IMPLIED or simply undeclared
3299      * @see #getAttributeDefaultValue
3300     public String getAttributeExpandedValue (String name, String aname)
3301     throws Exception
3302     {
3303 	Object attribute[] = getAttribute (name, aname);
3304 
3305 	if (attribute == null) {
3306 	    return null;
3307 	} else if (attribute [4] == null && attribute [1] != null) {
3308 	    // we MUST use the same buf for both quotes else the literal
3309 	    // can't be properly terminated
3310 	    char buf [] = new char [1];
3311 	    int	flags = LIT_ENTITY_REF | LIT_ATTRIBUTE;
3312 	    String type = getAttributeType (name, aname);
3313 
3314 	    if (type != "CDATA" && type != null)
3315 		flags |= LIT_NORMALIZE;
3316 	    buf [0] = '"';
3317 	    pushCharArray (null, buf, 0, 1);
3318 	    pushString (null, (String) attribute [1]);
3319 	    pushCharArray (null, buf, 0, 1);
3320 	    attribute [4] = readLiteral (flags);
3321 	}
3322 	return (String) attribute [4];
3323     }
3324      */
3325 
3326     /***
3327      * Retrieve the default value mode of a declared attribute.
3328      * @see #ATTRIBUTE_DEFAULT_SPECIFIED
3329      * @see #ATTRIBUTE_DEFAULT_IMPLIED
3330      * @see #ATTRIBUTE_DEFAULT_REQUIRED
3331      * @see #ATTRIBUTE_DEFAULT_FIXED
3332      */
3333     public int getAttributeDefaultValueType (String name, String aname)
3334     {
3335 	Object attribute[] = getAttribute (name, aname);
3336 	if (attribute == null) {
3337 	    return ATTRIBUTE_DEFAULT_UNDECLARED;
3338 	} else {
3339 	    return ((Integer) attribute [2]).intValue ();
3340 	}
3341     }
3342 
3343 
3344     /***
3345      * Register an attribute declaration for later retrieval.
3346      * Format:
3347      * - String type
3348      * - String default value
3349      * - int value type
3350      * - enumeration
3351      * - processed default value
3352      */
3353     private void setAttribute (String elName, String name, String type,
3354 			String enumeration,
3355 			String value, int valueType)
3356     throws Exception
3357     {
3358 	Hashtable attlist;
3359 
3360 	if (skippedPE)
3361 	    return;
3362 
3363 	// Create a new hashtable if necessary.
3364 	attlist = getElementAttributes (elName);
3365 	if (attlist == null)
3366 	    attlist = new Hashtable ();
3367 
3368 	// ignore multiple attribute declarations!
3369 	if (attlist.get (name) != null) {
3370 	    // warn ...
3371 	    return;
3372 	} else {
3373 	    Object attribute [] = new Object [5];
3374 	    attribute [0] = type;
3375 	    attribute [1] = value;
3376 	    attribute [2] = new Integer (valueType);
3377 	    attribute [3] = enumeration;
3378 	    attribute [4] = null;
3379 	    attlist.put (name, attribute);
3380 
3381 	    // save; but don't overwrite any existing <!ELEMENT ...>
3382 	    setElement (elName, CONTENT_UNDECLARED, null, attlist);
3383 	}
3384     }
3385 
3386 
3387     /***
3388      * Retrieve the array representing an attribute declaration.
3389      */
3390     private Object[] getAttribute (String elName, String name)
3391     {
3392 	Hashtable attlist;
3393 
3394 	attlist = getElementAttributes (elName);
3395 	if (attlist == null)
3396 	    return null;
3397 	return (Object[]) attlist.get (name);
3398     }
3399 
3400 
3401     //
3402     // Entities
3403     //
3404 
3405     /***
3406      * Find the type of an entity.
3407      * @returns An integer constant representing the entity type.
3408      * @see #ENTITY_UNDECLARED
3409      * @see #ENTITY_INTERNAL
3410      * @see #ENTITY_NDATA
3411      * @see #ENTITY_TEXT
3412      */
3413     public int getEntityType (String ename)
3414     {
3415 	Object entity[] = (Object[]) entityInfo.get (ename);
3416 	if (entity == null) {
3417 	    return ENTITY_UNDECLARED;
3418 	} else {
3419 	    return ((Integer) entity [0]).intValue ();
3420 	}
3421     }
3422 
3423 
3424     /***
3425      * Return an external entity's identifier array.
3426      * @param ename The name of the external entity.
3427      * @return Three element array containing (in order) the entity's
3428      *	public identifier, system identifier, and base URI.  Null if
3429      *	 the entity was not declared as an external entity.
3430      * @see #getEntityType
3431      */
3432     public String [] getEntityIds (String ename)
3433     {
3434 	Object entity[] = (Object[]) entityInfo.get (ename);
3435 	if (entity == null) {
3436 	    return null;
3437 	} else {
3438 	    return (String []) entity [1];
3439 	}
3440     }
3441 
3442 
3443     /***
3444      * Return an internal entity's replacement text.
3445      * @param ename The name of the internal entity.
3446      * @return The entity's replacement text, or null if
3447      *	 the entity was not declared as an internal entity.
3448      * @see #getEntityType
3449      */
3450     public String getEntityValue (String ename)
3451     {
3452 	Object entity[] = (Object[]) entityInfo.get (ename);
3453 	if (entity == null) {
3454 	    return null;
3455 	} else {
3456 	    return (String) entity [3];
3457 	}
3458     }
3459 
3460 
3461     /***
3462      * Register an entity declaration for later retrieval.
3463      */
3464     private void setInternalEntity (String eName, String value)
3465     throws SAXException
3466     {
3467 	if (skippedPE)
3468 	    return;
3469 
3470 	if (entityInfo.get (eName) == null) {
3471 	    Object entity[] = new Object [5];
3472 	    entity [0] = new Integer (ENTITY_INTERNAL);
3473 // FIXME: shrink!!  [2] useless
3474 	    entity [3] = value;
3475 	    entityInfo.put (eName, entity);
3476 	}
3477   if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
3478     if ("lt" == eName || "gt" == eName || "quot" == eName
3479         || "apos" == eName || "amp" == eName)
3480 	    return;
3481   } else {
3482     if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName)
3483         || "apos".equals(eName) || "amp".equals(eName))
3484 	    return;
3485   }
3486 	handler.getDeclHandler ()
3487 	    .internalEntityDecl (eName, value);
3488     }
3489 
3490 
3491     /***
3492      * Register an external entity declaration for later retrieval.
3493      */
3494     private void setExternalEntity (String eName, int eClass,
3495 		     String ids [], String nName)
3496     {
3497 	if (entityInfo.get (eName) == null) {
3498 	    Object entity[] = new Object [5];
3499 	    entity [0] = new Integer (eClass);
3500 	    entity [1] = ids;
3501 // FIXME: shrink!!  [2] no longer used, [4] irrelevant given [0]
3502 	    entity [4] = nName;
3503 	    entityInfo.put (eName, entity);
3504 	}
3505     }
3506 
3507 
3508     //
3509     // Notations.
3510     //
3511 
3512     /***
3513      * Report a notation declaration, checking for duplicates.
3514      */
3515     private void setNotation (String nname, String ids [])
3516     throws SAXException
3517     {
3518 	if (skippedPE)
3519 	    return;
3520 
3521 	handler.notationDecl (nname, ids);
3522 	if (notationInfo.get (nname) == null)
3523 	    notationInfo.put (nname, nname);
3524 	else
3525 	    // VC: Unique Notation Name
3526 	    handler.verror ("Duplicate notation name decl: " + nname);
3527     }
3528 
3529 
3530     //
3531     // Location.
3532     //
3533 
3534 
3535     /***
3536      * Return the current line number.
3537      */
3538     public int getLineNumber ()
3539     {
3540 	return line;
3541     }
3542 
3543 
3544     /***
3545      * Return the current column number.
3546      */
3547     public int getColumnNumber ()
3548     {
3549 	return column;
3550     }
3551 
3552 
3553     //////////////////////////////////////////////////////////////////////
3554     // High-level I/O.
3555     //////////////////////////////////////////////////////////////////////
3556 
3557 
3558     /***
3559      * Read a single character from the readBuffer.
3560      * <p>The readDataChunk () method maintains the buffer.
3561      * <p>If we hit the end of an entity, try to pop the stack and
3562      * keep going.
3563      * <p> (This approach doesn't really enforce XML's rules about
3564      * entity boundaries, but this is not currently a validating
3565      * parser).
3566      * <p>This routine also attempts to keep track of the current
3567      * position in external entities, but it's not entirely accurate.
3568      * @return The next available input character.
3569      * @see #unread (char)
3570      * @see #readDataChunk
3571      * @see #readBuffer
3572      * @see #line
3573      * @return The next character from the current input source.
3574      */
3575     private char readCh ()
3576     throws SAXException, IOException
3577     {
3578 	// As long as there's nothing in the
3579 	// read buffer, try reading more data
3580 	// (for an external entity) or popping
3581 	// the entity stack (for either).
3582 	while (readBufferPos >= readBufferLength) {
3583 	    switch (sourceType) {
3584 	    case INPUT_READER:
3585 	    case INPUT_STREAM:
3586 		readDataChunk ();
3587 		while (readBufferLength < 1) {
3588 		    popInput ();
3589 		    if (readBufferLength < 1) {
3590 			readDataChunk ();
3591 		    }
3592 		}
3593 		break;
3594 
3595 	    default:
3596 
3597 		popInput ();
3598 		break;
3599 	    }
3600 	}
3601 
3602 	char c = readBuffer [readBufferPos++];
3603        
3604 	if (c == '\n') {
3605 	    line++;
3606 	    column = 0;
3607 	} else {
3608 	    if (c == '<') {
3609 		/* the most common return to parseContent () ... NOP */
3610 	    } else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
3611 	    		|| ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) 
3612 	    		   && xmlVersion == XML_11)) 
3613 		error ("illegal XML character U+"
3614 			+ Integer.toHexString (c));
3615 
3616 	    // If we're in the DTD and in a context where PEs get expanded,
3617 	    // do so ... 1/14/2000 errata identify those contexts.  There
3618 	    // are also spots in the internal subset where PE refs are fatal
3619 	    // errors, hence yet another flag.
3620 	    else if (c == '%' && expandPE) {
3621 		if (peIsError)
3622 		    error ("PE reference within decl in internal subset.");
3623 		parsePEReference ();
3624 		return readCh ();
3625 	    }
3626 	    column++;
3627 	}
3628 
3629 	return c;
3630     }
3631 
3632 
3633     /***
3634      * Push a single character back onto the current input stream.
3635      * <p>This method usually pushes the character back onto
3636      * the readBuffer.
3637      * <p>I don't think that this would ever be called with 
3638      * readBufferPos = 0, because the methods always reads a character
3639      * before unreading it, but just in case, I've added a boundary
3640      * condition.
3641      * @param c The character to push back.
3642      * @see #readCh
3643      * @see #unread (char[])
3644      * @see #readBuffer
3645      */
3646     private void unread (char c)
3647     throws SAXException
3648     {
3649 	// Normal condition.
3650 	if (c == '\n') {
3651 	    line--;
3652 	    column = -1;
3653 	}
3654 	if (readBufferPos > 0) {
3655 	    readBuffer [--readBufferPos] = c;
3656 	} else {
3657 	    pushString (null, new Character (c).toString ());
3658 	}
3659     }
3660 
3661 
3662     /***
3663      * Push a char array back onto the current input stream.
3664      * <p>NOTE: you must <em>never</em> push back characters that you
3665      * haven't actually read: use pushString () instead.
3666      * @see #readCh
3667      * @see #unread (char)
3668      * @see #readBuffer
3669      * @see #pushString
3670      */
3671     private void unread (char ch[], int length)
3672     throws SAXException
3673     {
3674 	for (int i = 0; i < length; i++) {
3675 	    if (ch [i] == '\n') {
3676 		line--;
3677 		column = -1;
3678 	    }
3679 	}
3680 	if (length < readBufferPos) {
3681 	    readBufferPos -= length;
3682 	} else {
3683 	    pushCharArray (null, ch, 0, length);
3684 	}
3685     }
3686 
3687 
3688     /***
3689      * Push, or skip, a new external input source.
3690      * The source will be some kind of parsed entity, such as a PE
3691      * (including the external DTD subset) or content for the body.
3692      *
3693      * @param url The java.net.URL object for the entity.
3694      * @see SAXDriver#resolveEntity
3695      * @see #pushString
3696      * @see #sourceType
3697      * @see #pushInput
3698      * @see #detectEncoding
3699      * @see #sourceType
3700      * @see #readBuffer
3701      */
3702     private void pushURL (
3703         boolean		isPE,
3704 	String		ename,
3705 	String		ids [],		// public, system, baseURI
3706 	Reader		reader,
3707 	InputStream	stream,
3708 	String		encoding,
3709 	boolean		doResolve
3710     ) throws SAXException, IOException
3711     {
3712 	boolean		ignoreEncoding;
3713 	String		systemId;
3714 	InputSource	source;
3715 
3716 	if (!isPE)
3717 	    dataBufferFlush ();
3718 
3719 	scratch.setPublicId (ids [0]);
3720 	scratch.setSystemId (ids [1]);
3721 
3722 	// See if we should skip or substitute the entity.
3723 	// If we're not skipping, resolving reports startEntity()
3724 	// and updates the (handler's) stack of URIs.
3725 	if (doResolve) {
3726 	    // assert (stream == null && reader == null && encoding == null)
3727 	    source = handler.resolveEntity (isPE, ename, scratch, ids [2]);
3728 	    if (source == null) {
3729 		handler.warn ("skipping entity: " + ename);
3730 		handler.skippedEntity (ename);
3731 		if (isPE)
3732 		    skippedPE = true;
3733 		return;
3734 	    }
3735 
3736 	    // we might be using alternate IDs/encoding
3737 	    systemId = source.getSystemId ();
3738 	    // The following warning and setting systemId was deleted bcause
3739 	    // the application has the option of not setting systemId
3740 	    // provided that it has set the characte/byte stream.
3741 	    /*
3742 	    if (systemId == null) {
3743 		handler.warn ("missing system ID, using " + ids [1]);
3744 		systemId = ids [1];
3745 	    }
3746 	    */
3747 	} else {
3748 	    // "[document]", or "[dtd]" via getExternalSubset()
3749 	    scratch.setCharacterStream (reader);
3750 	    scratch.setByteStream (stream);
3751 	    scratch.setEncoding (encoding);
3752 	    source = scratch;
3753 	    systemId = ids [1];
3754       if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) {
3755         handler.startExternalEntity (ename, systemId,
3756                                      "[document]" == ename);
3757       } else {
3758         handler.startExternalEntity (ename, systemId,
3759                                      "[document]".equals(ename));
3760       }
3761 	}
3762 
3763 	// we may have been given I/O streams directly
3764 	if (source.getCharacterStream () != null) {
3765 	    if (source.getByteStream () != null)
3766 		error ("InputSource has two streams!");
3767 	    reader = source.getCharacterStream ();
3768 	} else if (source.getByteStream () != null) {
3769 	    encoding = source.getEncoding ();
3770 	    if (encoding == null)
3771 		stream = source.getByteStream ();
3772 	    else try {
3773 		reader = new InputStreamReader (
3774 		    source.getByteStream (),
3775 		    encoding);
3776 	    } catch (IOException e) {
3777 		stream = source.getByteStream ();
3778 	    }
3779 	} else if (systemId == null)
3780 	    error ("InputSource has no URI!");
3781 	scratch.setCharacterStream (null);
3782 	scratch.setByteStream (null);
3783 	scratch.setEncoding (null);
3784 
3785 	// Push the existing status.
3786 	pushInput (ename);
3787 
3788 	// Create a new read buffer.
3789 	// (Note the four-character margin)
3790 	readBuffer = new char [READ_BUFFER_MAX + 4];
3791 	readBufferPos = 0;
3792 	readBufferLength = 0;
3793 	readBufferOverflow = -1;
3794 	is = null;
3795 	line = 1;
3796 	column = 0;
3797 	currentByteCount = 0;
3798 
3799 	// If there's an explicit character stream, just
3800 	// ignore encoding declarations.
3801 	if (reader != null) {
3802 	    sourceType = INPUT_READER;
3803 	    this.reader = reader;
3804 	    tryEncodingDecl (true);
3805 	    return;
3806 	}
3807 	
3808 	// Else we handle the conversion, and need to ensure
3809 	// it's done right.
3810 	sourceType = INPUT_STREAM;
3811 	if (stream != null) {
3812 	    is = stream;
3813 	} else {
3814 	    // We have to open our own stream to the URL.
3815 	    URL url = new URL (systemId);
3816 
3817 	    externalEntity = url.openConnection ();
3818 	    externalEntity.connect ();
3819 	    is = externalEntity.getInputStream ();
3820 	}
3821 
3822 	// If we get to here, there must be
3823 	// an InputStream available.
3824 	if (!is.markSupported ()) {
3825 	    is = new BufferedInputStream (is);
3826 	}
3827 
3828 	// Get any external encoding label.
3829 	if (encoding == null && externalEntity != null) {
3830 	    // External labels can be untrustworthy; filesystems in
3831 	    // particular often have the wrong default for content
3832 	    // that wasn't locally originated.  Those we autodetect.
3833 	    if (!"file".equals (externalEntity.getURL ().getProtocol ())) {
3834 		int temp;
3835 
3836 		// application/xml;charset=something;otherAttr=...
3837 		// ... with many variants on 'something'
3838 		encoding = externalEntity.getContentType ();
3839 
3840 		// MHK code (fix for Saxon 5.5.1/007):
3841 		// protect against encoding==null
3842 		if (encoding==null) {
3843 		    temp = -1;
3844 		} else {
3845 		    temp = encoding.indexOf ("charset");
3846 		}
3847 
3848 		// RFC 2376 sez MIME text defaults to ASCII, but since the
3849 		// JDK will create a MIME type out of thin air, we always
3850 		// autodetect when there's no explicit charset attribute.
3851 		if (temp < 0)
3852 		    encoding = null;	// autodetect
3853 		else {
3854 		    // only this one attribute
3855 		    if ((temp = encoding.indexOf (';')) > 0)
3856 			encoding = encoding.substring (0, temp);
3857 
3858 		    if ((temp = encoding.indexOf ('=', temp + 7)) > 0) {
3859 			encoding = encoding.substring (temp + 1);
3860 
3861 			// attributes can have comment fields (RFC 822)
3862 			if ((temp = encoding.indexOf ('(')) > 0)
3863 			    encoding = encoding.substring (0, temp);
3864 			// ... and values may be quoted
3865 			if ((temp = encoding.indexOf ('"')) > 0)
3866 			    encoding = encoding.substring (temp + 1,
3867 				    encoding.indexOf ('"', temp + 2));
3868 			encoding.trim ();
3869 		    } else {
3870 			handler.warn ("ignoring illegal MIME attribute: "
3871 				+ encoding);
3872 			encoding = null;
3873 		    }
3874 		}
3875 	    }
3876 	}
3877 
3878 	// if we got an external encoding label, use it ...
3879 	if (encoding != null) {
3880 	    this.encoding = ENCODING_EXTERNAL;
3881 	    setupDecoding (encoding);
3882 	    ignoreEncoding = true;
3883 	
3884 	// ... else autodetect from first bytes.
3885 	} else {
3886 	    detectEncoding ();
3887 	    ignoreEncoding = false;
3888 	}
3889 
3890 	// Read any XML or text declaration.
3891 	// If we autodetected, it may tell us the "real" encoding.
3892 	try {
3893 	    tryEncodingDecl (ignoreEncoding);
3894 	} catch (UnsupportedEncodingException x) {
3895 	    encoding = x.getMessage ();
3896 
3897 	    // if we don't handle the declared encoding,
3898 	    // try letting a JVM InputStreamReader do it
3899 	    try {
3900 		if (sourceType != INPUT_STREAM)
3901 		    throw x;
3902 
3903 		is.reset ();
3904 		readBufferPos = 0;
3905 		readBufferLength = 0;
3906 		readBufferOverflow = -1;
3907 		line = 1;
3908 		currentByteCount = column = 0;
3909 
3910 		sourceType = INPUT_READER;
3911 		this.reader = new InputStreamReader (is, encoding);
3912 		is = null;
3913 
3914 		tryEncodingDecl (true);
3915 
3916 	    } catch (IOException e) {
3917 		error ("unsupported text encoding",
3918 		       encoding,
3919 		       null);
3920 	    }
3921 	}
3922     }
3923 
3924 
3925     /***
3926      * Check for an encoding declaration.  This is the second part of the
3927      * XML encoding autodetection algorithm, relying on detectEncoding to
3928      * get to the point that this part can read any encoding declaration
3929      * in the document (using only US-ASCII characters).
3930      *
3931      * <p> Because this part starts to fill parser buffers with this data,
3932      * it's tricky to setup a reader so that Java's built-in decoders can be
3933      * used for the character encodings that aren't built in to this parser
3934      * (such as EUC-JP, KOI8-R, Big5, etc).
3935      *
3936      * @return any encoding in the declaration, uppercased; or null
3937      * @see detectEncoding
3938      */
3939     private String tryEncodingDecl (boolean ignoreEncoding)
3940     throws SAXException, IOException
3941     {
3942 	// Read the XML/text declaration.
3943 	if (tryRead ("<?xml")) {
3944 	    if (tryWhitespace ()) {
3945 		if (inputStack.size () > 0) {
3946 		    return parseTextDecl (ignoreEncoding);
3947 		} else {
3948 		    return parseXMLDecl (ignoreEncoding);
3949 		}
3950 	    } else {
3951 		// <?xml-stylesheet ...?> or similar
3952 		unread ('l');
3953 		unread ('m');
3954 		unread ('x');
3955 		unread ('?');
3956 		unread ('<');
3957 	    }
3958 	}
3959 	return null;
3960     }
3961 
3962 
3963     /***
3964      * Attempt to detect the encoding of an entity.
3965      * <p>The trick here (as suggested in the XML standard) is that
3966      * any entity not in UTF-8, or in UCS-2 with a byte-order mark, 
3967      * <b>must</b> begin with an XML declaration or an encoding
3968      * declaration; we simply have to look for "&lt;?xml" in various
3969      * encodings.
3970      * <p>This method has no way to distinguish among 8-bit encodings.
3971      * Instead, it sets up for UTF-8, then (possibly) revises its assumption
3972      * later in setupDecoding ().  Any ASCII-derived 8-bit encoding
3973      * should work, but most will be rejected later by setupDecoding ().
3974      * @see #tryEncoding (byte[], byte, byte, byte, byte)
3975      * @see #tryEncoding (byte[], byte, byte)
3976      * @see #setupDecoding
3977      */
3978     private void detectEncoding ()
3979     throws SAXException, IOException
3980     {
3981 	byte signature[] = new byte [4];
3982 
3983 	// Read the first four bytes for
3984 	// autodetection.
3985 	is.mark (4);
3986 	is.read (signature);
3987 	is.reset ();
3988 
3989 	//
3990 	// FIRST:  four byte encodings (who uses these?)
3991 	//
3992 	if (tryEncoding (signature, (byte) 0x00, (byte) 0x00,
3993 			  (byte) 0x00, (byte) 0x3c)) {
3994 	    // UCS-4 must begin with "<?xml"
3995 	    // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234)
3996 	    // "UTF-32BE"
3997 	    encoding = ENCODING_UCS_4_1234;
3998 
3999 	} else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00,
4000 				 (byte) 0x00, (byte) 0x00)) {
4001 	    // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321)
4002 	    // "UTF-32LE"
4003 	    encoding = ENCODING_UCS_4_4321;
4004 
4005 	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x00,
4006 				 (byte) 0x3c, (byte) 0x00)) {
4007 	    // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143)
4008 	    encoding = ENCODING_UCS_4_2143;
4009 
4010 	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c,
4011 				 (byte) 0x00, (byte) 0x00)) {
4012 	    // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421)
4013 	    encoding = ENCODING_UCS_4_3412;
4014 
4015 	    // 00 00 fe ff UCS_4_1234 (with BOM)
4016 	    // ff fe 00 00 UCS_4_4321 (with BOM)
4017 	}
4018 
4019 	//
4020 	// SECOND:  two byte encodings
4021 	// note ... with 1/14/2000 errata the XML spec identifies some
4022 	// more "broken UTF-16" autodetection cases, with no XML decl,
4023 	// which we don't handle here (that's legal too).
4024 	//
4025 	else if (tryEncoding (signature, (byte) 0xfe, (byte) 0xff)) {
4026 	    // UCS-2 with a byte-order marker. (UTF-16)
4027 	    // 0xfe 0xff: UCS-2, big-endian (12)
4028 	    encoding = ENCODING_UCS_2_12;
4029 	    is.read (); is.read ();
4030 
4031 	} else if (tryEncoding (signature, (byte) 0xff, (byte) 0xfe)) {
4032 	    // UCS-2 with a byte-order marker. (UTF-16)
4033 	    // 0xff 0xfe: UCS-2, little-endian (21)
4034 	    encoding = ENCODING_UCS_2_21;
4035 	    is.read (); is.read ();
4036 
4037 	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c,
4038 				 (byte) 0x00, (byte) 0x3f)) {
4039 	    // UTF-16BE (otherwise, malformed UTF-16)
4040 	    // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark
4041 	    encoding = ENCODING_UCS_2_12;
4042 	    error ("no byte-order mark for UCS-2 entity");
4043 
4044 	} else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00,
4045 				 (byte) 0x3f, (byte) 0x00)) {
4046 	    // UTF-16LE (otherwise, malformed UTF-16)
4047 	    // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark
4048 	    encoding = ENCODING_UCS_2_21;
4049 	    error ("no byte-order mark for UCS-2 entity");
4050 	}
4051 
4052 	//
4053 	// THIRD:  ASCII-derived encodings, fixed and variable lengths
4054 	//
4055 	else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x3f,
4056 			       (byte) 0x78, (byte) 0x6d)) {
4057 	    // ASCII derived
4058 	    // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING)
4059 	    encoding = ENCODING_UTF_8;
4060 	    prefetchASCIIEncodingDecl ();
4061 
4062 	} else if (signature [0] == (byte) 0xef
4063 		&& signature [1] == (byte) 0xbb
4064 		&& signature [2] == (byte) 0xbf) {
4065 	    // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text)
4066 	    // this un-needed notion slipped into XML 2nd ed through a
4067 	    // "non-normative" erratum; now required by MSFT and UDDI,
4068 	    // and E22 made it normative.
4069 	    encoding = ENCODING_UTF_8;
4070 	    is.read (); is.read (); is.read ();
4071 
4072 	} else {
4073 	    // 4c 6f a7 94 ... we don't understand EBCDIC flavors
4074 	    // ... but we COULD at least kick in some fixed code page
4075 
4076 	    // (default) UTF-8 without encoding/XML declaration
4077 	    encoding = ENCODING_UTF_8;
4078 	}
4079     }
4080 
4081 
4082     /***
4083      * Check for a four-byte signature.
4084      * <p>Utility routine for detectEncoding ().
4085      * <p>Always looks for some part of "<?XML" in a specific encoding.
4086      * @param sig The first four bytes read.
4087      * @param b1 The first byte of the signature
4088      * @param b2 The second byte of the signature
4089      * @param b3 The third byte of the signature
4090      * @param b4 The fourth byte of the signature
4091      * @see #detectEncoding
4092      */
4093     private static boolean tryEncoding (
4094 	byte sig[], byte b1, byte b2, byte b3, byte b4)
4095     {
4096 	return (sig [0] == b1 && sig [1] == b2
4097 		&& sig [2] == b3 && sig [3] == b4);
4098     }
4099 
4100 
4101     /***
4102      * Check for a two-byte signature.
4103      * <p>Looks for a UCS-2 byte-order mark.
4104      * <p>Utility routine for detectEncoding ().
4105      * @param sig The first four bytes read.
4106      * @param b1 The first byte of the signature
4107      * @param b2 The second byte of the signature
4108      * @see #detectEncoding
4109      */
4110     private static boolean tryEncoding (byte sig[], byte b1, byte b2)
4111     {
4112 	return ((sig [0] == b1) && (sig [1] == b2));
4113     }
4114 
4115 
4116     /***
4117      * This method pushes a string back onto input.
4118      * <p>It is useful either as the expansion of an internal entity, 
4119      * or for backtracking during the parse.
4120      * <p>Call pushCharArray () to do the actual work.
4121      * @param s The string to push back onto input.
4122      * @see #pushCharArray
4123      */
4124     private void pushString (String ename, String s)
4125     throws SAXException
4126     {
4127 	char ch[] = s.toCharArray ();
4128 	pushCharArray (ename, ch, 0, ch.length);
4129     }
4130 
4131 
4132     /***
4133      * Push a new internal input source.
4134      * <p>This method is useful for expanding an internal entity,
4135      * or for unreading a string of characters.  It creates a new
4136      * readBuffer containing the characters in the array, instead
4137      * of characters converted from an input byte stream.
4138      * @param ch The char array to push.
4139      * @see #pushString
4140      * @see #pushURL
4141      * @see #readBuffer
4142      * @see #sourceType
4143      * @see #pushInput
4144      */
4145     private void pushCharArray (String ename, char ch[], int start, int length)
4146     throws SAXException
4147     {
4148 	// Push the existing status
4149 	pushInput (ename);
4150 	if (ename != null && doReport) {
4151 	    dataBufferFlush ();
4152 	    handler.startInternalEntity (ename);
4153 	}
4154 	sourceType = INPUT_INTERNAL;
4155 	readBuffer = ch;
4156 	readBufferPos = start;
4157 	readBufferLength = length;
4158 	readBufferOverflow = -1;
4159     }
4160 
4161 
4162     /***
4163      * Save the current input source onto the stack.
4164      * <p>This method saves all of the global variables associated with
4165      * the current input source, so that they can be restored when a new
4166      * input source has finished.  It also tests for entity recursion.
4167      * <p>The method saves the following global variables onto a stack
4168      * using a fixed-length array:
4169      * <ol>
4170      * <li>sourceType
4171      * <li>externalEntity
4172      * <li>readBuffer
4173      * <li>readBufferPos
4174      * <li>readBufferLength
4175      * <li>line
4176      * <li>encoding
4177      * </ol>
4178      * @param ename The name of the entity (if any) causing the new input.
4179      * @see #popInput
4180      * @see #sourceType
4181      * @see #externalEntity
4182      * @see #readBuffer
4183      * @see #readBufferPos
4184      * @see #readBufferLength
4185      * @see #line
4186      * @see #encoding
4187      */
4188     private void pushInput (String ename)
4189     throws SAXException
4190     {
4191 	// Check for entity recursion.
4192 	if (ename != null) {
4193 	    Enumeration entities = entityStack.elements ();
4194 	    while (entities.hasMoreElements ()) {
4195 		String e = (String) entities.nextElement ();
4196 		if (e != null && e == ename) {
4197 		    error ("recursive reference to entity", ename, null);
4198 		}
4199 	    }
4200 	}
4201 	entityStack.push (ename);
4202 
4203 	// Don't bother if there is no current input.
4204 	if (sourceType == INPUT_NONE) {
4205 	    return;
4206 	}
4207 
4208 	// Set up a snapshot of the current
4209 	// input source.
4210 	Object input[] = new Object [12];
4211 
4212 	input [0] = new Integer (sourceType);
4213 	input [1] = externalEntity;
4214 	input [2] = readBuffer;
4215 	input [3] = new Integer (readBufferPos);
4216 	input [4] = new Integer (readBufferLength);
4217 	input [5] = new Integer (line);
4218 	input [6] = new Integer (encoding);
4219 	input [7] = new Integer (readBufferOverflow);
4220 	input [8] = is;
4221 	input [9] = new Integer (currentByteCount);
4222 	input [10] = new Integer (column);
4223 	input [11] = reader;
4224 
4225 	// Push it onto the stack.
4226 	inputStack.push (input);
4227     }
4228 
4229 
4230     /***
4231      * Restore a previous input source.
4232      * <p>This method restores all of the global variables associated with
4233      * the current input source.
4234      * @exception java.io.EOFException
4235      *    If there are no more entries on the input stack.
4236      * @see #pushInput
4237      * @see #sourceType
4238      * @see #externalEntity
4239      * @see #readBuffer
4240      * @see #readBufferPos
4241      * @see #readBufferLength
4242      * @see #line
4243      * @see #encoding
4244      */
4245     private void popInput ()
4246     throws SAXException, IOException
4247     {
4248 	String ename = (String) entityStack.pop ();
4249 
4250 	if (ename != null && doReport)
4251 	    dataBufferFlush ();
4252 	switch (sourceType) {
4253 	case INPUT_STREAM:
4254 	    handler.endExternalEntity (ename);
4255 	    is.close ();
4256 	    break;
4257 	case INPUT_READER:
4258 	    handler.endExternalEntity (ename);
4259 	    reader.close ();
4260 	    break;
4261 	case INPUT_INTERNAL:
4262 	    if (ename != null && doReport)
4263 		handler.endInternalEntity (ename);
4264 	    break;
4265 	}
4266 
4267 	// Throw an EOFException if there
4268 	// is nothing else to pop.
4269 	if (inputStack.isEmpty ()) {
4270 	    throw new EOFException ("no more input");
4271 	}
4272 
4273 	Object input [] = (Object[]) inputStack.pop ();
4274 
4275 	sourceType = ((Integer) input [0]).intValue ();
4276 	externalEntity = (URLConnection) input [1];
4277 	readBuffer = (char[]) input [2];
4278 	readBufferPos = ((Integer) input [3]).intValue ();
4279 	readBufferLength = ((Integer) input [4]).intValue ();
4280 	line = ((Integer) input [5]).intValue ();
4281 	encoding = ((Integer) input [6]).intValue ();
4282 	readBufferOverflow = ((Integer) input [7]).intValue ();
4283 	is = (InputStream) input [8];
4284 	currentByteCount = ((Integer) input [9]).intValue ();
4285 	column = ((Integer) input [10]).intValue ();
4286 	reader = (Reader) input [11];
4287     }
4288 
4289 
4290     /***
4291      * Return true if we can read the expected character.
4292      * <p>Note that the character will be removed from the input stream
4293      * on success, but will be put back on failure.  Do not attempt to
4294      * read the character again if the method succeeds.
4295      * @param delim The character that should appear next.  For a
4296      *	      insensitive match, you must supply this in upper-case.
4297      * @return true if the character was successfully read, or false if
4298      *	 it was not.
4299      * @see #tryRead (String)
4300      */
4301     private boolean tryRead (char delim)
4302     throws SAXException, IOException
4303     {
4304 	char c;
4305 
4306 	// Read the character
4307 	c = readCh ();
4308 
4309 	// Test for a match, and push the character
4310 	// back if the match fails.
4311 	if (c == delim) {
4312 	    return true;
4313 	} else {
4314 	    unread (c);
4315 	    return false;
4316 	}
4317     }
4318 
4319 
4320     /***
4321      * Return true if we can read the expected string.
4322      * <p>This is simply a convenience method.
4323      * <p>Note that the string will be removed from the input stream
4324      * on success, but will be put back on failure.  Do not attempt to
4325      * read the string again if the method succeeds.
4326      * <p>This method will push back a character rather than an
4327      * array whenever possible (probably the majority of cases).
4328      * @param delim The string that should appear next.
4329      * @return true if the string was successfully read, or false if
4330      *	 it was not.
4331      * @see #tryRead (char)
4332      */
4333     private boolean tryRead (String delim)
4334     throws SAXException, IOException
4335     {
4336 	return tryRead (delim.toCharArray ());
4337     }
4338 
4339     private boolean tryRead (char ch [])
4340     throws SAXException, IOException
4341     {
4342 	char c;
4343 
4344 	// Compare the input, character-
4345 	// by character.
4346 
4347 	for (int i = 0; i < ch.length; i++) {
4348 	    c = readCh ();
4349 	    if (c != ch [i]) {
4350 		unread (c);
4351 		if (i != 0) {
4352 		    unread (ch, i);
4353 		}
4354 		return false;
4355 	    }
4356 	}
4357 	return true;
4358     }
4359 
4360 
4361 
4362     /***
4363      * Return true if we can read some whitespace.
4364      * <p>This is simply a convenience method.
4365      * <p>This method will push back a character rather than an
4366      * array whenever possible (probably the majority of cases).
4367      * @return true if whitespace was found.
4368      */
4369     private boolean tryWhitespace ()
4370     throws SAXException, IOException
4371     {
4372 	char c;
4373 	c = readCh ();
4374 	if (isWhitespace (c)) {
4375 	    skipWhitespace ();
4376 	    return true;
4377 	} else {
4378 	    unread (c);
4379 	    return false;
4380 	}
4381     }
4382 
4383 
4384     /***
4385      * Read all data until we find the specified string.
4386      * This is useful for scanning CDATA sections and PIs.
4387      * <p>This is inefficient right now, since it calls tryRead ()
4388      * for every character.
4389      * @param delim The string delimiter
4390      * @see #tryRead (String, boolean)
4391      * @see #readCh
4392      */
4393     private void parseUntil (String delim)
4394     throws SAXException, IOException
4395     {
4396 	parseUntil (delim.toCharArray ());
4397     }
4398 
4399     private void parseUntil (char delim [])
4400     throws SAXException, IOException
4401     {
4402 	char c;
4403 	int startLine = line;
4404 
4405 	try {
4406 	    while (!tryRead (delim)) {
4407 		c = readCh ();
4408 		dataBufferAppend (c);
4409 	    }
4410 	} catch (EOFException e) {
4411 	    error ("end of input while looking for delimiter "
4412 		+ "(started on line " + startLine
4413 		+ ')', null, new String (delim));
4414 	}
4415     }
4416 
4417 
4418     //////////////////////////////////////////////////////////////////////
4419     // Low-level I/O.
4420     //////////////////////////////////////////////////////////////////////
4421 
4422 
4423     /***
4424      * Prefetch US-ASCII XML/text decl from input stream into read buffer.
4425      * Doesn't buffer more than absolutely needed, so that when an encoding
4426      * decl says we need to create an InputStreamReader, we can discard our
4427      * buffer and reset().  Caller knows the first chars of the decl exist
4428      * in the input stream.
4429      */
4430     private void prefetchASCIIEncodingDecl ()
4431     throws SAXException, IOException
4432     {
4433 	int ch;
4434 	readBufferPos = readBufferLength = 0;
4435 
4436 	is.mark (readBuffer.length);
4437 	while (true) {
4438 	    ch = is.read ();
4439 	    readBuffer [readBufferLength++] = (char) ch;
4440 	    switch (ch) {
4441 	      case (int) '>':
4442 		return;
4443 	      case -1:
4444 		error ("file ends before end of XML or encoding declaration.",
4445 		       null, "?>");
4446 	    }
4447 	    if (readBuffer.length == readBufferLength)
4448 		error ("unfinished XML or encoding declaration");
4449 	}
4450     }
4451 
4452     /***
4453      * Read a chunk of data from an external input source.
4454      * <p>This is simply a front-end that fills the rawReadBuffer
4455      * with bytes, then calls the appropriate encoding handler.
4456      * @see #encoding
4457      * @see #rawReadBuffer
4458      * @see #readBuffer
4459      * @see #filterCR
4460      * @see #copyUtf8ReadBuffer
4461      * @see #copyIso8859_1ReadBuffer
4462      * @see #copyUcs_2ReadBuffer
4463      * @see #copyUcs_4ReadBuffer
4464      */
4465     private void readDataChunk ()
4466     throws SAXException, IOException
4467     {
4468 	int count;
4469 
4470 	// See if we have any overflow (filterCR sets for CR at end)
4471 	if (readBufferOverflow > -1) {
4472 	    readBuffer [0] = (char) readBufferOverflow;
4473 	    readBufferOverflow = -1;
4474 	    readBufferPos = 1;
4475 	    sawCR = true;
4476 	} else {
4477 	    readBufferPos = 0;
4478 	    sawCR = false;
4479 	}
4480 
4481 	// input from a character stream.
4482 	if (sourceType == INPUT_READER) {
4483 	    count = reader.read (readBuffer,
4484 			    readBufferPos, READ_BUFFER_MAX - readBufferPos);
4485 	    if (count < 0)
4486 		readBufferLength = readBufferPos;
4487 	    else
4488 		readBufferLength = readBufferPos + count;
4489 	    if (readBufferLength > 0)
4490 		filterCR (count >= 0);
4491 	    sawCR = false;
4492 	    return;
4493 	}
4494 
4495 	// Read as many bytes as possible into the raw buffer.
4496 	count = is.read (rawReadBuffer, 0, READ_BUFFER_MAX);
4497 
4498 	// Dispatch to an encoding-specific reader method to populate
4499 	// the readBuffer.  In most parser speed profiles, these routines
4500 	// show up at the top of the CPU usage chart.
4501 	if (count > 0) {
4502 	    switch (encoding) {
4503 	      // one byte builtins
4504 	      case ENCODING_ASCII:
4505 		copyIso8859_1ReadBuffer (count, (char) 0x0080);
4506 		break;
4507 	      case ENCODING_UTF_8:
4508 		copyUtf8ReadBuffer (count);
4509 		break;
4510 	      case ENCODING_ISO_8859_1:
4511 		copyIso8859_1ReadBuffer (count, (char) 0);
4512 		break;
4513 
4514 	      // two byte builtins
4515 	      case ENCODING_UCS_2_12:
4516 		copyUcs2ReadBuffer (count, 8, 0);
4517 		break;
4518 	      case ENCODING_UCS_2_21:
4519 		copyUcs2ReadBuffer (count, 0, 8);
4520 		break;
4521 
4522 	      // four byte builtins
4523 	      case ENCODING_UCS_4_1234:
4524 		copyUcs4ReadBuffer (count, 24, 16, 8, 0);
4525 		break;
4526 	      case ENCODING_UCS_4_4321:
4527 		copyUcs4ReadBuffer (count, 0, 8, 16, 24);
4528 		break;
4529 	      case ENCODING_UCS_4_2143:
4530 		copyUcs4ReadBuffer (count, 16, 24, 0, 8);
4531 		break;
4532 	      case ENCODING_UCS_4_3412:
4533 		copyUcs4ReadBuffer (count, 8, 0, 24, 16);
4534 		break;
4535 	    }
4536 	} else
4537 	    readBufferLength = readBufferPos;
4538 
4539 	readBufferPos = 0;
4540 
4541 	// Filter out all carriage returns if we've seen any
4542 	// (including any saved from a previous read)
4543 	if (sawCR) {
4544 	    filterCR (count >= 0);
4545 	    sawCR = false;
4546 
4547 	    // must actively report EOF, lest some CRs get lost.
4548 	    if (readBufferLength == 0 && count >= 0)
4549 		readDataChunk ();
4550 	}
4551 
4552 	if (count > 0)
4553 	    currentByteCount += count;
4554     }
4555 
4556 
4557     /***
4558      * Filter carriage returns in the read buffer.
4559      * CRLF becomes LF; CR becomes LF.
4560      * @param moreData true iff more data might come from the same source
4561      * @see #readDataChunk
4562      * @see #readBuffer
4563      * @see #readBufferOverflow
4564      */
4565     private void filterCR (boolean moreData)
4566     {
4567 	int i, j;
4568 
4569 	readBufferOverflow = -1;
4570 
4571 loop:
4572 	for (i = j = readBufferPos; j < readBufferLength; i++, j++) {
4573 	    switch (readBuffer [j]) {
4574 	    case '\r':
4575 		if (j == readBufferLength - 1) {
4576 		    if (moreData) {
4577 			readBufferOverflow = '\r';
4578 			readBufferLength--;
4579 		    } else 	// CR at end of buffer
4580 			readBuffer [i++] = '\n';
4581 		    break loop;
4582 		} else if (readBuffer [j + 1] == '\n') {
4583 		    j++;
4584 		}
4585 		readBuffer [i] = '\n';
4586 		break;
4587 
4588 	    case '\n':
4589 	    default:
4590 		readBuffer [i] = readBuffer [j];
4591 		break;
4592 	    }
4593 	}
4594 	readBufferLength = i;
4595     }
4596 
4597     /***
4598      * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
4599      * <p>When readDataChunk () calls this method, the raw bytes are in 
4600      * rawReadBuffer, and the final characters will appear in 
4601      * readBuffer.
4602      * <p>Note that as of Unicode 3.1, good practice became a requirement,
4603      * so that each Unicode character has exactly one UTF-8 representation.
4604      * @param count The number of bytes to convert.
4605      * @see #readDataChunk
4606      * @see #rawReadBuffer
4607      * @see #readBuffer
4608      * @see #getNextUtf8Byte
4609      */
4610     private void copyUtf8ReadBuffer (int count)
4611     throws SAXException, IOException
4612     {
4613 	int	i = 0;
4614 	int	j = readBufferPos;
4615 	int	b1;
4616 	char	c = 0;
4617 
4618 	/*
4619 	// check once, so the runtime won't (if it's smart enough)
4620 	if (count < 0 || count > rawReadBuffer.length)
4621 	    throw new ArrayIndexOutOfBoundsException (Integer.toString (count));
4622 	*/
4623 
4624 	while (i < count) {
4625 	    b1 = rawReadBuffer [i++];
4626 
4627 	    // Determine whether we are dealing
4628 	    // with a one-, two-, three-, or four-
4629 	    // byte sequence.
4630 	    if (b1 < 0) {
4631 		if ((b1 & 0xe0) == 0xc0) {
4632 		    // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
4633 		    c = (char) (((b1 & 0x1f) << 6)
4634 				| getNextUtf8Byte (i++, count));
4635 		    if (c < 0x0080)
4636 			encodingError ("Illegal two byte UTF-8 sequence",
4637 				c, 0);
4638 		    //Sec 2.11
4639 		    // [1] the two-character sequence #xD #xA
4640 		    // [2] the two-character sequence #xD #x85
4641 		    if ((c == 0x0085 || c == 0x000a) && sawCR)
4642 		       	continue;
4643 		    
4644 		    // Sec 2.11
4645 		    // [3] the single character #x85
4646 		    
4647 		    if(c == 0x0085  && xmlVersion == XML_11)
4648 		    	readBuffer[j++] = '\r';
4649 		} else if ((b1 & 0xf0) == 0xe0) {
4650 		    // 3-byte sequence:
4651 		    // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
4652 		    // most CJKV characters
4653 		    c = (char) (((b1 & 0x0f) << 12) |
4654 				   (getNextUtf8Byte (i++, count) << 6) |
4655 				   getNextUtf8Byte (i++, count));
4656                     //sec 2.11
4657 		    //[4] the single character #x2028
4658 		    if(c == 0x2028 && xmlVersion == XML_11){
4659 		       	readBuffer[j++] = '\r';
4660 		       	sawCR = true;
4661 		       	continue;
4662 		    }
4663 		    if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff))
4664 			encodingError ("Illegal three byte UTF-8 sequence",
4665 				c, 0);
4666 		} else if ((b1 & 0xf8) == 0xf0) {
4667 		    // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
4668 		    //     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
4669 		    // (uuuuu = wwww + 1)
4670 		    // "Surrogate Pairs" ... from the "Astral Planes"
4671 		    // Unicode 3.1 assigned the first characters there
4672 		    int iso646 = b1 & 07;
4673 		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count);
4674 		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count);
4675 		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count);
4676 
4677 		    if (iso646 <= 0xffff) {
4678 			encodingError ("Illegal four byte UTF-8 sequence",
4679 				iso646, 0);
4680 		    } else {
4681 			if (iso646 > 0x0010ffff)
4682 			    encodingError (
4683 				"UTF-8 value out of range for Unicode",
4684 				iso646, 0);
4685 			iso646 -= 0x010000;
4686 			readBuffer [j++] = (char) (0xd800 | (iso646 >> 10));
4687 			readBuffer [j++] = (char) (0xdc00 | (iso646 & 0x03ff));
4688 			continue;
4689 		    }
4690 		} else {
4691 		    // The five and six byte encodings aren't supported;
4692 		    // they exceed the Unicode (and XML) range.
4693 		    encodingError (
4694 			    "unsupported five or six byte UTF-8 sequence",
4695 			    0xff & b1, i);
4696 		    // NOTREACHED
4697 		    c = 0;
4698 		}
4699 	    } else {
4700 		// 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
4701 		// (US-ASCII character, "common" case, one branch to here)
4702 		c = (char) b1;
4703 	    }
4704 	    readBuffer [j++] = c;
4705 	    if (c == '\r')
4706 		sawCR = true;
4707 	}
4708 	// How many characters have we read?
4709 	readBufferLength = j;
4710     }
4711 
4712 
4713     /***
4714      * Return the next byte value in a UTF-8 sequence.
4715      * If it is not possible to get a byte from the current
4716      * entity, throw an exception.
4717      * @param pos The current position in the rawReadBuffer.
4718      * @param count The number of bytes in the rawReadBuffer
4719      * @return The significant six bits of a non-initial byte in
4720      *	 a UTF-8 sequence.
4721      * @exception EOFException If the sequence is incomplete.
4722      */
4723     private int getNextUtf8Byte (int pos, int count)
4724     throws SAXException, IOException
4725     {
4726 	int val;
4727 
4728 	// Take a character from the buffer
4729 	// or from the actual input stream.
4730 	if (pos < count) {
4731 	    val = rawReadBuffer [pos];
4732 	} else {
4733 	    val = is.read ();
4734 	    if (val == -1) {
4735 		encodingError ("unfinished multi-byte UTF-8 sequence at EOF",
4736 			-1, pos);
4737 	    }
4738 	}
4739 
4740 	// Check for the correct bits at the start.
4741 	if ((val & 0xc0) != 0x80) {
4742 	    encodingError ("bad continuation of multi-byte UTF-8 sequence",
4743 		    val, pos + 1);
4744 	}
4745 
4746 	// Return the significant bits.
4747 	return (val & 0x3f);
4748     }
4749 
4750 
4751     /***
4752      * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into
4753      * UTF-16 characters.
4754      *
4755      * <p>When readDataChunk () calls this method, the raw bytes are in 
4756      * rawReadBuffer, and the final characters will appear in 
4757      * readBuffer.
4758      *
4759      * @param count The number of bytes to convert.
4760      * @param mask For ASCII conversion, 0x7f; else, 0xff.
4761      * @see #readDataChunk
4762      * @see #rawReadBuffer
4763      * @see #readBuffer
4764      */
4765     private void copyIso8859_1ReadBuffer (int count, char mask)
4766     throws IOException
4767     {
4768 	int i, j;
4769 	for (i = 0, j = readBufferPos; i < count; i++, j++) {
4770 	    char c = (char) (rawReadBuffer [i] & 0xff);
4771 	    if ((c & mask) != 0)
4772 		throw new CharConversionException ("non-ASCII character U+"
4773 						    + Integer.toHexString (c));
4774 	    if (c == 0x0085 && xmlVersion == XML_11)
4775 	       c = '\r';	
4776 	    readBuffer [j] = c;
4777 	    if (c == '\r') {
4778 		sawCR = true;
4779 	    }
4780 	}
4781 	readBufferLength = j;
4782     }
4783 
4784 
4785     /***
4786      * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters
4787      * (as used in Java string manipulation).
4788      *
4789      * <p>When readDataChunk () calls this method, the raw bytes are in 
4790      * rawReadBuffer, and the final characters will appear in 
4791      * readBuffer.
4792      * @param count The number of bytes to convert.
4793      * @param shift1 The number of bits to shift byte 1.
4794      * @param shift2 The number of bits to shift byte 2
4795      * @see #readDataChunk
4796      * @see #rawReadBuffer
4797      * @see #readBuffer
4798      */
4799     private void copyUcs2ReadBuffer (int count, int shift1, int shift2)
4800     throws SAXException
4801     {
4802 	int j = readBufferPos;
4803 
4804 	if (count > 0 && (count % 2) != 0) {
4805 	    encodingError ("odd number of bytes in UCS-2 encoding", -1, count);
4806 	}
4807 	// The loops are faster with less internal brancing; hence two
4808 	if (shift1 == 0) {	// "UTF-16-LE"
4809 	    for (int i = 0; i < count; i += 2) {
4810 		char c = (char) (rawReadBuffer [i + 1] << 8);
4811 		c |= 0xff & rawReadBuffer [i];
4812 		readBuffer [j++] = c;
4813 		if (c == '\r')
4814 		    sawCR = true;
4815 	    }
4816 	} else {	// "UTF-16-BE"
4817 	    for (int i = 0; i < count; i += 2) {
4818 		char c = (char) (rawReadBuffer [i] << 8);
4819 		c |= 0xff & rawReadBuffer [i + 1];
4820 		readBuffer [j++] = c;
4821 		if (c == '\r')
4822 		    sawCR = true;
4823 	    }
4824 	}
4825 	readBufferLength = j;
4826     }
4827 
4828 
4829     /***
4830      * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
4831      *
4832      * <p>When readDataChunk () calls this method, the raw bytes are in 
4833      * rawReadBuffer, and the final characters will appear in 
4834      * readBuffer.
4835      * <p>Java has Unicode chars, and this routine uses surrogate pairs
4836      * for ISO-10646 values between 0x00010000 and 0x000fffff.  An
4837      * exception is thrown if the ISO-10646 character has no Unicode
4838      * representation.
4839      *
4840      * @param count The number of bytes to convert.
4841      * @param shift1 The number of bits to shift byte 1.
4842      * @param shift2 The number of bits to shift byte 2
4843      * @param shift3 The number of bits to shift byte 2
4844      * @param shift4 The number of bits to shift byte 2
4845      * @see #readDataChunk
4846      * @see #rawReadBuffer
4847      * @see #readBuffer
4848      */
4849     private void copyUcs4ReadBuffer (int count, int shift1, int shift2,
4850 			      int shift3, int shift4)
4851     throws SAXException
4852     {
4853 	int j = readBufferPos;
4854 
4855 	if (count > 0 && (count % 4) != 0) {
4856 	    encodingError (
4857 		    "number of bytes in UCS-4 encoding not divisible by 4",
4858 		    -1, count);
4859 	}
4860 	for (int i = 0; i < count; i += 4) {
4861 	    int value = (((rawReadBuffer [i] & 0xff) << shift1) |
4862 		      ((rawReadBuffer [i + 1] & 0xff) << shift2) |
4863 		      ((rawReadBuffer [i + 2] & 0xff) << shift3) |
4864 		      ((rawReadBuffer [i + 3] & 0xff) << shift4));
4865 	    if (value < 0x0000ffff) {
4866 		readBuffer [j++] = (char) value;
4867 		if (value == (int) '\r') {
4868 		    sawCR = true;
4869 		}
4870 	    } else if (value < 0x0010ffff) {
4871 		value -= 0x010000;
4872 		readBuffer [j++] = (char) (0xd8 | ((value >> 10) & 0x03ff));
4873 		readBuffer [j++] = (char) (0xdc | (value & 0x03ff));
4874 	    } else {
4875 		encodingError ("UCS-4 value out of range for Unicode",
4876 			       value, i);
4877 	    }
4878 	}
4879 	readBufferLength = j;
4880     }
4881 
4882 
4883     /***
4884      * Report a character encoding error.
4885      */
4886     private void encodingError (String message, int value, int offset)
4887     throws SAXException
4888     {
4889 	if (value != -1)
4890 	    message = message + " (character code: 0x" +
4891 		      Integer.toHexString (value) + ')';
4892 	error (message);
4893     }
4894 
4895 
4896     //////////////////////////////////////////////////////////////////////
4897     // Local Variables.
4898     //////////////////////////////////////////////////////////////////////
4899 
4900     /***
4901      * Re-initialize the variables for each parse.
4902      */
4903     private void initializeVariables ()
4904     {
4905 	// First line
4906 	line = 1;
4907 	column = 0;
4908 
4909 	// Set up the buffers for data and names
4910 	dataBufferPos = 0;
4911 	dataBuffer = new char [DATA_BUFFER_INITIAL];
4912 	nameBufferPos = 0;
4913 	nameBuffer = new char [NAME_BUFFER_INITIAL];
4914 
4915 	// Set up the DTD hash tables
4916 	elementInfo = new Hashtable ();
4917 	entityInfo = new Hashtable ();
4918 	notationInfo = new Hashtable ();
4919 	skippedPE = false;
4920 
4921 	// Set up the variables for the current
4922 	// element context.
4923 	currentElement = null;
4924 	currentElementContent = CONTENT_UNDECLARED;
4925 
4926 	// Set up the input variables
4927 	sourceType = INPUT_NONE;
4928 	inputStack = new Stack ();
4929 	entityStack = new Stack ();
4930 	externalEntity = null;
4931 	tagAttributePos = 0;
4932 	tagAttributes = new String [100];
4933 	rawReadBuffer = new byte [READ_BUFFER_MAX];
4934 	readBufferOverflow = -1;
4935 
4936 	scratch = new InputSource ();
4937 
4938 	inLiteral = false;
4939 	expandPE = false;
4940 	peIsError = false;
4941 
4942 	doReport = false;
4943 
4944 	inCDATA = false;
4945 
4946 	symbolTable = new Object [SYMBOL_TABLE_LENGTH][];
4947     }
4948 
4949 
4950     //
4951     // The current XML handler interface.
4952     //
4953     private SAXDriver	handler;
4954 
4955     //
4956     // I/O information.
4957     //
4958     private Reader	reader; 	// current reader
4959     private InputStream	is; 		// current input stream
4960     private int		line; 		// current line number
4961     private int		column; 	// current column number
4962     private int		sourceType; 	// type of input source
4963     private Stack	inputStack; 	// stack of input soruces
4964     private URLConnection externalEntity; // current external entity
4965     private int		encoding; 	// current character encoding
4966     private int		currentByteCount; // bytes read from current source
4967     private InputSource	scratch;	// temporary
4968 
4969     //
4970     // Buffers for decoded but unparsed character input.
4971     //
4972     private char	readBuffer [];
4973     private int		readBufferPos;
4974     private int		readBufferLength;
4975     private int		readBufferOverflow;  // overflow from last data chunk.
4976 
4977 
4978     //
4979     // Buffer for undecoded raw byte input.
4980     //
4981     private final static int READ_BUFFER_MAX = 16384;
4982     private byte	rawReadBuffer [];
4983 
4984 
4985     //
4986     // Buffer for attribute values, char refs, DTD stuff.
4987     //
4988     private static int DATA_BUFFER_INITIAL = 4096;
4989     private char	dataBuffer [];
4990     private int		dataBufferPos;
4991 
4992     //
4993     // Buffer for parsed names.
4994     //
4995     private static int NAME_BUFFER_INITIAL = 1024;
4996     private char	nameBuffer [];
4997     private int		nameBufferPos;
4998 
4999     //
5000     // Save any standalone flag
5001     //
5002     private boolean	docIsStandalone;
5003 
5004     //
5005     // Hashtables for DTD information on elements, entities, and notations.
5006     // Populated until we start ignoring decls (because of skipping a PE)
5007     //
5008     private Hashtable	elementInfo;
5009     private Hashtable	entityInfo;
5010     private Hashtable	notationInfo;
5011     private boolean	skippedPE;
5012 
5013 
5014     //
5015     // Element type currently in force.
5016     //
5017     private String	currentElement;
5018     private int		currentElementContent;
5019 
5020     //
5021     // Stack of entity names, to detect recursion.
5022     //
5023     private Stack	entityStack;
5024 
5025     //
5026     // PE expansion is enabled in most chunks of the DTD, not all.
5027     // When it's enabled, literals are treated differently.
5028     //
5029     private boolean	inLiteral;
5030     private boolean	expandPE;
5031     private boolean	peIsError;
5032 
5033     //
5034     // can't report entity expansion inside two constructs:
5035     // - attribute expansions (internal entities only)
5036     // - markup declarations (parameter entities only)
5037     //
5038     private boolean	doReport;
5039 
5040     //
5041     // Symbol table, for caching interned names.
5042     //
5043     // These show up wherever XML names or nmtokens are used:  naming elements,
5044     // attributes, PIs, notations, entities, and enumerated attribute values.
5045     //
5046     // NOTE:  This hashtable doesn't grow.  The default size is intended to be
5047     // rather large for most documents.  Example:  one snapshot of the DocBook
5048     // XML 4.1 DTD used only about 350 such names.  As a rule, only pathological
5049     // documents (ones that don't reuse names) should ever see much collision.
5050     //
5051     // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing.
5052     // "2039" keeps the hash table size at about two memory pages on typical
5053     // 32 bit hardware.
5054     //
5055     private final static int SYMBOL_TABLE_LENGTH = 2039;
5056 
5057     private Object	symbolTable [][];
5058 
5059     //
5060     // Hash table of attributes found in current start tag.
5061     //
5062     private String	tagAttributes [];
5063     private int		tagAttributePos;
5064 
5065     //
5066     // Utility flag: have we noticed a CR while reading the last
5067     // data chunk?  If so, we will have to go back and normalise
5068     // CR or CR/LF line ends.
5069     //
5070     private boolean	sawCR;
5071 
5072     //
5073     // Utility flag: are we in CDATA?  If so, whitespace isn't ignorable.
5074     // 
5075     private boolean	inCDATA;
5076     
5077     //
5078     // Xml version.
5079     //  
5080     private static final int XML_10 = 0; 
5081     private static final int XML_11 = 1; 
5082     private int 	xmlVersion = XML_10;
5083 }