View Javadoc

1   // EntityResolver2.java - Extended SAX entity resolver.
2   // http://www.saxproject.org
3   // No warranty; no copyright -- use this as you will.
4   // $Id: EntityResolver2.java,v 1.4 2004/03/19 20:17:54 maartenc Exp $
5   
6   package org.xml.sax.ext;
7   
8   import java.io.IOException;
9   
10  import org.xml.sax.EntityResolver;
11  import org.xml.sax.InputSource;
12  import org.xml.sax.XMLReader;
13  import org.xml.sax.SAXException;
14  
15  
16  /***
17   * Extended interface for mapping external entity references to input
18   * sources, or providing a missing external subset.  The
19   * {@link XMLReader#setEntityResolver XMLReader.setEntityResolver()} method
20   * is used to provide implementations of this interface to parsers.
21   * When a parser uses the methods in this interface, the
22   * {@link EntityResolver2#resolveEntity EntityResolver2.resolveEntity()}
23   * method (in this interface) is used <em>instead of</em> the older (SAX 1.0)
24   * {@link EntityResolver#resolveEntity EntityResolver.resolveEntity()} method.
25   *
26   * <blockquote>
27   * <em>This module, both source code and documentation, is in the
28   * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
29   * </blockquote>
30   *
31   * <p>If a SAX application requires the customized handling which this
32   * interface defines for external entities, it must ensure that it uses
33   * an XMLReader with the
34   * <em>http://xml.org/sax/features/use-entity-resolver2</em> feature flag
35   * set to <em>true</em> (which is its default value when the feature is
36   * recognized).  If that flag is unrecognized, or its value is false,
37   * or the resolver does not implement this interface, then only the
38   * {@link EntityResolver} method will be used.
39   * </p>
40   *
41   * <p>That supports three categories of application that modify entity
42   * resolution.  <em>Old Style</em> applications won't know about this interface;
43   * they will provide an EntityResolver.
44   * <em>Transitional Mode</em> provide an EntityResolver2 and automatically
45   * get the benefit of its methods in any systems (parsers or other tools)
46   * supporting it, due to polymorphism.
47   * Both <em>Old Style</em> and <em>Transitional Mode</em> applications will
48   * work with any SAX2 parser.
49   * <em>New style</em> applications will fail to run except on SAX2 parsers
50   * that support this particular feature.
51   * They will insist that feature flag have a value of "true", and the
52   * EntityResolver2 implementation they provide  might throw an exception
53   * if the original SAX 1.0 style entity resolution method is invoked.
54   * </p>
55   *
56   * @see org.xml.sax.XMLReader#setEntityResolver
57   *
58   * @since SAX 2.0 (extensions 1.1 alpha)
59   * @author David Brownell
60   * @version TBD
61   */
62  public interface EntityResolver2 extends EntityResolver
63  {
64      /***
65       * Allows applications to provide an external subset for documents
66       * that don't explicitly define one.  Documents with DOCTYPE declarations
67       * that omit an external subset can thus augment the declarations
68       * available for validation, entity processing, and attribute processing
69       * (normalization, defaulting, and reporting types including ID).
70       * This augmentation is reported
71       * through the {@link LexicalHandler#startDTD startDTD()} method as if
72       * the document text had originally included the external subset;
73       * this callback is made before any internal subset data or errors
74       * are reported.</p>
75       *
76       * <p>This method can also be used with documents that have no DOCTYPE
77       * declaration.  When the root element is encountered,
78       * but no DOCTYPE declaration has been seen, this method is
79       * invoked.  If it returns a value for the external subset, that root
80       * element is declared to be the root element, giving the effect of
81       * splicing a DOCTYPE declaration at the end the prolog of a document
82       * that could not otherwise be valid.  The sequence of parser callbacks
83       * in that case logically resembles this:</p>
84       *
85       * <pre>
86       * ... comments and PIs from the prolog (as usual)
87       * startDTD ("rootName", source.getPublicId (), source.getSystemId ());
88       * startEntity ("[dtd]");
89       * ... declarations, comments, and PIs from the external subset
90       * endEntity ("[dtd]");
91       * endDTD ();
92       * ... then the rest of the document (as usual)
93       * startElement (..., "rootName", ...);
94       * </pre>
95       *
96       * <p>Note that the InputSource gets no further resolution.
97       * Implementations of this method may wish to invoke
98       * {@link #resolveEntity resolveEntity()} to gain benefits such as use
99       * of local caches of DTD entities.  Also, this method will never be
100      * used by a (non-validating) processor that is not including external
101      * parameter entities. </p>
102      *
103      * <p>Uses for this method include facilitating data validation when
104      * interoperating with XML processors that would always require
105      * undesirable network accesses for external entities, or which for
106      * other reasons adopt a "no DTDs" policy.
107      * Non-validation motives include forcing documents to include DTDs so
108      * that attributes are handled consistently.
109      * For example, an XPath processor needs to know which attibutes have
110      * type "ID" before it can process a widely used type of reference.</p>
111      * 
112      * <p><strong>Warning:</strong> Returning an external subset modifies
113      * the input document.  By providing definitions for general entities,
114      * it can make a malformed document appear to be well formed.
115      * </p>
116      *
117      * @param name Identifies the document root element.  This name comes
118      *	from a DOCTYPE declaration (where available) or from the actual
119      *	root element. 
120      * @param baseURI The document's base URI, serving as an additional
121      *	hint for selecting the external subset.  This is always an absolute
122      *	URI, unless it is null because the XMLReader was given an InputSource
123      *	without one.
124      *
125      * @return An InputSource object describing the new external subset
126      *	to be used by the parser, or null to indicate that no external
127      *	subset is provided.
128      *
129      * @exception SAXException Any SAX exception, possibly wrapping
130      *	another exception.
131      * @exception IOException Probably indicating a failure to create
132      *	a new InputStream or Reader, or an illegal URL.
133      */
134     public InputSource getExternalSubset (String name, String baseURI)
135     throws SAXException, IOException;
136 
137     /***
138      * Allows applications to map references to external entities into input
139      * sources, or tell the parser it should use conventional URI resolution.
140      * This method is only called for external entities which have been
141      * properly declared.
142      * This method provides more flexibility than the {@link EntityResolver}
143      * interface, supporting implementations of more complex catalogue
144      * schemes such as the one defined by the <a href=
145 	"http://www.oasis-open.org/committees/entity/spec-2001-08-06.html"
146 	>OASIS XML Catalogs</a> specification.</p>
147      *
148      * <p>Parsers configured to use this resolver method will call it
149      * to determine the input source to use for any external entity
150      * being included because of a reference in the XML text.
151      * That excludes the document entity, and any external entity returned
152      * by {@link #getExternalSubset getExternalSubset()}.
153      * When a (non-validating) processor is configured not to include
154      * a class of entities (parameter or general) through use of feature
155      * flags, this method is not invoked for such entities.  </p>
156      *
157      * <p>Note that the entity naming scheme used here is the same one
158      * used in the {@link LexicalHandler}, or in the {@link
159 	org.xml.sax.ContentHandler#skippedEntity
160 	ContentHandler.skippedEntity()}
161      * method. </p>
162      *
163      * @param name Identifies the external entity being resolved.
164      *	Either "[dtd]" for the external subset, or a name starting
165      *	with "%" to indicate a parameter entity, or else the name of
166      *	a general entity.  This is never null when invoked by a SAX2
167      *	parser.
168      * @param publicId The public identifier of the external entity being
169      *	referenced (normalized as required by the XML specification), or
170      *	null if none was supplied.
171      * @param baseURI The URI with respect to which relative systemIDs
172      *	are interpreted.  This is always an absolute URI, unless it is
173      *	null (likely because the XMLReader was given an InputSource without
174      *  one).  This URI is defined by the XML specification to be the one
175      *	associated with the "&lt;" starting the relevant declaration.
176      * @param systemId The system identifier of the external entity
177      *	being referenced; either a relative or absolute URI.
178      *  This is never null when invoked by a SAX2 parser; only declared
179      *	entities, and any external subset, are resolved by such parsers.
180      *
181      * @return An InputSource object describing the new input source to
182      *	be used by the parser.  Returning null directs the parser to
183      *	resolve the system ID against the base URI and open a connection
184      *	to resulting URI.
185      *
186      * @exception SAXException Any SAX exception, possibly wrapping
187      *	another exception.
188      * @exception IOException Probably indicating a failure to create
189      *	a new InputStream or Reader, or an illegal URL.
190      */
191     public InputSource resolveEntity (
192 	    String name,
193 	    String publicId,
194 	    String baseURI,
195 	    String systemId
196     ) throws SAXException, IOException;
197 }