1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.backsource.utils.xml;
21 import java.io.File;
22 import java.io.FileReader;
23 import java.io.InputStream;
24 import java.io.IOException;
25 import java.net.URL;
26 import java.net.MalformedURLException;
27
28 import javax.xml.transform.Source;
29 import javax.xml.transform.TransformerException;
30 import javax.xml.transform.URIResolver;
31 import javax.xml.transform.sax.SAXSource;
32 import javax.xml.parsers.SAXParserFactory;
33 import javax.xml.parsers.ParserConfigurationException;
34
35 import org.xml.sax.EntityResolver;
36 import org.xml.sax.InputSource;
37 import org.xml.sax.SAXException;
38 import org.xml.sax.XMLReader;
39 import org.apache.log4j.Logger;
40
41 import org.backsource.utils.resource.URI;
42 import org.backsource.utils.resource.URIFactory;
43 import org.backsource.utils.resource.MalformedURIException;
44 /***
45 * A Catalog based resolver which is originally based on {@link org.apache.xml.resolver.tools.CatalogResolver}.
46 *<p>It does currently not extend from the original resolver since the Catalog of that resolver has private access. This is a JAXP type resolver which adds the following featues to the original: </p>
47 <ul>
48 <li>It is possible to programatically set the {@link Catalog} the resolver should use.</li>
49 <li>if catalog resolve of systemId or href fail both are translated to a "name" version - i.e last part of it is used to do one more lookup against the catalog. For example xerces translated systemId:s to the full path of its current working directory; so nitf.dtd will be called for examle file:///my/path/nitf.dtd, where /my/path actually is where the program happens to be running: to be able to write Catalog entries that do not have to know where programs are running the systemId id is also rewritten to the name part an tested again.</li>
50 <li>It contains an implicit Catalog: if there is no entry in its Catalog the
51 resource is translated to a name part, or if relative to that,
52 and looked up in the filesystem relative to the base of the Catalog.</li>
53 <li>It uses {@link org.backsource.utils.resource.URI} and friends to translate an uri to an URL.</li>
54 </ul>
55 <p>Here's one way to use it, see {@link Catalog} on how to create the catalog</p>
56 <pre>
57 CatalogResolver stdResolver = new CatalogResolver(catalog);
58 TransformerFactory tfactory = TransformerFactory.newInstance();
59 Transformer serializer = tfactory.newTransformer();
60 serializer.setURIResolver( stdResolver );
61
62 // Or
63 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
64 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
65 docBuilder.setEntityResolver( stdResolver );
66 </pre>
67 <p>When using with utils in this package one often have to use the {@link Resolver} and {@link ResolverContext} classes to set the resolver in the environment. To get DocumentUtil to use a given resolver on could for example do this:</p>
68 <pre>
69 Resolver res = new Resolver(stdResolver,stdResolver);
70 ResolverContext.set(res);
71
72 // This would then use that resolver
73 Document doc = DocumentUtil.getDocument( myFile );
74 </pre>
75 <p>Through its use of URI to lookup resolved uris it is possible to lookup resources at other places than the filesystem or HTTP; it makes it possible to lookup resources in the classpath or in a servlet resoource context. It is possible to programaticallty set a URIFactory on a CatalogResolver, but when no one is specifyed the normal resolution mechanism is done where {@link org.backsource.utils.resource.URIFactoryContext} is used.</p>
76 <pre>
77 classPathResolver = new CatalogResolver(catalog);
78 URIFactory f = new URIFactory();
79 f.register("class", new ClassContextURLFactory());
80 classPathResolver.setURIFactory(f);
81 </pre>
82 <p>Since the default for URI factory is to use the classpath when al else fails this setup would make it possible to lookuo both these entries int the classpath:</p>
83 <pre>
84 catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "class:org/apache/log4j/xml/log4j.dtd") );
85 catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "/org/apache/log4j/xml/log4j.dtd") );
86 </pre>
87 <p>The following is one way to lookup the resource from the classpath in the current thread only.</p>
88 <pre>
89 ResolverContext.setThreadLocal( new Resolver(classPathResolver,classPathResolver) );
90 Document doc = DocumentUtil.getDocument( new FileInputStream(log4jFile ) );
91 </pre>
92
93
94 * @see Catalog
95 * @see org.xml.sax.EntityResolver
96 * @see javax.xml.transform.URIResolver
97 * @see org.backsource.utils.resource.URIFactory
98 * @author <a href="mailto:pra@tim.se">Peter Antman</a>
99 * @author Norman Walsh
100 * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
101 * @version $Revision: 1.3 $
102 */
103
104 public class CatalogResolver implements EntityResolver, URIResolver {
105 private static final Logger log = Logger.getLogger(CatalogResolver.class);
106 protected Catalog catalog;
107 protected URIFactory uriFactory;
108
109 public CatalogResolver (Catalog catalog){
110 this.catalog = catalog;
111 }
112
113 public Catalog getCatalog() {
114 return catalog;
115 }
116
117 /***
118 * Set the URIFactory that URI should use when translating uris to reasources.
119 * <p>if no URIFactory is set the normal factory resolution mechanism is used in URI.</p>
120 * @see org.backsource.utils.resource.URIFactory
121 */
122 public void setURIFactory(URIFactory factory) {
123 this.uriFactory = uriFactory;
124 }
125
126 /***
127 * Get the URIFactory that was set.
128 */
129 public URIFactory getURIFactory() {
130 return uriFactory;
131 }
132
133
134 /***
135 * <p>Implements the guts of the <code>resolveEntity</code> method
136 * for the SAX interface.</p>
137 *
138 * <p>Presented with an optional public identifier and a system
139 * identifier, this function attempts to locate a mapping in the
140 * catalogs.</p>
141 *
142 * <p>If such a mapping is found, it is returned. If no mapping is
143 * found, null is returned.</p>
144 *
145 * @param publicId The public identifier for the entity in question.
146 * This may be null.
147 *
148 * @param systemId The system identifier for the entity in question.
149 * XML requires a system identifier on all external entities, so this
150 * value is always specified.
151 *
152 * @return The resolved identifier (a URI reference).
153 */
154 public String getResolvedEntity (String publicId, String systemId) {
155 String resolved = null;
156
157 if (catalog == null) {
158 log.warn("Catalog resolution attempted with null catalog; ignored");
159 return null;
160 }
161
162 if (systemId != null) {
163 try {
164 resolved = catalog.resolveSystem(systemId);
165 } catch (MalformedURLException me) {
166 log.warn("Malformed URL exception trying to resolve " + systemId,me);
167 resolved = null;
168 } catch (IOException ie) {
169 log.warn("I/O exception trying to resolve " + systemId,ie);
170 resolved = null;
171 }
172 }
173
174 if (resolved == null) {
175 if (publicId != null) {
176 try {
177 resolved = catalog.resolvePublic(publicId, systemId);
178 } catch (MalformedURLException me) {
179 log.warn("Malformed URL exception trying to resolve " +publicId,me);
180 } catch (IOException ie) {
181 log.warn("I/O exception trying to resolve "+publicId, ie);
182 }
183 }
184
185 if (resolved != null) {
186 if ( log.isDebugEnabled()) {
187 log.debug("Resolved public " +publicId + " to "+ resolved);
188 }
189
190 }
191 } else {
192 if ( log.isDebugEnabled()) {
193 log.debug("Resolved system "+ systemId +" to " + resolved);
194 }
195 }
196
197 return resolved;
198 }
199
200 /***
201 * <p>Implements the <code>resolveEntity</code> method
202 * for the SAX interface.</p>
203 *
204 * <p>Presented with an optional public identifier and a system
205 * identifier, this function attempts to locate a mapping in the
206 * catalogs.</p>
207 * <p>if that fails the systemId is translated to a name (last part) and looked up again.</p>
208 *
209 * <p>If such a mapping is found, the resolver creates an URI and lets the URIFactory in use translate the URI to and URL. This is opened as an InputSource and is returned. Exceptions are
210 * ignored and null is returned if the mapped value cannot be opened
211 * as an input source.</p>
212 * <p>Last the systemId is used as a name (or if relative the relatove path) and looked up as a file with the resolver base URI as base path.</p>
213 *
214 * <p>If no mapping is found (or an error occurs attempting to open
215 * the mapped value as an input source), null is returned and the system
216 * will use the specified system identifier as if no entityResolver
217 * was specified.</p>
218 *
219 * @param publicId The public identifier for the entity in question.
220 * This may be null.
221 *
222 * @param systemId The system identifier for the entity in question.
223 * XML requires a system identifier on all external entities, so this
224 * value is always specified.
225 *
226 * @return An InputSource for the mapped identifier, or null.
227 */
228 public InputSource resolveEntity (String publicId, String systemId) {
229 InputSource source = null;
230
231 if ( log.isDebugEnabled()) {
232 log.debug("Resolving publicId="+publicId+"/systemId="+systemId);
233 }
234
235 try {
236
237
238 String resolved = getResolvedEntity(publicId, systemId);
239
240
241
242
243 if ( resolved == null) {
244 resolved = getResolvedEntity(publicId, getName(systemId) );
245 }
246
247
248
249
250 if (resolved != null) {
251
252 source = getInputSource(resolved);
253 }
254 } catch (Exception e) {
255 log.warn("Failed to resolve" +systemId,e);
256 }
257
258 if ( source == null) {
259
260 try {
261 source = resolveImplicit(systemId);
262
263 } catch (Exception e) {
264 log.debug("Could not get local systemId for " + systemId +":" +e,e);
265 }
266 }
267
268
269 if ( source != null) {
270 source.setPublicId( publicId );
271 }
272
273 return source;
274 }
275
276 /***
277 * JAXP URIResolver API.
278 *
279 * Resolves href by the following steps.
280 * <ol>
281 <li>Lookup href in catalog, </li>
282 <li>Remake href into a name, ie last part, and lookup in catalog</li>
283 <li>Take last part (or relative name) and lookup against the base url of the Catalog in the filesystem.</li>
284 <li>Use the base and lastly user.dir to locate the name of the resource</li>
285 </ol>
286 <p>The firts two steps and the last are then looked up with the help of URI.</p>
287 */
288 public Source resolve(String href, String base)
289 throws TransformerException {
290 if ( log.isDebugEnabled() ) {
291 log.debug("Resolving href " +href + " with base " + base);
292 }
293
294 String uri = removeFragment(href);
295 InputSource iSource = null;
296
297 String result = null;
298
299 try {
300 result = catalog.resolveURI(uri);
301 } catch (Exception e) {
302 ;
303 }
304
305
306 if ( result == null) {
307 try {
308 result = catalog.resolveURI( getName(uri) );
309 } catch (Exception e) {
310 ;
311 }
312 }
313
314
315 if ( result == null) {
316 try {
317
318 iSource = resolveImplicit(uri);
319 } catch (Exception e) {
320 log.debug("Could not get local uri for " + uri +":" +e,e);
321 }
322 }
323
324
325
326 if ( iSource == null && result == null) {
327
328
329
330 if (result == null) {
331 try {
332 URL url = null;
333
334 if (base==null) {
335 url = new URL(uri);
336 result = url.toString();
337 } else {
338 URL baseURL = new URL(base);
339 url = (href.length()==0 ? baseURL : new URL(baseURL, uri));
340 result = url.toString();
341 }
342 } catch (java.net.MalformedURLException mue) {
343
344 String absBase = makeAbsolute(base);
345 if (!absBase.equals(base)) {
346
347 return resolve(href, absBase);
348 } else {
349 throw new TransformerException("Malformed URL "
350 + href + "(base " + base + ")",
351 mue);
352 }
353 }
354 }
355 }
356
357
358
359 if ( result != null && iSource == null) {
360 if ( log.isDebugEnabled() ) {
361 log.debug("Resolved URI " +href + " to " + result);
362 }
363
364 try {
365 iSource = getInputSource(result);
366 } catch (Exception e) {
367 ;
368 }
369 }
370
371 SAXSource source = new SAXSource();
372
373 if ( iSource == null) {
374 iSource = new InputSource(uri);
375 }
376
377 source.setInputSource(iSource);
378
379
380 setEntityResolver(source);
381 return source;
382 }
383
384 /***
385 * Attempt to construct an absolute URI.
386 * <p>FIXME: why not use the base of the Catalog?
387 */
388 private String makeAbsolute(String uri) {
389 if (uri == null) {
390 uri = "";
391 }
392
393 try {
394 URL url = new URL(uri);
395 return url.toString();
396 } catch (MalformedURLException mue) {
397 String dir = System.getProperty("user.dir");
398 String file = "";
399
400 if (dir.endsWith("/")) {
401 file = "file://" + dir + uri;
402 } else {
403 file = "file://" + dir + "/" + uri;
404 }
405
406 try {
407 URL fileURL = new URL(file);
408 return fileURL.toString();
409 } catch (MalformedURLException mue2) {
410
411 return uri;
412 }
413 }
414 }
415
416 /***
417 * Get the InputSource through uri by using URI and any configured URIFactory.
418 * <p>The lookup is actually done twize if the sceme used is file:. This is
419 * because there is no way if specifying a full path like /hm/dm in either
420 * the base of the Catalog or as a resolved entity. To be able to fully use
421 * the URI stuff, we first strip file: if thats the protocol and tests with
422 * and then uses the full resolved uri.</p>
423 * <p>Observer that in the first case only the URLFactory specifyed as
424 * default for the URIFactory will be used.</p>
425
426 */
427 private InputSource getInputSource(String resolved) throws MalformedURLException, IOException {
428 InputSource source = null;
429 URI uri = getURI(resolved);
430 URL url = null;
431
432
433
434 if ( "file".equals(uri.getScheme()) ) {
435
436 try {
437 URI u = new URI( uri.getPath() );
438 url = u.getURL();
439 } catch (MalformedURLException e) {
440
441 }
442 }
443
444 if ( url == null) {
445 url = uri.getURL();
446 }
447
448
449 if ( url != null) {
450
451
452 if ( log.isDebugEnabled() ) {
453 log.debug("Resolved uri to url: " + url);
454 }
455
456
457 InputStream is = url.openStream();
458 if ( is != null) {
459 source = new InputSource(is);
460 source.setSystemId( url.toExternalForm() );
461 }
462 }
463 return source;
464 }
465
466 /***
467 * Resolve by "using" the implicit catalog.
468 * This is a four step activity:
469 * 1. uri is resolved as is with URI
470 * 2. uri is merged with catalog base and looked up through URI.
471 * 3. The name part is merged with uri and looked up through URI.
472 * 4. getFileInputSource is used.
473 *
474 * Its important to note that the base of Catalog is not an URI but an
475 * URL, wich means that only URL:s that has a factory is valid as the base
476 * of a Catalog. But since URI may use any protocol and URIFactory may
477 * have any default we must both use the full and the stripped URL when
478 * using the base if protocol is file.
479 */
480 private InputSource resolveImplicit(String resolve) throws MalformedURLException{
481
482 InputSource source = null;
483 URL base = catalog.getBase();
484
485
486 source = getFileInputSource(resolve);
487
488 if ( source == null) {
489
490
491
492 String cBase = base.toExternalForm();
493 if ( "file".equals( base.getProtocol()) ) {
494 cBase = base.getFile().toString();
495
496
497
498
499 int idx = cBase.indexOf(":");
500 if (idx >-1) {
501 cBase = cBase.substring(idx+1);
502 }
503 }
504
505 URI merged = new URI(cBase);
506 if ( source == null) {
507 try {
508 merged.appendPath( getName(resolve) );
509 if ( log.isDebugEnabled()) {
510 log.debug("Resolving implicit with base/name: " + merged.toExternalForm());
511 }
512 source = getInputSource( merged.toExternalForm() );
513 } catch (Exception e) {
514 ;
515 }
516 }
517
518
519 if ( source == null) {
520 try {
521
522 URI u = new URI(resolve);
523 merged.appendPath( u.getPath() );
524 if ( log.isDebugEnabled()) {
525 log.debug("Resolving implicit with base/systemId: " + merged.toExternalForm());
526 }
527 source = getInputSource( merged.toExternalForm() );
528 } catch (Exception e) {
529 ;
530 }
531 }
532
533
534
535
536 }
537 if ( source == null) {
538
539 try {
540 if ( log.isDebugEnabled()) {
541 log.debug("Resolving implicit with systemId: " + resolve);
542 }
543 source = getInputSource(resolve);
544 } catch (Exception e) {
545 ;
546 }
547 }
548
549 return source;
550
551
552 }
553
554 /***
555 * try to make a name out of resolve and look it up as a file relative to
556 * the base of the Catalog.
557 * <p>if resolve is relative its complete relative path will be used.
558 * <p>The systemId will be set when the inputStream returns.</p>
559 */
560 private InputSource getFileInputSource(String resolve) throws MalformedURLException {
561
562 File file = null;
563 try {
564 URL tmp = new URL(resolve);
565 file = new File(tmp.getFile());
566 }catch(java.net.MalformedURLException ex) {
567 file = new File(resolve);
568 }
569
570 if (file.isAbsolute()) {
571
572 file = new File(file.getName());
573 }
574
575
576 File baseFile = new File(catalog.getBase().getFile());
577 File check = new File(baseFile, file.getPath());
578 if(check.exists()) {
579 if ( log.isDebugEnabled()) {
580 log.debug(resolve + " resolved to file "+check);
581 }
582 FileReader r = null;
583 try {
584 r = new FileReader(check);
585 }catch(java.io.FileNotFoundException ex) {
586 return null;
587 }
588 InputSource source = new InputSource(r);
589 source.setSystemId( check.toURL().toExternalForm() );
590 return source;
591 }else {
592 return null;
593 }
594
595
596 }
597
598 /***
599 * Make an URI out of the uri string.
600 *<p>The URI will be created with the URIFactory if set, otherwise use the default resolution mechanism.</p>
601 * @see org.backsource.utils.resource.URIFactory
602 */
603 private URI getURI(String uri) throws MalformedURIException{
604 if ( uriFactory != null) {
605 return new URI(uri,uriFactory);
606 } else {
607 return new URI(uri);
608 }
609
610
611 }
612 /***
613 * <p>This is called from the URIResolver to set an EntityResolver
614 * on the SAX parser to be used for new XML documents that are
615 * encountered as a result of the document() function, xsl:import,
616 * or xsl:include. This is done because the XSLT processor calls
617 * out to the SAXParserFactory itself to create a new SAXParser to
618 * parse the new document. The new parser does not automatically
619 * inherit the EntityResolver of the original (although arguably
620 * it should). See below:</p>
621 *
622 * <tt>"If an application wants to set the ErrorHandler or
623 * EntityResolver for an XMLReader used during a transformation,
624 * it should use a URIResolver to return the SAXSource which
625 * provides (with getXMLReader) a reference to the XMLReader"</tt>
626 *
627 * <p>...quoted from page 118 of the Java API for XML
628 * Processing 1.1 specification</p>
629 * <p>Taken from ant XMLCatalog.</p>
630 * @see org.apache.tools.ant.types.XMLCatalog
631 *
632 */
633 private void setEntityResolver(SAXSource source) throws TransformerException {
634
635 XMLReader reader = source.getXMLReader();
636 if (reader == null) {
637 SAXParserFactory spFactory = SAXParserFactory.newInstance();
638 spFactory.setNamespaceAware(true);
639 try {
640 reader = spFactory.newSAXParser().getXMLReader();
641 }
642 catch (ParserConfigurationException ex) {
643 throw new TransformerException(ex);
644 }
645 catch (SAXException ex) {
646 throw new TransformerException(ex);
647 }
648 }
649 reader.setEntityResolver(this);
650 source.setXMLReader(reader);
651 }
652
653 /***
654 * Utility method to remove trailing fragment from a URI.
655 * For example,
656 * <code>http://java.sun.com/index.html#chapter1</code>
657 * would return <code>http://java.sun.com/index.html</code>.
658 * <p>Taken from ant XMLCatalog.</p>
659 *
660 * @param uri The URI to process. It may or may not contain a
661 * fragment.
662 * @return The URI sans fragment.
663 * @see org.apache.tools.ant.types.XMLCatalog
664 */
665 private String removeFragment(String uri) {
666 String result = uri;
667 String fragment = null;
668 int hashPos = uri.indexOf("#");
669 if (hashPos >= 0) {
670 result = uri.substring(0, hashPos);
671 fragment = uri.substring(hashPos+1);
672 }
673 return result;
674 }
675
676 /***
677 * Deduce the name part of the URI
678 */
679 protected String getName(String uriSpec) throws MalformedURLException{
680 URI uri = new URI(uriSpec);
681 File file = new File(uri.getPath());
682 return file.getName();
683 }
684 }