1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  
20  package org.backsource.utils.xml;
21  import java.io.File;
22  import java.io.FileReader;
23  import java.io.InputStream;
24  import java.io.IOException;
25  import java.net.URL;
26  import java.net.MalformedURLException;
27  
28  import javax.xml.transform.Source;
29  import javax.xml.transform.TransformerException;
30  import javax.xml.transform.URIResolver;
31  import javax.xml.transform.sax.SAXSource;
32  import javax.xml.parsers.SAXParserFactory;
33  import javax.xml.parsers.ParserConfigurationException;
34  
35  import org.xml.sax.EntityResolver;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.SAXException;
38  import org.xml.sax.XMLReader;
39  import org.apache.log4j.Logger;
40  
41  import org.backsource.utils.resource.URI;
42  import org.backsource.utils.resource.URIFactory;
43  import org.backsource.utils.resource.MalformedURIException;
44  /***
45   * A Catalog based resolver which is originally based on {@link org.apache.xml.resolver.tools.CatalogResolver}.
46   *<p>It does currently not extend from the original resolver since the Catalog of that resolver has private access. This is a JAXP type resolver which adds the following featues to the original: </p>
47   <ul>
48   <li>It is possible to programatically set the {@link Catalog} the resolver should use.</li>
49   <li>if catalog resolve of systemId or href fail both are translated to a "name" version - i.e last part of it is used to do one more lookup against the catalog. For example xerces translated systemId:s to the full path of its current working directory; so nitf.dtd will be called for examle file:///my/path/nitf.dtd, where /my/path actually is where the program happens to be running: to be able to write Catalog entries that do not have to know where programs are running the systemId id is also rewritten to the name part an tested again.</li>
50   <li>It contains an implicit Catalog: if there is no entry in its Catalog the 
51   resource is translated to a name part, or if relative to that,
52   and looked up in the filesystem relative to the base of the Catalog.</li>
53   <li>It uses {@link org.backsource.utils.resource.URI} and friends to translate an uri to an URL.</li>
54   </ul>
55   <p>Here's one way to use it, see {@link Catalog} on how to create the catalog</p>
56   <pre>
57        CatalogResolver stdResolver = new CatalogResolver(catalog);
58        TransformerFactory tfactory = TransformerFactory.newInstance();
59        Transformer serializer = tfactory.newTransformer();
60        serializer.setURIResolver( stdResolver );
61  
62        // Or
63        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
64        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
65        docBuilder.setEntityResolver( stdResolver );
66   </pre>
67   <p>When using with utils in this package one often have to use the {@link Resolver} and {@link ResolverContext} classes to set the resolver in the environment. To get DocumentUtil to use a given resolver on could for example do this:</p>
68  <pre>
69        Resolver res = new Resolver(stdResolver,stdResolver);
70        ResolverContext.set(res);
71        
72        // This would then use that resolver
73        Document doc = DocumentUtil.getDocument( myFile );
74  </pre>
75  <p>Through its use of URI to lookup resolved uris it is possible to lookup resources at other places than the filesystem or HTTP; it makes it possible to lookup resources in the classpath or in a servlet resoource context. It is possible to programaticallty set a URIFactory on a CatalogResolver, but when no one is specifyed the normal resolution mechanism is done where {@link org.backsource.utils.resource.URIFactoryContext} is used.</p>
76  <pre>
77        classPathResolver = new CatalogResolver(catalog);
78        URIFactory f = new URIFactory();
79        f.register("class", new ClassContextURLFactory());
80        classPathResolver.setURIFactory(f);
81  </pre>
82  <p>Since the default for URI factory is to use the classpath when al else fails this setup would make it possible to lookuo both these entries int the classpath:</p>
83  <pre>
84        catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "class:org/apache/log4j/xml/log4j.dtd") );
85        catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "/org/apache/log4j/xml/log4j.dtd") );
86  </pre>
87  <p>The following is one way to lookup the resource from the classpath in the current thread only.</p>
88  <pre>
89        ResolverContext.setThreadLocal( new Resolver(classPathResolver,classPathResolver) );
90        Document doc = DocumentUtil.getDocument( new FileInputStream(log4jFile ) );
91  </pre>
92  
93   
94   * @see Catalog
95   * @see org.xml.sax.EntityResolver
96   * @see javax.xml.transform.URIResolver
97   * @see org.backsource.utils.resource.URIFactory
98   * @author <a href="mailto:pra@tim.se">Peter Antman</a>
99   * @author Norman Walsh
100  * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
101  * @version $Revision: 1.3 $
102  */
103 
104 public class CatalogResolver  implements EntityResolver, URIResolver {
105    private static final Logger log = Logger.getLogger(CatalogResolver.class);
106    protected Catalog catalog;
107    protected URIFactory uriFactory;
108    
109    public CatalogResolver (Catalog catalog){
110       this.catalog = catalog;
111    }
112 
113    public Catalog getCatalog() {
114       return catalog;
115    }
116 
117    /***
118     * Set the URIFactory that URI should use when translating uris to reasources.
119     * <p>if no URIFactory is set the normal factory resolution mechanism is used in URI.</p>
120     * @see org.backsource.utils.resource.URIFactory
121     */
122    public void setURIFactory(URIFactory factory) {
123       this.uriFactory = uriFactory;
124    }
125 
126    /***
127     * Get the URIFactory that was set.
128     */
129    public URIFactory getURIFactory() {
130       return uriFactory;
131    }
132    
133 
134    /***
135    * <p>Implements the guts of the <code>resolveEntity</code> method
136    * for the SAX interface.</p>
137    *
138    * <p>Presented with an optional public identifier and a system
139    * identifier, this function attempts to locate a mapping in the
140    * catalogs.</p>
141    *
142    * <p>If such a mapping is found, it is returned.  If no mapping is
143    * found, null is returned.</p>
144    *
145    * @param publicId  The public identifier for the entity in question.
146    * This may be null.
147    *
148    * @param systemId  The system identifier for the entity in question.
149    * XML requires a system identifier on all external entities, so this
150    * value is always specified.
151    *
152    * @return The resolved identifier (a URI reference).
153    */
154   public String getResolvedEntity (String publicId, String systemId) {
155     String resolved = null;
156     
157     if (catalog == null) {
158        log.warn("Catalog resolution attempted with null catalog; ignored");
159        return null;
160     }
161     
162     if (systemId != null) {
163       try {
164 	resolved = catalog.resolveSystem(systemId);
165       } catch (MalformedURLException me) {
166          log.warn("Malformed URL exception trying to resolve " + systemId,me);
167          resolved = null;
168       } catch (IOException ie) {
169          log.warn("I/O exception trying to resolve " + systemId,ie);
170 	resolved = null;
171       }
172     }
173 
174     if (resolved == null) {
175        if (publicId != null) {
176           try {
177              resolved = catalog.resolvePublic(publicId, systemId);
178           } catch (MalformedURLException me) {
179              log.warn("Malformed URL exception trying to resolve " +publicId,me);
180 	} catch (IOException ie) {
181 	  log.warn("I/O exception trying to resolve "+publicId, ie);
182 	}
183       }
184 
185       if (resolved != null) {
186          if ( log.isDebugEnabled()) {
187             log.debug("Resolved public " +publicId + " to "+ resolved);
188          } 
189 
190       }
191     } else {
192        if ( log.isDebugEnabled()) {
193           log.debug("Resolved system "+ systemId +" to " + resolved);
194        }
195     }
196 
197     return resolved;
198   }
199 
200    /***
201     * <p>Implements the <code>resolveEntity</code> method
202     * for the SAX interface.</p>
203     *
204     * <p>Presented with an optional public identifier and a system
205    * identifier, this function attempts to locate a mapping in the
206    * catalogs.</p>
207    * <p>if that fails the systemId is translated to a name (last part) and looked up again.</p>
208    *
209    * <p>If such a mapping is found, the resolver creates an URI and lets the URIFactory in use translate the URI to and URL. This is opened as an InputSource and is returned. Exceptions are
210    * ignored and null is returned if the mapped value cannot be opened
211    * as an input source.</p>
212    * <p>Last the systemId is used as a name (or if relative the relatove path) and looked up as a file with the resolver base URI as base path.</p>
213    *
214    * <p>If no mapping is found (or an error occurs attempting to open
215    * the mapped value as an input source), null is returned and the system
216    * will use the specified system identifier as if no entityResolver
217    * was specified.</p>
218    *
219    * @param publicId  The public identifier for the entity in question.
220    * This may be null.
221    *
222    * @param systemId  The system identifier for the entity in question.
223    * XML requires a system identifier on all external entities, so this
224    * value is always specified.
225    *
226    * @return An InputSource for the mapped identifier, or null.
227    */
228   public InputSource resolveEntity (String publicId, String systemId) {
229      InputSource source = null;
230      
231      if ( log.isDebugEnabled()) {
232         log.debug("Resolving publicId="+publicId+"/systemId="+systemId);
233      }
234      
235      try {
236         
237         
238         String resolved = getResolvedEntity(publicId, systemId);
239         
240         
241         
242         
243         if ( resolved == null) {
244            resolved = getResolvedEntity(publicId, getName(systemId) );
245         } 
246         
247         
248         
249         
250         if (resolved != null) {
251            
252            source = getInputSource(resolved);
253         } 
254      } catch (Exception e) {
255         log.warn("Failed to resolve" +systemId,e);
256      } 
257      
258      if ( source == null) {
259         
260         try {
261            source = resolveImplicit(systemId);
262            
263         } catch (Exception e) {
264            log.debug("Could not get local systemId for " + systemId +":" +e,e);
265         } 
266      }
267      
268      
269      if ( source != null) {
270         source.setPublicId( publicId );
271      }
272      
273      return source;
274   }
275 
276   /*** 
277    * JAXP URIResolver API.
278    *
279    * Resolves href by the following steps.
280    * <ol>
281    <li>Lookup href in catalog, </li>
282    <li>Remake href into a name, ie last part, and lookup in catalog</li>
283    <li>Take last part (or relative name) and lookup against the base url of the Catalog in the filesystem.</li>
284    <li>Use the base and lastly user.dir to locate the name of the resource</li>
285    </ol>
286    <p>The firts two steps and the last are then looked up with the help of URI.</p>
287    */
288   public Source resolve(String href, String base)
289     throws TransformerException {
290     if ( log.isDebugEnabled() ) {
291        log.debug("Resolving href " +href + " with base " + base);
292     } 
293     
294     String uri = removeFragment(href);
295     InputSource iSource = null;
296 
297     String result = null;
298     
299     try {
300        result = catalog.resolveURI(uri);
301     } catch (Exception e) {
302        ;
303     }
304     
305     
306     if ( result == null) {
307        try {
308           result = catalog.resolveURI( getName(uri) );
309        } catch (Exception e) {
310           ;
311        }
312     } 
313 
314     
315     if ( result == null) {
316        try {
317           
318           iSource = resolveImplicit(uri);
319        } catch (Exception  e) {
320           log.debug("Could not get local uri for " + uri +":" +e,e);
321        } 
322     } 
323     
324     
325     
326     if ( iSource == null && result == null) {
327        
328        
329        
330        if (result == null) {
331           try {
332              URL url = null;
333              
334              if (base==null) {
335                 url = new URL(uri);
336                 result = url.toString();
337              } else {
338                 URL baseURL = new URL(base);
339                 url = (href.length()==0 ? baseURL : new URL(baseURL, uri));
340                 result = url.toString();
341              }
342           } catch (java.net.MalformedURLException mue) {
343              
344              String absBase = makeAbsolute(base);
345              if (!absBase.equals(base)) {
346                 
347                 return resolve(href, absBase);
348              } else {
349                 throw new TransformerException("Malformed URL "
350                                                + href + "(base " + base + ")",
351                                                mue);
352              }
353           }
354        }
355     }
356 
357 
358     
359     if ( result != null && iSource == null) {
360        if ( log.isDebugEnabled() ) {
361           log.debug("Resolved URI " +href + " to " + result);
362        } 
363        
364        try {
365           iSource = getInputSource(result);
366        } catch (Exception e) {
367           ;
368        } 
369     } 
370     
371     SAXSource source = new SAXSource();
372     
373     if ( iSource == null) {
374        iSource =  new InputSource(uri);
375     } 
376     
377     source.setInputSource(iSource);
378     
379     
380     setEntityResolver(source);    
381     return source;
382   }
383 
384   /***
385    * Attempt to construct an absolute URI.
386    * <p>FIXME: why not use the base of the Catalog?
387    */
388   private String makeAbsolute(String uri) {
389     if (uri == null) {
390       uri = "";
391     }
392 
393     try {
394       URL url = new URL(uri);
395       return url.toString();
396     } catch (MalformedURLException mue) {
397       String dir = System.getProperty("user.dir");
398       String file = "";
399 
400       if (dir.endsWith("/")) {
401 	file = "file://" + dir + uri;
402       } else {
403 	file = "file://" + dir + "/" + uri;
404       }
405 
406       try {
407 	URL fileURL = new URL(file);
408 	return fileURL.toString();
409       } catch (MalformedURLException mue2) {
410 	
411 	return uri;
412       }
413     }
414   }
415    
416    /***
417     * Get the InputSource through uri by using URI and any configured URIFactory.
418     * <p>The lookup is actually done twize if the sceme used is file:. This is
419     * because there is no way if specifying a full path like /hm/dm in either
420     * the base of the Catalog or as a resolved entity. To be able to fully use
421     * the URI stuff, we first strip file: if thats the protocol and tests with
422     * and then uses the full resolved uri.</p>
423     * <p>Observer that in the first case only the URLFactory specifyed as
424     * default for the URIFactory will be used.</p>
425     
426     */
427    private InputSource getInputSource(String resolved) throws MalformedURLException, IOException {
428       InputSource source = null;
429       URI uri = getURI(resolved);
430       URL url = null;
431       
432       
433       
434       if ( "file".equals(uri.getScheme()) ) {
435          
436          try {
437             URI u = new URI( uri.getPath() );
438             url = u.getURL();            
439          } catch (MalformedURLException e) {
440             
441          } 
442       } 
443       
444       if ( url == null) {
445          url = uri.getURL();
446       } 
447       
448       
449       if ( url != null) {
450          
451          
452          if ( log.isDebugEnabled() ) {
453             log.debug("Resolved uri to url: " + url);
454          } 
455          
456          
457          InputStream is = url.openStream();
458          if ( is != null) {
459                source = new InputSource(is);
460                source.setSystemId( url.toExternalForm() );
461          }
462       }
463       return source;
464    }
465 
466    /***
467     * Resolve by "using" the implicit catalog.
468     * This is a four step activity:
469     * 1. uri is resolved as is with URI
470     * 2. uri is merged with catalog base and looked up through URI.
471     * 3. The name  part is merged with uri and looked up through URI.
472     * 4. getFileInputSource is used.
473     *
474     * Its important to note that the base of Catalog is not an URI but an
475     * URL, wich means that only URL:s that has a factory is valid as the base
476     * of a Catalog. But since URI may use any protocol and URIFactory may
477     * have any default we must both use the full and the stripped URL when
478     * using the base if protocol is file.
479     */
480    private InputSource resolveImplicit(String resolve) throws MalformedURLException{
481       
482       InputSource source = null;
483       URL base = catalog.getBase();
484 
485       
486       source = getFileInputSource(resolve);
487       
488       if ( source == null) {
489          
490          
491          
492          String cBase = base.toExternalForm();
493          if (  "file".equals( base.getProtocol()) ) {
494             cBase = base.getFile().toString();
495             
496             
497             
498             
499             int idx =  cBase.indexOf(":");
500             if (idx >-1) {
501                cBase = cBase.substring(idx+1);
502             }
503          } 
504          
505          URI merged = new URI(cBase);
506          if ( source == null) {
507             try {
508                merged.appendPath( getName(resolve) );
509                if ( log.isDebugEnabled()) {
510                   log.debug("Resolving implicit with base/name: " + merged.toExternalForm());
511                } 
512                source = getInputSource( merged.toExternalForm() );
513             } catch (Exception e) {
514                ;
515             } 
516          } 
517 
518          
519          if ( source == null) {
520             try {
521                
522                URI u = new URI(resolve);
523                merged.appendPath( u.getPath() );
524                if ( log.isDebugEnabled()) {
525                   log.debug("Resolving implicit with base/systemId: " + merged.toExternalForm());
526                } 
527                source =  getInputSource( merged.toExternalForm() );
528             } catch (Exception e) {
529                ;
530             } 
531          } 
532          
533          
534          
535 
536       } 
537       if ( source == null) {
538          
539          try {
540             if ( log.isDebugEnabled()) {
541                   log.debug("Resolving implicit with systemId: " + resolve);
542                } 
543             source = getInputSource(resolve);
544          } catch (Exception e) {
545             ;
546          } 
547       }
548 
549       return source;
550       
551       
552    }
553 
554    /***
555     * try to make a name out of resolve and look it up as a file relative to
556     * the base of the Catalog.
557     * <p>if resolve is relative its complete relative path will be used.
558     * <p>The systemId will be set when the inputStream returns.</p>
559     */
560    private InputSource getFileInputSource(String resolve) throws MalformedURLException {
561       
562       File file = null;
563       try {
564          URL tmp = new URL(resolve);
565          file = new File(tmp.getFile());
566       }catch(java.net.MalformedURLException ex) {
567          file = new File(resolve);
568       }
569       
570       if (file.isAbsolute()) {
571          
572          file = new File(file.getName());
573       }
574       
575       
576       File baseFile = new File(catalog.getBase().getFile());
577       File check = new File(baseFile, file.getPath());      
578       if(check.exists()) {
579          if ( log.isDebugEnabled()) {
580             log.debug(resolve + " resolved to file "+check);
581          } 
582          FileReader r = null;
583          try {
584             r = new FileReader(check);
585          }catch(java.io.FileNotFoundException ex) {
586             return null;
587          }
588          InputSource source = new InputSource(r);
589          source.setSystemId( check.toURL().toExternalForm() );
590          return source;
591       }else {
592          return null;
593       }
594       
595 
596    }
597    
598    /***
599     * Make an URI out of the uri string.
600     *<p>The URI will be created with the URIFactory if set, otherwise use the default resolution mechanism.</p>
601     * @see org.backsource.utils.resource.URIFactory
602     */
603    private URI getURI(String uri) throws MalformedURIException{
604       if ( uriFactory != null) {
605          return new URI(uri,uriFactory);
606       } else {
607          return new URI(uri);
608       } 
609       
610       
611    }
612    /***
613     * <p>This is called from the URIResolver to set an EntityResolver
614     * on the SAX parser to be used for new XML documents that are
615     * encountered as a result of the document() function, xsl:import,
616     * or xsl:include.  This is done because the XSLT processor calls
617     * out to the SAXParserFactory itself to create a new SAXParser to
618     * parse the new document.  The new parser does not automatically
619     * inherit the EntityResolver of the original (although arguably
620     * it should).  See below:</p>
621     *
622     * <tt>"If an application wants to set the ErrorHandler or
623     * EntityResolver for an XMLReader used during a transformation,
624     * it should use a URIResolver to return the SAXSource which
625     * provides (with getXMLReader) a reference to the XMLReader"</tt>
626     *
627     * <p>...quoted from page 118 of the Java API for XML
628     * Processing 1.1 specification</p>
629     * <p>Taken from ant XMLCatalog.</p>
630     * @see org.apache.tools.ant.types.XMLCatalog
631     *
632     */
633    private void setEntityResolver(SAXSource source) throws TransformerException {
634       
635         XMLReader reader = source.getXMLReader();
636         if (reader == null) {
637             SAXParserFactory spFactory = SAXParserFactory.newInstance();
638             spFactory.setNamespaceAware(true);
639             try {
640                 reader = spFactory.newSAXParser().getXMLReader();
641             }
642             catch (ParserConfigurationException ex) {
643                 throw new TransformerException(ex);
644             }
645             catch (SAXException ex) {
646                 throw new TransformerException(ex);
647             }
648         }
649         reader.setEntityResolver(this);
650         source.setXMLReader(reader);
651     }
652 
653        /***
654      * Utility method to remove trailing fragment from a URI.
655      * For example,
656      * <code>http://java.sun.com/index.html#chapter1</code>
657      * would return <code>http://java.sun.com/index.html</code>.
658      * <p>Taken from ant XMLCatalog.</p>
659      *
660      * @param uri The URI to process.  It may or may not contain a
661      *            fragment.
662      * @return The URI sans fragment.
663      * @see org.apache.tools.ant.types.XMLCatalog
664      */
665     private String removeFragment(String uri) {
666         String result = uri;
667         String fragment = null;
668         int hashPos = uri.indexOf("#");
669         if (hashPos >= 0) {
670             result = uri.substring(0, hashPos);
671             fragment = uri.substring(hashPos+1);
672         }
673         return result;
674     }
675 
676    /***
677     * Deduce the name part of the URI
678     */
679    protected String getName(String uriSpec) throws MalformedURLException{
680       URI uri = new URI(uriSpec);
681       File file = new File(uri.getPath());
682       return file.getName();
683    }
684 }