View Javadoc

1   /*
2    * Copyright (c) 2003 Peter Antman, Teknik i Media  <peter.antman@tim.se>
3    *
4    * $Id: CatalogResolver.java,v 1.3 2004/06/02 08:57:51 omatzura Exp $
5    *
6    * This library is free software; you can redistribute it and/or
7    * modify it under the terms of the GNU Lesser General Public
8    * License as published by the Free Software Foundation; either
9    * version 2 of the License, or (at your option) any later version
10   * 
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   * 
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, write to the Free Software
18   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19   */
20  package org.backsource.utils.xml;
21  import java.io.File;
22  import java.io.FileReader;
23  import java.io.InputStream;
24  import java.io.IOException;
25  import java.net.URL;
26  import java.net.MalformedURLException;
27  
28  import javax.xml.transform.Source;
29  import javax.xml.transform.TransformerException;
30  import javax.xml.transform.URIResolver;
31  import javax.xml.transform.sax.SAXSource;
32  import javax.xml.parsers.SAXParserFactory;
33  import javax.xml.parsers.ParserConfigurationException;
34  
35  import org.xml.sax.EntityResolver;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.SAXException;
38  import org.xml.sax.XMLReader;
39  import org.apache.log4j.Logger;
40  
41  import org.backsource.utils.resource.URI;
42  import org.backsource.utils.resource.URIFactory;
43  import org.backsource.utils.resource.MalformedURIException;
44  /***
45   * A Catalog based resolver which is originally based on {@link org.apache.xml.resolver.tools.CatalogResolver}.
46   *<p>It does currently not extend from the original resolver since the Catalog of that resolver has private access. This is a JAXP type resolver which adds the following featues to the original: </p>
47   <ul>
48   <li>It is possible to programatically set the {@link Catalog} the resolver should use.</li>
49   <li>if catalog resolve of systemId or href fail both are translated to a "name" version - i.e last part of it is used to do one more lookup against the catalog. For example xerces translated systemId:s to the full path of its current working directory; so nitf.dtd will be called for examle file:///my/path/nitf.dtd, where /my/path actually is where the program happens to be running: to be able to write Catalog entries that do not have to know where programs are running the systemId id is also rewritten to the name part an tested again.</li>
50   <li>It contains an implicit Catalog: if there is no entry in its Catalog the 
51   resource is translated to a name part, or if relative to that,
52   and looked up in the filesystem relative to the base of the Catalog.</li>
53   <li>It uses {@link org.backsource.utils.resource.URI} and friends to translate an uri to an URL.</li>
54   </ul>
55   <p>Here's one way to use it, see {@link Catalog} on how to create the catalog</p>
56   <pre>
57        CatalogResolver stdResolver = new CatalogResolver(catalog);
58        TransformerFactory tfactory = TransformerFactory.newInstance();
59        Transformer serializer = tfactory.newTransformer();
60        serializer.setURIResolver( stdResolver );
61  
62        // Or
63        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
64        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
65        docBuilder.setEntityResolver( stdResolver );
66   </pre>
67   <p>When using with utils in this package one often have to use the {@link Resolver} and {@link ResolverContext} classes to set the resolver in the environment. To get DocumentUtil to use a given resolver on could for example do this:</p>
68  <pre>
69        Resolver res = new Resolver(stdResolver,stdResolver);
70        ResolverContext.set(res);
71        
72        // This would then use that resolver
73        Document doc = DocumentUtil.getDocument( myFile );
74  </pre>
75  <p>Through its use of URI to lookup resolved uris it is possible to lookup resources at other places than the filesystem or HTTP; it makes it possible to lookup resources in the classpath or in a servlet resoource context. It is possible to programaticallty set a URIFactory on a CatalogResolver, but when no one is specifyed the normal resolution mechanism is done where {@link org.backsource.utils.resource.URIFactoryContext} is used.</p>
76  <pre>
77        classPathResolver = new CatalogResolver(catalog);
78        URIFactory f = new URIFactory();
79        f.register("class", new ClassContextURLFactory());
80        classPathResolver.setURIFactory(f);
81  </pre>
82  <p>Since the default for URI factory is to use the classpath when al else fails this setup would make it possible to lookuo both these entries int the classpath:</p>
83  <pre>
84        catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "class:org/apache/log4j/xml/log4j.dtd") );
85        catalog.addCatalogEntry( CatalogEntry.getSystemEntry("log4j.dtd", "/org/apache/log4j/xml/log4j.dtd") );
86  </pre>
87  <p>The following is one way to lookup the resource from the classpath in the current thread only.</p>
88  <pre>
89        ResolverContext.setThreadLocal( new Resolver(classPathResolver,classPathResolver) );
90        Document doc = DocumentUtil.getDocument( new FileInputStream(log4jFile ) );
91  </pre>
92  
93   
94   * @see Catalog
95   * @see org.xml.sax.EntityResolver
96   * @see javax.xml.transform.URIResolver
97   * @see org.backsource.utils.resource.URIFactory
98   * @author <a href="mailto:pra@tim.se">Peter Antman</a>
99   * @author Norman Walsh
100  * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
101  * @version $Revision: 1.3 $
102  */
103 
104 public class CatalogResolver  implements EntityResolver, URIResolver {
105    private static final Logger log = Logger.getLogger(CatalogResolver.class);
106    protected Catalog catalog;
107    protected URIFactory uriFactory;
108    
109    public CatalogResolver (Catalog catalog){
110       this.catalog = catalog;
111    }
112 
113    public Catalog getCatalog() {
114       return catalog;
115    }
116 
117    /***
118     * Set the URIFactory that URI should use when translating uris to reasources.
119     * <p>if no URIFactory is set the normal factory resolution mechanism is used in URI.</p>
120     * @see org.backsource.utils.resource.URIFactory
121     */
122    public void setURIFactory(URIFactory factory) {
123       this.uriFactory = uriFactory;
124    }
125 
126    /***
127     * Get the URIFactory that was set.
128     */
129    public URIFactory getURIFactory() {
130       return uriFactory;
131    }
132    
133 
134    /***
135    * <p>Implements the guts of the <code>resolveEntity</code> method
136    * for the SAX interface.</p>
137    *
138    * <p>Presented with an optional public identifier and a system
139    * identifier, this function attempts to locate a mapping in the
140    * catalogs.</p>
141    *
142    * <p>If such a mapping is found, it is returned.  If no mapping is
143    * found, null is returned.</p>
144    *
145    * @param publicId  The public identifier for the entity in question.
146    * This may be null.
147    *
148    * @param systemId  The system identifier for the entity in question.
149    * XML requires a system identifier on all external entities, so this
150    * value is always specified.
151    *
152    * @return The resolved identifier (a URI reference).
153    */
154   public String getResolvedEntity (String publicId, String systemId) {
155     String resolved = null;
156     
157     if (catalog == null) {
158        log.warn("Catalog resolution attempted with null catalog; ignored");
159        return null;
160     }
161     
162     if (systemId != null) {
163       try {
164 	resolved = catalog.resolveSystem(systemId);
165       } catch (MalformedURLException me) {
166          log.warn("Malformed URL exception trying to resolve " + systemId,me);
167          resolved = null;
168       } catch (IOException ie) {
169          log.warn("I/O exception trying to resolve " + systemId,ie);
170 	resolved = null;
171       }
172     }
173 
174     if (resolved == null) {
175        if (publicId != null) {
176           try {
177              resolved = catalog.resolvePublic(publicId, systemId);
178           } catch (MalformedURLException me) {
179              log.warn("Malformed URL exception trying to resolve " +publicId,me);
180 	} catch (IOException ie) {
181 	  log.warn("I/O exception trying to resolve "+publicId, ie);
182 	}
183       }
184 
185       if (resolved != null) {
186          if ( log.isDebugEnabled()) {
187             log.debug("Resolved public " +publicId + " to "+ resolved);
188          } // end of if ()
189 
190       }
191     } else {
192        if ( log.isDebugEnabled()) {
193           log.debug("Resolved system "+ systemId +" to " + resolved);
194        }
195     }
196 
197     return resolved;
198   }
199 
200    /***
201     * <p>Implements the <code>resolveEntity</code> method
202     * for the SAX interface.</p>
203     *
204     * <p>Presented with an optional public identifier and a system
205    * identifier, this function attempts to locate a mapping in the
206    * catalogs.</p>
207    * <p>if that fails the systemId is translated to a name (last part) and looked up again.</p>
208    *
209    * <p>If such a mapping is found, the resolver creates an URI and lets the URIFactory in use translate the URI to and URL. This is opened as an InputSource and is returned. Exceptions are
210    * ignored and null is returned if the mapped value cannot be opened
211    * as an input source.</p>
212    * <p>Last the systemId is used as a name (or if relative the relatove path) and looked up as a file with the resolver base URI as base path.</p>
213    *
214    * <p>If no mapping is found (or an error occurs attempting to open
215    * the mapped value as an input source), null is returned and the system
216    * will use the specified system identifier as if no entityResolver
217    * was specified.</p>
218    *
219    * @param publicId  The public identifier for the entity in question.
220    * This may be null.
221    *
222    * @param systemId  The system identifier for the entity in question.
223    * XML requires a system identifier on all external entities, so this
224    * value is always specified.
225    *
226    * @return An InputSource for the mapped identifier, or null.
227    */
228   public InputSource resolveEntity (String publicId, String systemId) {
229      InputSource source = null;
230      
231      if ( log.isDebugEnabled()) {
232         log.debug("Resolving publicId="+publicId+"/systemId="+systemId);
233      }
234      
235      try {
236         
237         // try the catalog 
238         String resolved = getResolvedEntity(publicId, systemId);
239         
240         // Do it one more time with a relative name
241         // One of the truly iritating things is that systemId often is 
242         // translated into an absolute path!
243         if ( resolved == null) {
244            resolved = getResolvedEntity(publicId, getName(systemId) );
245         } // end of if ()
246         
247         //FIXME: this seems not to work on implicit lookups, i.e when
248         // the systemId to lookup was not in Catalog!
249         
250         if (resolved != null) {
251            // Find the real resource through URI
252            source = getInputSource(resolved);
253         } // end of if ()         
254      } catch (Exception e) {
255         log.warn("Failed to resolve" +systemId,e);
256      } // end of try-catch
257      
258      if ( source == null) {
259         // Locit up in implicit catalog
260         try {
261            source = resolveImplicit(systemId);
262            //source = getFileInputSource(systemId);
263         } catch (Exception e) {
264            log.debug("Could not get local systemId for " + systemId +":" +e,e);
265         } // end of try-catch
266      }
267      
268      // Set publicId if resolved
269      if ( source != null) {
270         source.setPublicId( publicId );
271      }
272      
273      return source;
274   }
275 
276   /*** 
277    * JAXP URIResolver API.
278    *
279    * Resolves href by the following steps.
280    * <ol>
281    <li>Lookup href in catalog, </li>
282    <li>Remake href into a name, ie last part, and lookup in catalog</li>
283    <li>Take last part (or relative name) and lookup against the base url of the Catalog in the filesystem.</li>
284    <li>Use the base and lastly user.dir to locate the name of the resource</li>
285    </ol>
286    <p>The firts two steps and the last are then looked up with the help of URI.</p>
287    */
288   public Source resolve(String href, String base)
289     throws TransformerException {
290     if ( log.isDebugEnabled() ) {
291        log.debug("Resolving href " +href + " with base " + base);
292     } // end of if ()
293     
294     String uri = removeFragment(href);
295     InputSource iSource = null;
296 
297     String result = null;
298     // Get real uri from catalog
299     try {
300        result = catalog.resolveURI(uri);
301     } catch (Exception e) {
302        ;// nop;
303     }
304     
305     // try with name version
306     if ( result == null) {
307        try {
308           result = catalog.resolveURI( getName(uri) );
309        } catch (Exception e) {
310           ;// nop;
311        }
312     } // end of if ()
313 
314     // try locally
315     if ( result == null) {
316        try {
317           //iSource = getFileInputSource(uri);
318           iSource = resolveImplicit(uri);
319        } catch (Exception  e) {
320           log.debug("Could not get local uri for " + uri +":" +e,e);
321        } // end of try-catch
322     } // end of if ()
323     
324     // This last block is messy
325     // if all above failed
326     if ( iSource == null && result == null) {
327        
328        // Of not found, try construct a valid URL first from base and 
329        // if it fails from user.dir
330        if (result == null) {
331           try {
332              URL url = null;
333              
334              if (base==null) {
335                 url = new URL(uri);
336                 result = url.toString();
337              } else {
338                 URL baseURL = new URL(base);
339                 url = (href.length()==0 ? baseURL : new URL(baseURL, uri));
340                 result = url.toString();
341              }
342           } catch (java.net.MalformedURLException mue) {
343              // try to make an absolute URI from the current base
344              String absBase = makeAbsolute(base);
345              if (!absBase.equals(base)) {
346                 // don't bother if the absBase isn't different!
347                 return resolve(href, absBase);
348              } else {
349                 throw new TransformerException("Malformed URL "
350                                                + href + "(base " + base + ")",
351                                                mue);
352              }
353           }
354        }
355     }
356 
357 
358     // MESSY!
359     if ( result != null && iSource == null) {
360        if ( log.isDebugEnabled() ) {
361           log.debug("Resolved URI " +href + " to " + result);
362        } // end of if ()
363        // try to find the resource through URI
364        try {
365           iSource = getInputSource(result);
366        } catch (Exception e) {
367           ;//Noop
368        } // end of try-catch
369     } // end of if ()
370     
371     SAXSource source = new SAXSource();
372     // if we can't get a resolved set the original uri, see XMLCatalog for why.
373     if ( iSource == null) {
374        iSource =  new InputSource(uri);
375     } // end of if ()
376     
377     source.setInputSource(iSource);
378     
379     // Set the resolver so that sub sheets also has an entity resolver!
380     setEntityResolver(source);    
381     return source;
382   }
383 
384   /***
385    * Attempt to construct an absolute URI.
386    * <p>FIXME: why not use the base of the Catalog?
387    */
388   private String makeAbsolute(String uri) {
389     if (uri == null) {
390       uri = "";
391     }
392 
393     try {
394       URL url = new URL(uri);
395       return url.toString();
396     } catch (MalformedURLException mue) {
397       String dir = System.getProperty("user.dir");
398       String file = "";
399 
400       if (dir.endsWith("/")) {
401 	file = "file://" + dir + uri;
402       } else {
403 	file = "file://" + dir + "/" + uri;
404       }
405 
406       try {
407 	URL fileURL = new URL(file);
408 	return fileURL.toString();
409       } catch (MalformedURLException mue2) {
410 	// bail
411 	return uri;
412       }
413     }
414   }
415    
416    /***
417     * Get the InputSource through uri by using URI and any configured URIFactory.
418     * <p>The lookup is actually done twize if the sceme used is file:. This is
419     * because there is no way if specifying a full path like /hm/dm in either
420     * the base of the Catalog or as a resolved entity. To be able to fully use
421     * the URI stuff, we first strip file: if thats the protocol and tests with
422     * and then uses the full resolved uri.</p>
423     * <p>Observer that in the first case only the URLFactory specifyed as
424     * default for the URIFactory will be used.</p>
425     
426     */
427    private InputSource getInputSource(String resolved) throws MalformedURLException, IOException {
428       InputSource source = null;
429       URI uri = getURI(resolved);
430       URL url = null;
431       
432       // Do it in two faces, since a looked up reaource in catalog
433       // that has no protocol allways gets file:
434       if ( "file".equals(uri.getScheme()) ) {
435          // Also try without file
436          try {
437             URI u = new URI( uri.getPath() );
438             url = u.getURL();            
439          } catch (MalformedURLException e) {
440             
441          } // end of try-catch
442       } // end of if ()
443       
444       if ( url == null) {
445          url = uri.getURL();
446       } // end of if ()
447       
448       
449       if ( url != null) {
450          
451          
452          if ( log.isDebugEnabled() ) {
453             log.debug("Resolved uri to url: " + url);
454          } // end of if ()
455          
456          
457          InputStream is = url.openStream();
458          if ( is != null) {
459                source = new InputSource(is);
460                source.setSystemId( url.toExternalForm() );
461          }
462       }
463       return source;
464    }
465 
466    /***
467     * Resolve by "using" the implicit catalog.
468     * This is a four step activity:
469     * 1. uri is resolved as is with URI
470     * 2. uri is merged with catalog base and looked up through URI.
471     * 3. The name  part is merged with uri and looked up through URI.
472     * 4. getFileInputSource is used.
473     *
474     * Its important to note that the base of Catalog is not an URI but an
475     * URL, wich means that only URL:s that has a factory is valid as the base
476     * of a Catalog. But since URI may use any protocol and URIFactory may
477     * have any default we must both use the full and the stripped URL when
478     * using the base if protocol is file.
479     */
480    private InputSource resolveImplicit(String resolve) throws MalformedURLException{
481       // Just try it out
482       InputSource source = null;
483       URL base = catalog.getBase();
484 
485       // try file first
486       source = getFileInputSource(resolve);
487       
488       if ( source == null) {
489          
490          // We strip base of file, it should have been resolved already by
491          // getFileInpitSource if that worked
492          String cBase = base.toExternalForm();
493          if (  "file".equals( base.getProtocol()) ) {
494             cBase = base.getFile().toString();
495             // FIXME: check if there is any good alternative to remove
496             // drive letters
497             // or should that be up to URIFactory??
498             // windows fucks up classpath lookups with drive letter
499             int idx =  cBase.indexOf(":");
500             if (idx >-1) {
501                cBase = cBase.substring(idx+1);
502             }
503          } 
504          // Merge name
505          URI merged = new URI(cBase);
506          if ( source == null) {
507             try {
508                merged.appendPath( getName(resolve) );
509                if ( log.isDebugEnabled()) {
510                   log.debug("Resolving implicit with base/name: " + merged.toExternalForm());
511                } // end of if ()
512                source = getInputSource( merged.toExternalForm() );
513             } catch (Exception e) {
514                ;//NOOP
515             } // end of try-catch
516          } // end of if ()
517 
518          // Merge complete
519          if ( source == null) {
520             try {
521                // remove any schema part from resolve
522                URI u = new URI(resolve);
523                merged.appendPath( u.getPath() );
524                if ( log.isDebugEnabled()) {
525                   log.debug("Resolving implicit with base/systemId: " + merged.toExternalForm());
526                } // end of if ()
527                source =  getInputSource( merged.toExternalForm() );
528             } catch (Exception e) {
529                ;// NOOP
530             } // end of try-catch
531          } // end of if ()
532          
533          
534          
535 
536       } // end of if ()  
537       if ( source == null) {
538          //Lookup the complete uri
539          try {
540             if ( log.isDebugEnabled()) {
541                   log.debug("Resolving implicit with systemId: " + resolve);
542                } // end of if ()
543             source = getInputSource(resolve);
544          } catch (Exception e) {
545             ;//NOOP
546          } // end of try-catch
547       }
548 
549       return source;
550       
551       
552    }
553 
554    /***
555     * try to make a name out of resolve and look it up as a file relative to
556     * the base of the Catalog.
557     * <p>if resolve is relative its complete relative path will be used.
558     * <p>The systemId will be set when the inputStream returns.</p>
559     */
560    private InputSource getFileInputSource(String resolve) throws MalformedURLException {
561       
562       File file = null;
563       try {
564          URL tmp = new URL(resolve);
565          file = new File(tmp.getFile());
566       }catch(java.net.MalformedURLException ex) {
567          file = new File(resolve);
568       }
569       
570       if (file.isAbsolute()) {
571          //Strip al but last part
572          file = new File(file.getName());
573       }
574       
575       // try get it, construct new
576       File baseFile = new File(catalog.getBase().getFile());
577       File check = new File(baseFile, file.getPath());      
578       if(check.exists()) {
579          if ( log.isDebugEnabled()) {
580             log.debug(resolve + " resolved to file "+check);
581          } // end of if ()
582          FileReader r = null;
583          try {
584             r = new FileReader(check);
585          }catch(java.io.FileNotFoundException ex) {
586             return null;
587          }
588          InputSource source = new InputSource(r);
589          source.setSystemId( check.toURL().toExternalForm() );
590          return source;
591       }else {
592          return null;
593       }
594       
595 
596    }
597    
598    /***
599     * Make an URI out of the uri string.
600     *<p>The URI will be created with the URIFactory if set, otherwise use the default resolution mechanism.</p>
601     * @see org.backsource.utils.resource.URIFactory
602     */
603    private URI getURI(String uri) throws MalformedURIException{
604       if ( uriFactory != null) {
605          return new URI(uri,uriFactory);
606       } else {
607          return new URI(uri);
608       } // end of else
609       
610       
611    }
612    /***
613     * <p>This is called from the URIResolver to set an EntityResolver
614     * on the SAX parser to be used for new XML documents that are
615     * encountered as a result of the document() function, xsl:import,
616     * or xsl:include.  This is done because the XSLT processor calls
617     * out to the SAXParserFactory itself to create a new SAXParser to
618     * parse the new document.  The new parser does not automatically
619     * inherit the EntityResolver of the original (although arguably
620     * it should).  See below:</p>
621     *
622     * <tt>"If an application wants to set the ErrorHandler or
623     * EntityResolver for an XMLReader used during a transformation,
624     * it should use a URIResolver to return the SAXSource which
625     * provides (with getXMLReader) a reference to the XMLReader"</tt>
626     *
627     * <p>...quoted from page 118 of the Java API for XML
628     * Processing 1.1 specification</p>
629     * <p>Taken from ant XMLCatalog.</p>
630     * @see org.apache.tools.ant.types.XMLCatalog
631     *
632     */
633    private void setEntityResolver(SAXSource source) throws TransformerException {
634       
635         XMLReader reader = source.getXMLReader();
636         if (reader == null) {
637             SAXParserFactory spFactory = SAXParserFactory.newInstance();
638             spFactory.setNamespaceAware(true);
639             try {
640                 reader = spFactory.newSAXParser().getXMLReader();
641             }
642             catch (ParserConfigurationException ex) {
643                 throw new TransformerException(ex);
644             }
645             catch (SAXException ex) {
646                 throw new TransformerException(ex);
647             }
648         }
649         reader.setEntityResolver(this);
650         source.setXMLReader(reader);
651     }
652 
653        /***
654      * Utility method to remove trailing fragment from a URI.
655      * For example,
656      * <code>http://java.sun.com/index.html#chapter1</code>
657      * would return <code>http://java.sun.com/index.html</code>.
658      * <p>Taken from ant XMLCatalog.</p>
659      *
660      * @param uri The URI to process.  It may or may not contain a
661      *            fragment.
662      * @return The URI sans fragment.
663      * @see org.apache.tools.ant.types.XMLCatalog
664      */
665     private String removeFragment(String uri) {
666         String result = uri;
667         String fragment = null;
668         int hashPos = uri.indexOf("#");
669         if (hashPos >= 0) {
670             result = uri.substring(0, hashPos);
671             fragment = uri.substring(hashPos+1);
672         }
673         return result;
674     }
675 
676    /***
677     * Deduce the name part of the URI
678     */
679    protected String getName(String uriSpec) throws MalformedURLException{
680       URI uri = new URI(uriSpec);
681       File file = new File(uri.getPath());
682       return file.getName();
683    }
684 }// CatalogResolver