View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.odf;
18  
19  import java.io.IOException;
20  import java.io.StringReader;
21  
22  import org.apache.tika.sax.ContentHandlerDecorator;
23  import org.xml.sax.Attributes;
24  import org.xml.sax.ContentHandler;
25  import org.xml.sax.InputSource;
26  import org.xml.sax.SAXException;
27  import org.xml.sax.helpers.AttributesImpl;
28  
29  /**
30   * Content handler decorator that:<ul>
31   * <li>Maps old OpenOffice 1.0 Namespaces to the OpenDocument ones</li>
32   * <li>Returns a fake DTD when parser requests OpenOffice DTD</li>
33   * </ul>
34   */
35  public class NSNormalizerContentHandler extends ContentHandlerDecorator {
36  
37      private static final String OLD_NS =
38          "http://openoffice.org/2000/";
39  
40      private static final String NEW_NS =
41          "urn:oasis:names:tc:opendocument:xmlns:";
42  
43      private static final String DTD_PUBLIC_ID =
44          "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
45  
46      public NSNormalizerContentHandler(ContentHandler handler) {
47          super(handler);
48      }
49  
50      private String mapOldNS(String ns) {
51          if (ns != null && ns.startsWith(OLD_NS)) {
52              return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
53          } else {
54              return ns;
55          }
56      }
57  
58      @Override
59      public void startElement(
60              String namespaceURI, String localName, String qName,
61              Attributes atts) throws SAXException {
62          AttributesImpl natts = new AttributesImpl();
63          for (int i = 0; i < atts.getLength(); i++) {
64              natts.addAttribute(
65                      mapOldNS(atts.getURI(i)), atts.getLocalName(i),
66                      atts.getQName(i), atts.getType(i), atts.getValue(i));
67          }
68          super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
69      }
70  
71      @Override
72      public void endElement(String namespaceURI, String localName, String qName)
73              throws SAXException {
74          super.endElement(mapOldNS(namespaceURI), localName, qName);
75      }
76  
77      @Override
78      public void startPrefixMapping(String prefix, String uri)
79              throws SAXException {
80          super.startPrefixMapping(prefix, mapOldNS(uri));
81      }
82  
83      /**
84       * do not load any DTDs (may be requested by parser). Fake the DTD by
85       * returning a empty string as InputSource
86       */
87      @Override
88      public InputSource resolveEntity(String publicId, String systemId)
89              throws IOException, SAXException {
90          if ((systemId != null && systemId.toLowerCase().endsWith(".dtd"))
91                  || DTD_PUBLIC_ID.equals(publicId)) {
92              return new InputSource(new StringReader(""));
93          } else {
94              return super.resolveEntity(publicId, systemId);
95          }
96      }
97  
98  }