View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.sax.xpath;
18  
19  import java.util.LinkedList;
20  
21  import org.apache.tika.sax.ContentHandlerDecorator;
22  import org.xml.sax.Attributes;
23  import org.xml.sax.ContentHandler;
24  import org.xml.sax.SAXException;
25  import org.xml.sax.helpers.AttributesImpl;
26  
27  /**
28   * Content handler decorator that only passes the elements, attributes,
29   * and text nodes that match the given XPath expression.
30   */
31  public class MatchingContentHandler extends ContentHandlerDecorator {
32  
33      private final LinkedList<Matcher> matchers = new LinkedList<Matcher>();
34  
35      private Matcher matcher;
36  
37      public MatchingContentHandler(ContentHandler delegate, Matcher matcher) {
38          super(delegate);
39          this.matcher = matcher;
40      }
41  
42      public void startElement(
43              String uri, String localName, String name, Attributes attributes)
44              throws SAXException {
45          matchers.addFirst(matcher);
46          matcher = matcher.descend(uri, localName);
47  
48          AttributesImpl matches = new AttributesImpl();
49          for (int i = 0; i < attributes.getLength(); i++) {
50              String attributeURI = attributes.getURI(i);
51              String attributeName = attributes.getLocalName(i);
52              if (matcher.matchesAttribute(attributeURI, attributeName)) {
53                  matches.addAttribute(
54                          attributeURI, attributeName, attributes.getQName(i),
55                          attributes.getType(i), attributes.getValue(i));
56              }
57          }
58  
59          if (matcher.matchesElement() || matches.getLength() > 0) {
60              super.startElement(uri, localName, name, matches);
61              if (!matcher.matchesElement()) {
62                  // Force the matcher to match the current element, so the
63                  // endElement method knows to emit the correct event
64                  matcher =
65                      new CompositeMatcher(matcher, ElementMatcher.INSTANCE);
66              }
67          }
68      }
69  
70      public void endElement(String uri, String localName, String name)
71              throws SAXException {
72          if (matcher.matchesElement()) {
73              super.endElement(uri, localName, name);
74          }
75          matcher = matchers.removeFirst();
76      }
77  
78      public void characters(char[] ch, int start, int length)
79              throws SAXException {
80          if (matcher.matchesText()) {
81              super.characters(ch, start, length);
82          }
83      }
84  
85      public void ignorableWhitespace(char[] ch, int start, int length)
86              throws SAXException {
87          if (matcher.matchesText()) {
88              super.ignorableWhitespace(ch, start, length);
89          }
90      }
91  
92      public void processingInstruction(String target, String data) {
93          // TODO: Support for matching processing instructions
94      }
95  
96      public void skippedEntity(String name) throws SAXException {
97          // TODO: Can skipped entities refer to more than text?
98          if (matcher.matchesText()) {
99              super.skippedEntity(name);
100         }
101     }
102 
103 }