View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.sax.xpath;
18  
19  import java.util.HashMap;
20  import java.util.Map;
21  
22  /**
23   * Parser for a very simple XPath subset. Only the following XPath constructs
24   * (with namespaces) are supported:
25   * <ul>
26   *   <li><code>.../text()</code></li>
27   *   <li><code>.../@*</code></li>
28   *   <li><code>.../@name</code></li>
29   *   <li><code>.../*...</code></li>
30   *   <li><code>.../name...</code></li>
31   *   <li><code>...//*...</code></li>
32   *   <li><code>...//name...</code></li>
33   * </ul>
34   */
35  public class XPathParser {
36  
37      private final Map<String, String> prefixes = new HashMap<String, String>();
38  
39      public XPathParser() {
40      }
41  
42      public XPathParser(String prefix, String namespace) {
43          addPrefix(prefix, namespace);
44      }
45  
46      public void addPrefix(String prefix, String namespace) {
47          prefixes.put(prefix, namespace);
48      }
49  
50      /**
51       * Parses the given simple XPath expression to an evaluation state
52       * initialized at the document node. Invalid expressions are not flagged
53       * as errors, they just result in a failing evaluation state.
54       *
55       * @param xpath simple XPath expression
56       * @return XPath evaluation state
57       */
58      public Matcher parse(String xpath) {
59          if (xpath.equals("/text()")) {
60              return TextMatcher.INSTANCE;
61          } else if (xpath.equals("/node()")) {
62              return NodeMatcher.INSTANCE;
63          } else if (xpath.equals("/descendant:node()")) {
64              return new CompositeMatcher(
65                      TextMatcher.INSTANCE,
66                      new ChildMatcher(new SubtreeMatcher(NodeMatcher.INSTANCE)));
67          } else if (xpath.equals("/@*")) {
68              return AttributeMatcher.INSTANCE;
69          } else if (xpath.length() == 0) {
70              return ElementMatcher.INSTANCE;
71          } else if (xpath.startsWith("/@")) {
72              String name = xpath.substring(2);
73              String prefix = null;
74              int colon = name.indexOf(':');
75              if (colon != -1) {
76                  prefix = name.substring(0, colon);
77                  name = name.substring(colon + 1);
78              }
79              if (prefixes.containsKey(prefix)) {
80                  return new NamedAttributeMatcher(prefixes.get(prefix), name);
81              } else {
82                  return Matcher.FAIL;
83              }
84          } else if (xpath.startsWith("/*")) {
85              return new ChildMatcher(parse(xpath.substring(2)));
86          } else if (xpath.startsWith("///")) {
87              return Matcher.FAIL;
88          } else if (xpath.startsWith("//")) {
89              return new SubtreeMatcher(parse(xpath.substring(1)));
90          } else if (xpath.startsWith("/")) {
91              int slash = xpath.indexOf('/', 1);
92              if (slash == -1) {
93                  slash = xpath.length();
94              }
95              String name = xpath.substring(1, slash);
96              String prefix = null;
97              int colon = name.indexOf(':');
98              if (colon != -1) {
99                  prefix = name.substring(0, colon);
100                 name = name.substring(colon + 1);
101             }
102             if (prefixes.containsKey(prefix)) {
103                 return new NamedElementMatcher(
104                         prefixes.get(prefix), name,
105                         parse(xpath.substring(slash)));
106             } else {
107                 return Matcher.FAIL;
108             }
109         } else {
110             return Matcher.FAIL;
111         }
112     }
113 
114 }