View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.html;
18  
19  /**
20   * The default HTML mapping rules in Tika.
21   *
22   * @since Apache Tika 0.6
23   */
24  public class DefaultHtmlMapper implements HtmlMapper {
25  
26      public String mapSafeElement(String name) {
27          // Based on http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd
28  
29          if ("H1".equals(name)) return "h1";
30          if ("H2".equals(name)) return "h2";
31          if ("H3".equals(name)) return "h3";
32          if ("H4".equals(name)) return "h4";
33          if ("H5".equals(name)) return "h5";
34          if ("H6".equals(name)) return "h6";
35  
36          if ("P".equals(name)) return "p";
37          if ("PRE".equals(name)) return "pre";
38          if ("BLOCKQUOTE".equals(name)) return "blockquote";
39  
40          if ("UL".equals(name)) return "ul";
41          if ("OL".equals(name)) return "ol";
42          if ("MENU".equals(name)) return "ul";
43          if ("LI".equals(name)) return "li";
44          if ("DL".equals(name)) return "dl";
45          if ("DT".equals(name)) return "dt";
46          if ("DD".equals(name)) return "dd";
47  
48          if ("TABLE".equals(name)) return "table";
49          if ("THEAD".equals(name)) return "thead";
50          if ("TBODY".equals(name)) return "tbody";
51          if ("TR".equals(name)) return "tr";
52          if ("TH".equals(name)) return "th";
53          if ("TD".equals(name)) return "td";
54  
55          if ("ADDRESS".equals(name)) return "address";
56  
57          return null;
58      }
59  
60      public boolean isDiscardElement(String name) {
61          return "STYLE".equals(name) || "SCRIPT".equals(name);
62      }
63  
64  }