View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.mime;
18  
19  // JDK imports
20  import java.util.Comparator;
21  import java.util.HashMap;
22  import java.util.Map;
23  import java.util.SortedMap;
24  import java.util.TreeMap;
25  
26  /**
27   * Defines a MimeType pattern.
28   */
29  class Patterns {
30  
31      /**
32       * Index of exact name patterns.
33       */
34      private final Map<String, MimeType> names = new HashMap<String, MimeType>();
35  
36      /**
37       * Index of extension patterns of the form "*extension".
38       */
39      private final Map<String, MimeType> extensions =
40          new HashMap<String, MimeType>();
41  
42      private int minExtensionLength = Integer.MAX_VALUE;
43  
44      private int maxExtensionLength = 0;
45  
46      /**
47       * Index of generic glob patterns, sorted by length.
48       */
49      private final SortedMap<String, MimeType> globs =
50          new TreeMap<String, MimeType>(new Comparator<String>() {
51              public int compare(String a, String b) {
52                  int diff = b.length() - a.length();
53                  if (diff == 0) {
54                      diff = a.compareTo(b);
55                  }
56                  return diff;
57              }
58          });
59  
60  
61      public void add(String pattern, MimeType type) throws MimeTypeException {
62          this.add(pattern, false, type);
63      }
64     
65      public void add(String pattern, boolean isJavaRegex, MimeType type)
66              throws MimeTypeException {
67          if (pattern == null || type == null) {
68              throw new IllegalArgumentException(
69                      "Pattern and/or mime type is missing");
70          }
71          
72          if (isJavaRegex) {
73              // in this case, we don't need to build a regex pattern
74              // it's already there for us, so just add the pattern as is
75              addGlob(pattern, type);
76          } else {
77  
78              if (pattern.indexOf('*') == -1 && pattern.indexOf('?') == -1
79                      && pattern.indexOf('[') == -1) {
80                  addName(pattern, type);
81              } else if (pattern.startsWith("*") && pattern.indexOf('*', 1) == -1
82                      && pattern.indexOf('?') == -1 && pattern.indexOf('[') == -1) {
83                  addExtension(pattern.substring(1), type);
84              } else {
85                  addGlob(compile(pattern), type);
86              }
87          }
88      }
89      
90      private void addName(String name, MimeType type) throws MimeTypeException {
91          MimeType previous = names.get(name);
92          if (previous == null || previous.isDescendantOf(type)) {
93              names.put(name, type);
94          } else if (previous == type || type.isDescendantOf(previous)) {
95              // do nothing
96          } else {
97              throw new MimeTypeException("Conflicting name pattern: " + name);
98          }
99      }
100 
101     private void addExtension(String extension, MimeType type)
102             throws MimeTypeException {
103         MimeType previous = extensions.get(extension);
104         if (previous == null || previous.isDescendantOf(type)) {
105             extensions.put(extension, type);
106             int length = extension.length();
107             minExtensionLength = Math.min(minExtensionLength, length);
108             maxExtensionLength = Math.max(maxExtensionLength, length);
109         } else if (previous == type || type.isDescendantOf(previous)) {
110             // do nothing
111         } else {
112             throw new MimeTypeException(
113                     "Conflicting extension pattern: " + extension);
114         }
115     }
116 
117     private void addGlob(String glob, MimeType type)
118             throws MimeTypeException {
119         MimeType previous = globs.get(glob);
120         if (previous == null || previous.isDescendantOf(type)) {
121             globs.put(glob, type);
122         } else if (previous == type || type.isDescendantOf(previous)) {
123             // do nothing
124         } else {
125             throw new MimeTypeException("Conflicting glob pattern: " + glob);
126         }
127     }
128 
129     /**
130      * Find the MimeType corresponding to a resource name.
131      * 
132      * It applies the recommendations detailed in FreeDesktop Shared MIME-info
133      * Database for guessing MimeType from a resource name: It first tries a
134      * case-sensitive match, then try again with the resource name converted to
135      * lower-case if that fails. If several patterns match then the longest
136      * pattern is used. In particular, files with multiple extensions (such as
137      * Data.tar.gz) match the longest sequence of extensions (eg '*.tar.gz' in
138      * preference to '*.gz'). Literal patterns (eg, 'Makefile') are matched
139      * before all others. Patterns beginning with `*.' and containing no other
140      * special characters (`*?[') are matched before other wildcarded patterns
141      * (since this covers the majority of the patterns).
142      */
143     public MimeType matches(String name) {
144         if (name == null) {
145             throw new IllegalArgumentException("Name is missing");
146         }
147 
148         // First, try exact match of the provided resource name
149         if (names.containsKey(name)) {
150             return names.get(name);
151         }
152 
153         // Then try "extension" (*.xxx) matching
154         int maxLength = Math.min(maxExtensionLength, name.length());
155         for (int n = maxLength; n >= minExtensionLength; n--) {
156             String extension = name.substring(name.length() - n);
157             if (extensions.containsKey(extension)) {
158                 return extensions.get(extension);
159             }
160         }
161 
162         // And finally, try complex regexp matching
163         for (Map.Entry<String, MimeType> entry : globs.entrySet()) {
164             if (name.matches(entry.getKey())) {
165                 return entry.getValue();
166             }
167         }
168 
169         return null;
170     }
171 
172     private String compile(String glob) {
173         StringBuilder pattern = new StringBuilder();
174         pattern.append("\\A");
175         for (int i = 0; i < glob.length(); i++) {
176             char ch = glob.charAt(i);
177             if (ch == '?') {
178                 pattern.append('.');
179             } else if (ch == '*') {
180                 pattern.append(".*");
181             } else if ("\\[]^.-$+(){}|".indexOf(ch) != -1) {
182                 pattern.append('\\');
183                 pattern.append(ch);
184             } else {
185                 pattern.append(ch);
186             }
187         }
188         pattern.append("\\z");
189         return pattern.toString();
190     }
191 
192 }