View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.mime;
18  
19  import java.util.Collections;
20  import java.util.HashMap;
21  import java.util.Map;
22  import java.util.SortedMap;
23  import java.util.TreeMap;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  /**
28   * Internet media type.
29   */
30  public final class MediaType {
31  
32      private static final Map<String, String> NO_PARAMETERS =
33          new TreeMap<String, String>();
34  
35      private static final Pattern SPECIAL =
36          Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
37  
38      private static final Pattern SPECIAL_OR_WHITESPACE =
39          Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]");
40  
41      // TIKA-350: handle charset as first element in content-type
42      // See http://www.ietf.org/rfc/rfc2045.txt for valid mime-type characters.
43      private static final String VALID_MIMETYPE_CHARS = "[^\\c\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]";
44      private static final String MIME_TYPE_PATTERN_STRING = "(" + VALID_MIMETYPE_CHARS + "+)"
45                      + "\\s*/\\s*" + "(" + VALID_MIMETYPE_CHARS + "+)";
46      private static final Pattern CONTENT_TYPE_PATTERN = Pattern.compile(
47                      "(?is)\\s*" + MIME_TYPE_PATTERN_STRING + "\\s*($|;.*)");
48      private static final Pattern CONTENT_TYPE_CHARSET_FIRST_PATTERN = Pattern.compile(
49                      "(?i)\\s*(charset\\s*=\\s*[^\\c;\\s]+)\\s*;\\s*" + MIME_TYPE_PATTERN_STRING);
50  
51      public static final MediaType OCTET_STREAM = application("octet-stream");
52  
53      public static final MediaType TEXT_PLAIN = text("plain");
54  
55      public static final MediaType APPLICATION_XML = application("xml");
56  
57      public static MediaType application(String type) {
58          return new MediaType("application", type);
59      }
60  
61      public static MediaType audio(String type) {
62          return new MediaType("audio", type);
63      }
64  
65      public static MediaType image(String type) {
66          return new MediaType("image", type);
67      }
68  
69      public static MediaType text(String type) {
70          return new MediaType("text", type);
71      }
72  
73      public static MediaType video(String type) {
74          return new MediaType("video", type);
75      }
76  
77      /**
78       * Parses the given string to a media type. The string is expected to be of
79       * the form "type/subtype(; parameter=...)*" as defined in RFC 2045, though
80       * we also handle "charset=xxx; type/subtype" for broken web servers.
81       * 
82       * @param string
83       *            media type string to be parsed
84       * @return parsed media type, or <code>null</code> if parsing fails
85       */
86      public static MediaType parse(String string) {
87          if (string == null) {
88              return null;
89          }
90  
91          String type;
92          String subtype;
93          String params;
94          
95          Matcher m = CONTENT_TYPE_PATTERN.matcher(string);
96          if (m.matches()) {
97              type = m.group(1);
98              subtype = m.group(2);
99              params = m.group(3);
100         } else {
101             m = CONTENT_TYPE_CHARSET_FIRST_PATTERN.matcher(string);
102             if (m.matches()) {
103                 params = m.group(1);
104                 type = m.group(2);
105                 subtype = m.group(3);
106             } else {
107                 return null;
108             }
109         }
110         
111         MediaType result = new MediaType(type, subtype);
112         String[] paramPieces = params.split(";");
113         for (String paramPiece : paramPieces) {
114             String[] keyValue = paramPiece.split("=");
115             if (keyValue.length != 2) {
116                 continue;
117             }
118             
119             String key = keyValue[0].trim();
120             if (key.length() > 0) {
121                 result.parameters.put(key, keyValue[1].trim());
122             }
123         }
124         
125         return result;
126     }
127 
128     private final String type;
129 
130     private final String subtype;
131 
132     private final SortedMap<String, String> parameters;
133 
134     public MediaType(
135             String type, String subtype, Map<String, String> parameters) {
136         this.type = type.trim().toLowerCase();
137         this.subtype = subtype.trim().toLowerCase();
138         this.parameters = new TreeMap<String, String>();
139         for (Map.Entry<String, String> entry : parameters.entrySet()) {
140             this.parameters.put(
141                     entry.getKey().trim().toLowerCase(), entry.getValue());
142         }
143     }
144 
145     public MediaType(String type, String subtype) {
146         this(type, subtype, NO_PARAMETERS);
147     }
148 
149     private static Map<String, String> union(
150             Map<String, String> a, Map<String, String> b) {
151         if (a.isEmpty()) {
152             return b;
153         } else if (b.isEmpty()) {
154             return a;
155         } else {
156             Map<String, String> union = new HashMap<String, String>();
157             union.putAll(a);
158             union.putAll(b);
159             return union;
160         }
161     }
162 
163     public MediaType(MediaType type, Map<String, String> parameters) {
164         this(type.type, type.subtype, union(type.parameters, parameters));
165     }
166 
167     public MediaType getBaseType() {
168         if (parameters.isEmpty()) {
169             return this;
170         } else {
171             return new MediaType(type, subtype);
172         }
173     }
174 
175     public String getType() {
176         return type;
177     }
178 
179     public String getSubtype() {
180         return subtype;
181     }
182 
183     public Map<String, String> getParameters() {
184         return Collections.unmodifiableMap(parameters);
185     }
186 
187     public boolean isSpecializationOf(MediaType that) {
188         if (OCTET_STREAM.equals(that)) {
189             return true;
190         } else if (!type.equals(that.type)) {
191             return false;
192         } else if (!parameters.entrySet().containsAll(that.parameters.entrySet())) {
193             return false;
194         } else if (TEXT_PLAIN.equals(that.getBaseType())) {
195             return true;
196         } else if (APPLICATION_XML.equals(that.getBaseType())
197                 && subtype.endsWith("+xml")) {
198             return true;
199         } else {
200             return subtype.equals(that.subtype);
201         }
202     }
203 
204     public String toString() {
205         StringBuilder builder = new StringBuilder();
206         builder.append(type);
207         builder.append('/');
208         builder.append(subtype);
209         for (Map.Entry<String, String> entry : parameters.entrySet()) {
210             builder.append("; ");
211             builder.append(entry.getKey());
212             builder.append("=");
213             String value = entry.getValue();
214             if (SPECIAL_OR_WHITESPACE.matcher(value).find()) {
215                 builder.append('"');
216                 builder.append(SPECIAL.matcher(value).replaceAll("\\\\$0"));
217                 builder.append('"');
218             } else {
219                 builder.append(value);
220             }
221         }
222         return builder.toString();
223     }
224 
225     public boolean equals(Object object) {
226         if (object instanceof MediaType) {
227             MediaType that = (MediaType) object;
228             return type.equals(that.type)
229                 && subtype.equals(that.subtype)
230                 && parameters.equals(that.parameters);
231         } else {
232             return false;
233         }
234     }
235 
236     public int hashCode() {
237         int hash = 17;
238         hash = hash * 31 + type.hashCode();
239         hash = hash * 31 + subtype.hashCode();
240         hash = hash * 31 + parameters.hashCode();
241         return hash;
242     }
243 
244 }