1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.tika.mime;
19
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.net.MalformedURLException;
25 import java.net.URL;
26
27 import junit.framework.TestCase;
28
29 import org.apache.tika.config.TikaConfig;
30 import org.apache.tika.metadata.Metadata;
31
32
33
34
35
36
37 public class TestMimeTypes extends TestCase {
38
39 private MimeTypes repo;
40
41 private static URL u;
42
43 static {
44 try {
45 u = new URL("http://mydomain.com/x.pdf?x=y");
46 } catch (MalformedURLException e) {
47 fail(e.getMessage());
48 }
49 }
50
51 private static final File f = new File("/a/b/c/x.pdf");
52
53 public TestMimeTypes() {
54 try {
55 repo = TikaConfig.getDefaultConfig().getMimeRepository();
56 } catch (Exception e) {
57 fail(e.getMessage());
58 }
59
60 }
61
62 public void testCaseSensitivity() {
63 MimeType type = repo.getMimeType("test.PDF");
64 assertNotNull(type);
65 assertEquals(repo.getMimeType("test.pdf"), type);
66 assertEquals(repo.getMimeType("test.PdF"), type);
67 assertEquals(repo.getMimeType("test.pdF"), type);
68 }
69
70 public void testLoadMimeTypes() throws MimeTypeException {
71 assertNotNull(repo.forName("application/octet-stream"));
72 assertNotNull(repo.forName("text/x-tex"));
73 }
74
75
76
77
78 public void testGuessMimeTypes() throws Exception {
79 assertTypeByName("application/pdf", "x.pdf");
80 assertEquals("application/pdf", repo.getMimeType(u).getName());
81 assertEquals("application/pdf", repo.getMimeType(f).getName());
82 assertTypeByName("text/plain", "x.txt");
83 assertTypeByName("text/html", "x.htm");
84 assertTypeByName("text/html", "x.html");
85 assertTypeByName("application/xhtml+xml", "x.xhtml");
86 assertTypeByName("application/xml", "x.xml");
87 assertTypeByName("application/zip", "x.zip");
88 assertTypeByName("application/vnd.oasis.opendocument.text", "x.odt");
89 assertTypeByName("application/octet-stream", "x.unknown");
90
91
92
93 assertTypeByName("application/msword", "x.doc");
94 assertTypeByName("application/msword", "x.dot");
95 assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "x.docx");
96 assertTypeByName("application/vnd.openxmlformats-officedocument.wordprocessingml.template", "x.dotx");
97 assertTypeByName("application/vnd.ms-word.document.macroenabled.12", "x.docm");
98 assertTypeByName("application/vnd.ms-word.template.macroenabled.12", "x.dotm");
99 assertTypeByName("application/vnd.ms-excel", "x.xls");
100 assertTypeByName("application/vnd.ms-excel", "x.xlt");
101 assertTypeByName("application/vnd.ms-excel", "x.xla");
102 assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "x.xlsx");
103 assertTypeByName("application/vnd.openxmlformats-officedocument.spreadsheetml.template", "x.xltx");
104 assertTypeByName("application/vnd.ms-excel.sheet.macroenabled.12", "x.xlsm");
105 assertTypeByName("application/vnd.ms-excel.template.macroenabled.12", "x.xltm");
106 assertTypeByName("application/vnd.ms-excel.addin.macroenabled.12", "x.xlam");
107 assertTypeByName("application/vnd.ms-excel.sheet.binary.macroenabled.12", "x.xlsb");
108 assertTypeByName("application/vnd.ms-powerpoint", "x.ppt");
109 assertTypeByName("application/vnd.ms-powerpoint", "x.pot");
110 assertTypeByName("application/vnd.ms-powerpoint", "x.pps");
111 assertTypeByName("application/vnd.ms-powerpoint", "x.ppa");
112 assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.presentation", "x.pptx");
113 assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.template", "x.potx");
114 assertTypeByName("application/vnd.openxmlformats-officedocument.presentationml.slideshow", "x.ppsx");
115 assertTypeByName("application/vnd.ms-powerpoint.addin.macroenabled.12", "x.ppam");
116 assertTypeByName("application/vnd.ms-powerpoint.presentation.macroenabled.12", "x.pptm");
117 assertTypeByName("application/vnd.ms-powerpoint.template.macroenabled.12", "x.potm");
118 assertTypeByName("application/vnd.ms-powerpoint.slideshow.macroenabled.12", "x.ppsm");
119 }
120
121 public void testOoxmlDetection() throws Exception {
122 assertTypeByData("application/x-tika-ooxml", "testWORD.docx");
123 assertTypeByData("application/x-tika-ooxml", "testEXCEL.xlsx");
124 assertTypeByData("application/x-tika-ooxml", "testPPT.pptx");
125 }
126
127 public void testJpegDetection() throws Exception {
128 assertType("image/jpeg", "testJPEG.jpg");
129 assertTypeByData("image/jpeg", "testJPEG.jpg");
130 assertTypeByName("image/jpeg", "x.jpg");
131 assertTypeByName("image/jpeg", "x.JPG");
132 assertTypeByName("image/jpeg", "x.jpeg");
133 assertTypeByName("image/jpeg", "x.JPEG");
134 assertTypeByName("image/jpeg", "x.jpe");
135 assertTypeByName("image/jpeg", "x.jif");
136 assertTypeByName("image/jpeg", "x.jfif");
137 assertTypeByName("image/jpeg", "x.jfi");
138 }
139
140 public void testTiffDetection() throws Exception {
141 assertType("image/tiff", "testTIFF.tif");
142 assertTypeByData("image/tiff", "testTIFF.tif");
143 assertTypeByName("image/tiff", "x.tiff");
144 assertTypeByName("image/tiff", "x.tif");
145 assertTypeByName("image/tiff", "x.TIF");
146 }
147
148 public void testGifDetection() throws Exception {
149 assertType("image/gif", "testGIF.gif");
150 assertTypeByData("image/gif", "testGIF.gif");
151 assertTypeByName("image/gif", "x.gif");
152 assertTypeByName("image/gif", "x.GIF");
153 }
154
155 public void testPngDetection() throws Exception {
156 assertType("image/png", "testPNG.png");
157 assertTypeByData("image/png", "testPNG.png");
158 assertTypeByName("image/png", "x.png");
159 assertTypeByName("image/png", "x.PNG");
160 }
161
162 public void testBmpDetection() throws Exception {
163 assertType("image/bmp", "testBMP.bmp");
164 assertTypeByData("image/bmp", "testBMP.bmp");
165 assertTypeByName("image/bmp", "x.bmp");
166 assertTypeByName("image/bmp", "x.BMP");
167 assertTypeByName("image/bmp", "x.dib");
168 assertTypeByName("image/bmp", "x.DIB");
169 }
170
171 public void testPnmDetection() throws Exception {
172 assertType("image/x-portable-bitmap", "testPBM.pbm");
173 assertType("image/x-portable-graymap", "testPGM.pgm");
174 assertType("image/x-portable-pixmap", "testPPM.ppm");
175 assertTypeByData("image/x-portable-bitmap", "testPBM.pbm");
176 assertTypeByData("image/x-portable-graymap", "testPGM.pgm");
177 assertTypeByData("image/x-portable-pixmap", "testPPM.ppm");
178 assertTypeByName("image/x-portable-anymap", "x.pnm");
179 assertTypeByName("image/x-portable-anymap", "x.PNM");
180 assertTypeByName("image/x-portable-bitmap", "x.pbm");
181 assertTypeByName("image/x-portable-bitmap", "x.PBM");
182 assertTypeByName("image/x-portable-graymap", "x.pgm");
183 assertTypeByName("image/x-portable-graymap", "x.PGM");
184 assertTypeByName("image/x-portable-pixmap", "x.ppm");
185 assertTypeByName("image/x-portable-pixmap", "x.PPM");
186 }
187
188 public void testCgmDetection() throws Exception {
189
190 assertTypeByName("image/cgm", "x.cgm");
191 assertTypeByName("image/cgm", "x.CGM");
192 }
193
194 public void testRdfXmlDetection() throws Exception {
195 assertTypeByName("application/rdf+xml", "x.rdf");
196 assertTypeByName("application/rdf+xml", "x.owl");
197 }
198
199 public void testSvgDetection() throws Exception {
200 assertType("image/svg+xml", "testSVG.svg");
201 assertTypeByData("image/svg+xml", "testSVG.svg");
202 assertTypeByName("image/svg+xml", "x.svg");
203 assertTypeByName("image/svg+xml", "x.SVG");
204
205
206 assertType("application/x-gzip", "testSVG.svgz");
207 assertTypeByData("application/x-gzip", "testSVG.svgz");
208 assertTypeByName("image/svg+xml", "x.svgz");
209 assertTypeByName("image/svg+xml", "x.SVGZ");
210 }
211
212 public void testPdfDetection() throws Exception {
213 assertType("application/pdf", "testPDF.pdf");
214 assertTypeByData("application/pdf", "testPDF.pdf");
215 assertTypeByName("application/pdf", "x.pdf");
216 assertTypeByName("application/pdf", "x.PDF");
217 }
218
219 public void testSwfDetection() throws Exception {
220
221 assertTypeByName("application/x-shockwave-flash", "x.swf");
222 assertTypeByName("application/x-shockwave-flash", "x.SWF");
223 }
224
225 public void testWmfDetection() throws Exception {
226
227 assertTypeByName("application/x-msmetafile", "x.wmf");
228 assertTypeByName("application/x-msmetafile", "x.WMF");
229
230 assertTypeByName("application/x-msmetafile", "x.emf");
231 assertTypeByName("application/x-msmetafile", "x.EMF");
232
233 assertTypeByName("application/x-ms-wmz", "x.wmz");
234 assertTypeByName("application/x-ms-wmz", "x.WMZ");
235
236 assertTypeByName("application/x-gzip", "x.emz");
237 assertTypeByName("application/x-gzip", "x.EMZ");
238 }
239
240 public void testPsDetection() throws Exception {
241
242 assertTypeByName("application/postscript", "x.ps");
243 assertTypeByName("application/postscript", "x.PS");
244 assertTypeByName("application/postscript", "x.eps");
245 assertTypeByName("application/postscript", "x.epsf");
246 assertTypeByName("application/postscript", "x.epsi");
247 }
248
249
250
251
252 public void testJavaRegex() throws Exception{
253 MimeType testType = new MimeType(this.repo, "foo/bar");
254 this.repo.add(testType);
255 assertNotNull(repo.forName("foo/bar"));
256 String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
257 this.repo.addPattern(testType, pattern, true);
258 String testFileName = "rtg_sst_grb_0.5.12345678";
259 assertNotNull(this.repo.getMimeType(testFileName));
260 assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
261
262 MimeType testType2 = new MimeType(this.repo, "foo/bar2");
263 this.repo.add(testType2);
264 assertNotNull(repo.forName("foo/bar2"));
265 this.repo.addPattern(testType2, pattern, false);
266 assertNotNull(this.repo.getMimeType(testFileName));
267 assertNotSame("foo/bar2", this.repo.getMimeType(testFileName).getName());
268 }
269
270 public void testRawDetection() throws Exception {
271 assertTypeByName("image/x-raw-adobe", "x.dng");
272 assertTypeByName("image/x-raw-adobe", "x.DNG");
273 assertTypeByName("image/x-raw-hasselblad", "x.3fr");
274 assertTypeByName("image/x-raw-fuji", "x.raf");
275 assertTypeByName("image/x-raw-canon", "x.crw");
276 assertTypeByName("image/x-raw-canon", "x.cr2");
277 assertTypeByName("image/x-raw-kodak", "x.k25");
278 assertTypeByName("image/x-raw-kodak", "x.kdc");
279 assertTypeByName("image/x-raw-kodak", "x.dcs");
280 assertTypeByName("image/x-raw-kodak", "x.drf");
281 assertTypeByName("image/x-raw-minolta", "x.mrw");
282 assertTypeByName("image/x-raw-nikon", "x.nef");
283 assertTypeByName("image/x-raw-nikon", "x.nrw");
284 assertTypeByName("image/x-raw-olympus", "x.orf");
285 assertTypeByName("image/x-raw-pentax", "x.ptx");
286 assertTypeByName("image/x-raw-pentax", "x.pef");
287 assertTypeByName("image/x-raw-sony", "x.arw");
288 assertTypeByName("image/x-raw-sony", "x.srf");
289 assertTypeByName("image/x-raw-sony", "x.sr2");
290 assertTypeByName("image/x-raw-sigma", "x.x3f");
291 assertTypeByName("image/x-raw-epson", "x.erf");
292 assertTypeByName("image/x-raw-mamiya", "x.mef");
293 assertTypeByName("image/x-raw-leaf", "x.mos");
294 assertTypeByName("image/x-raw-panasonic", "x.raw");
295 assertTypeByName("image/x-raw-panasonic", "x.rw2");
296 assertTypeByName("image/x-raw-phaseone", "x.cap");
297 assertTypeByName("image/x-raw-phaseone", "x.iiq");
298 assertTypeByName("image/x-raw-phaseone", "x.cap");
299 assertTypeByName("image/x-raw-red", "x.r3d");
300 assertTypeByName("image/x-raw-imacon", "x.fff");
301 assertTypeByName("image/x-raw-logitech", "x.pxn");
302 assertTypeByName("image/x-raw-casio", "x.bay");
303 assertTypeByName("image/x-raw-rawzor", "x.rwz");
304 }
305
306
307
308
309
310 public void testMimeDeterminationForTestDocuments() throws Exception {
311 assertType("text/html", "testHTML.html");
312 assertType("application/zip", "test-documents.zip");
313
314
315
316
317
318
319
320
321 assertType("text/html", "testHTML_utf8.html");
322 assertType(
323 "application/vnd.oasis.opendocument.text",
324 "testOpenOffice2.odt");
325 assertType("application/pdf", "testPDF.pdf");
326 assertType("application/rtf", "testRTF.rtf");
327 assertType("text/plain", "testTXT.txt");
328 assertType("application/xml", "testXML.xml");
329 assertType("audio/basic", "testAU.au");
330 assertType("audio/x-aiff", "testAIFF.aif");
331 assertType("audio/x-wav", "testWAV.wav");
332 assertType("audio/midi", "testMID.mid");
333 }
334
335 private void assertType(String expected, String filename) throws Exception {
336 InputStream stream = TestMimeTypes.class.getResourceAsStream(
337 "/test-documents/" + filename);
338 try {
339 Metadata metadata = new Metadata();
340 metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
341 assertEquals(expected, repo.detect(stream, metadata).toString());
342 } finally {
343 stream.close();
344 }
345 }
346
347 private void assertTypeByName(String expected, String filename)
348 throws IOException {
349 Metadata metadata = new Metadata();
350 metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
351 assertEquals(expected, repo.detect(null, metadata).toString());
352 }
353
354 private void assertTypeByData(String expected, String filename)
355 throws IOException {
356 InputStream stream = TestMimeTypes.class.getResourceAsStream(
357 "/test-documents/" + filename);
358 try {
359 Metadata metadata = new Metadata();
360 assertEquals(expected, repo.detect(stream, metadata).toString());
361 } finally {
362 stream.close();
363 }
364 }
365
366 }