1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.detect;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22
23 import junit.framework.TestCase;
24
25 import org.apache.tika.metadata.Metadata;
26 import org.apache.tika.mime.MediaType;
27
28
29
30
31 public class MagicDetectorTest extends TestCase {
32
33 public void testDetectNull() throws Exception {
34 MediaType html = new MediaType("text", "html");
35 Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
36 assertEquals(
37 MediaType.OCTET_STREAM,
38 detector.detect(null, new Metadata()));
39 }
40
41 public void testDetectSimple() throws Exception {
42 MediaType html = new MediaType("text", "html");
43 Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
44
45 assertDetect(detector, html, "<html");
46 assertDetect(detector, html, "<html><head/><body/></html>");
47 assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
48 assertDetect(detector, MediaType.OCTET_STREAM, "<?xml?><html");
49 assertDetect(detector, MediaType.OCTET_STREAM, " <html");
50 assertDetect(detector, MediaType.OCTET_STREAM, "");
51 }
52
53 public void testDetectOffsetRange() throws Exception {
54 MediaType html = new MediaType("text", "html");
55 Detector detector = new MagicDetector(
56 html, "<html".getBytes("ASCII"), null, 0, 64);
57
58 assertDetect(detector, html, "<html");
59 assertDetect(detector, html, "<html><head/><body/></html>");
60 assertDetect(detector, html, "<?xml?><html/>");
61 assertDetect(detector, html, "\n <html");
62 assertDetect(detector, html, "\u0000<html");
63 assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
64 assertDetect(detector, MediaType.OCTET_STREAM, " html");
65 assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
66
67 assertDetect(detector, html,
68 "0........1.........2.........3.........4.........5.........6"
69 + "1234<html");
70 assertDetect(detector, MediaType.OCTET_STREAM,
71 "0........1.........2.........3.........4.........5.........6"
72 + "12345<html");
73
74 assertDetect(detector, MediaType.OCTET_STREAM, "");
75 }
76
77 public void testDetectMask() throws Exception {
78 MediaType html = new MediaType("text", "html");
79 byte up = (byte) 0xdf;
80 Detector detector = new MagicDetector(
81 html,
82 new byte[] { '<', 'H', 'T', 'M', 'L' },
83 new byte[] { (byte) 0xff, up, up, up, up },
84 0, 64);
85
86 assertDetect(detector, html, "<html");
87 assertDetect(detector, html, "<HTML><head/><body/></html>");
88 assertDetect(detector, html, "<?xml?><HtMl/>");
89 assertDetect(detector, html, "\n <html");
90 assertDetect(detector, html, "\u0000<HTML");
91 assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
92 assertDetect(detector, MediaType.OCTET_STREAM, " html");
93
94 assertDetect(detector, html,
95 "0 1 2 3 4 5 6"
96 + "1234<html");
97 assertDetect(detector, MediaType.OCTET_STREAM,
98 "0 1 2 3 4 5 6"
99 + "12345<html");
100
101 assertDetect(detector, MediaType.OCTET_STREAM, "");
102 }
103
104 private void assertDetect(Detector detector, MediaType type, String data) {
105 try {
106 byte[] bytes = data.getBytes("ASCII");
107 InputStream stream = new ByteArrayInputStream(bytes);
108 assertEquals(type, detector.detect(stream, new Metadata()));
109
110
111 for (int i = 0; i < bytes.length; i++) {
112 assertEquals(bytes[i], (byte) stream.read());
113 }
114 assertEquals(-1, stream.read());
115 } catch (IOException e) {
116 fail("Unexpected exception from MagicDetector");
117 }
118 }
119
120 }