1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.detect;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.util.Arrays;
23
24 import junit.framework.TestCase;
25
26 import org.apache.tika.metadata.Metadata;
27 import org.apache.tika.mime.MediaType;
28
29
30
31
32 public class TextDetectorTest extends TestCase {
33
34 private final Detector detector = new TextDetector();
35
36 public void testDetectNull() throws Exception {
37 assertEquals(
38 MediaType.OCTET_STREAM,
39 detector.detect(null, new Metadata()));
40 }
41
42 public void testDetectText() throws Exception {
43 assertText(new byte[0]);
44
45 assertText("Hello, World!".getBytes("UTF-8"));
46 assertText(" \t\r\n".getBytes("UTF-8"));
47 assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
48 assertNotText(new byte[] { 0 });
49 assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
50
51 byte[] data = new byte[512];
52 Arrays.fill(data, (byte) '.');
53 assertText(data);
54 Arrays.fill(data, (byte) 0x1f);
55 assertNotText(data);
56
57 data = new byte[513];
58 Arrays.fill(data, (byte) '.');
59 assertText(data);
60 Arrays.fill(data, (byte) 0x1f);
61 assertNotText(data);
62 }
63
64 private void assertText(byte[] data) {
65 try {
66 InputStream stream = new ByteArrayInputStream(data);
67 assertEquals(
68 MediaType.TEXT_PLAIN,
69 detector.detect(stream, new Metadata()));
70
71
72 for (int i = 0; i < data.length; i++) {
73 assertEquals(data[i], (byte) stream.read());
74 }
75 assertEquals(-1, stream.read());
76 } catch (IOException e) {
77 fail("Unexpected exception from TextDetector");
78 }
79 }
80
81 private void assertNotText(byte[] data) {
82 try {
83 assertEquals(
84 MediaType.OCTET_STREAM,
85 detector.detect(
86 new ByteArrayInputStream(data), new Metadata()));
87 } catch (IOException e) {
88 fail("Unexpected exception from TextDetector");
89 }
90 }
91
92 }