1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.InputStream;
21 import java.io.Reader;
22
23 import org.apache.tika.metadata.Metadata;
24
25 import junit.framework.TestCase;
26
27 public class ParsingReaderTest extends TestCase {
28
29 public void testPlainText() throws Exception {
30 String data = "test content";
31 InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
32 Reader reader = new ParsingReader(stream, "test.txt");
33 assertEquals('t', reader.read());
34 assertEquals('e', reader.read());
35 assertEquals('s', reader.read());
36 assertEquals('t', reader.read());
37 assertEquals(' ', reader.read());
38 assertEquals('c', reader.read());
39 assertEquals('o', reader.read());
40 assertEquals('n', reader.read());
41 assertEquals('t', reader.read());
42 assertEquals('e', reader.read());
43 assertEquals('n', reader.read());
44 assertEquals('t', reader.read());
45 assertEquals('\n', reader.read());
46 assertEquals(-1, reader.read());
47 reader.close();
48 assertEquals(-1, stream.read());
49 }
50
51 public void testXML() throws Exception {
52 String data = "<p>test <span>content</span></p>";
53 InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
54 Reader reader = new ParsingReader(stream, "test.xml");
55 assertEquals('t', (char) reader.read());
56 assertEquals('e', (char) reader.read());
57 assertEquals('s', (char) reader.read());
58 assertEquals('t', (char) reader.read());
59 assertEquals(' ', (char) reader.read());
60 assertEquals('c', (char) reader.read());
61 assertEquals('o', (char) reader.read());
62 assertEquals('n', (char) reader.read());
63 assertEquals('t', (char) reader.read());
64 assertEquals('e', (char) reader.read());
65 assertEquals('n', (char) reader.read());
66 assertEquals('t', (char) reader.read());
67 assertEquals('\n', (char) reader.read());
68 assertEquals(-1, reader.read());
69 reader.close();
70 assertEquals(-1, stream.read());
71 }
72
73
74
75
76
77
78 public void testMetadata() throws Exception {
79 Metadata metadata = new Metadata();
80 InputStream stream = ParsingReaderTest.class.getResourceAsStream(
81 "/test-documents/testEXCEL.xls");
82 Reader reader =
83 new ParsingReader(new AutoDetectParser(), stream, metadata);
84 try {
85
86 assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
87
88 assertEquals('F', (char) reader.read());
89 assertEquals('e', (char) reader.read());
90 assertEquals('u', (char) reader.read());
91 assertEquals('i', (char) reader.read());
92 assertEquals('l', (char) reader.read());
93 assertEquals('1', (char) reader.read());
94 } finally {
95 reader.close();
96 }
97 }
98
99 }