1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.InputStream;
21  import java.io.Reader;
22  
23  import org.apache.tika.metadata.Metadata;
24  
25  import junit.framework.TestCase;
26  
27  public class ParsingReaderTest extends TestCase {
28  
29      public void testPlainText() throws Exception {
30          String data = "test content";
31          InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
32          Reader reader = new ParsingReader(stream, "test.txt");
33          assertEquals('t', reader.read());
34          assertEquals('e', reader.read());
35          assertEquals('s', reader.read());
36          assertEquals('t', reader.read());
37          assertEquals(' ', reader.read());
38          assertEquals('c', reader.read());
39          assertEquals('o', reader.read());
40          assertEquals('n', reader.read());
41          assertEquals('t', reader.read());
42          assertEquals('e', reader.read());
43          assertEquals('n', reader.read());
44          assertEquals('t', reader.read());
45          assertEquals('\n', reader.read());
46          assertEquals(-1, reader.read());
47          reader.close();
48          assertEquals(-1, stream.read());
49      }
50  
51      public void testXML() throws Exception {
52          String data = "<p>test <span>content</span></p>";
53          InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
54          Reader reader = new ParsingReader(stream, "test.xml");
55          assertEquals('t', (char) reader.read());
56          assertEquals('e', (char) reader.read());
57          assertEquals('s', (char) reader.read());
58          assertEquals('t', (char) reader.read());
59          assertEquals(' ', (char) reader.read());
60          assertEquals('c', (char) reader.read());
61          assertEquals('o', (char) reader.read());
62          assertEquals('n', (char) reader.read());
63          assertEquals('t', (char) reader.read());
64          assertEquals('e', (char) reader.read());
65          assertEquals('n', (char) reader.read());
66          assertEquals('t', (char) reader.read());
67          assertEquals('\n', (char) reader.read());
68          assertEquals(-1, reader.read());
69          reader.close();
70          assertEquals(-1, stream.read());
71      }
72  
73      /**
74       * Test case for TIKA-203
75       *
76       * @see <a href="https://issues.apache.org/jira/browse/TIKA-203">TIKA-203</a>
77       */
78      public void testMetadata() throws Exception {
79          Metadata metadata = new Metadata();
80          InputStream stream = ParsingReaderTest.class.getResourceAsStream(
81                  "/test-documents/testEXCEL.xls");
82          Reader reader =
83              new ParsingReader(new AutoDetectParser(), stream, metadata);
84          try {
85              // Metadata should already be available
86              assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
87              // Check that the internal buffering isn't broken
88              assertEquals('F', (char) reader.read());
89              assertEquals('e', (char) reader.read());
90              assertEquals('u', (char) reader.read());
91              assertEquals('i', (char) reader.read());
92              assertEquals('l', (char) reader.read());
93              assertEquals('1', (char) reader.read());
94          } finally {
95              reader.close();
96          }
97      }
98  
99  }