1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.detect;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.util.Arrays;
23  
24  import junit.framework.TestCase;
25  
26  import org.apache.tika.metadata.Metadata;
27  import org.apache.tika.mime.MediaType;
28  
29  /**
30   * Test cases for the {@link TextDetector} class.
31   */
32  public class TextDetectorTest extends TestCase {
33  
34      private final Detector detector = new TextDetector();
35  
36      public void testDetectNull() throws Exception {
37          assertEquals(
38                  MediaType.OCTET_STREAM,
39                  detector.detect(null, new Metadata()));
40      }
41  
42      public void testDetectText() throws Exception {
43          assertText(new byte[0]);
44  
45          assertText("Hello, World!".getBytes("UTF-8"));
46          assertText(" \t\r\n".getBytes("UTF-8"));
47          assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
48          assertNotText(new byte[] { 0 });
49          assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
50  
51          byte[] data = new byte[512];
52          Arrays.fill(data, (byte) '.');
53          assertText(data);
54          Arrays.fill(data, (byte) 0x1f);
55          assertNotText(data);
56  
57          data = new byte[513];
58          Arrays.fill(data, (byte) '.');
59          assertText(data);
60          Arrays.fill(data, (byte) 0x1f);
61          assertNotText(data);
62      }
63  
64      private void assertText(byte[] data) {
65          try {
66              InputStream stream = new ByteArrayInputStream(data);
67              assertEquals(
68                      MediaType.TEXT_PLAIN,
69                      detector.detect(stream, new Metadata()));
70  
71              // Test that the stream has been reset
72              for (int i = 0; i < data.length; i++) {
73                  assertEquals(data[i], (byte) stream.read());
74              }
75              assertEquals(-1, stream.read());
76          } catch (IOException e) {
77              fail("Unexpected exception from TextDetector");
78          }
79      }
80  
81      private void assertNotText(byte[] data) {
82          try {
83              assertEquals(
84                      MediaType.OCTET_STREAM,
85                      detector.detect(
86                              new ByteArrayInputStream(data), new Metadata()));
87          } catch (IOException e) {
88              fail("Unexpected exception from TextDetector");
89          }
90      }
91  
92  }