1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.detect;
18  
19  import java.io.IOException;
20  import java.util.HashMap;
21  import java.util.Map;
22  import java.util.regex.Pattern;
23  
24  import org.apache.tika.metadata.Metadata;
25  import org.apache.tika.mime.MediaType;
26  
27  import junit.framework.TestCase;
28  
29  /**
30   * Test cases for the {@link NameDetector} class.
31   */
32  public class NameDetectorTest extends TestCase {
33  
34      private Detector detector;
35  
36      protected void setUp() {
37          Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>();
38          patterns.put(
39                  Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
40                  MediaType.TEXT_PLAIN);
41          patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
42          detector = new NameDetector(patterns);
43      }
44  
45      public void testDetect() {
46          assertDetect(MediaType.TEXT_PLAIN, "text.txt");
47          assertDetect(MediaType.TEXT_PLAIN, "text.txt ");    // trailing space
48          assertDetect(MediaType.TEXT_PLAIN, "text.txt\n");   // trailing newline
49          assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
50          assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
51          assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt");   // URL encoded
52          assertDetect(MediaType.TEXT_PLAIN, "text.TXT");     // case insensitive
53          assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
54  
55          assertDetect(MediaType.TEXT_PLAIN, "README");
56          assertDetect(MediaType.TEXT_PLAIN, " README ");     // space around
57          assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n");   // other whitespace
58          assertDetect(MediaType.TEXT_PLAIN, "/a/README");    // leading path
59          assertDetect(MediaType.TEXT_PLAIN, "\\b\\README");  // windows path
60          assertDetect(MediaType.OCTET_STREAM, "ReadMe");     // case sensitive
61          assertDetect(MediaType.OCTET_STREAM, "README.NOW");
62  
63          // tough one
64          assertDetect(
65                  MediaType.TEXT_PLAIN,
66                  " See http://www.example.com:1234/README.txt?a=b#c \n");
67          assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
68          assertDetect(MediaType.OCTET_STREAM, "See README");   // but not this
69  
70          // test also the zero input cases
71          assertDetect(MediaType.OCTET_STREAM, "");
72          assertDetect(MediaType.OCTET_STREAM, null);
73          try {
74              assertEquals(
75                      MediaType.OCTET_STREAM,
76                      detector.detect(null, new Metadata()));
77          } catch (IOException e) {
78              fail("NameDetector should never throw an IOException");
79          }
80      }
81  
82      private void assertDetect(MediaType type, String name){
83          Metadata metadata = new Metadata();
84          metadata.set(Metadata.RESOURCE_NAME_KEY, name);
85          try {
86              assertEquals(type, detector.detect(null, metadata));
87          } catch (IOException e) {
88              fail("NameDetector should never throw an IOException");
89          }
90      }
91  
92  }