1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.language;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.InputStreamReader;
22  import java.io.Writer;
23  
24  import junit.framework.TestCase;
25  
26  import org.apache.tika.io.IOUtils;
27  
28  /**
29   * JUnit based test of class {@link LanguageIdentifier}.
30   *
31   * @author Sami Siren
32   * @author Jerome Charron - http://frutch.free.fr/
33   */
34  public class LanguageIdentifierTest extends TestCase {
35  
36      private static final String[] languages = new String[] {
37          "da", "de", /* "el", */ "en", "es", "fi", "fr", "it", "nl", "pt", "sv"
38      };
39  
40      public void testLanguageDetection() throws IOException {
41          for (String language : languages) {
42              ProfilingWriter writer = new ProfilingWriter();
43              writeTo(language, writer);
44              LanguageIdentifier identifier =
45                  new LanguageIdentifier(writer.getProfile());
46              assertTrue(identifier.toString(), identifier.isReasonablyCertain());
47              assertEquals(language, identifier.getLanguage());
48          }
49      }
50  
51      public void testMixedLanguages() throws IOException {
52          for (String language : languages) {
53              for (String other : languages) {
54                  if (!language.equals(other)) {
55                      ProfilingWriter writer = new ProfilingWriter();
56                      writeTo(language, writer);
57                      writeTo(other, writer);
58                      LanguageIdentifier identifier =
59                          new LanguageIdentifier(writer.getProfile());
60                      assertFalse(identifier.isReasonablyCertain());
61                  }
62              }
63          }
64      }
65  
66      private void writeTo(String language, Writer writer) throws IOException {
67          InputStream stream =
68              LanguageIdentifierTest.class.getResourceAsStream(language + ".test");
69          try {
70              IOUtils.copy(new InputStreamReader(stream, "UTF-8"), writer);
71          } finally {
72              stream.close();
73          }
74      }
75  
76  }