1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.language;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.io.InputStreamReader;
22 import java.io.Writer;
23
24 import junit.framework.TestCase;
25
26 import org.apache.tika.io.IOUtils;
27
28
29
30
31
32
33
34 public class LanguageIdentifierTest extends TestCase {
35
36 private static final String[] languages = new String[] {
37 "da", "de",
38 };
39
40 public void testLanguageDetection() throws IOException {
41 for (String language : languages) {
42 ProfilingWriter writer = new ProfilingWriter();
43 writeTo(language, writer);
44 LanguageIdentifier identifier =
45 new LanguageIdentifier(writer.getProfile());
46 assertTrue(identifier.toString(), identifier.isReasonablyCertain());
47 assertEquals(language, identifier.getLanguage());
48 }
49 }
50
51 public void testMixedLanguages() throws IOException {
52 for (String language : languages) {
53 for (String other : languages) {
54 if (!language.equals(other)) {
55 ProfilingWriter writer = new ProfilingWriter();
56 writeTo(language, writer);
57 writeTo(other, writer);
58 LanguageIdentifier identifier =
59 new LanguageIdentifier(writer.getProfile());
60 assertFalse(identifier.isReasonablyCertain());
61 }
62 }
63 }
64 }
65
66 private void writeTo(String language, Writer writer) throws IOException {
67 InputStream stream =
68 LanguageIdentifierTest.class.getResourceAsStream(language + ".test");
69 try {
70 IOUtils.copy(new InputStreamReader(stream, "UTF-8"), writer);
71 } finally {
72 stream.close();
73 }
74 }
75
76 }