1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.language;
18
19 import java.io.IOException;
20 import java.io.Writer;
21
22
23
24
25
26
27 public class ProfilingWriter extends Writer {
28
29 private final LanguageProfile profile;
30
31 private char[] buffer = new char[] { 0, 0, '_' };
32
33 private int n = 1;
34
35 public ProfilingWriter(LanguageProfile profile) {
36 this.profile = profile;
37 }
38
39 public ProfilingWriter() {
40 this(new LanguageProfile());
41 }
42
43
44
45
46
47
48
49
50
51 public LanguageProfile getProfile() {
52 return profile;
53 }
54
55
56
57
58
59
60
61 public LanguageIdentifier getLanguage() {
62 return new LanguageIdentifier(profile);
63 }
64
65 @Override
66 public void write(char[] cbuf, int off, int len) {
67 for (int i = 0; i < len; i++) {
68 char c = Character.toLowerCase(cbuf[off + i]);
69 if (Character.isLetter(c)) {
70 addLetter(c);
71 } else {
72 addSeparator();
73 }
74 }
75 }
76
77 private void addLetter(char c) {
78 System.arraycopy(buffer, 1, buffer, 0, buffer.length - 1);
79 buffer[buffer.length - 1] = c;
80 n++;
81 if (n >= buffer.length) {
82 profile.add(new String(buffer));
83 }
84 }
85
86 private void addSeparator() {
87 addLetter('_');
88 n = 1;
89 }
90
91 @Override
92 public void close() throws IOException {
93 addSeparator();
94 }
95
96
97
98
99 @Override
100 public void flush() {
101 }
102
103 }