1 /***
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.util.NoSuchElementException;
9 import java.util.StringTokenizer;
10
11 /***
12 * This class does a best-guess try-anything tokenization.
13 *
14 * @author jheintz
15 *
16 */
17 public class AnyTokenizer implements Tokenizer {
18 public static final String TOKENS = " \t!#$%^&*(){}-=+<>///`~;:";
19
20 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
21 StringBuffer sb = sourceCode.getCodeBuffer();
22 BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
23 try {
24 int lineNumber = 1;
25 String line = reader.readLine();
26 while (line != null) {
27 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
28 try {
29 String token = tokenizer.nextToken();
30 while (token != null) {
31 if (!token.equals(" ") && !token.equals("\t")) {
32 tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
33 }
34 token = tokenizer.nextToken();
35 }
36 } catch (NoSuchElementException ex) {
37
38 }
39
40 line = reader.readLine();
41 lineNumber++;
42 }
43 } catch (Exception ex) {
44 ex.printStackTrace();
45 } finally {
46 try {
47 reader.close();
48 } catch (Exception ex) {}
49 tokenEntries.add(TokenEntry.getEOF());
50 }
51 }
52 }