View Javadoc

1   /***
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.BufferedReader;
7   import java.io.CharArrayReader;
8   import java.util.NoSuchElementException;
9   import java.util.StringTokenizer;
10  
11  /***
12   * This class does a best-guess try-anything tokenization.
13   * 
14   * @author jheintz
15   *
16   */
17  public class AnyTokenizer implements Tokenizer {
18  	public static final String TOKENS = " \t!#$%^&*(){}-=+<>///`~;:";
19      
20      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
21          StringBuffer sb = sourceCode.getCodeBuffer();
22      	BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
23          try {
24          	int lineNumber = 1;
25          	String line = reader.readLine();
26          	while (line != null) {
27          		StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
28          		try {
29              		String token = tokenizer.nextToken();
30          			while (token != null) {
31          				if (!token.equals(" ") && !token.equals("\t")) {
32          					tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
33          				}
34      					token = tokenizer.nextToken();
35          			}
36          		} catch (NoSuchElementException ex) {
37          			// done with tokens
38          		}
39          		// advance iteration variables
40          		line = reader.readLine();
41          		lineNumber++;
42          	}
43          } catch (Exception ex) {
44              ex.printStackTrace();
45          } finally {
46          	try {
47          		reader.close();
48          	} catch (Exception ex) {}
49          	tokenEntries.add(TokenEntry.getEOF());
50          }
51      }
52  }