1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| package net.sourceforge.pmd.cpd; |
6 |
| |
7 |
| import java.util.List; |
8 |
| |
9 |
| public class RubyTokenizer implements Tokenizer { |
10 |
| private boolean downcaseString = true; |
11 |
| |
12 |
0
| public void tokenize(SourceCode tokens, Tokens tokenEntries) {
|
13 |
0
| List code = tokens.getCode();
|
14 |
0
| for (int i = 0; i < code.size(); i++) {
|
15 |
0
| String currentLine = (String) code.get(i);
|
16 |
0
| int loc = 0;
|
17 |
0
| while (loc < currentLine.length()) {
|
18 |
0
| StringBuffer token = new StringBuffer();
|
19 |
0
| loc = getTokenFromLine(currentLine,token,loc);
|
20 |
0
| if (token.length() > 0 && !isIgnorableString(token.toString())) {
|
21 |
0
| if (downcaseString) {
|
22 |
0
| token = new StringBuffer(token.toString().toLowerCase());
|
23 |
| } |
24 |
0
| tokenEntries.add(
|
25 |
| new TokenEntry( |
26 |
| token.toString(), |
27 |
| tokens.getFileName(), |
28 |
| i + 1) |
29 |
| ); |
30 |
| } |
31 |
| } |
32 |
| } |
33 |
0
| tokenEntries.add(TokenEntry.getEOF());
|
34 |
| } |
35 |
| |
36 |
0
| private int getTokenFromLine(String line, StringBuffer token, int loc) {
|
37 |
0
| for (int j = loc; j < line.length(); j++) {
|
38 |
0
| char tok = line.charAt(j);
|
39 |
0
| if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
|
40 |
0
| if (isComment(tok)) {
|
41 |
0
| if (token.length() > 0) {
|
42 |
0
| return j;
|
43 |
| } else { |
44 |
0
| return getCommentToken(line, token, loc);
|
45 |
| } |
46 |
0
| } else if (isString(tok)) {
|
47 |
0
| if (token.length() > 0) {
|
48 |
| |
49 |
0
| return j;
|
50 |
| } else { |
51 |
| |
52 |
0
| return parseString(line, token, j, tok);
|
53 |
| } |
54 |
| } else { |
55 |
0
| token.append(tok);
|
56 |
| } |
57 |
| } else { |
58 |
0
| if (token.length() > 0) {
|
59 |
0
| return j;
|
60 |
| } |
61 |
| } |
62 |
0
| loc = j;
|
63 |
| } |
64 |
0
| return loc + 1;
|
65 |
| } |
66 |
| |
67 |
0
| private int parseString(String line, StringBuffer token, int loc, char stringType) {
|
68 |
0
| boolean escaped = false;
|
69 |
0
| boolean done = false;
|
70 |
| |
71 |
| |
72 |
| |
73 |
0
| char tok = ' ';
|
74 |
0
| while ((loc < line.length()) && !done) {
|
75 |
0
| tok = line.charAt(loc);
|
76 |
0
| if (escaped && tok == stringType) {
|
77 |
| |
78 |
0
| escaped = false;
|
79 |
0
| } else if (tok == stringType && (token.length() > 0)) {
|
80 |
| |
81 |
| |
82 |
0
| done = true;
|
83 |
0
| } else if (tok == '\\') {
|
84 |
| |
85 |
0
| escaped = true;
|
86 |
| } else { |
87 |
| |
88 |
0
| escaped = false;
|
89 |
| } |
90 |
| |
91 |
0
| token.append(tok);
|
92 |
0
| loc++;
|
93 |
| } |
94 |
0
| return loc + 1;
|
95 |
| } |
96 |
| |
97 |
0
| private boolean ignoreCharacter(char tok) {
|
98 |
0
| boolean result = false;
|
99 |
0
| switch (tok) {
|
100 |
0
| case '{':
|
101 |
0
| case '}':
|
102 |
0
| case '(':
|
103 |
0
| case ')':
|
104 |
0
| case ';':
|
105 |
0
| case ',':
|
106 |
0
| result = true;
|
107 |
0
| break;
|
108 |
0
| default :
|
109 |
0
| result = false;
|
110 |
| } |
111 |
0
| return result;
|
112 |
| } |
113 |
| |
114 |
0
| private boolean isString(char tok) {
|
115 |
0
| boolean result = false;
|
116 |
0
| switch (tok) {
|
117 |
0
| case '\'':
|
118 |
0
| case '"':
|
119 |
0
| result = true;
|
120 |
0
| break;
|
121 |
0
| default:
|
122 |
0
| result = false;
|
123 |
| } |
124 |
0
| return result;
|
125 |
| } |
126 |
| |
127 |
0
| private boolean isComment(char tok) {
|
128 |
0
| return tok == '#';
|
129 |
| } |
130 |
| |
131 |
0
| private int getCommentToken(String line, StringBuffer token, int loc) {
|
132 |
0
| while (loc < line.length()) {
|
133 |
0
| token.append(line.charAt(loc));
|
134 |
0
| loc++;
|
135 |
| } |
136 |
0
| return loc;
|
137 |
| } |
138 |
| |
139 |
0
| private boolean isIgnorableString(String token) {
|
140 |
0
| return token == "do" || token == "end";
|
141 |
| } |
142 |
| } |