1 /***
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 * @authors: Zev Blut zb@ubit.com
4 */
5 package net.sourceforge.pmd.cpd;
6
7 import java.util.List;
8
9 public class RubyTokenizer implements Tokenizer {
10 private boolean downcaseString = true;
11
12 public void tokenize(SourceCode tokens, Tokens tokenEntries) {
13 List code = tokens.getCode();
14 for (int i = 0; i < code.size(); i++) {
15 String currentLine = (String) code.get(i);
16 int loc = 0;
17 while (loc < currentLine.length()) {
18 StringBuffer token = new StringBuffer();
19 loc = getTokenFromLine(currentLine,token,loc);
20 if (token.length() > 0 && !isIgnorableString(token.toString())) {
21 if (downcaseString) {
22 token = new StringBuffer(token.toString().toLowerCase());
23 }
24 tokenEntries.add(
25 new TokenEntry(
26 token.toString(),
27 tokens.getFileName(),
28 i + 1)
29 );
30 }
31 }
32 }
33 tokenEntries.add(TokenEntry.getEOF());
34 }
35
36 private int getTokenFromLine(String line, StringBuffer token, int loc) {
37 for (int j = loc; j < line.length(); j++) {
38 char tok = line.charAt(j);
39 if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
40 if (isComment(tok)) {
41 if (token.length() > 0) {
42 return j;
43 } else {
44 return getCommentToken(line, token, loc);
45 }
46 } else if (isString(tok)) {
47 if (token.length() > 0) {
48
49 return j;
50 } else {
51
52 return parseString(line, token, j, tok);
53 }
54 } else {
55 token.append(tok);
56 }
57 } else {
58 if (token.length() > 0) {
59 return j;
60 }
61 }
62 loc = j;
63 }
64 return loc + 1;
65 }
66
67 private int parseString(String line, StringBuffer token, int loc, char stringType) {
68 boolean escaped = false;
69 boolean done = false;
70
71
72
73 char tok = ' ';
74 while ((loc < line.length()) && !done) {
75 tok = line.charAt(loc);
76 if (escaped && tok == stringType) {
77
78 escaped = false;
79 } else if (tok == stringType && (token.length() > 0)) {
80
81
82 done = true;
83 } else if (tok == '//') {
84
85 escaped = true;
86 } else {
87
88 escaped = false;
89 }
90
91 token.append(tok);
92 loc++;
93 }
94 return loc + 1;
95 }
96
97 private boolean ignoreCharacter(char tok) {
98 boolean result = false;
99 switch (tok) {
100 case '{':
101 case '}':
102 case '(':
103 case ')':
104 case ';':
105 case ',':
106 result = true;
107 break;
108 default :
109 result = false;
110 }
111 return result;
112 }
113
114 private boolean isString(char tok) {
115 boolean result = false;
116 switch (tok) {
117 case '\'':
118 case '"':
119 result = true;
120 break;
121 default:
122 result = false;
123 }
124 return result;
125 }
126
127 private boolean isComment(char tok) {
128 return tok == '#';
129 }
130
131 private int getCommentToken(String line, StringBuffer token, int loc) {
132 while (loc < line.length()) {
133 token.append(line.charAt(loc));
134 loc++;
135 }
136 return loc;
137 }
138
139 private boolean isIgnorableString(String token) {
140 return token == "do" || token == "end";
141 }
142 }