View Javadoc

1   /***
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    * @authors: Zev Blut zb@ubit.com
4    */
5   package net.sourceforge.pmd.cpd;
6   
7   import java.util.List;
8   
9   public class RubyTokenizer implements Tokenizer {
10      private boolean downcaseString = true;   
11  
12      public void tokenize(SourceCode tokens, Tokens tokenEntries) {
13          List code = tokens.getCode();
14          for (int i = 0; i < code.size(); i++) {
15              String currentLine = (String) code.get(i);
16              int loc = 0;
17              while (loc < currentLine.length()) {
18                  StringBuffer token = new StringBuffer();
19                  loc = getTokenFromLine(currentLine,token,loc);
20                  if (token.length() > 0 && !isIgnorableString(token.toString())) {
21                      if (downcaseString) {
22                          token = new StringBuffer(token.toString().toLowerCase());               
23                      }
24                      tokenEntries.add(
25                          new TokenEntry(
26                              token.toString(),
27                              tokens.getFileName(),
28                              i + 1)
29                          );
30                  }
31              }
32          }
33          tokenEntries.add(TokenEntry.getEOF());
34      }
35      
36      private int getTokenFromLine(String line, StringBuffer token, int loc) {        
37          for (int j = loc; j < line.length(); j++) {
38              char tok = line.charAt(j);
39              if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
40                  if (isComment(tok)) {
41                      if (token.length() > 0) {
42                          return j;
43                      } else {
44                          return getCommentToken(line, token, loc);
45                      }
46                  } else if (isString(tok)) {
47                      if (token.length() > 0) {
48                          //if (loc == lin
49                          return j; // we need to now parse the string as a seperate token.
50                      } else {
51                          // we are at the start of a string
52                          return parseString(line, token, j, tok);
53                      }
54                  } else {
55                      token.append(tok);
56                  }                                
57              } else {
58                  if (token.length() > 0) {
59                      return j;                
60                  }
61              }
62              loc = j;
63          }
64          return loc + 1;
65      }
66      
67      private int parseString(String line, StringBuffer token, int loc, char stringType) {
68          boolean escaped = false;
69          boolean done = false;
70          //System.out.println("Parsing String:" + stringType);
71          //System.out.println("Starting loc:" + loc);
72          // problem of strings that span multiple lines :-(
73          char tok = ' '; // this will be replaced.
74          while ((loc < line.length()) && !done) {
75              tok = line.charAt(loc);
76              if (escaped && tok == stringType) {
77             //     System.out.println("Found an escaped string");
78                  escaped = false;
79              } else if (tok == stringType && (token.length() > 0)) {
80                  // we are done
81               //   System.out.println("Found an end string");
82                  done = true;
83              } else if (tok == '//') {
84                 // System.out.println("Found an escaped char");
85                  escaped = true;
86              } else {
87                 // System.out.println("Adding char:" + tok + ";loc:" + loc);
88                  escaped = false;
89              }
90              //System.out.println("Adding char to String:" + token.toString());
91              token.append(tok);
92              loc++;
93          }        
94          return loc + 1;
95      }
96      
97      private boolean ignoreCharacter(char tok) {
98          boolean result = false;
99          switch (tok) {
100             case '{':
101             case '}':
102             case '(':
103             case ')':
104             case ';':
105             case ',':
106             result = true;
107             break;
108             default :
109             result = false;
110         }
111         return result;
112     }
113     
114     private boolean isString(char tok) {
115         boolean result = false;
116         switch (tok) {
117             case '\'':
118             case '"':
119             result = true;
120             break;
121             default:
122             result = false;
123         }
124         return result;
125     }
126     
127     private boolean isComment(char tok) {
128         return tok == '#';
129     }
130     
131     private int getCommentToken(String line, StringBuffer token, int loc) {
132         while (loc < line.length()) {
133             token.append(line.charAt(loc));
134             loc++;
135         }
136         return loc;
137     }
138     
139     private boolean isIgnorableString(String token) {
140         return token == "do" || token == "end";
141     }
142 }