001    /**
002     * Copyright 2005 Alan Green
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     * http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     *
016     */
017    
018    
019    package org.codehaus.groovy.antlr;
020    
021    import java.io.IOException;
022    import java.io.Reader;
023    
024    import antlr.CharScanner;
025    
026    /**
027     * Translates GLS-defined unicode escapes into characters. Throws an exception
028     * in the event of an invalid unicode escape being detected.
029     *
030     * <p>No attempt has been made to optimise this class for speed or
031     * space.</p>
032     *
033     * @version $Revision: 1.3 $
034     */
035    public class UnicodeEscapingReader extends Reader {
036    
037        private Reader reader;
038        private CharScanner lexer;
039        private boolean hasNextChar = false;
040        private int nextChar;
041        private SourceBuffer sourceBuffer;
042    
043        /**
044         * Constructor.
045         * @param reader The reader that this reader will filter over.
046         */
047        public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) {
048            this.reader = reader;
049            this.sourceBuffer = sourceBuffer;
050        }
051    
052        /**
053         * Sets the lexer that is using this reader. Must be called before the
054         * lexer is used.
055         */
056        public void setLexer(CharScanner lexer) {
057            this.lexer = lexer;
058        }
059    
060        /**
061         * Reads characters from the underlying reader.
062         * @see java.io.Reader#read(char[],int,int)
063         */
064        public int read(char cbuf[], int off, int len) throws IOException {
065            int c = 0;
066            int count = 0;
067            while (count < len && (c = read())!= -1) {
068                cbuf[off + count] = (char) c;
069                count++;
070            }
071            return (count == 0 && c == -1) ? -1 : count;
072        }
073    
074        /**
075         * Gets the next character from the underlying reader,
076         * translating escapes as required.
077         * @see java.io.Reader#close()
078         */
079        public int read() throws IOException {
080            if (hasNextChar) {
081                hasNextChar = false;
082                write(nextChar);
083                return nextChar;
084            }
085    
086            int c = reader.read();
087            if (c != '\\') {
088                write(c);
089                return c;
090            }
091    
092            // Have one backslash, continue if next char is 'u'
093            c = reader.read();
094            if (c != 'u') {
095                hasNextChar = true;
096                nextChar = c;
097                write('\\');
098                return '\\';
099            }
100    
101            // Swallow multiple 'u's
102            do {
103                c = reader.read();
104            } while (c == 'u');
105    
106            // Get first hex digit
107            checkHexDigit(c);
108            StringBuffer charNum = new StringBuffer();
109            charNum.append((char) c);
110    
111            // Must now be three more hex digits
112            for (int i = 0; i < 3; i++) {
113                c = reader.read();
114                checkHexDigit(c);
115                charNum.append((char) c);
116            }
117            int rv = Integer.parseInt(charNum.toString(), 16);
118            write(rv);
119            return rv;
120        }
121        private void write(int c) {
122            if (sourceBuffer != null) {sourceBuffer.write(c);}
123        }
124        /**
125         * Checks that the given character is indeed a hex digit.
126         */
127        private void checkHexDigit(int c) throws IOException {
128            if (c >= '0' && c <= '9') {
129                return;
130            }
131            if (c >= 'a' && c <= 'f') {
132                return;
133            }
134            if (c >= 'A' && c <= 'F') {
135                return;
136            }
137            // Causes the invalid escape to be skipped
138            hasNextChar = true;
139            nextChar = c;
140            throw new IOException("Did not find four digit hex character code."
141                    + " line: " + lexer.getLine() + " col:" + lexer.getColumn());
142        }
143    
144        /**
145         * Closes this reader by calling close on the underlying reader.
146         * @see java.io.Reader#close()
147         */
148        public void close() throws IOException {
149            reader.close();
150        }
151    }