GNU Classpath (0.20) | |
Frames | No Frames |
1: /* InputStreamReader.java -- Reader than transforms bytes to chars 2: Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.io; 40: 41: import gnu.java.nio.charset.EncodingHelper; 42: 43: import java.nio.ByteBuffer; 44: import java.nio.CharBuffer; 45: import java.nio.charset.Charset; 46: import java.nio.charset.CharsetDecoder; 47: import java.nio.charset.CoderResult; 48: import java.nio.charset.CodingErrorAction; 49: 50: /** 51: * This class reads characters from a byte input stream. The characters 52: * read are converted from bytes in the underlying stream by a 53: * decoding layer. The decoding layer transforms bytes to chars according 54: * to an encoding standard. There are many available encodings to choose 55: * from. The desired encoding can either be specified by name, or if no 56: * encoding is selected, the system default encoding will be used. The 57: * system default encoding name is determined from the system property 58: * <code>file.encoding</code>. The only encodings that are guaranteed to 59: * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 60: * Unforunately, Java does not provide a mechanism for listing the 61: * ecodings that are supported in a given implementation. 62: * <p> 63: * Here is a list of standard encoding names that may be available: 64: * <p> 65: * <ul> 66: * <li>8859_1 (ISO-8859-1/Latin-1)</li> 67: * <li>8859_2 (ISO-8859-2/Latin-2)</li> 68: * <li>8859_3 (ISO-8859-3/Latin-3)</li> 69: * <li>8859_4 (ISO-8859-4/Latin-4)</li> 70: * <li>8859_5 (ISO-8859-5/Latin-5)</li> 71: * <li>8859_6 (ISO-8859-6/Latin-6)</li> 72: * <li>8859_7 (ISO-8859-7/Latin-7)</li> 73: * <li>8859_8 (ISO-8859-8/Latin-8)</li> 74: * <li>8859_9 (ISO-8859-9/Latin-9)</li> 75: * <li>ASCII (7-bit ASCII)</li> 76: * <li>UTF8 (UCS Transformation Format-8)</li> 77: * <li>More later</li> 78: * </ul> 79: * <p> 80: * It is recommended that applications do not use 81: * <code>InputStreamReader</code>'s 82: * directly. Rather, for efficiency purposes, an object of this class 83: * should be wrapped by a <code>BufferedReader</code>. 84: * <p> 85: * Due to a deficiency the Java class library design, there is no standard 86: * way for an application to install its own byte-character encoding. 87: * 88: * @see BufferedReader 89: * @see InputStream 90: * 91: * @author Robert Schuster 92: * @author Aaron M. Renn (arenn@urbanophile.com) 93: * @author Per Bothner (bothner@cygnus.com) 94: * @date April 22, 1998. 95: */ 96: public class InputStreamReader extends Reader 97: { 98: /** 99: * The input stream. 100: */ 101: private InputStream in; 102: 103: /** 104: * The charset decoder. 105: */ 106: private CharsetDecoder decoder; 107: 108: /** 109: * End of stream reached. 110: */ 111: private boolean isDone = false; 112: 113: /** 114: * Need this. 115: */ 116: private float maxBytesPerChar; 117: 118: /** 119: * Buffer holding surplus loaded bytes (if any) 120: */ 121: private ByteBuffer byteBuffer; 122: 123: /** 124: * java.io canonical name of the encoding. 125: */ 126: private String encoding; 127: 128: /** 129: * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 130: * output buffer. In this case we need to save the surrogate char. 131: */ 132: private char savedSurrogate; 133: private boolean hasSavedSurrogate = false; 134: 135: /** 136: * This method initializes a new instance of <code>InputStreamReader</code> 137: * to read from the specified stream using the default encoding. 138: * 139: * @param in The <code>InputStream</code> to read from 140: */ 141: public InputStreamReader(InputStream in) 142: { 143: if (in == null) 144: throw new NullPointerException(); 145: this.in = in; 146: try 147: { 148: encoding = System.getProperty("file.encoding"); 149: // Don't use NIO if avoidable 150: if(EncodingHelper.isISOLatin1(encoding)) 151: { 152: encoding = "ISO8859_1"; 153: maxBytesPerChar = 1f; 154: decoder = null; 155: return; 156: } 157: Charset cs = EncodingHelper.getCharset(encoding); 158: decoder = cs.newDecoder(); 159: encoding = EncodingHelper.getOldCanonical(cs.name()); 160: try { 161: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 162: } catch(UnsupportedOperationException _){ 163: maxBytesPerChar = 1f; 164: } 165: decoder.onMalformedInput(CodingErrorAction.REPLACE); 166: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 167: decoder.reset(); 168: } catch(RuntimeException e) { 169: encoding = "ISO8859_1"; 170: maxBytesPerChar = 1f; 171: decoder = null; 172: } catch(UnsupportedEncodingException e) { 173: encoding = "ISO8859_1"; 174: maxBytesPerChar = 1f; 175: decoder = null; 176: } 177: } 178: 179: /** 180: * This method initializes a new instance of <code>InputStreamReader</code> 181: * to read from the specified stream using a caller supplied character 182: * encoding scheme. Note that due to a deficiency in the Java language 183: * design, there is no way to determine which encodings are supported. 184: * 185: * @param in The <code>InputStream</code> to read from 186: * @param encoding_name The name of the encoding scheme to use 187: * 188: * @exception UnsupportedEncodingException If the encoding scheme 189: * requested is not available. 190: */ 191: public InputStreamReader(InputStream in, String encoding_name) 192: throws UnsupportedEncodingException 193: { 194: if (in == null 195: || encoding_name == null) 196: throw new NullPointerException(); 197: 198: this.in = in; 199: // Don't use NIO if avoidable 200: if(EncodingHelper.isISOLatin1(encoding_name)) 201: { 202: encoding = "ISO8859_1"; 203: maxBytesPerChar = 1f; 204: decoder = null; 205: return; 206: } 207: try { 208: Charset cs = EncodingHelper.getCharset(encoding_name); 209: try { 210: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 211: } catch(UnsupportedOperationException _){ 212: maxBytesPerChar = 1f; 213: } 214: 215: decoder = cs.newDecoder(); 216: decoder.onMalformedInput(CodingErrorAction.REPLACE); 217: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 218: decoder.reset(); 219: 220: // The encoding should be the old name, if such exists. 221: encoding = EncodingHelper.getOldCanonical(cs.name()); 222: } catch(RuntimeException e) { 223: encoding = "ISO8859_1"; 224: maxBytesPerChar = 1f; 225: decoder = null; 226: } 227: } 228: 229: /** 230: * Creates an InputStreamReader that uses a decoder of the given 231: * charset to decode the bytes in the InputStream into 232: * characters. 233: * 234: * @since 1.5 235: */ 236: public InputStreamReader(InputStream in, Charset charset) { 237: this.in = in; 238: decoder = charset.newDecoder(); 239: 240: decoder.onMalformedInput(CodingErrorAction.REPLACE); 241: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 242: decoder.reset(); 243: encoding = EncodingHelper.getOldCanonical(charset.name()); 244: } 245: 246: /** 247: * Creates an InputStreamReader that uses the given charset decoder 248: * to decode the bytes in the InputStream into characters. 249: * 250: * @since 1.5 251: */ 252: public InputStreamReader(InputStream in, CharsetDecoder decoder) { 253: this.in = in; 254: this.decoder = decoder; 255: 256: Charset charset = decoder.charset(); 257: try { 258: if (charset == null) 259: maxBytesPerChar = 1f; 260: else 261: maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 262: } catch(UnsupportedOperationException _){ 263: maxBytesPerChar = 1f; 264: } 265: 266: decoder.onMalformedInput(CodingErrorAction.REPLACE); 267: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 268: decoder.reset(); 269: if (charset == null) 270: encoding = "US-ASCII"; 271: else 272: encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 273: } 274: 275: /** 276: * This method closes this stream, as well as the underlying 277: * <code>InputStream</code>. 278: * 279: * @exception IOException If an error occurs 280: */ 281: public void close() throws IOException 282: { 283: synchronized (lock) 284: { 285: // Makes sure all intermediate data is released by the decoder. 286: if (decoder != null) 287: decoder.reset(); 288: if (in != null) 289: in.close(); 290: in = null; 291: isDone = true; 292: decoder = null; 293: } 294: } 295: 296: /** 297: * This method returns the name of the encoding that is currently in use 298: * by this object. If the stream has been closed, this method is allowed 299: * to return <code>null</code>. 300: * 301: * @return The current encoding name 302: */ 303: public String getEncoding() 304: { 305: return in != null ? encoding : null; 306: } 307: 308: /** 309: * This method checks to see if the stream is ready to be read. It 310: * will return <code>true</code> if is, or <code>false</code> if it is not. 311: * If the stream is not ready to be read, it could (although is not required 312: * to) block on the next read attempt. 313: * 314: * @return <code>true</code> if the stream is ready to be read, 315: * <code>false</code> otherwise 316: * 317: * @exception IOException If an error occurs 318: */ 319: public boolean ready() throws IOException 320: { 321: if (in == null) 322: throw new IOException("Reader has been closed"); 323: 324: return in.available() != 0; 325: } 326: 327: /** 328: * This method reads up to <code>length</code> characters from the stream into 329: * the specified array starting at index <code>offset</code> into the 330: * array. 331: * 332: * @param buf The character array to recieve the data read 333: * @param offset The offset into the array to start storing characters 334: * @param length The requested number of characters to read. 335: * 336: * @return The actual number of characters read, or -1 if end of stream. 337: * 338: * @exception IOException If an error occurs 339: */ 340: public int read(char[] buf, int offset, int length) throws IOException 341: { 342: if (in == null) 343: throw new IOException("Reader has been closed"); 344: if (isDone) 345: return -1; 346: if(decoder != null){ 347: int totalBytes = (int)((double)length * maxBytesPerChar); 348: byte[] bytes = new byte[totalBytes]; 349: 350: int remaining = 0; 351: if(byteBuffer != null) 352: { 353: remaining = byteBuffer.remaining(); 354: byteBuffer.get(bytes, 0, remaining); 355: } 356: int read; 357: if(totalBytes - remaining > 0) 358: { 359: read = in.read(bytes, remaining, totalBytes - remaining); 360: if(read == -1){ 361: read = remaining; 362: isDone = true; 363: } else 364: read += remaining; 365: } else 366: read = remaining; 367: byteBuffer = ByteBuffer.wrap(bytes, 0, read); 368: CharBuffer cb = CharBuffer.wrap(buf, offset, length); 369: int startPos = cb.position(); 370: 371: if(hasSavedSurrogate){ 372: hasSavedSurrogate = false; 373: cb.put(savedSurrogate); 374: read++; 375: } 376: 377: CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 378: decoder.reset(); 379: // 1 char remains which is the first half of a surrogate pair. 380: if(cr.isOverflow() && cb.hasRemaining()){ 381: CharBuffer overflowbuf = CharBuffer.allocate(2); 382: cr = decoder.decode(byteBuffer, overflowbuf, isDone); 383: overflowbuf.flip(); 384: if(overflowbuf.hasRemaining()) 385: { 386: cb.put(overflowbuf.get()); 387: savedSurrogate = overflowbuf.get(); 388: hasSavedSurrogate = true; 389: isDone = false; 390: } 391: } 392: 393: if(byteBuffer.hasRemaining()) { 394: byteBuffer.compact(); 395: byteBuffer.flip(); 396: isDone = false; 397: } else 398: byteBuffer = null; 399: 400: read = cb.position() - startPos; 401: return (read <= 0) ? -1 : read; 402: } else { 403: byte[] bytes = new byte[length]; 404: int read = in.read(bytes); 405: for(int i=0;i<read;i++) 406: buf[offset+i] = (char)(bytes[i]&0xFF); 407: return read; 408: } 409: } 410: 411: /** 412: * Reads an char from the input stream and returns it 413: * as an int in the range of 0-65535. This method also will return -1 if 414: * the end of the stream has been reached. 415: * <p> 416: * This method will block until the char can be read. 417: * 418: * @return The char read or -1 if end of stream 419: * 420: * @exception IOException If an error occurs 421: */ 422: public int read() throws IOException 423: { 424: char[] buf = new char[1]; 425: int count = read(buf, 0, 1); 426: return count > 0 ? buf[0] : -1; 427: } 428: 429: /** 430: * Skips the specified number of chars in the stream. It 431: * returns the actual number of chars skipped, which may be less than the 432: * requested amount. 433: * 434: * @param count The requested number of chars to skip 435: * 436: * @return The actual number of chars skipped. 437: * 438: * @exception IOException If an error occurs 439: */ 440: public long skip(long count) throws IOException 441: { 442: if (in == null) 443: throw new IOException("Reader has been closed"); 444: 445: return super.skip(count); 446: } 447: }
GNU Classpath (0.20) |