GNU Classpath (0.19) | ||
Frames | No Frames |
1: /* InputStreamReader.java -- Reader than transforms bytes to chars 2: Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.io; 40: 41: import gnu.java.nio.charset.EncodingHelper; 42: 43: import java.nio.ByteBuffer; 44: import java.nio.CharBuffer; 45: import java.nio.charset.Charset; 46: import java.nio.charset.CharsetDecoder; 47: import java.nio.charset.CoderResult; 48: import java.nio.charset.CodingErrorAction; 49: 50: /** 51: * This class reads characters from a byte input stream. The characters 52: * read are converted from bytes in the underlying stream by a 53: * decoding layer. The decoding layer transforms bytes to chars according 54: * to an encoding standard. There are many available encodings to choose 55: * from. The desired encoding can either be specified by name, or if no 56: * encoding is selected, the system default encoding will be used. The 57: * system default encoding name is determined from the system property 58: * <code>file.encoding</code>. The only encodings that are guaranteed to 59: * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 60: * Unforunately, Java does not provide a mechanism for listing the 61: * ecodings that are supported in a given implementation. 62: * <p> 63: * Here is a list of standard encoding names that may be available: 64: * <p> 65: * <ul> 66: * <li>8859_1 (ISO-8859-1/Latin-1)</li> 67: * <li>8859_2 (ISO-8859-2/Latin-2)</li> 68: * <li>8859_3 (ISO-8859-3/Latin-3)</li> 69: * <li>8859_4 (ISO-8859-4/Latin-4)</li> 70: * <li>8859_5 (ISO-8859-5/Latin-5)</li> 71: * <li>8859_6 (ISO-8859-6/Latin-6)</li> 72: * <li>8859_7 (ISO-8859-7/Latin-7)</li> 73: * <li>8859_8 (ISO-8859-8/Latin-8)</li> 74: * <li>8859_9 (ISO-8859-9/Latin-9)</li> 75: * <li>ASCII (7-bit ASCII)</li> 76: * <li>UTF8 (UCS Transformation Format-8)</li> 77: * <li>More later</li> 78: * </ul> 79: * <p> 80: * It is recommended that applications do not use 81: * <code>InputStreamReader</code>'s 82: * directly. Rather, for efficiency purposes, an object of this class 83: * should be wrapped by a <code>BufferedReader</code>. 84: * <p> 85: * Due to a deficiency the Java class library design, there is no standard 86: * way for an application to install its own byte-character encoding. 87: * 88: * @see BufferedReader 89: * @see InputStream 90: * 91: * @author Robert Schuster 92: * @author Aaron M. Renn (arenn@urbanophile.com) 93: * @author Per Bothner (bothner@cygnus.com) 94: * @date April 22, 1998. 95: */ 96: public class InputStreamReader extends Reader 97: { 98: /** 99: * The input stream. 100: */ 101: private InputStream in; 102: 103: /** 104: * The charset decoder. 105: */ 106: private CharsetDecoder decoder; 107: 108: /** 109: * End of stream reached. 110: */ 111: private boolean isDone = false; 112: 113: /** 114: * Need this. 115: */ 116: private float maxBytesPerChar; 117: 118: /** 119: * Buffer holding surplus loaded bytes (if any) 120: */ 121: private ByteBuffer byteBuffer; 122: 123: /** 124: * java.io canonical name of the encoding. 125: */ 126: private String encoding; 127: 128: /** 129: * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 130: * output buffer. In this case we need to save the surrogate char. 131: */ 132: private char savedSurrogate; 133: private boolean hasSavedSurrogate = false; 134: 135: /** 136: * This method initializes a new instance of <code>InputStreamReader</code> 137: * to read from the specified stream using the default encoding. 138: * 139: * @param in The <code>InputStream</code> to read from 140: */ 141: public InputStreamReader(InputStream in) 142: { 143: if (in == null) 144: throw new NullPointerException(); 145: this.in = in; 146: try 147: { 148: encoding = System.getProperty("file.encoding"); 149: // Don't use NIO if avoidable 150: if(EncodingHelper.isISOLatin1(encoding)) 151: { 152: encoding = "ISO8859_1"; 153: maxBytesPerChar = 1f; 154: decoder = null; 155: return; 156: } 157: Charset cs = EncodingHelper.getCharset(encoding); 158: decoder = cs.newDecoder(); 159: encoding = EncodingHelper.getOldCanonical(cs.name()); 160: try { 161: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 162: } catch(UnsupportedOperationException _){ 163: maxBytesPerChar = 1f; 164: } 165: decoder.onMalformedInput(CodingErrorAction.REPLACE); 166: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 167: decoder.reset(); 168: } catch(RuntimeException e) { 169: encoding = "ISO8859_1"; 170: maxBytesPerChar = 1f; 171: decoder = null; 172: } catch(UnsupportedEncodingException e) { 173: encoding = "ISO8859_1"; 174: maxBytesPerChar = 1f; 175: decoder = null; 176: } 177: } 178: 179: /** 180: * This method initializes a new instance of <code>InputStreamReader</code> 181: * to read from the specified stream using a caller supplied character 182: * encoding scheme. Note that due to a deficiency in the Java language 183: * design, there is no way to determine which encodings are supported. 184: * 185: * @param in The <code>InputStream</code> to read from 186: * @param encoding_name The name of the encoding scheme to use 187: * 188: * @exception UnsupportedEncodingException If the encoding scheme 189: * requested is not available. 190: */ 191: public InputStreamReader(InputStream in, String encoding_name) 192: throws UnsupportedEncodingException 193: { 194: if (in == null 195: || encoding_name == null) 196: throw new NullPointerException(); 197: 198: this.in = in; 199: // Don't use NIO if avoidable 200: if(EncodingHelper.isISOLatin1(encoding_name)) 201: { 202: encoding = "ISO8859_1"; 203: maxBytesPerChar = 1f; 204: decoder = null; 205: return; 206: } 207: try { 208: Charset cs = EncodingHelper.getCharset(encoding_name); 209: try { 210: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 211: } catch(UnsupportedOperationException _){ 212: maxBytesPerChar = 1f; 213: } 214: 215: decoder = cs.newDecoder(); 216: decoder.onMalformedInput(CodingErrorAction.REPLACE); 217: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 218: decoder.reset(); 219: 220: // The encoding should be the old name, if such exists. 221: encoding = EncodingHelper.getOldCanonical(cs.name()); 222: } catch(RuntimeException e) { 223: encoding = "ISO8859_1"; 224: maxBytesPerChar = 1f; 225: decoder = null; 226: } 227: } 228: 229: /** 230: * Creates an InputStreamReader that uses a decoder of the given 231: * charset to decode the bytes in the InputStream into 232: * characters. 233: */ 234: public InputStreamReader(InputStream in, Charset charset) { 235: this.in = in; 236: decoder = charset.newDecoder(); 237: 238: decoder.onMalformedInput(CodingErrorAction.REPLACE); 239: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 240: decoder.reset(); 241: encoding = EncodingHelper.getOldCanonical(charset.name()); 242: } 243: 244: /** 245: * Creates an InputStreamReader that uses the given charset decoder 246: * to decode the bytes in the InputStream into characters. 247: */ 248: public InputStreamReader(InputStream in, CharsetDecoder decoder) { 249: this.in = in; 250: this.decoder = decoder; 251: 252: try { 253: maxBytesPerChar = decoder.charset().newEncoder().maxBytesPerChar(); 254: } catch(UnsupportedOperationException _){ 255: maxBytesPerChar = 1f; 256: } 257: 258: decoder.onMalformedInput(CodingErrorAction.REPLACE); 259: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 260: decoder.reset(); 261: encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 262: } 263: 264: /** 265: * This method closes this stream, as well as the underlying 266: * <code>InputStream</code>. 267: * 268: * @exception IOException If an error occurs 269: */ 270: public void close() throws IOException 271: { 272: synchronized (lock) 273: { 274: // Makes sure all intermediate data is released by the decoder. 275: if (decoder != null) 276: decoder.reset(); 277: if (in != null) 278: in.close(); 279: in = null; 280: isDone = true; 281: decoder = null; 282: } 283: } 284: 285: /** 286: * This method returns the name of the encoding that is currently in use 287: * by this object. If the stream has been closed, this method is allowed 288: * to return <code>null</code>. 289: * 290: * @return The current encoding name 291: */ 292: public String getEncoding() 293: { 294: return in != null ? encoding : null; 295: } 296: 297: /** 298: * This method checks to see if the stream is ready to be read. It 299: * will return <code>true</code> if is, or <code>false</code> if it is not. 300: * If the stream is not ready to be read, it could (although is not required 301: * to) block on the next read attempt. 302: * 303: * @return <code>true</code> if the stream is ready to be read, 304: * <code>false</code> otherwise 305: * 306: * @exception IOException If an error occurs 307: */ 308: public boolean ready() throws IOException 309: { 310: if (in == null) 311: throw new IOException("Reader has been closed"); 312: 313: return in.available() != 0; 314: } 315: 316: /** 317: * This method reads up to <code>length</code> characters from the stream into 318: * the specified array starting at index <code>offset</code> into the 319: * array. 320: * 321: * @param buf The character array to recieve the data read 322: * @param offset The offset into the array to start storing characters 323: * @param length The requested number of characters to read. 324: * 325: * @return The actual number of characters read, or -1 if end of stream. 326: * 327: * @exception IOException If an error occurs 328: */ 329: public int read(char[] buf, int offset, int length) throws IOException 330: { 331: if (in == null) 332: throw new IOException("Reader has been closed"); 333: if (isDone) 334: return -1; 335: if(decoder != null){ 336: int totalBytes = (int)((double)length * maxBytesPerChar); 337: byte[] bytes = new byte[totalBytes]; 338: 339: int remaining = 0; 340: if(byteBuffer != null) 341: { 342: remaining = byteBuffer.remaining(); 343: byteBuffer.get(bytes, 0, remaining); 344: } 345: int read; 346: if(totalBytes - remaining > 0) 347: { 348: read = in.read(bytes, remaining, totalBytes - remaining); 349: if(read == -1){ 350: read = remaining; 351: isDone = true; 352: } else 353: read += remaining; 354: } else 355: read = remaining; 356: byteBuffer = ByteBuffer.wrap(bytes, 0, read); 357: CharBuffer cb = CharBuffer.wrap(buf, offset, length); 358: int startPos = cb.position(); 359: 360: if(hasSavedSurrogate){ 361: hasSavedSurrogate = false; 362: cb.put(savedSurrogate); 363: read++; 364: } 365: 366: CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 367: decoder.reset(); 368: // 1 char remains which is the first half of a surrogate pair. 369: if(cr.isOverflow() && cb.hasRemaining()){ 370: CharBuffer overflowbuf = CharBuffer.allocate(2); 371: cr = decoder.decode(byteBuffer, overflowbuf, isDone); 372: overflowbuf.flip(); 373: if(overflowbuf.hasRemaining()) 374: { 375: cb.put(overflowbuf.get()); 376: savedSurrogate = overflowbuf.get(); 377: hasSavedSurrogate = true; 378: isDone = false; 379: } 380: } 381: 382: if(byteBuffer.hasRemaining()) { 383: byteBuffer.compact(); 384: byteBuffer.flip(); 385: isDone = false; 386: } else 387: byteBuffer = null; 388: 389: read = cb.position() - startPos; 390: return (read <= 0) ? -1 : read; 391: } else { 392: byte[] bytes = new byte[length]; 393: int read = in.read(bytes); 394: for(int i=0;i<read;i++) 395: buf[offset+i] = (char)(bytes[i]&0xFF); 396: return read; 397: } 398: } 399: 400: /** 401: * Reads an char from the input stream and returns it 402: * as an int in the range of 0-65535. This method also will return -1 if 403: * the end of the stream has been reached. 404: * <p> 405: * This method will block until the char can be read. 406: * 407: * @return The char read or -1 if end of stream 408: * 409: * @exception IOException If an error occurs 410: */ 411: public int read() throws IOException 412: { 413: char[] buf = new char[1]; 414: int count = read(buf, 0, 1); 415: return count > 0 ? buf[0] : -1; 416: } 417: 418: /** 419: * Skips the specified number of chars in the stream. It 420: * returns the actual number of chars skipped, which may be less than the 421: * requested amount. 422: * 423: * @param count The requested number of chars to skip 424: * 425: * @return The actual number of chars skipped. 426: * 427: * @exception IOException If an error occurs 428: */ 429: public long skip(long count) throws IOException 430: { 431: if (in == null) 432: throw new IOException("Reader has been closed"); 433: 434: return super.skip(count); 435: } 436: }
GNU Classpath (0.19) |