GNU Classpath (0.92) | |
Frames | No Frames |
1: /* String.java -- immutable character sequences; the object of string literals 2: Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2005 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.lang; 41: 42: import gnu.java.lang.CharData; 43: 44: import java.io.Serializable; 45: import java.io.UnsupportedEncodingException; 46: import java.nio.ByteBuffer; 47: import java.nio.CharBuffer; 48: import java.nio.charset.CharacterCodingException; 49: import java.nio.charset.Charset; 50: import java.nio.charset.CharsetDecoder; 51: import java.nio.charset.CharsetEncoder; 52: import java.nio.charset.CodingErrorAction; 53: import java.nio.charset.IllegalCharsetNameException; 54: import java.nio.charset.UnsupportedCharsetException; 55: import java.text.Collator; 56: import java.util.Comparator; 57: import java.util.Locale; 58: import java.util.regex.Matcher; 59: import java.util.regex.Pattern; 60: import java.util.regex.PatternSyntaxException; 61: 62: /** 63: * Strings represent an immutable set of characters. All String literals 64: * are instances of this class, and two string literals with the same contents 65: * refer to the same String object. 66: * 67: * <p>This class also includes a number of methods for manipulating the 68: * contents of strings (of course, creating a new object if there are any 69: * changes, as String is immutable). Case mapping relies on Unicode 3.0.0 70: * standards, where some character sequences have a different number of 71: * characters in the uppercase version than the lower case. 72: * 73: * <p>Strings are special, in that they are the only object with an overloaded 74: * operator. When you use '+' with at least one String argument, both 75: * arguments have String conversion performed on them, and another String (not 76: * guaranteed to be unique) results. 77: * 78: * <p>String is special-cased when doing data serialization - rather than 79: * listing the fields of this class, a String object is converted to a string 80: * literal in the object stream. 81: * 82: * @author Paul N. Fisher 83: * @author Eric Blake (ebb9@email.byu.edu) 84: * @author Per Bothner (bothner@cygnus.com) 85: * @since 1.0 86: * @status updated to 1.4; but could use better data sharing via offset field 87: */ 88: public final class String implements Serializable, Comparable, CharSequence 89: { 90: // WARNING: String is a CORE class in the bootstrap cycle. See the comments 91: // in vm/reference/java/lang/Runtime for implications of this fact. 92: 93: /** 94: * This is probably not necessary because this class is special cased already 95: * but it will avoid showing up as a discrepancy when comparing SUIDs. 96: */ 97: private static final long serialVersionUID = -6849794470754667710L; 98: 99: /** 100: * Stores unicode multi-character uppercase expansion table. 101: * @see #toUpperCase(Locale) 102: * @see CharData#UPPER_EXPAND 103: */ 104: private static final char[] upperExpand 105: = zeroBasedStringValue(CharData.UPPER_EXPAND); 106: 107: /** 108: * Stores unicode multi-character uppercase special casing table. 109: * @see #upperCaseExpansion(char) 110: * @see CharData#UPPER_SPECIAL 111: */ 112: private static final char[] upperSpecial 113: = zeroBasedStringValue(CharData.UPPER_SPECIAL); 114: 115: /** 116: * Characters which make up the String. 117: * Package access is granted for use by StringBuffer. 118: */ 119: final char[] value; 120: 121: /** 122: * Holds the number of characters in value. This number is generally 123: * the same as value.length, but can be smaller because substrings and 124: * StringBuffers can share arrays. Package visible for use by trusted code. 125: */ 126: final int count; 127: 128: /** 129: * Caches the result of hashCode(). If this value is zero, the hashcode 130: * is considered uncached (even if 0 is the correct hash value). 131: */ 132: private int cachedHashCode; 133: 134: /** 135: * Holds the starting position for characters in value[]. Since 136: * substring()'s are common, the use of offset allows the operation 137: * to perform in O(1). Package access is granted for use by StringBuffer. 138: */ 139: final int offset; 140: 141: /** 142: * An implementation for {@link #CASE_INSENSITIVE_ORDER}. 143: * This must be {@link Serializable}. The class name is dictated by 144: * compatibility with Sun's JDK. 145: */ 146: private static final class CaseInsensitiveComparator 147: implements Comparator, Serializable 148: { 149: /** 150: * Compatible with JDK 1.2. 151: */ 152: private static final long serialVersionUID = 8575799808933029326L; 153: 154: /** 155: * The default private constructor generates unnecessary overhead. 156: */ 157: CaseInsensitiveComparator() {} 158: 159: /** 160: * Compares to Strings, using 161: * <code>String.compareToIgnoreCase(String)</code>. 162: * 163: * @param o1 the first string 164: * @param o2 the second string 165: * @return < 0, 0, or > 0 depending on the case-insensitive 166: * comparison of the two strings. 167: * @throws NullPointerException if either argument is null 168: * @throws ClassCastException if either argument is not a String 169: * @see #compareToIgnoreCase(String) 170: */ 171: public int compare(Object o1, Object o2) 172: { 173: return ((String) o1).compareToIgnoreCase((String) o2); 174: } 175: } // class CaseInsensitiveComparator 176: 177: /** 178: * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>. 179: * This comparator is {@link Serializable}. Note that it ignores Locale, 180: * for that, you want a Collator. 181: * 182: * @see Collator#compare(String, String) 183: * @since 1.2 184: */ 185: public static final Comparator CASE_INSENSITIVE_ORDER 186: = new CaseInsensitiveComparator(); 187: 188: /** 189: * Creates an empty String (length 0). Unless you really need a new object, 190: * consider using <code>""</code> instead. 191: */ 192: public String() 193: { 194: value = "".value; 195: offset = 0; 196: count = 0; 197: } 198: 199: /** 200: * Copies the contents of a String to a new String. Since Strings are 201: * immutable, only a shallow copy is performed. 202: * 203: * @param str String to copy 204: * @throws NullPointerException if value is null 205: */ 206: public String(String str) 207: { 208: value = str.value; 209: offset = str.offset; 210: count = str.count; 211: cachedHashCode = str.cachedHashCode; 212: } 213: 214: /** 215: * Creates a new String using the character sequence of the char array. 216: * Subsequent changes to data do not affect the String. 217: * 218: * @param data char array to copy 219: * @throws NullPointerException if data is null 220: */ 221: public String(char[] data) 222: { 223: this(data, 0, data.length, false); 224: } 225: 226: /** 227: * Creates a new String using the character sequence of a subarray of 228: * characters. The string starts at offset, and copies count chars. 229: * Subsequent changes to data do not affect the String. 230: * 231: * @param data char array to copy 232: * @param offset position (base 0) to start copying out of data 233: * @param count the number of characters from data to copy 234: * @throws NullPointerException if data is null 235: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 236: * || offset + count < 0 (overflow) 237: * || offset + count > data.length) 238: * (while unspecified, this is a StringIndexOutOfBoundsException) 239: */ 240: public String(char[] data, int offset, int count) 241: { 242: this(data, offset, count, false); 243: } 244: 245: /** 246: * Creates a new String using an 8-bit array of integer values, starting at 247: * an offset, and copying up to the count. Each character c, using 248: * corresponding byte b, is created in the new String as if by performing: 249: * 250: * <pre> 251: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 252: * </pre> 253: * 254: * @param ascii array of integer values 255: * @param hibyte top byte of each Unicode character 256: * @param offset position (base 0) to start copying out of ascii 257: * @param count the number of characters from ascii to copy 258: * @throws NullPointerException if ascii is null 259: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 260: * || offset + count < 0 (overflow) 261: * || offset + count > ascii.length) 262: * (while unspecified, this is a StringIndexOutOfBoundsException) 263: * @see #String(byte[]) 264: * @see #String(byte[], String) 265: * @see #String(byte[], int, int) 266: * @see #String(byte[], int, int, String) 267: * @deprecated use {@link #String(byte[], int, int, String)} to perform 268: * correct encoding 269: */ 270: public String(byte[] ascii, int hibyte, int offset, int count) 271: { 272: if (offset < 0) 273: throw new StringIndexOutOfBoundsException("offset: " + offset); 274: if (count < 0) 275: throw new StringIndexOutOfBoundsException("count: " + count); 276: // equivalent to: offset + count < 0 || offset + count > ascii.length 277: if (ascii.length - offset < count) 278: throw new StringIndexOutOfBoundsException("offset + count: " 279: + (offset + count)); 280: value = new char[count]; 281: this.offset = 0; 282: this.count = count; 283: hibyte <<= 8; 284: offset += count; 285: while (--count >= 0) 286: value[count] = (char) (hibyte | (ascii[--offset] & 0xff)); 287: } 288: 289: /** 290: * Creates a new String using an 8-bit array of integer values. Each 291: * character c, using corresponding byte b, is created in the new String 292: * as if by performing: 293: * 294: * <pre> 295: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 296: * </pre> 297: * 298: * @param ascii array of integer values 299: * @param hibyte top byte of each Unicode character 300: * @throws NullPointerException if ascii is null 301: * @see #String(byte[]) 302: * @see #String(byte[], String) 303: * @see #String(byte[], int, int) 304: * @see #String(byte[], int, int, String) 305: * @see #String(byte[], int, int, int) 306: * @deprecated use {@link #String(byte[], String)} to perform 307: * correct encoding 308: */ 309: public String(byte[] ascii, int hibyte) 310: { 311: this(ascii, hibyte, 0, ascii.length); 312: } 313: 314: /** 315: * Creates a new String using the portion of the byte array starting at the 316: * offset and ending at offset + count. Uses the specified encoding type 317: * to decode the byte array, so the resulting string may be longer or 318: * shorter than the byte array. For more decoding control, use 319: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 320: * see {@link java.nio.charset.Charset}. The behavior is not specified if 321: * the decoder encounters invalid characters; this implementation throws 322: * an Error. 323: * 324: * @param data byte array to copy 325: * @param offset the offset to start at 326: * @param count the number of bytes in the array to use 327: * @param encoding the name of the encoding to use 328: * @throws NullPointerException if data or encoding is null 329: * @throws IndexOutOfBoundsException if offset or count is incorrect 330: * (while unspecified, this is a StringIndexOutOfBoundsException) 331: * @throws UnsupportedEncodingException if encoding is not found 332: * @throws Error if the decoding fails 333: * @since 1.1 334: */ 335: public String(byte[] data, int offset, int count, String encoding) 336: throws UnsupportedEncodingException 337: { 338: if (offset < 0) 339: throw new StringIndexOutOfBoundsException("offset: " + offset); 340: if (count < 0) 341: throw new StringIndexOutOfBoundsException("count: " + count); 342: // equivalent to: offset + count < 0 || offset + count > data.length 343: if (data.length - offset < count) 344: throw new StringIndexOutOfBoundsException("offset + count: " 345: + (offset + count)); 346: try 347: { 348: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 349: csd.onMalformedInput(CodingErrorAction.REPLACE); 350: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 351: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 352: if(cbuf.hasArray()) 353: { 354: value = cbuf.array(); 355: this.offset = cbuf.position(); 356: this.count = cbuf.remaining(); 357: } else { 358: // Doubt this will happen. But just in case. 359: value = new char[cbuf.remaining()]; 360: cbuf.get(value); 361: this.offset = 0; 362: this.count = value.length; 363: } 364: } catch(CharacterCodingException e){ 365: throw new UnsupportedEncodingException("Encoding: "+encoding+ 366: " not found."); 367: } catch(IllegalCharsetNameException e){ 368: throw new UnsupportedEncodingException("Encoding: "+encoding+ 369: " not found."); 370: } catch(UnsupportedCharsetException e){ 371: throw new UnsupportedEncodingException("Encoding: "+encoding+ 372: " not found."); 373: } 374: } 375: 376: /** 377: * Creates a new String using the byte array. Uses the specified encoding 378: * type to decode the byte array, so the resulting string may be longer or 379: * shorter than the byte array. For more decoding control, use 380: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 381: * see {@link java.nio.charset.Charset}. The behavior is not specified if 382: * the decoder encounters invalid characters; this implementation throws 383: * an Error. 384: * 385: * @param data byte array to copy 386: * @param encoding the name of the encoding to use 387: * @throws NullPointerException if data or encoding is null 388: * @throws UnsupportedEncodingException if encoding is not found 389: * @throws Error if the decoding fails 390: * @see #String(byte[], int, int, String) 391: * @since 1.1 392: */ 393: public String(byte[] data, String encoding) 394: throws UnsupportedEncodingException 395: { 396: this(data, 0, data.length, encoding); 397: } 398: 399: /** 400: * Creates a new String using the portion of the byte array starting at the 401: * offset and ending at offset + count. Uses the encoding of the platform's 402: * default charset, so the resulting string may be longer or shorter than 403: * the byte array. For more decoding control, use 404: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 405: * if the decoder encounters invalid characters; this implementation throws 406: * an Error. 407: * 408: * @param data byte array to copy 409: * @param offset the offset to start at 410: * @param count the number of bytes in the array to use 411: * @throws NullPointerException if data is null 412: * @throws IndexOutOfBoundsException if offset or count is incorrect 413: * @throws Error if the decoding fails 414: * @see #String(byte[], int, int, String) 415: * @since 1.1 416: */ 417: public String(byte[] data, int offset, int count) 418: { 419: if (offset < 0) 420: throw new StringIndexOutOfBoundsException("offset: " + offset); 421: if (count < 0) 422: throw new StringIndexOutOfBoundsException("count: " + count); 423: // equivalent to: offset + count < 0 || offset + count > data.length 424: if (data.length - offset < count) 425: throw new StringIndexOutOfBoundsException("offset + count: " 426: + (offset + count)); 427: int o, c; 428: char[] v; 429: String encoding; 430: try 431: { 432: encoding = System.getProperty("file.encoding"); 433: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 434: csd.onMalformedInput(CodingErrorAction.REPLACE); 435: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 436: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 437: if(cbuf.hasArray()) 438: { 439: v = cbuf.array(); 440: o = cbuf.position(); 441: c = cbuf.remaining(); 442: } else { 443: // Doubt this will happen. But just in case. 444: v = new char[cbuf.remaining()]; 445: cbuf.get(v); 446: o = 0; 447: c = v.length; 448: } 449: } catch(Exception ex){ 450: // If anything goes wrong (System property not set, 451: // NIO provider not available, etc) 452: // Default to the 'safe' encoding ISO8859_1 453: v = new char[count]; 454: o = 0; 455: c = count; 456: for (int i=0;i<count;i++) 457: v[i] = (char)data[offset+i]; 458: } 459: this.value = v; 460: this.offset = o; 461: this.count = c; 462: } 463: 464: /** 465: * Creates a new String using the byte array. Uses the encoding of the 466: * platform's default charset, so the resulting string may be longer or 467: * shorter than the byte array. For more decoding control, use 468: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 469: * if the decoder encounters invalid characters; this implementation throws 470: * an Error. 471: * 472: * @param data byte array to copy 473: * @throws NullPointerException if data is null 474: * @throws Error if the decoding fails 475: * @see #String(byte[], int, int) 476: * @see #String(byte[], int, int, String) 477: * @since 1.1 478: */ 479: public String(byte[] data) 480: { 481: this(data, 0, data.length); 482: } 483: 484: /** 485: * Creates a new String using the character sequence represented by 486: * the StringBuffer. Subsequent changes to buf do not affect the String. 487: * 488: * @param buffer StringBuffer to copy 489: * @throws NullPointerException if buffer is null 490: */ 491: public String(StringBuffer buffer) 492: { 493: synchronized (buffer) 494: { 495: offset = 0; 496: count = buffer.count; 497: // Share unless buffer is 3/4 empty. 498: if ((count << 2) < buffer.value.length) 499: { 500: value = new char[count]; 501: VMSystem.arraycopy(buffer.value, 0, value, 0, count); 502: } 503: else 504: { 505: buffer.shared = true; 506: value = buffer.value; 507: } 508: } 509: } 510: 511: /** 512: * Creates a new String using the character sequence represented by 513: * the StringBuilder. Subsequent changes to buf do not affect the String. 514: * 515: * @param buffer StringBuilder to copy 516: * @throws NullPointerException if buffer is null 517: */ 518: public String(StringBuilder buffer) 519: { 520: this(buffer.value, 0, buffer.count); 521: } 522: 523: /** 524: * Special constructor which can share an array when safe to do so. 525: * 526: * @param data the characters to copy 527: * @param offset the location to start from 528: * @param count the number of characters to use 529: * @param dont_copy true if the array is trusted, and need not be copied 530: * @throws NullPointerException if chars is null 531: * @throws StringIndexOutOfBoundsException if bounds check fails 532: */ 533: String(char[] data, int offset, int count, boolean dont_copy) 534: { 535: if (offset < 0) 536: throw new StringIndexOutOfBoundsException("offset: " + offset); 537: if (count < 0) 538: throw new StringIndexOutOfBoundsException("count: " + count); 539: // equivalent to: offset + count < 0 || offset + count > data.length 540: if (data.length - offset < count) 541: throw new StringIndexOutOfBoundsException("offset + count: " 542: + (offset + count)); 543: if (dont_copy) 544: { 545: value = data; 546: this.offset = offset; 547: } 548: else 549: { 550: value = new char[count]; 551: VMSystem.arraycopy(data, offset, value, 0, count); 552: this.offset = 0; 553: } 554: this.count = count; 555: } 556: 557: /** 558: * Creates a new String containing the characters represented in the 559: * given subarray of Unicode code points. 560: * @param codePoints the entire array of code points 561: * @param offset the start of the subarray 562: * @param count the length of the subarray 563: * 564: * @throws IllegalArgumentException if an invalid code point is found 565: * in the codePoints array 566: * @throws IndexOutOfBoundsException if offset is negative or offset + count 567: * is greater than the length of the array. 568: */ 569: public String(int[] codePoints, int offset, int count) 570: { 571: // FIXME: This implementation appears to give correct internal 572: // representation of the String because: 573: // - length() is correct 574: // - getting a char[] from toCharArray() and testing 575: // Character.codePointAt() on all the characters in that array gives 576: // the appropriate results 577: // however printing the String gives incorrect results. This may be 578: // due to printing method errors (such as incorrectly looping through 579: // the String one char at a time rather than one "character" at a time. 580: 581: if (offset < 0) 582: throw new IndexOutOfBoundsException(); 583: int end = offset + count; 584: int pos = 0; 585: // This creates a char array that is long enough for all of the code 586: // points to represent supplementary characters. This is more than likely 587: // a waste of storage, so we use it only temporarily and then copy the 588: // used portion into the value array. 589: char[] temp = new char[2 * codePoints.length]; 590: for (int i = offset; i < end; i++) 591: { 592: pos += Character.toChars(codePoints[i], temp, pos); 593: } 594: this.count = pos; 595: this.value = new char[pos]; 596: System.arraycopy(temp, 0, value, 0, pos); 597: this.offset = 0; 598: } 599: 600: /** 601: * Returns the number of characters contained in this String. 602: * 603: * @return the length of this String 604: */ 605: public int length() 606: { 607: return count; 608: } 609: 610: /** 611: * Returns the character located at the specified index within this String. 612: * 613: * @param index position of character to return (base 0) 614: * @return character located at position index 615: * @throws IndexOutOfBoundsException if index < 0 || index >= length() 616: * (while unspecified, this is a StringIndexOutOfBoundsException) 617: */ 618: public char charAt(int index) 619: { 620: if (index < 0 || index >= count) 621: throw new StringIndexOutOfBoundsException(index); 622: return value[offset + index]; 623: } 624: 625: /** 626: * Get the code point at the specified index. This is like #charAt(int), 627: * but if the character is the start of a surrogate pair, and the 628: * following character completes the pair, then the corresponding 629: * supplementary code point is returned. 630: * @param index the index of the codepoint to get, starting at 0 631: * @return the codepoint at the specified index 632: * @throws IndexOutOfBoundsException if index is negative or >= length() 633: * @since 1.5 634: */ 635: public synchronized int codePointAt(int index) 636: { 637: // Use the CharSequence overload as we get better range checking 638: // this way. 639: return Character.codePointAt(this, index); 640: } 641: 642: /** 643: * Get the code point before the specified index. This is like 644: * #codePointAt(int), but checks the characters at <code>index-1</code> and 645: * <code>index-2</code> to see if they form a supplementary code point. 646: * @param index the index just past the codepoint to get, starting at 0 647: * @return the codepoint at the specified index 648: * @throws IndexOutOfBoundsException if index is negative or >= length() 649: * (while unspecified, this is a StringIndexOutOfBoundsException) 650: * @since 1.5 651: */ 652: public synchronized int codePointBefore(int index) 653: { 654: // Use the CharSequence overload as we get better range checking 655: // this way. 656: return Character.codePointBefore(this, index); 657: } 658: 659: /** 660: * Copies characters from this String starting at a specified start index, 661: * ending at a specified stop index, to a character array starting at 662: * a specified destination begin index. 663: * 664: * @param srcBegin index to begin copying characters from this String 665: * @param srcEnd index after the last character to be copied from this String 666: * @param dst character array which this String is copied into 667: * @param dstBegin index to start writing characters into dst 668: * @throws NullPointerException if dst is null 669: * @throws IndexOutOfBoundsException if any indices are out of bounds 670: * (while unspecified, source problems cause a 671: * StringIndexOutOfBoundsException, and dst problems cause an 672: * ArrayIndexOutOfBoundsException) 673: */ 674: public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 675: { 676: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 677: throw new StringIndexOutOfBoundsException(); 678: VMSystem.arraycopy(value, srcBegin + offset, 679: dst, dstBegin, srcEnd - srcBegin); 680: } 681: 682: /** 683: * Copies the low byte of each character from this String starting at a 684: * specified start index, ending at a specified stop index, to a byte array 685: * starting at a specified destination begin index. 686: * 687: * @param srcBegin index to being copying characters from this String 688: * @param srcEnd index after the last character to be copied from this String 689: * @param dst byte array which each low byte of this String is copied into 690: * @param dstBegin index to start writing characters into dst 691: * @throws NullPointerException if dst is null and copy length is non-zero 692: * @throws IndexOutOfBoundsException if any indices are out of bounds 693: * (while unspecified, source problems cause a 694: * StringIndexOutOfBoundsException, and dst problems cause an 695: * ArrayIndexOutOfBoundsException) 696: * @see #getBytes() 697: * @see #getBytes(String) 698: * @deprecated use {@link #getBytes()}, which uses a char to byte encoder 699: */ 700: public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) 701: { 702: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 703: throw new StringIndexOutOfBoundsException(); 704: int i = srcEnd - srcBegin; 705: srcBegin += offset; 706: while (--i >= 0) 707: dst[dstBegin++] = (byte) value[srcBegin++]; 708: } 709: 710: /** 711: * Converts the Unicode characters in this String to a byte array. Uses the 712: * specified encoding method, so the result may be longer or shorter than 713: * the String. For more encoding control, use 714: * {@link java.nio.charset.CharsetEncoder}, and for valid character sets, 715: * see {@link java.nio.charset.Charset}. Unsupported characters get 716: * replaced by an encoding specific byte. 717: * 718: * @param enc encoding name 719: * @return the resulting byte array 720: * @throws NullPointerException if enc is null 721: * @throws UnsupportedEncodingException if encoding is not supported 722: * @since 1.1 723: */ 724: public byte[] getBytes(String enc) throws UnsupportedEncodingException 725: { 726: try 727: { 728: CharsetEncoder cse = Charset.forName(enc).newEncoder(); 729: cse.onMalformedInput(CodingErrorAction.REPLACE); 730: cse.onUnmappableCharacter(CodingErrorAction.REPLACE); 731: ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count)); 732: if(bbuf.hasArray()) 733: return bbuf.array(); 734: 735: // Doubt this will happen. But just in case. 736: byte[] bytes = new byte[bbuf.remaining()]; 737: bbuf.get(bytes); 738: return bytes; 739: } 740: catch(IllegalCharsetNameException e) 741: { 742: throw new UnsupportedEncodingException("Encoding: " + enc 743: + " not found."); 744: } 745: catch(UnsupportedCharsetException e) 746: { 747: throw new UnsupportedEncodingException("Encoding: " + enc 748: + " not found."); 749: } 750: catch(CharacterCodingException e) 751: { 752: // This shouldn't ever happen. 753: throw (InternalError) new InternalError().initCause(e); 754: } 755: } 756: 757: /** 758: * Converts the Unicode characters in this String to a byte array. Uses the 759: * encoding of the platform's default charset, so the result may be longer 760: * or shorter than the String. For more encoding control, use 761: * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get 762: * replaced by an encoding specific byte. 763: * 764: * @return the resulting byte array, or null on a problem 765: * @since 1.1 766: */ 767: public byte[] getBytes() 768: { 769: try 770: { 771: return getBytes(System.getProperty("file.encoding")); 772: } catch(Exception e) { 773: // XXX - Throw an error here? 774: // For now, default to the 'safe' encoding. 775: byte[] bytes = new byte[count]; 776: for(int i=0;i<count;i++) 777: bytes[i] = (byte)((value[offset+i] <= 0xFF)? 778: value[offset+i]:'?'); 779: return bytes; 780: } 781: } 782: 783: /** 784: * Predicate which compares anObject to this. This is true only for Strings 785: * with the same character sequence. 786: * 787: * @param anObject the object to compare 788: * @return true if anObject is semantically equal to this 789: * @see #compareTo(String) 790: * @see #equalsIgnoreCase(String) 791: */ 792: public boolean equals(Object anObject) 793: { 794: if (! (anObject instanceof String)) 795: return false; 796: String str2 = (String) anObject; 797: if (count != str2.count) 798: return false; 799: if (value == str2.value && offset == str2.offset) 800: return true; 801: int i = count; 802: int x = offset; 803: int y = str2.offset; 804: while (--i >= 0) 805: if (value[x++] != str2.value[y++]) 806: return false; 807: return true; 808: } 809: 810: /** 811: * Compares the given StringBuffer to this String. This is true if the 812: * StringBuffer has the same content as this String at this moment. 813: * 814: * @param buffer the StringBuffer to compare to 815: * @return true if StringBuffer has the same character sequence 816: * @throws NullPointerException if the given StringBuffer is null 817: * @since 1.4 818: */ 819: public boolean contentEquals(StringBuffer buffer) 820: { 821: synchronized (buffer) 822: { 823: if (count != buffer.count) 824: return false; 825: if (value == buffer.value) 826: return true; // Possible if shared. 827: int i = count; 828: int x = offset + count; 829: while (--i >= 0) 830: if (value[--x] != buffer.value[i]) 831: return false; 832: return true; 833: } 834: } 835: 836: /** 837: * Compares the given CharSequence to this String. This is true if 838: * the CharSequence has the same content as this String at this 839: * moment. 840: * 841: * @param seq the CharSequence to compare to 842: * @return true if CharSequence has the same character sequence 843: * @throws NullPointerException if the given CharSequence is null 844: * @since 1.5 845: */ 846: public boolean contentEquals(CharSequence seq) 847: { 848: if (seq.length() != count) 849: return false; 850: for (int i = 0; i < count; ++i) 851: if (value[offset + i] != seq.charAt(i)) 852: return false; 853: return true; 854: } 855: 856: /** 857: * Compares a String to this String, ignoring case. This does not handle 858: * multi-character capitalization exceptions; instead the comparison is 859: * made on a character-by-character basis, and is true if:<br><ul> 860: * <li><code>c1 == c2</code></li> 861: * <li><code>Character.toUpperCase(c1) 862: * == Character.toUpperCase(c2)</code></li> 863: * <li><code>Character.toLowerCase(c1) 864: * == Character.toLowerCase(c2)</code></li> 865: * </ul> 866: * 867: * @param anotherString String to compare to this String 868: * @return true if anotherString is equal, ignoring case 869: * @see #equals(Object) 870: * @see Character#toUpperCase(char) 871: * @see Character#toLowerCase(char) 872: */ 873: public boolean equalsIgnoreCase(String anotherString) 874: { 875: if (anotherString == null || count != anotherString.count) 876: return false; 877: int i = count; 878: int x = offset; 879: int y = anotherString.offset; 880: while (--i >= 0) 881: { 882: char c1 = value[x++]; 883: char c2 = anotherString.value[y++]; 884: // Note that checking c1 != c2 is redundant, but avoids method calls. 885: if (c1 != c2 886: && Character.toUpperCase(c1) != Character.toUpperCase(c2) 887: && Character.toLowerCase(c1) != Character.toLowerCase(c2)) 888: return false; 889: } 890: return true; 891: } 892: 893: /** 894: * Compares this String and another String (case sensitive, 895: * lexicographically). The result is less than 0 if this string sorts 896: * before the other, 0 if they are equal, and greater than 0 otherwise. 897: * After any common starting sequence is skipped, the result is 898: * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings 899: * have characters remaining, or 900: * <code>this.length() - anotherString.length()</code> if one string is 901: * a subsequence of the other. 902: * 903: * @param anotherString the String to compare against 904: * @return the comparison 905: * @throws NullPointerException if anotherString is null 906: */ 907: public int compareTo(String anotherString) 908: { 909: int i = Math.min(count, anotherString.count); 910: int x = offset; 911: int y = anotherString.offset; 912: while (--i >= 0) 913: { 914: int result = value[x++] - anotherString.value[y++]; 915: if (result != 0) 916: return result; 917: } 918: return count - anotherString.count; 919: } 920: 921: /** 922: * Behaves like <code>compareTo(java.lang.String)</code> unless the Object 923: * is not a <code>String</code>. Then it throws a 924: * <code>ClassCastException</code>. 925: * 926: * @param o the object to compare against 927: * @return the comparison 928: * @throws NullPointerException if o is null 929: * @throws ClassCastException if o is not a <code>String</code> 930: * @since 1.2 931: */ 932: public int compareTo(Object o) 933: { 934: return compareTo((String) o); 935: } 936: 937: /** 938: * Compares this String and another String (case insensitive). This 939: * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores 940: * locale and multi-characater capitalization, and compares characters 941: * after performing 942: * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each 943: * character of the string. This is unsatisfactory for locale-based 944: * comparison, in which case you should use {@link java.text.Collator}. 945: * 946: * @param str the string to compare against 947: * @return the comparison 948: * @see Collator#compare(String, String) 949: * @since 1.2 950: */ 951: public int compareToIgnoreCase(String str) 952: { 953: int i = Math.min(count, str.count); 954: int x = offset; 955: int y = str.offset; 956: while (--i >= 0) 957: { 958: int result = Character.toLowerCase(Character.toUpperCase(value[x++])) 959: - Character.toLowerCase(Character.toUpperCase(str.value[y++])); 960: if (result != 0) 961: return result; 962: } 963: return count - str.count; 964: } 965: 966: /** 967: * Predicate which determines if this String matches another String 968: * starting at a specified offset for each String and continuing 969: * for a specified length. Indices out of bounds are harmless, and give 970: * a false result. 971: * 972: * @param toffset index to start comparison at for this String 973: * @param other String to compare region to this String 974: * @param ooffset index to start comparison at for other 975: * @param len number of characters to compare 976: * @return true if regions match (case sensitive) 977: * @throws NullPointerException if other is null 978: */ 979: public boolean regionMatches(int toffset, String other, int ooffset, int len) 980: { 981: return regionMatches(false, toffset, other, ooffset, len); 982: } 983: 984: /** 985: * Predicate which determines if this String matches another String 986: * starting at a specified offset for each String and continuing 987: * for a specified length, optionally ignoring case. Indices out of bounds 988: * are harmless, and give a false result. Case comparisons are based on 989: * <code>Character.toLowerCase()</code> and 990: * <code>Character.toUpperCase()</code>, not on multi-character 991: * capitalization expansions. 992: * 993: * @param ignoreCase true if case should be ignored in comparision 994: * @param toffset index to start comparison at for this String 995: * @param other String to compare region to this String 996: * @param ooffset index to start comparison at for other 997: * @param len number of characters to compare 998: * @return true if regions match, false otherwise 999: * @throws NullPointerException if other is null 1000: */ 1001: public boolean regionMatches(boolean ignoreCase, int toffset, 1002: String other, int ooffset, int len) 1003: { 1004: if (toffset < 0 || ooffset < 0 || toffset + len > count 1005: || ooffset + len > other.count) 1006: return false; 1007: toffset += offset; 1008: ooffset += other.offset; 1009: while (--len >= 0) 1010: { 1011: char c1 = value[toffset++]; 1012: char c2 = other.value[ooffset++]; 1013: // Note that checking c1 != c2 is redundant when ignoreCase is true, 1014: // but it avoids method calls. 1015: if (c1 != c2 1016: && (! ignoreCase 1017: || (Character.toLowerCase(c1) != Character.toLowerCase(c2) 1018: && (Character.toUpperCase(c1) 1019: != Character.toUpperCase(c2))))) 1020: return false; 1021: } 1022: return true; 1023: } 1024: 1025: /** 1026: * Predicate which determines if this String contains the given prefix, 1027: * beginning comparison at toffset. The result is false if toffset is 1028: * negative or greater than this.length(), otherwise it is the same as 1029: * <code>this.substring(toffset).startsWith(prefix)</code>. 1030: * 1031: * @param prefix String to compare 1032: * @param toffset offset for this String where comparison starts 1033: * @return true if this String starts with prefix 1034: * @throws NullPointerException if prefix is null 1035: * @see #regionMatches(boolean, int, String, int, int) 1036: */ 1037: public boolean startsWith(String prefix, int toffset) 1038: { 1039: return regionMatches(false, toffset, prefix, 0, prefix.count); 1040: } 1041: 1042: /** 1043: * Predicate which determines if this String starts with a given prefix. 1044: * If the prefix is an empty String, true is returned. 1045: * 1046: * @param prefix String to compare 1047: * @return true if this String starts with the prefix 1048: * @throws NullPointerException if prefix is null 1049: * @see #startsWith(String, int) 1050: */ 1051: public boolean startsWith(String prefix) 1052: { 1053: return regionMatches(false, 0, prefix, 0, prefix.count); 1054: } 1055: 1056: /** 1057: * Predicate which determines if this String ends with a given suffix. 1058: * If the suffix is an empty String, true is returned. 1059: * 1060: * @param suffix String to compare 1061: * @return true if this String ends with the suffix 1062: * @throws NullPointerException if suffix is null 1063: * @see #regionMatches(boolean, int, String, int, int) 1064: */ 1065: public boolean endsWith(String suffix) 1066: { 1067: return regionMatches(false, count - suffix.count, suffix, 0, suffix.count); 1068: } 1069: 1070: /** 1071: * Computes the hashcode for this String. This is done with int arithmetic, 1072: * where ** represents exponentiation, by this formula:<br> 1073: * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>. 1074: * 1075: * @return hashcode value of this String 1076: */ 1077: public int hashCode() 1078: { 1079: if (cachedHashCode != 0) 1080: return cachedHashCode; 1081: 1082: // Compute the hash code using a local variable to be reentrant. 1083: int hashCode = 0; 1084: int limit = count + offset; 1085: for (int i = offset; i < limit; i++) 1086: hashCode = hashCode * 31 + value[i]; 1087: return cachedHashCode = hashCode; 1088: } 1089: 1090: /** 1091: * Finds the first instance of a character in this String. 1092: * 1093: * @param ch character to find 1094: * @return location (base 0) of the character, or -1 if not found 1095: */ 1096: public int indexOf(int ch) 1097: { 1098: return indexOf(ch, 0); 1099: } 1100: 1101: /** 1102: * Finds the first instance of a character in this String, starting at 1103: * a given index. If starting index is less than 0, the search 1104: * starts at the beginning of this String. If the starting index 1105: * is greater than the length of this String, -1 is returned. 1106: * 1107: * @param ch character to find 1108: * @param fromIndex index to start the search 1109: * @return location (base 0) of the character, or -1 if not found 1110: */ 1111: public int indexOf(int ch, int fromIndex) 1112: { 1113: if ((char) ch != ch) 1114: return -1; 1115: if (fromIndex < 0) 1116: fromIndex = 0; 1117: int i = fromIndex + offset; 1118: for ( ; fromIndex < count; fromIndex++) 1119: if (value[i++] == ch) 1120: return fromIndex; 1121: return -1; 1122: } 1123: 1124: /** 1125: * Finds the last instance of a character in this String. 1126: * 1127: * @param ch character to find 1128: * @return location (base 0) of the character, or -1 if not found 1129: */ 1130: public int lastIndexOf(int ch) 1131: { 1132: return lastIndexOf(ch, count - 1); 1133: } 1134: 1135: /** 1136: * Finds the last instance of a character in this String, starting at 1137: * a given index. If starting index is greater than the maximum valid 1138: * index, then the search begins at the end of this String. If the 1139: * starting index is less than zero, -1 is returned. 1140: * 1141: * @param ch character to find 1142: * @param fromIndex index to start the search 1143: * @return location (base 0) of the character, or -1 if not found 1144: */ 1145: public int lastIndexOf(int ch, int fromIndex) 1146: { 1147: if ((char) ch != ch) 1148: return -1; 1149: if (fromIndex >= count) 1150: fromIndex = count - 1; 1151: int i = fromIndex + offset; 1152: for ( ; fromIndex >= 0; fromIndex--) 1153: if (value[i--] == ch) 1154: return fromIndex; 1155: return -1; 1156: } 1157: 1158: /** 1159: * Finds the first instance of a String in this String. 1160: * 1161: * @param str String to find 1162: * @return location (base 0) of the String, or -1 if not found 1163: * @throws NullPointerException if str is null 1164: */ 1165: public int indexOf(String str) 1166: { 1167: return indexOf(str, 0); 1168: } 1169: 1170: /** 1171: * Finds the first instance of a String in this String, starting at 1172: * a given index. If starting index is less than 0, the search 1173: * starts at the beginning of this String. If the starting index 1174: * is greater than the length of this String, -1 is returned. 1175: * 1176: * @param str String to find 1177: * @param fromIndex index to start the search 1178: * @return location (base 0) of the String, or -1 if not found 1179: * @throws NullPointerException if str is null 1180: */ 1181: public int indexOf(String str, int fromIndex) 1182: { 1183: if (fromIndex < 0) 1184: fromIndex = 0; 1185: int limit = count - str.count; 1186: for ( ; fromIndex <= limit; fromIndex++) 1187: if (regionMatches(fromIndex, str, 0, str.count)) 1188: return fromIndex; 1189: return -1; 1190: } 1191: 1192: /** 1193: * Finds the last instance of a String in this String. 1194: * 1195: * @param str String to find 1196: * @return location (base 0) of the String, or -1 if not found 1197: * @throws NullPointerException if str is null 1198: */ 1199: public int lastIndexOf(String str) 1200: { 1201: return lastIndexOf(str, count - str.count); 1202: } 1203: 1204: /** 1205: * Finds the last instance of a String in this String, starting at 1206: * a given index. If starting index is greater than the maximum valid 1207: * index, then the search begins at the end of this String. If the 1208: * starting index is less than zero, -1 is returned. 1209: * 1210: * @param str String to find 1211: * @param fromIndex index to start the search 1212: * @return location (base 0) of the String, or -1 if not found 1213: * @throws NullPointerException if str is null 1214: */ 1215: public int lastIndexOf(String str, int fromIndex) 1216: { 1217: fromIndex = Math.min(fromIndex, count - str.count); 1218: for ( ; fromIndex >= 0; fromIndex--) 1219: if (regionMatches(fromIndex, str, 0, str.count)) 1220: return fromIndex; 1221: return -1; 1222: } 1223: 1224: /** 1225: * Creates a substring of this String, starting at a specified index 1226: * and ending at the end of this String. 1227: * 1228: * @param begin index to start substring (base 0) 1229: * @return new String which is a substring of this String 1230: * @throws IndexOutOfBoundsException if begin < 0 || begin > length() 1231: * (while unspecified, this is a StringIndexOutOfBoundsException) 1232: */ 1233: public String substring(int begin) 1234: { 1235: return substring(begin, count); 1236: } 1237: 1238: /** 1239: * Creates a substring of this String, starting at a specified index 1240: * and ending at one character before a specified index. 1241: * 1242: * @param beginIndex index to start substring (inclusive, base 0) 1243: * @param endIndex index to end at (exclusive) 1244: * @return new String which is a substring of this String 1245: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1246: * || begin > end (while unspecified, this is a 1247: * StringIndexOutOfBoundsException) 1248: */ 1249: public String substring(int beginIndex, int endIndex) 1250: { 1251: if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) 1252: throw new StringIndexOutOfBoundsException(); 1253: if (beginIndex == 0 && endIndex == count) 1254: return this; 1255: int len = endIndex - beginIndex; 1256: // Package constructor avoids an array copy. 1257: return new String(value, beginIndex + offset, len, 1258: (len << 2) >= value.length); 1259: } 1260: 1261: /** 1262: * Creates a substring of this String, starting at a specified index 1263: * and ending at one character before a specified index. This behaves like 1264: * <code>substring(begin, end)</code>. 1265: * 1266: * @param begin index to start substring (inclusive, base 0) 1267: * @param end index to end at (exclusive) 1268: * @return new String which is a substring of this String 1269: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1270: * || begin > end 1271: * @since 1.4 1272: */ 1273: public CharSequence subSequence(int begin, int end) 1274: { 1275: return substring(begin, end); 1276: } 1277: 1278: /** 1279: * Concatenates a String to this String. This results in a new string unless 1280: * one of the two originals is "". 1281: * 1282: * @param str String to append to this String 1283: * @return newly concatenated String 1284: * @throws NullPointerException if str is null 1285: */ 1286: public String concat(String str) 1287: { 1288: if (str.count == 0) 1289: return this; 1290: if (count == 0) 1291: return str; 1292: char[] newStr = new char[count + str.count]; 1293: VMSystem.arraycopy(value, offset, newStr, 0, count); 1294: VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count); 1295: // Package constructor avoids an array copy. 1296: return new String(newStr, 0, newStr.length, true); 1297: } 1298: 1299: /** 1300: * Replaces every instance of a character in this String with a new 1301: * character. If no replacements occur, this is returned. 1302: * 1303: * @param oldChar the old character to replace 1304: * @param newChar the new character 1305: * @return new String with all instances of oldChar replaced with newChar 1306: */ 1307: public String replace(char oldChar, char newChar) 1308: { 1309: if (oldChar == newChar) 1310: return this; 1311: int i = count; 1312: int x = offset - 1; 1313: while (--i >= 0) 1314: if (value[++x] == oldChar) 1315: break; 1316: if (i < 0) 1317: return this; 1318: char[] newStr = (char[]) value.clone(); 1319: newStr[x] = newChar; 1320: while (--i >= 0) 1321: if (value[++x] == oldChar) 1322: newStr[x] = newChar; 1323: // Package constructor avoids an array copy. 1324: return new String(newStr, offset, count, true); 1325: } 1326: 1327: /** 1328: * Test if this String matches a regular expression. This is shorthand for 1329: * <code>{@link Pattern}.matches(regex, this)</code>. 1330: * 1331: * @param regex the pattern to match 1332: * @return true if the pattern matches 1333: * @throws NullPointerException if regex is null 1334: * @throws PatternSyntaxException if regex is invalid 1335: * @see Pattern#matches(String, CharSequence) 1336: * @since 1.4 1337: */ 1338: public boolean matches(String regex) 1339: { 1340: return Pattern.matches(regex, this); 1341: } 1342: 1343: /** 1344: * Replaces the first substring match of the regular expression with a 1345: * given replacement. This is shorthand for <code>{@link Pattern} 1346: * .compile(regex).matcher(this).replaceFirst(replacement)</code>. 1347: * 1348: * @param regex the pattern to match 1349: * @param replacement the replacement string 1350: * @return the modified string 1351: * @throws NullPointerException if regex or replacement is null 1352: * @throws PatternSyntaxException if regex is invalid 1353: * @see #replaceAll(String, String) 1354: * @see Pattern#compile(String) 1355: * @see Pattern#matcher(CharSequence) 1356: * @see Matcher#replaceFirst(String) 1357: * @since 1.4 1358: */ 1359: public String replaceFirst(String regex, String replacement) 1360: { 1361: return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 1362: } 1363: 1364: /** 1365: * Replaces all matching substrings of the regular expression with a 1366: * given replacement. This is shorthand for <code>{@link Pattern} 1367: * .compile(regex).matcher(this).replaceAll(replacement)</code>. 1368: * 1369: * @param regex the pattern to match 1370: * @param replacement the replacement string 1371: * @return the modified string 1372: * @throws NullPointerException if regex or replacement is null 1373: * @throws PatternSyntaxException if regex is invalid 1374: * @see #replaceFirst(String, String) 1375: * @see Pattern#compile(String) 1376: * @see Pattern#matcher(CharSequence) 1377: * @see Matcher#replaceAll(String) 1378: * @since 1.4 1379: */ 1380: public String replaceAll(String regex, String replacement) 1381: { 1382: return Pattern.compile(regex).matcher(this).replaceAll(replacement); 1383: } 1384: 1385: /** 1386: * Split this string around the matches of a regular expression. Each 1387: * element of the returned array is the largest block of characters not 1388: * terminated by the regular expression, in the order the matches are found. 1389: * 1390: * <p>The limit affects the length of the array. If it is positive, the 1391: * array will contain at most n elements (n - 1 pattern matches). If 1392: * negative, the array length is unlimited, but there can be trailing empty 1393: * entries. if 0, the array length is unlimited, and trailing empty entries 1394: * are discarded. 1395: * 1396: * <p>For example, splitting "boo:and:foo" yields:<br> 1397: * <table border=0> 1398: * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th> 1399: * <tr><td>":"</td> <td>2</td> <td>{ "boo", "and:foo" }</td></tr> 1400: * <tr><td>":"</td> <td>t</td> <td>{ "boo", "and", "foo" }</td></tr> 1401: * <tr><td>":"</td> <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr> 1402: * <tr><td>"o"</td> <td>5</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1403: * <tr><td>"o"</td> <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1404: * <tr><td>"o"</td> <td>0</td> <td>{ "b", "", ":and:f" }</td></tr> 1405: * </table> 1406: * 1407: * <p>This is shorthand for 1408: * <code>{@link Pattern}.compile(regex).split(this, limit)</code>. 1409: * 1410: * @param regex the pattern to match 1411: * @param limit the limit threshold 1412: * @return the array of split strings 1413: * @throws NullPointerException if regex or replacement is null 1414: * @throws PatternSyntaxException if regex is invalid 1415: * @see Pattern#compile(String) 1416: * @see Pattern#split(CharSequence, int) 1417: * @since 1.4 1418: */ 1419: public String[] split(String regex, int limit) 1420: { 1421: return Pattern.compile(regex).split(this, limit); 1422: } 1423: 1424: /** 1425: * Split this string around the matches of a regular expression. Each 1426: * element of the returned array is the largest block of characters not 1427: * terminated by the regular expression, in the order the matches are found. 1428: * The array length is unlimited, and trailing empty entries are discarded, 1429: * as though calling <code>split(regex, 0)</code>. 1430: * 1431: * @param regex the pattern to match 1432: * @return the array of split strings 1433: * @throws NullPointerException if regex or replacement is null 1434: * @throws PatternSyntaxException if regex is invalid 1435: * @see #split(String, int) 1436: * @see Pattern#compile(String) 1437: * @see Pattern#split(CharSequence, int) 1438: * @since 1.4 1439: */ 1440: public String[] split(String regex) 1441: { 1442: return Pattern.compile(regex).split(this, 0); 1443: } 1444: 1445: /** 1446: * Lowercases this String according to a particular locale. This uses 1447: * Unicode's special case mappings, as applied to the given Locale, so the 1448: * resulting string may be a different length. 1449: * 1450: * @param loc locale to use 1451: * @return new lowercased String, or this if no characters were lowercased 1452: * @throws NullPointerException if loc is null 1453: * @see #toUpperCase(Locale) 1454: * @since 1.1 1455: */ 1456: public String toLowerCase(Locale loc) 1457: { 1458: // First, see if the current string is already lower case. 1459: boolean turkish = "tr".equals(loc.getLanguage()); 1460: int i = count; 1461: int x = offset - 1; 1462: while (--i >= 0) 1463: { 1464: char ch = value[++x]; 1465: if ((turkish && ch == '\u0049') 1466: || ch != Character.toLowerCase(ch)) 1467: break; 1468: } 1469: if (i < 0) 1470: return this; 1471: 1472: // Now we perform the conversion. Fortunately, there are no multi-character 1473: // lowercase expansions in Unicode 3.0.0. 1474: char[] newStr = (char[]) value.clone(); 1475: do 1476: { 1477: char ch = value[x]; 1478: // Hardcoded special case. 1479: newStr[x++] = (turkish && ch == '\u0049') ? '\u0131' 1480: : Character.toLowerCase(ch); 1481: } 1482: while (--i >= 0); 1483: // Package constructor avoids an array copy. 1484: return new String(newStr, offset, count, true); 1485: } 1486: 1487: /** 1488: * Lowercases this String. This uses Unicode's special case mappings, as 1489: * applied to the platform's default Locale, so the resulting string may 1490: * be a different length. 1491: * 1492: * @return new lowercased String, or this if no characters were lowercased 1493: * @see #toLowerCase(Locale) 1494: * @see #toUpperCase() 1495: */ 1496: public String toLowerCase() 1497: { 1498: return toLowerCase(Locale.getDefault()); 1499: } 1500: 1501: /** 1502: * Uppercases this String according to a particular locale. This uses 1503: * Unicode's special case mappings, as applied to the given Locale, so the 1504: * resulting string may be a different length. 1505: * 1506: * @param loc locale to use 1507: * @return new uppercased String, or this if no characters were uppercased 1508: * @throws NullPointerException if loc is null 1509: * @see #toLowerCase(Locale) 1510: * @since 1.1 1511: */ 1512: public String toUpperCase(Locale loc) 1513: { 1514: // First, see how many characters we have to grow by, as well as if the 1515: // current string is already upper case. 1516: boolean turkish = "tr".equals(loc.getLanguage()); 1517: int expand = 0; 1518: boolean unchanged = true; 1519: int i = count; 1520: int x = i + offset; 1521: while (--i >= 0) 1522: { 1523: char ch = value[--x]; 1524: expand += upperCaseExpansion(ch); 1525: unchanged = (unchanged && expand == 0 1526: && ! (turkish && ch == '\u0069') 1527: && ch == Character.toUpperCase(ch)); 1528: } 1529: if (unchanged) 1530: return this; 1531: 1532: // Now we perform the conversion. 1533: i = count; 1534: if (expand == 0) 1535: { 1536: char[] newStr = (char[]) value.clone(); 1537: while (--i >= 0) 1538: { 1539: char ch = value[x]; 1540: // Hardcoded special case. 1541: newStr[x++] = (turkish && ch == '\u0069') ? '\u0130' 1542: : Character.toUpperCase(ch); 1543: } 1544: // Package constructor avoids an array copy. 1545: return new String(newStr, offset, count, true); 1546: } 1547: 1548: // Expansion is necessary. 1549: char[] newStr = new char[count + expand]; 1550: int j = 0; 1551: while (--i >= 0) 1552: { 1553: char ch = value[x++]; 1554: // Hardcoded special case. 1555: if (turkish && ch == '\u0069') 1556: { 1557: newStr[j++] = '\u0130'; 1558: continue; 1559: } 1560: expand = upperCaseExpansion(ch); 1561: if (expand > 0) 1562: { 1563: int index = upperCaseIndex(ch); 1564: while (expand-- >= 0) 1565: newStr[j++] = upperExpand[index++]; 1566: } 1567: else 1568: newStr[j++] = Character.toUpperCase(ch); 1569: } 1570: // Package constructor avoids an array copy. 1571: return new String(newStr, 0, newStr.length, true); 1572: } 1573: 1574: /** 1575: * Uppercases this String. This uses Unicode's special case mappings, as 1576: * applied to the platform's default Locale, so the resulting string may 1577: * be a different length. 1578: * 1579: * @return new uppercased String, or this if no characters were uppercased 1580: * @see #toUpperCase(Locale) 1581: * @see #toLowerCase() 1582: */ 1583: public String toUpperCase() 1584: { 1585: return toUpperCase(Locale.getDefault()); 1586: } 1587: 1588: /** 1589: * Trims all characters less than or equal to <code>'\u0020'</code> 1590: * (<code>' '</code>) from the beginning and end of this String. This 1591: * includes many, but not all, ASCII control characters, and all 1592: * {@link Character#isWhitespace(char)}. 1593: * 1594: * @return new trimmed String, or this if nothing trimmed 1595: */ 1596: public String trim() 1597: { 1598: int limit = count + offset; 1599: if (count == 0 || (value[offset] > '\u0020' 1600: && value[limit - 1] > '\u0020')) 1601: return this; 1602: int begin = offset; 1603: do 1604: if (begin == limit) 1605: return ""; 1606: while (value[begin++] <= '\u0020'); 1607: int end = limit; 1608: while (value[--end] <= '\u0020'); 1609: return substring(begin - offset - 1, end - offset + 1); 1610: } 1611: 1612: /** 1613: * Returns this, as it is already a String! 1614: * 1615: * @return this 1616: */ 1617: public String toString() 1618: { 1619: return this; 1620: } 1621: 1622: /** 1623: * Copies the contents of this String into a character array. Subsequent 1624: * changes to the array do not affect the String. 1625: * 1626: * @return character array copying the String 1627: */ 1628: public char[] toCharArray() 1629: { 1630: if (count == value.length) 1631: return (char[]) value.clone(); 1632: 1633: char[] copy = new char[count]; 1634: VMSystem.arraycopy(value, offset, copy, 0, count); 1635: return copy; 1636: } 1637: 1638: /** 1639: * Returns a String representation of an Object. This is "null" if the 1640: * object is null, otherwise it is <code>obj.toString()</code> (which 1641: * can be null). 1642: * 1643: * @param obj the Object 1644: * @return the string conversion of obj 1645: */ 1646: public static String valueOf(Object obj) 1647: { 1648: return obj == null ? "null" : obj.toString(); 1649: } 1650: 1651: /** 1652: * Returns a String representation of a character array. Subsequent 1653: * changes to the array do not affect the String. 1654: * 1655: * @param data the character array 1656: * @return a String containing the same character sequence as data 1657: * @throws NullPointerException if data is null 1658: * @see #valueOf(char[], int, int) 1659: * @see #String(char[]) 1660: */ 1661: public static String valueOf(char[] data) 1662: { 1663: return valueOf (data, 0, data.length); 1664: } 1665: 1666: /** 1667: * Returns a String representing the character sequence of the char array, 1668: * starting at the specified offset, and copying chars up to the specified 1669: * count. Subsequent changes to the array do not affect the String. 1670: * 1671: * @param data character array 1672: * @param offset position (base 0) to start copying out of data 1673: * @param count the number of characters from data to copy 1674: * @return String containing the chars from data[offset..offset+count] 1675: * @throws NullPointerException if data is null 1676: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1677: * || offset + count < 0 (overflow) 1678: * || offset + count > data.length) 1679: * (while unspecified, this is a StringIndexOutOfBoundsException) 1680: * @see #String(char[], int, int) 1681: */ 1682: public static String valueOf(char[] data, int offset, int count) 1683: { 1684: return new String(data, offset, count, false); 1685: } 1686: 1687: /** 1688: * Returns a String representing the character sequence of the char array, 1689: * starting at the specified offset, and copying chars up to the specified 1690: * count. Subsequent changes to the array do not affect the String. 1691: * 1692: * @param data character array 1693: * @param offset position (base 0) to start copying out of data 1694: * @param count the number of characters from data to copy 1695: * @return String containing the chars from data[offset..offset+count] 1696: * @throws NullPointerException if data is null 1697: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1698: * || offset + count < 0 (overflow) 1699: * || offset + count > data.length) 1700: * (while unspecified, this is a StringIndexOutOfBoundsException) 1701: * @see #String(char[], int, int) 1702: */ 1703: public static String copyValueOf(char[] data, int offset, int count) 1704: { 1705: return new String(data, offset, count, false); 1706: } 1707: 1708: /** 1709: * Returns a String representation of a character array. Subsequent 1710: * changes to the array do not affect the String. 1711: * 1712: * @param data the character array 1713: * @return a String containing the same character sequence as data 1714: * @throws NullPointerException if data is null 1715: * @see #copyValueOf(char[], int, int) 1716: * @see #String(char[]) 1717: */ 1718: public static String copyValueOf(char[] data) 1719: { 1720: return copyValueOf (data, 0, data.length); 1721: } 1722: 1723: /** 1724: * Returns a String representing a boolean. 1725: * 1726: * @param b the boolean 1727: * @return "true" if b is true, else "false" 1728: */ 1729: public static String valueOf(boolean b) 1730: { 1731: return b ? "true" : "false"; 1732: } 1733: 1734: /** 1735: * Returns a String representing a character. 1736: * 1737: * @param c the character 1738: * @return String containing the single character c 1739: */ 1740: public static String valueOf(char c) 1741: { 1742: // Package constructor avoids an array copy. 1743: return new String(new char[] { c }, 0, 1, true); 1744: } 1745: 1746: /** 1747: * Returns a String representing an integer. 1748: * 1749: * @param i the integer 1750: * @return String containing the integer in base 10 1751: * @see Integer#toString(int) 1752: */ 1753: public static String valueOf(int i) 1754: { 1755: // See Integer to understand why we call the two-arg variant. 1756: return Integer.toString(i, 10); 1757: } 1758: 1759: /** 1760: * Returns a String representing a long. 1761: * 1762: * @param l the long 1763: * @return String containing the long in base 10 1764: * @see Long#toString(long) 1765: */ 1766: public static String valueOf(long l) 1767: { 1768: return Long.toString(l); 1769: } 1770: 1771: /** 1772: * Returns a String representing a float. 1773: * 1774: * @param f the float 1775: * @return String containing the float 1776: * @see Float#toString(float) 1777: */ 1778: public static String valueOf(float f) 1779: { 1780: return Float.toString(f); 1781: } 1782: 1783: /** 1784: * Returns a String representing a double. 1785: * 1786: * @param d the double 1787: * @return String containing the double 1788: * @see Double#toString(double) 1789: */ 1790: public static String valueOf(double d) 1791: { 1792: return Double.toString(d); 1793: } 1794: 1795: /** 1796: * If two Strings are considered equal, by the equals() method, 1797: * then intern() will return the same String instance. ie. 1798: * if (s1.equals(s2)) then (s1.intern() == s2.intern()). 1799: * All string literals and string-valued constant expressions 1800: * are already interned. 1801: * 1802: * @return the interned String 1803: */ 1804: public String intern() 1805: { 1806: return VMString.intern(this); 1807: } 1808: 1809: /** 1810: * Return the number of code points between two indices in the 1811: * <code>String</code>. An unpaired surrogate counts as a 1812: * code point for this purpose. Characters outside the indicated 1813: * range are not examined, even if the range ends in the middle of a 1814: * surrogate pair. 1815: * 1816: * @param start the starting index 1817: * @param end one past the ending index 1818: * @return the number of code points 1819: * @since 1.5 1820: */ 1821: public synchronized int codePointCount(int start, int end) 1822: { 1823: if (start < 0 || end > count || start > end) 1824: throw new StringIndexOutOfBoundsException(); 1825: 1826: start += offset; 1827: end += offset; 1828: int count = 0; 1829: while (start < end) 1830: { 1831: char base = value[start]; 1832: if (base < Character.MIN_HIGH_SURROGATE 1833: || base > Character.MAX_HIGH_SURROGATE 1834: || start == end 1835: || start == count 1836: || value[start + 1] < Character.MIN_LOW_SURROGATE 1837: || value[start + 1] > Character.MAX_LOW_SURROGATE) 1838: { 1839: // Nothing. 1840: } 1841: else 1842: { 1843: // Surrogate pair. 1844: ++start; 1845: } 1846: ++start; 1847: ++count; 1848: } 1849: return count; 1850: } 1851: 1852: /** 1853: * Helper function used to detect which characters have a multi-character 1854: * uppercase expansion. Note that this is only used in locations which 1855: * track one-to-many capitalization (java.lang.Character does not do this). 1856: * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the 1857: * longest uppercase expansion is three characters (a growth of 2 from the 1858: * lowercase character). 1859: * 1860: * @param ch the char to check 1861: * @return the number of characters to add when converting to uppercase 1862: * @see CharData#DIRECTION 1863: * @see CharData#UPPER_SPECIAL 1864: * @see #toUpperCase(Locale) 1865: */ 1866: private static int upperCaseExpansion(char ch) 1867: { 1868: return Character.direction[0][Character.readCodePoint((int)ch) >> 7] & 3; 1869: } 1870: 1871: /** 1872: * Helper function used to locate the offset in upperExpand given a 1873: * character with a multi-character expansion. The binary search is 1874: * optimized under the assumption that this method will only be called on 1875: * characters which exist in upperSpecial. 1876: * 1877: * @param ch the char to check 1878: * @return the index where its expansion begins 1879: * @see CharData#UPPER_SPECIAL 1880: * @see CharData#UPPER_EXPAND 1881: * @see #toUpperCase(Locale) 1882: */ 1883: private static int upperCaseIndex(char ch) 1884: { 1885: // Simple binary search for the correct character. 1886: int low = 0; 1887: int hi = upperSpecial.length - 2; 1888: int mid = ((low + hi) >> 2) << 1; 1889: char c = upperSpecial[mid]; 1890: while (ch != c) 1891: { 1892: if (ch < c) 1893: hi = mid - 2; 1894: else 1895: low = mid + 2; 1896: mid = ((low + hi) >> 2) << 1; 1897: c = upperSpecial[mid]; 1898: } 1899: return upperSpecial[mid + 1]; 1900: } 1901: 1902: /** 1903: * Returns the value array of the given string if it is zero based or a 1904: * copy of it that is zero based (stripping offset and making length equal 1905: * to count). Used for accessing the char[]s of gnu.java.lang.CharData. 1906: * Package private for use in Character. 1907: */ 1908: static char[] zeroBasedStringValue(String s) 1909: { 1910: char[] value; 1911: 1912: if (s.offset == 0 && s.count == s.value.length) 1913: value = s.value; 1914: else 1915: { 1916: int count = s.count; 1917: value = new char[count]; 1918: VMSystem.arraycopy(s.value, s.offset, value, 0, count); 1919: } 1920: 1921: return value; 1922: } 1923: 1924: /** 1925: * Returns true iff this String contains the sequence of Characters 1926: * described in s. 1927: * @param s the CharSequence 1928: * @return true iff this String contains s 1929: * 1930: * @since 1.5 1931: */ 1932: public boolean contains (CharSequence s) 1933: { 1934: return this.indexOf(s.toString()) != -1; 1935: } 1936: 1937: /** 1938: * Returns a string that is this string with all instances of the sequence 1939: * represented by <code>target</code> replaced by the sequence in 1940: * <code>replacement</code>. 1941: * @param target the sequence to be replaced 1942: * @param replacement the sequence used as the replacement 1943: * @return the string constructed as above 1944: */ 1945: public String replace (CharSequence target, CharSequence replacement) 1946: { 1947: String targetString = target.toString(); 1948: String replaceString = replacement.toString(); 1949: int targetLength = target.length(); 1950: int replaceLength = replacement.length(); 1951: 1952: int startPos = this.indexOf(targetString); 1953: StringBuilder result = new StringBuilder(this); 1954: while (startPos != -1) 1955: { 1956: // Replace the target with the replacement 1957: result.replace(startPos, startPos + targetLength, replaceString); 1958: 1959: // Search for a new occurrence of the target 1960: startPos = result.indexOf(targetString, startPos + replaceLength); 1961: } 1962: return result.toString(); 1963: } 1964: 1965: /** 1966: * Return the index into this String that is offset from the given index by 1967: * <code>codePointOffset</code> code points. 1968: * @param index the index at which to start 1969: * @param codePointOffset the number of code points to offset 1970: * @return the index into this String that is <code>codePointOffset</code> 1971: * code points offset from <code>index</code>. 1972: * 1973: * @throws IndexOutOfBoundsException if index is negative or larger than the 1974: * length of this string. 1975: * @throws IndexOutOfBoundsException if codePointOffset is positive and the 1976: * substring starting with index has fewer than codePointOffset code points. 1977: * @throws IndexOutOfBoundsException if codePointOffset is negative and the 1978: * substring ending with index has fewer than (-codePointOffset) code points. 1979: * @since 1.5 1980: */ 1981: public int offsetByCodePoints(int index, int codePointOffset) 1982: { 1983: if (index < 0 || index > count) 1984: throw new IndexOutOfBoundsException(); 1985: 1986: return Character.offsetByCodePoints(value, offset, count, offset + index, 1987: codePointOffset); 1988: } 1989: }
GNU Classpath (0.92) |