GNU Classpath (0.20) | |
Frames | No Frames |
1: /* String.java -- immutable character sequences; the object of string literals 2: Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2005 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.lang; 41: 42: import gnu.java.lang.CharData; 43: 44: import java.io.Serializable; 45: import java.io.UnsupportedEncodingException; 46: import java.nio.ByteBuffer; 47: import java.nio.CharBuffer; 48: import java.nio.charset.CharacterCodingException; 49: import java.nio.charset.Charset; 50: import java.nio.charset.CharsetDecoder; 51: import java.nio.charset.CharsetEncoder; 52: import java.nio.charset.CodingErrorAction; 53: import java.nio.charset.IllegalCharsetNameException; 54: import java.nio.charset.UnsupportedCharsetException; 55: import java.text.Collator; 56: import java.util.Comparator; 57: import java.util.Locale; 58: import java.util.regex.Matcher; 59: import java.util.regex.Pattern; 60: import java.util.regex.PatternSyntaxException; 61: 62: /** 63: * Strings represent an immutable set of characters. All String literals 64: * are instances of this class, and two string literals with the same contents 65: * refer to the same String object. 66: * 67: * <p>This class also includes a number of methods for manipulating the 68: * contents of strings (of course, creating a new object if there are any 69: * changes, as String is immutable). Case mapping relies on Unicode 3.0.0 70: * standards, where some character sequences have a different number of 71: * characters in the uppercase version than the lower case. 72: * 73: * <p>Strings are special, in that they are the only object with an overloaded 74: * operator. When you use '+' with at least one String argument, both 75: * arguments have String conversion performed on them, and another String (not 76: * guaranteed to be unique) results. 77: * 78: * <p>String is special-cased when doing data serialization - rather than 79: * listing the fields of this class, a String object is converted to a string 80: * literal in the object stream. 81: * 82: * @author Paul N. Fisher 83: * @author Eric Blake (ebb9@email.byu.edu) 84: * @author Per Bothner (bothner@cygnus.com) 85: * @since 1.0 86: * @status updated to 1.4; but could use better data sharing via offset field 87: */ 88: public final class String implements Serializable, Comparable, CharSequence 89: { 90: // WARNING: String is a CORE class in the bootstrap cycle. See the comments 91: // in vm/reference/java/lang/Runtime for implications of this fact. 92: 93: /** 94: * This is probably not necessary because this class is special cased already 95: * but it will avoid showing up as a discrepancy when comparing SUIDs. 96: */ 97: private static final long serialVersionUID = -6849794470754667710L; 98: 99: /** 100: * Stores unicode multi-character uppercase expansion table. 101: * @see #toUpperCase(Locale) 102: * @see CharData#UPPER_EXPAND 103: */ 104: private static final char[] upperExpand 105: = zeroBasedStringValue(CharData.UPPER_EXPAND); 106: 107: /** 108: * Stores unicode multi-character uppercase special casing table. 109: * @see #upperCaseExpansion(char) 110: * @see CharData#UPPER_SPECIAL 111: */ 112: private static final char[] upperSpecial 113: = zeroBasedStringValue(CharData.UPPER_SPECIAL); 114: 115: /** 116: * Characters which make up the String. 117: * Package access is granted for use by StringBuffer. 118: */ 119: final char[] value; 120: 121: /** 122: * Holds the number of characters in value. This number is generally 123: * the same as value.length, but can be smaller because substrings and 124: * StringBuffers can share arrays. Package visible for use by trusted code. 125: */ 126: final int count; 127: 128: /** 129: * Caches the result of hashCode(). If this value is zero, the hashcode 130: * is considered uncached (even if 0 is the correct hash value). 131: */ 132: private int cachedHashCode; 133: 134: /** 135: * Holds the starting position for characters in value[]. Since 136: * substring()'s are common, the use of offset allows the operation 137: * to perform in O(1). Package access is granted for use by StringBuffer. 138: */ 139: final int offset; 140: 141: /** 142: * An implementation for {@link #CASE_INSENSITIVE_ORDER}. 143: * This must be {@link Serializable}. The class name is dictated by 144: * compatibility with Sun's JDK. 145: */ 146: private static final class CaseInsensitiveComparator 147: implements Comparator, Serializable 148: { 149: /** 150: * Compatible with JDK 1.2. 151: */ 152: private static final long serialVersionUID = 8575799808933029326L; 153: 154: /** 155: * The default private constructor generates unnecessary overhead. 156: */ 157: CaseInsensitiveComparator() {} 158: 159: /** 160: * Compares to Strings, using 161: * <code>String.compareToIgnoreCase(String)</code>. 162: * 163: * @param o1 the first string 164: * @param o2 the second string 165: * @return < 0, 0, or > 0 depending on the case-insensitive 166: * comparison of the two strings. 167: * @throws NullPointerException if either argument is null 168: * @throws ClassCastException if either argument is not a String 169: * @see #compareToIgnoreCase(String) 170: */ 171: public int compare(Object o1, Object o2) 172: { 173: return ((String) o1).compareToIgnoreCase((String) o2); 174: } 175: } // class CaseInsensitiveComparator 176: 177: /** 178: * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>. 179: * This comparator is {@link Serializable}. Note that it ignores Locale, 180: * for that, you want a Collator. 181: * 182: * @see Collator#compare(String, String) 183: * @since 1.2 184: */ 185: public static final Comparator CASE_INSENSITIVE_ORDER 186: = new CaseInsensitiveComparator(); 187: 188: /** 189: * Creates an empty String (length 0). Unless you really need a new object, 190: * consider using <code>""</code> instead. 191: */ 192: public String() 193: { 194: value = "".value; 195: offset = 0; 196: count = 0; 197: } 198: 199: /** 200: * Copies the contents of a String to a new String. Since Strings are 201: * immutable, only a shallow copy is performed. 202: * 203: * @param str String to copy 204: * @throws NullPointerException if value is null 205: */ 206: public String(String str) 207: { 208: value = str.value; 209: offset = str.offset; 210: count = str.count; 211: cachedHashCode = str.cachedHashCode; 212: } 213: 214: /** 215: * Creates a new String using the character sequence of the char array. 216: * Subsequent changes to data do not affect the String. 217: * 218: * @param data char array to copy 219: * @throws NullPointerException if data is null 220: */ 221: public String(char[] data) 222: { 223: this(data, 0, data.length, false); 224: } 225: 226: /** 227: * Creates a new String using the character sequence of a subarray of 228: * characters. The string starts at offset, and copies count chars. 229: * Subsequent changes to data do not affect the String. 230: * 231: * @param data char array to copy 232: * @param offset position (base 0) to start copying out of data 233: * @param count the number of characters from data to copy 234: * @throws NullPointerException if data is null 235: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 236: * || offset + count < 0 (overflow) 237: * || offset + count > data.length) 238: * (while unspecified, this is a StringIndexOutOfBoundsException) 239: */ 240: public String(char[] data, int offset, int count) 241: { 242: this(data, offset, count, false); 243: } 244: 245: /** 246: * Creates a new String using an 8-bit array of integer values, starting at 247: * an offset, and copying up to the count. Each character c, using 248: * corresponding byte b, is created in the new String as if by performing: 249: * 250: * <pre> 251: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 252: * </pre> 253: * 254: * @param ascii array of integer values 255: * @param hibyte top byte of each Unicode character 256: * @param offset position (base 0) to start copying out of ascii 257: * @param count the number of characters from ascii to copy 258: * @throws NullPointerException if ascii is null 259: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 260: * || offset + count < 0 (overflow) 261: * || offset + count > ascii.length) 262: * (while unspecified, this is a StringIndexOutOfBoundsException) 263: * @see #String(byte[]) 264: * @see #String(byte[], String) 265: * @see #String(byte[], int, int) 266: * @see #String(byte[], int, int, String) 267: * @deprecated use {@link #String(byte[], int, int, String)} to perform 268: * correct encoding 269: */ 270: public String(byte[] ascii, int hibyte, int offset, int count) 271: { 272: if (offset < 0) 273: throw new StringIndexOutOfBoundsException("offset: " + offset); 274: if (count < 0) 275: throw new StringIndexOutOfBoundsException("count: " + count); 276: // equivalent to: offset + count < 0 || offset + count > ascii.length 277: if (ascii.length - offset < count) 278: throw new StringIndexOutOfBoundsException("offset + count: " 279: + (offset + count)); 280: value = new char[count]; 281: this.offset = 0; 282: this.count = count; 283: hibyte <<= 8; 284: offset += count; 285: while (--count >= 0) 286: value[count] = (char) (hibyte | (ascii[--offset] & 0xff)); 287: } 288: 289: /** 290: * Creates a new String using an 8-bit array of integer values. Each 291: * character c, using corresponding byte b, is created in the new String 292: * as if by performing: 293: * 294: * <pre> 295: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 296: * </pre> 297: * 298: * @param ascii array of integer values 299: * @param hibyte top byte of each Unicode character 300: * @throws NullPointerException if ascii is null 301: * @see #String(byte[]) 302: * @see #String(byte[], String) 303: * @see #String(byte[], int, int) 304: * @see #String(byte[], int, int, String) 305: * @see #String(byte[], int, int, int) 306: * @deprecated use {@link #String(byte[], String)} to perform 307: * correct encoding 308: */ 309: public String(byte[] ascii, int hibyte) 310: { 311: this(ascii, hibyte, 0, ascii.length); 312: } 313: 314: /** 315: * Creates a new String using the portion of the byte array starting at the 316: * offset and ending at offset + count. Uses the specified encoding type 317: * to decode the byte array, so the resulting string may be longer or 318: * shorter than the byte array. For more decoding control, use 319: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 320: * see {@link java.nio.charset.Charset}. The behavior is not specified if 321: * the decoder encounters invalid characters; this implementation throws 322: * an Error. 323: * 324: * @param data byte array to copy 325: * @param offset the offset to start at 326: * @param count the number of bytes in the array to use 327: * @param encoding the name of the encoding to use 328: * @throws NullPointerException if data or encoding is null 329: * @throws IndexOutOfBoundsException if offset or count is incorrect 330: * (while unspecified, this is a StringIndexOutOfBoundsException) 331: * @throws UnsupportedEncodingException if encoding is not found 332: * @throws Error if the decoding fails 333: * @since 1.1 334: */ 335: public String(byte[] data, int offset, int count, String encoding) 336: throws UnsupportedEncodingException 337: { 338: if (offset < 0) 339: throw new StringIndexOutOfBoundsException("offset: " + offset); 340: if (count < 0) 341: throw new StringIndexOutOfBoundsException("count: " + count); 342: // equivalent to: offset + count < 0 || offset + count > data.length 343: if (data.length - offset < count) 344: throw new StringIndexOutOfBoundsException("offset + count: " 345: + (offset + count)); 346: try 347: { 348: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 349: csd.onMalformedInput(CodingErrorAction.REPLACE); 350: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 351: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 352: if(cbuf.hasArray()) 353: { 354: value = cbuf.array(); 355: this.offset = cbuf.position(); 356: this.count = cbuf.remaining(); 357: } else { 358: // Doubt this will happen. But just in case. 359: value = new char[cbuf.remaining()]; 360: cbuf.get(value); 361: this.offset = 0; 362: this.count = value.length; 363: } 364: } catch(CharacterCodingException e){ 365: throw new UnsupportedEncodingException("Encoding: "+encoding+ 366: " not found."); 367: } catch(IllegalCharsetNameException e){ 368: throw new UnsupportedEncodingException("Encoding: "+encoding+ 369: " not found."); 370: } catch(UnsupportedCharsetException e){ 371: throw new UnsupportedEncodingException("Encoding: "+encoding+ 372: " not found."); 373: } 374: } 375: 376: /** 377: * Creates a new String using the byte array. Uses the specified encoding 378: * type to decode the byte array, so the resulting string may be longer or 379: * shorter than the byte array. For more decoding control, use 380: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 381: * see {@link java.nio.charset.Charset}. The behavior is not specified if 382: * the decoder encounters invalid characters; this implementation throws 383: * an Error. 384: * 385: * @param data byte array to copy 386: * @param encoding the name of the encoding to use 387: * @throws NullPointerException if data or encoding is null 388: * @throws UnsupportedEncodingException if encoding is not found 389: * @throws Error if the decoding fails 390: * @see #String(byte[], int, int, String) 391: * @since 1.1 392: */ 393: public String(byte[] data, String encoding) 394: throws UnsupportedEncodingException 395: { 396: this(data, 0, data.length, encoding); 397: } 398: 399: /** 400: * Creates a new String using the portion of the byte array starting at the 401: * offset and ending at offset + count. Uses the encoding of the platform's 402: * default charset, so the resulting string may be longer or shorter than 403: * the byte array. For more decoding control, use 404: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 405: * if the decoder encounters invalid characters; this implementation throws 406: * an Error. 407: * 408: * @param data byte array to copy 409: * @param offset the offset to start at 410: * @param count the number of bytes in the array to use 411: * @throws NullPointerException if data is null 412: * @throws IndexOutOfBoundsException if offset or count is incorrect 413: * @throws Error if the decoding fails 414: * @see #String(byte[], int, int, String) 415: * @since 1.1 416: */ 417: public String(byte[] data, int offset, int count) 418: { 419: if (offset < 0) 420: throw new StringIndexOutOfBoundsException("offset: " + offset); 421: if (count < 0) 422: throw new StringIndexOutOfBoundsException("count: " + count); 423: // equivalent to: offset + count < 0 || offset + count > data.length 424: if (data.length - offset < count) 425: throw new StringIndexOutOfBoundsException("offset + count: " 426: + (offset + count)); 427: int o, c; 428: char[] v; 429: String encoding; 430: try 431: { 432: encoding = System.getProperty("file.encoding"); 433: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 434: csd.onMalformedInput(CodingErrorAction.REPLACE); 435: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 436: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 437: if(cbuf.hasArray()) 438: { 439: v = cbuf.array(); 440: o = cbuf.position(); 441: c = cbuf.remaining(); 442: } else { 443: // Doubt this will happen. But just in case. 444: v = new char[cbuf.remaining()]; 445: cbuf.get(v); 446: o = 0; 447: c = v.length; 448: } 449: } catch(Exception ex){ 450: // If anything goes wrong (System property not set, 451: // NIO provider not available, etc) 452: // Default to the 'safe' encoding ISO8859_1 453: v = new char[count]; 454: o = 0; 455: c = count; 456: for (int i=0;i<count;i++) 457: v[i] = (char)data[offset+i]; 458: } 459: this.value = v; 460: this.offset = o; 461: this.count = c; 462: } 463: 464: /** 465: * Creates a new String using the byte array. Uses the encoding of the 466: * platform's default charset, so the resulting string may be longer or 467: * shorter than the byte array. For more decoding control, use 468: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 469: * if the decoder encounters invalid characters; this implementation throws 470: * an Error. 471: * 472: * @param data byte array to copy 473: * @throws NullPointerException if data is null 474: * @throws Error if the decoding fails 475: * @see #String(byte[], int, int) 476: * @see #String(byte[], int, int, String) 477: * @since 1.1 478: */ 479: public String(byte[] data) 480: { 481: this(data, 0, data.length); 482: } 483: 484: /** 485: * Creates a new String using the character sequence represented by 486: * the StringBuffer. Subsequent changes to buf do not affect the String. 487: * 488: * @param buffer StringBuffer to copy 489: * @throws NullPointerException if buffer is null 490: */ 491: public String(StringBuffer buffer) 492: { 493: synchronized (buffer) 494: { 495: offset = 0; 496: count = buffer.count; 497: // Share unless buffer is 3/4 empty. 498: if ((count << 2) < buffer.value.length) 499: { 500: value = new char[count]; 501: VMSystem.arraycopy(buffer.value, 0, value, 0, count); 502: } 503: else 504: { 505: buffer.shared = true; 506: value = buffer.value; 507: } 508: } 509: } 510: 511: /** 512: * Creates a new String using the character sequence represented by 513: * the StringBuilder. Subsequent changes to buf do not affect the String. 514: * 515: * @param buffer StringBuilder to copy 516: * @throws NullPointerException if buffer is null 517: */ 518: public String(StringBuilder buffer) 519: { 520: this(buffer.value, 0, buffer.count); 521: } 522: 523: /** 524: * Special constructor which can share an array when safe to do so. 525: * 526: * @param data the characters to copy 527: * @param offset the location to start from 528: * @param count the number of characters to use 529: * @param dont_copy true if the array is trusted, and need not be copied 530: * @throws NullPointerException if chars is null 531: * @throws StringIndexOutOfBoundsException if bounds check fails 532: */ 533: String(char[] data, int offset, int count, boolean dont_copy) 534: { 535: if (offset < 0) 536: throw new StringIndexOutOfBoundsException("offset: " + offset); 537: if (count < 0) 538: throw new StringIndexOutOfBoundsException("count: " + count); 539: // equivalent to: offset + count < 0 || offset + count > data.length 540: if (data.length - offset < count) 541: throw new StringIndexOutOfBoundsException("offset + count: " 542: + (offset + count)); 543: if (dont_copy) 544: { 545: value = data; 546: this.offset = offset; 547: } 548: else 549: { 550: value = new char[count]; 551: VMSystem.arraycopy(data, offset, value, 0, count); 552: this.offset = 0; 553: } 554: this.count = count; 555: } 556: 557: /** 558: * Returns the number of characters contained in this String. 559: * 560: * @return the length of this String 561: */ 562: public int length() 563: { 564: return count; 565: } 566: 567: /** 568: * Returns the character located at the specified index within this String. 569: * 570: * @param index position of character to return (base 0) 571: * @return character located at position index 572: * @throws IndexOutOfBoundsException if index < 0 || index >= length() 573: * (while unspecified, this is a StringIndexOutOfBoundsException) 574: */ 575: public char charAt(int index) 576: { 577: if (index < 0 || index >= count) 578: throw new StringIndexOutOfBoundsException(index); 579: return value[offset + index]; 580: } 581: 582: /** 583: * Get the code point at the specified index. This is like #charAt(int), 584: * but if the character is the start of a surrogate pair, and the 585: * following character completes the pair, then the corresponding 586: * supplementary code point is returned. 587: * @param index the index of the codepoint to get, starting at 0 588: * @return the codepoint at the specified index 589: * @throws IndexOutOfBoundsException if index is negative or >= length() 590: * @since 1.5 591: */ 592: public synchronized int codePointAt(int index) 593: { 594: // Use the CharSequence overload as we get better range checking 595: // this way. 596: return Character.codePointAt(this, index); 597: } 598: 599: /** 600: * Get the code point before the specified index. This is like 601: * #codePointAt(int), but checks the characters at <code>index-1</code> and 602: * <code>index-2</code> to see if they form a supplementary code point. 603: * @param index the index just past the codepoint to get, starting at 0 604: * @return the codepoint at the specified index 605: * @throws IndexOutOfBoundsException if index is negative or >= length() 606: * (while unspecified, this is a StringIndexOutOfBoundsException) 607: * @since 1.5 608: */ 609: public synchronized int codePointBefore(int index) 610: { 611: // Use the CharSequence overload as we get better range checking 612: // this way. 613: return Character.codePointBefore(this, index); 614: } 615: 616: /** 617: * Copies characters from this String starting at a specified start index, 618: * ending at a specified stop index, to a character array starting at 619: * a specified destination begin index. 620: * 621: * @param srcBegin index to begin copying characters from this String 622: * @param srcEnd index after the last character to be copied from this String 623: * @param dst character array which this String is copied into 624: * @param dstBegin index to start writing characters into dst 625: * @throws NullPointerException if dst is null 626: * @throws IndexOutOfBoundsException if any indices are out of bounds 627: * (while unspecified, source problems cause a 628: * StringIndexOutOfBoundsException, and dst problems cause an 629: * ArrayIndexOutOfBoundsException) 630: */ 631: public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 632: { 633: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 634: throw new StringIndexOutOfBoundsException(); 635: VMSystem.arraycopy(value, srcBegin + offset, 636: dst, dstBegin, srcEnd - srcBegin); 637: } 638: 639: /** 640: * Copies the low byte of each character from this String starting at a 641: * specified start index, ending at a specified stop index, to a byte array 642: * starting at a specified destination begin index. 643: * 644: * @param srcBegin index to being copying characters from this String 645: * @param srcEnd index after the last character to be copied from this String 646: * @param dst byte array which each low byte of this String is copied into 647: * @param dstBegin index to start writing characters into dst 648: * @throws NullPointerException if dst is null and copy length is non-zero 649: * @throws IndexOutOfBoundsException if any indices are out of bounds 650: * (while unspecified, source problems cause a 651: * StringIndexOutOfBoundsException, and dst problems cause an 652: * ArrayIndexOutOfBoundsException) 653: * @see #getBytes() 654: * @see #getBytes(String) 655: * @deprecated use {@link #getBytes()}, which uses a char to byte encoder 656: */ 657: public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) 658: { 659: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 660: throw new StringIndexOutOfBoundsException(); 661: int i = srcEnd - srcBegin; 662: srcBegin += offset; 663: while (--i >= 0) 664: dst[dstBegin++] = (byte) value[srcBegin++]; 665: } 666: 667: /** 668: * Converts the Unicode characters in this String to a byte array. Uses the 669: * specified encoding method, so the result may be longer or shorter than 670: * the String. For more encoding control, use 671: * {@link java.nio.charset.CharsetEncoder}, and for valid character sets, 672: * see {@link java.nio.charset.Charset}. Unsupported characters get 673: * replaced by an encoding specific byte. 674: * 675: * @param enc encoding name 676: * @return the resulting byte array 677: * @throws NullPointerException if enc is null 678: * @throws UnsupportedEncodingException if encoding is not supported 679: * @since 1.1 680: */ 681: public byte[] getBytes(String enc) throws UnsupportedEncodingException 682: { 683: try 684: { 685: CharsetEncoder cse = Charset.forName(enc).newEncoder(); 686: cse.onMalformedInput(CodingErrorAction.REPLACE); 687: cse.onUnmappableCharacter(CodingErrorAction.REPLACE); 688: ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count)); 689: if(bbuf.hasArray()) 690: return bbuf.array(); 691: 692: // Doubt this will happen. But just in case. 693: byte[] bytes = new byte[bbuf.remaining()]; 694: bbuf.get(bytes); 695: return bytes; 696: } 697: catch(IllegalCharsetNameException e) 698: { 699: throw new UnsupportedEncodingException("Encoding: " + enc 700: + " not found."); 701: } 702: catch(UnsupportedCharsetException e) 703: { 704: throw new UnsupportedEncodingException("Encoding: " + enc 705: + " not found."); 706: } 707: catch(CharacterCodingException e) 708: { 709: // This shouldn't ever happen. 710: throw (InternalError) new InternalError().initCause(e); 711: } 712: } 713: 714: /** 715: * Converts the Unicode characters in this String to a byte array. Uses the 716: * encoding of the platform's default charset, so the result may be longer 717: * or shorter than the String. For more encoding control, use 718: * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get 719: * replaced by an encoding specific byte. 720: * 721: * @return the resulting byte array, or null on a problem 722: * @since 1.1 723: */ 724: public byte[] getBytes() 725: { 726: try 727: { 728: return getBytes(System.getProperty("file.encoding")); 729: } catch(Exception e) { 730: // XXX - Throw an error here? 731: // For now, default to the 'safe' encoding. 732: byte[] bytes = new byte[count]; 733: for(int i=0;i<count;i++) 734: bytes[i] = (byte)((value[offset+i] <= 0xFF)? 735: value[offset+i]:'?'); 736: return bytes; 737: } 738: } 739: 740: /** 741: * Predicate which compares anObject to this. This is true only for Strings 742: * with the same character sequence. 743: * 744: * @param anObject the object to compare 745: * @return true if anObject is semantically equal to this 746: * @see #compareTo(String) 747: * @see #equalsIgnoreCase(String) 748: */ 749: public boolean equals(Object anObject) 750: { 751: if (! (anObject instanceof String)) 752: return false; 753: String str2 = (String) anObject; 754: if (count != str2.count) 755: return false; 756: if (value == str2.value && offset == str2.offset) 757: return true; 758: int i = count; 759: int x = offset; 760: int y = str2.offset; 761: while (--i >= 0) 762: if (value[x++] != str2.value[y++]) 763: return false; 764: return true; 765: } 766: 767: /** 768: * Compares the given StringBuffer to this String. This is true if the 769: * StringBuffer has the same content as this String at this moment. 770: * 771: * @param buffer the StringBuffer to compare to 772: * @return true if StringBuffer has the same character sequence 773: * @throws NullPointerException if the given StringBuffer is null 774: * @since 1.4 775: */ 776: public boolean contentEquals(StringBuffer buffer) 777: { 778: synchronized (buffer) 779: { 780: if (count != buffer.count) 781: return false; 782: if (value == buffer.value) 783: return true; // Possible if shared. 784: int i = count; 785: int x = offset + count; 786: while (--i >= 0) 787: if (value[--x] != buffer.value[i]) 788: return false; 789: return true; 790: } 791: } 792: 793: /** 794: * Compares the given CharSequence to this String. This is true if 795: * the CharSequence has the same content as this String at this 796: * moment. 797: * 798: * @param seq the CharSequence to compare to 799: * @return true if CharSequence has the same character sequence 800: * @throws NullPointerException if the given CharSequence is null 801: * @since 1.5 802: */ 803: public boolean contentEquals(CharSequence seq) 804: { 805: if (seq.length() != count) 806: return false; 807: for (int i = 0; i < count; ++i) 808: if (value[offset + i] != seq.charAt(i)) 809: return false; 810: return true; 811: } 812: 813: /** 814: * Compares a String to this String, ignoring case. This does not handle 815: * multi-character capitalization exceptions; instead the comparison is 816: * made on a character-by-character basis, and is true if:<br><ul> 817: * <li><code>c1 == c2</code></li> 818: * <li><code>Character.toUpperCase(c1) 819: * == Character.toUpperCase(c2)</code></li> 820: * <li><code>Character.toLowerCase(c1) 821: * == Character.toLowerCase(c2)</code></li> 822: * </ul> 823: * 824: * @param anotherString String to compare to this String 825: * @return true if anotherString is equal, ignoring case 826: * @see #equals(Object) 827: * @see Character#toUpperCase(char) 828: * @see Character#toLowerCase(char) 829: */ 830: public boolean equalsIgnoreCase(String anotherString) 831: { 832: if (anotherString == null || count != anotherString.count) 833: return false; 834: int i = count; 835: int x = offset; 836: int y = anotherString.offset; 837: while (--i >= 0) 838: { 839: char c1 = value[x++]; 840: char c2 = anotherString.value[y++]; 841: // Note that checking c1 != c2 is redundant, but avoids method calls. 842: if (c1 != c2 843: && Character.toUpperCase(c1) != Character.toUpperCase(c2) 844: && Character.toLowerCase(c1) != Character.toLowerCase(c2)) 845: return false; 846: } 847: return true; 848: } 849: 850: /** 851: * Compares this String and another String (case sensitive, 852: * lexicographically). The result is less than 0 if this string sorts 853: * before the other, 0 if they are equal, and greater than 0 otherwise. 854: * After any common starting sequence is skipped, the result is 855: * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings 856: * have characters remaining, or 857: * <code>this.length() - anotherString.length()</code> if one string is 858: * a subsequence of the other. 859: * 860: * @param anotherString the String to compare against 861: * @return the comparison 862: * @throws NullPointerException if anotherString is null 863: */ 864: public int compareTo(String anotherString) 865: { 866: int i = Math.min(count, anotherString.count); 867: int x = offset; 868: int y = anotherString.offset; 869: while (--i >= 0) 870: { 871: int result = value[x++] - anotherString.value[y++]; 872: if (result != 0) 873: return result; 874: } 875: return count - anotherString.count; 876: } 877: 878: /** 879: * Behaves like <code>compareTo(java.lang.String)</code> unless the Object 880: * is not a <code>String</code>. Then it throws a 881: * <code>ClassCastException</code>. 882: * 883: * @param o the object to compare against 884: * @return the comparison 885: * @throws NullPointerException if o is null 886: * @throws ClassCastException if o is not a <code>String</code> 887: * @since 1.2 888: */ 889: public int compareTo(Object o) 890: { 891: return compareTo((String) o); 892: } 893: 894: /** 895: * Compares this String and another String (case insensitive). This 896: * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores 897: * locale and multi-characater capitalization, and compares characters 898: * after performing 899: * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each 900: * character of the string. This is unsatisfactory for locale-based 901: * comparison, in which case you should use {@link java.text.Collator}. 902: * 903: * @param str the string to compare against 904: * @return the comparison 905: * @see Collator#compare(String, String) 906: * @since 1.2 907: */ 908: public int compareToIgnoreCase(String str) 909: { 910: int i = Math.min(count, str.count); 911: int x = offset; 912: int y = str.offset; 913: while (--i >= 0) 914: { 915: int result = Character.toLowerCase(Character.toUpperCase(value[x++])) 916: - Character.toLowerCase(Character.toUpperCase(str.value[y++])); 917: if (result != 0) 918: return result; 919: } 920: return count - str.count; 921: } 922: 923: /** 924: * Predicate which determines if this String matches another String 925: * starting at a specified offset for each String and continuing 926: * for a specified length. Indices out of bounds are harmless, and give 927: * a false result. 928: * 929: * @param toffset index to start comparison at for this String 930: * @param other String to compare region to this String 931: * @param ooffset index to start comparison at for other 932: * @param len number of characters to compare 933: * @return true if regions match (case sensitive) 934: * @throws NullPointerException if other is null 935: */ 936: public boolean regionMatches(int toffset, String other, int ooffset, int len) 937: { 938: return regionMatches(false, toffset, other, ooffset, len); 939: } 940: 941: /** 942: * Predicate which determines if this String matches another String 943: * starting at a specified offset for each String and continuing 944: * for a specified length, optionally ignoring case. Indices out of bounds 945: * are harmless, and give a false result. Case comparisons are based on 946: * <code>Character.toLowerCase()</code> and 947: * <code>Character.toUpperCase()</code>, not on multi-character 948: * capitalization expansions. 949: * 950: * @param ignoreCase true if case should be ignored in comparision 951: * @param toffset index to start comparison at for this String 952: * @param other String to compare region to this String 953: * @param ooffset index to start comparison at for other 954: * @param len number of characters to compare 955: * @return true if regions match, false otherwise 956: * @throws NullPointerException if other is null 957: */ 958: public boolean regionMatches(boolean ignoreCase, int toffset, 959: String other, int ooffset, int len) 960: { 961: if (toffset < 0 || ooffset < 0 || toffset + len > count 962: || ooffset + len > other.count) 963: return false; 964: toffset += offset; 965: ooffset += other.offset; 966: while (--len >= 0) 967: { 968: char c1 = value[toffset++]; 969: char c2 = other.value[ooffset++]; 970: // Note that checking c1 != c2 is redundant when ignoreCase is true, 971: // but it avoids method calls. 972: if (c1 != c2 973: && (! ignoreCase 974: || (Character.toLowerCase(c1) != Character.toLowerCase(c2) 975: && (Character.toUpperCase(c1) 976: != Character.toUpperCase(c2))))) 977: return false; 978: } 979: return true; 980: } 981: 982: /** 983: * Predicate which determines if this String contains the given prefix, 984: * beginning comparison at toffset. The result is false if toffset is 985: * negative or greater than this.length(), otherwise it is the same as 986: * <code>this.substring(toffset).startsWith(prefix)</code>. 987: * 988: * @param prefix String to compare 989: * @param toffset offset for this String where comparison starts 990: * @return true if this String starts with prefix 991: * @throws NullPointerException if prefix is null 992: * @see #regionMatches(boolean, int, String, int, int) 993: */ 994: public boolean startsWith(String prefix, int toffset) 995: { 996: return regionMatches(false, toffset, prefix, 0, prefix.count); 997: } 998: 999: /** 1000: * Predicate which determines if this String starts with a given prefix. 1001: * If the prefix is an empty String, true is returned. 1002: * 1003: * @param prefix String to compare 1004: * @return true if this String starts with the prefix 1005: * @throws NullPointerException if prefix is null 1006: * @see #startsWith(String, int) 1007: */ 1008: public boolean startsWith(String prefix) 1009: { 1010: return regionMatches(false, 0, prefix, 0, prefix.count); 1011: } 1012: 1013: /** 1014: * Predicate which determines if this String ends with a given suffix. 1015: * If the suffix is an empty String, true is returned. 1016: * 1017: * @param suffix String to compare 1018: * @return true if this String ends with the suffix 1019: * @throws NullPointerException if suffix is null 1020: * @see #regionMatches(boolean, int, String, int, int) 1021: */ 1022: public boolean endsWith(String suffix) 1023: { 1024: return regionMatches(false, count - suffix.count, suffix, 0, suffix.count); 1025: } 1026: 1027: /** 1028: * Computes the hashcode for this String. This is done with int arithmetic, 1029: * where ** represents exponentiation, by this formula:<br> 1030: * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>. 1031: * 1032: * @return hashcode value of this String 1033: */ 1034: public int hashCode() 1035: { 1036: if (cachedHashCode != 0) 1037: return cachedHashCode; 1038: 1039: // Compute the hash code using a local variable to be reentrant. 1040: int hashCode = 0; 1041: int limit = count + offset; 1042: for (int i = offset; i < limit; i++) 1043: hashCode = hashCode * 31 + value[i]; 1044: return cachedHashCode = hashCode; 1045: } 1046: 1047: /** 1048: * Finds the first instance of a character in this String. 1049: * 1050: * @param ch character to find 1051: * @return location (base 0) of the character, or -1 if not found 1052: */ 1053: public int indexOf(int ch) 1054: { 1055: return indexOf(ch, 0); 1056: } 1057: 1058: /** 1059: * Finds the first instance of a character in this String, starting at 1060: * a given index. If starting index is less than 0, the search 1061: * starts at the beginning of this String. If the starting index 1062: * is greater than the length of this String, -1 is returned. 1063: * 1064: * @param ch character to find 1065: * @param fromIndex index to start the search 1066: * @return location (base 0) of the character, or -1 if not found 1067: */ 1068: public int indexOf(int ch, int fromIndex) 1069: { 1070: if ((char) ch != ch) 1071: return -1; 1072: if (fromIndex < 0) 1073: fromIndex = 0; 1074: int i = fromIndex + offset; 1075: for ( ; fromIndex < count; fromIndex++) 1076: if (value[i++] == ch) 1077: return fromIndex; 1078: return -1; 1079: } 1080: 1081: /** 1082: * Finds the last instance of a character in this String. 1083: * 1084: * @param ch character to find 1085: * @return location (base 0) of the character, or -1 if not found 1086: */ 1087: public int lastIndexOf(int ch) 1088: { 1089: return lastIndexOf(ch, count - 1); 1090: } 1091: 1092: /** 1093: * Finds the last instance of a character in this String, starting at 1094: * a given index. If starting index is greater than the maximum valid 1095: * index, then the search begins at the end of this String. If the 1096: * starting index is less than zero, -1 is returned. 1097: * 1098: * @param ch character to find 1099: * @param fromIndex index to start the search 1100: * @return location (base 0) of the character, or -1 if not found 1101: */ 1102: public int lastIndexOf(int ch, int fromIndex) 1103: { 1104: if ((char) ch != ch) 1105: return -1; 1106: if (fromIndex >= count) 1107: fromIndex = count - 1; 1108: int i = fromIndex + offset; 1109: for ( ; fromIndex >= 0; fromIndex--) 1110: if (value[i--] == ch) 1111: return fromIndex; 1112: return -1; 1113: } 1114: 1115: /** 1116: * Finds the first instance of a String in this String. 1117: * 1118: * @param str String to find 1119: * @return location (base 0) of the String, or -1 if not found 1120: * @throws NullPointerException if str is null 1121: */ 1122: public int indexOf(String str) 1123: { 1124: return indexOf(str, 0); 1125: } 1126: 1127: /** 1128: * Finds the first instance of a String in this String, starting at 1129: * a given index. If starting index is less than 0, the search 1130: * starts at the beginning of this String. If the starting index 1131: * is greater than the length of this String, -1 is returned. 1132: * 1133: * @param str String to find 1134: * @param fromIndex index to start the search 1135: * @return location (base 0) of the String, or -1 if not found 1136: * @throws NullPointerException if str is null 1137: */ 1138: public int indexOf(String str, int fromIndex) 1139: { 1140: if (fromIndex < 0) 1141: fromIndex = 0; 1142: int limit = count - str.count; 1143: for ( ; fromIndex <= limit; fromIndex++) 1144: if (regionMatches(fromIndex, str, 0, str.count)) 1145: return fromIndex; 1146: return -1; 1147: } 1148: 1149: /** 1150: * Finds the last instance of a String in this String. 1151: * 1152: * @param str String to find 1153: * @return location (base 0) of the String, or -1 if not found 1154: * @throws NullPointerException if str is null 1155: */ 1156: public int lastIndexOf(String str) 1157: { 1158: return lastIndexOf(str, count - str.count); 1159: } 1160: 1161: /** 1162: * Finds the last instance of a String in this String, starting at 1163: * a given index. If starting index is greater than the maximum valid 1164: * index, then the search begins at the end of this String. If the 1165: * starting index is less than zero, -1 is returned. 1166: * 1167: * @param str String to find 1168: * @param fromIndex index to start the search 1169: * @return location (base 0) of the String, or -1 if not found 1170: * @throws NullPointerException if str is null 1171: */ 1172: public int lastIndexOf(String str, int fromIndex) 1173: { 1174: fromIndex = Math.min(fromIndex, count - str.count); 1175: for ( ; fromIndex >= 0; fromIndex--) 1176: if (regionMatches(fromIndex, str, 0, str.count)) 1177: return fromIndex; 1178: return -1; 1179: } 1180: 1181: /** 1182: * Creates a substring of this String, starting at a specified index 1183: * and ending at the end of this String. 1184: * 1185: * @param begin index to start substring (base 0) 1186: * @return new String which is a substring of this String 1187: * @throws IndexOutOfBoundsException if begin < 0 || begin > length() 1188: * (while unspecified, this is a StringIndexOutOfBoundsException) 1189: */ 1190: public String substring(int begin) 1191: { 1192: return substring(begin, count); 1193: } 1194: 1195: /** 1196: * Creates a substring of this String, starting at a specified index 1197: * and ending at one character before a specified index. 1198: * 1199: * @param beginIndex index to start substring (inclusive, base 0) 1200: * @param endIndex index to end at (exclusive) 1201: * @return new String which is a substring of this String 1202: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1203: * || begin > end (while unspecified, this is a 1204: * StringIndexOutOfBoundsException) 1205: */ 1206: public String substring(int beginIndex, int endIndex) 1207: { 1208: if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) 1209: throw new StringIndexOutOfBoundsException(); 1210: if (beginIndex == 0 && endIndex == count) 1211: return this; 1212: int len = endIndex - beginIndex; 1213: // Package constructor avoids an array copy. 1214: return new String(value, beginIndex + offset, len, 1215: (len << 2) >= value.length); 1216: } 1217: 1218: /** 1219: * Creates a substring of this String, starting at a specified index 1220: * and ending at one character before a specified index. This behaves like 1221: * <code>substring(begin, end)</code>. 1222: * 1223: * @param begin index to start substring (inclusive, base 0) 1224: * @param end index to end at (exclusive) 1225: * @return new String which is a substring of this String 1226: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1227: * || begin > end 1228: * @since 1.4 1229: */ 1230: public CharSequence subSequence(int begin, int end) 1231: { 1232: return substring(begin, end); 1233: } 1234: 1235: /** 1236: * Concatenates a String to this String. This results in a new string unless 1237: * one of the two originals is "". 1238: * 1239: * @param str String to append to this String 1240: * @return newly concatenated String 1241: * @throws NullPointerException if str is null 1242: */ 1243: public String concat(String str) 1244: { 1245: if (str.count == 0) 1246: return this; 1247: if (count == 0) 1248: return str; 1249: char[] newStr = new char[count + str.count]; 1250: VMSystem.arraycopy(value, offset, newStr, 0, count); 1251: VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count); 1252: // Package constructor avoids an array copy. 1253: return new String(newStr, 0, newStr.length, true); 1254: } 1255: 1256: /** 1257: * Replaces every instance of a character in this String with a new 1258: * character. If no replacements occur, this is returned. 1259: * 1260: * @param oldChar the old character to replace 1261: * @param newChar the new character 1262: * @return new String with all instances of oldChar replaced with newChar 1263: */ 1264: public String replace(char oldChar, char newChar) 1265: { 1266: if (oldChar == newChar) 1267: return this; 1268: int i = count; 1269: int x = offset - 1; 1270: while (--i >= 0) 1271: if (value[++x] == oldChar) 1272: break; 1273: if (i < 0) 1274: return this; 1275: char[] newStr = (char[]) value.clone(); 1276: newStr[x] = newChar; 1277: while (--i >= 0) 1278: if (value[++x] == oldChar) 1279: newStr[x] = newChar; 1280: // Package constructor avoids an array copy. 1281: return new String(newStr, offset, count, true); 1282: } 1283: 1284: /** 1285: * Test if this String matches a regular expression. This is shorthand for 1286: * <code>{@link Pattern}.matches(regex, this)</code>. 1287: * 1288: * @param regex the pattern to match 1289: * @return true if the pattern matches 1290: * @throws NullPointerException if regex is null 1291: * @throws PatternSyntaxException if regex is invalid 1292: * @see Pattern#matches(String, CharSequence) 1293: * @since 1.4 1294: */ 1295: public boolean matches(String regex) 1296: { 1297: return Pattern.matches(regex, this); 1298: } 1299: 1300: /** 1301: * Replaces the first substring match of the regular expression with a 1302: * given replacement. This is shorthand for <code>{@link Pattern} 1303: * .compile(regex).matcher(this).replaceFirst(replacement)</code>. 1304: * 1305: * @param regex the pattern to match 1306: * @param replacement the replacement string 1307: * @return the modified string 1308: * @throws NullPointerException if regex or replacement is null 1309: * @throws PatternSyntaxException if regex is invalid 1310: * @see #replaceAll(String, String) 1311: * @see Pattern#compile(String) 1312: * @see Pattern#matcher(CharSequence) 1313: * @see Matcher#replaceFirst(String) 1314: * @since 1.4 1315: */ 1316: public String replaceFirst(String regex, String replacement) 1317: { 1318: return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 1319: } 1320: 1321: /** 1322: * Replaces all matching substrings of the regular expression with a 1323: * given replacement. This is shorthand for <code>{@link Pattern} 1324: * .compile(regex).matcher(this).replaceAll(replacement)</code>. 1325: * 1326: * @param regex the pattern to match 1327: * @param replacement the replacement string 1328: * @return the modified string 1329: * @throws NullPointerException if regex or replacement is null 1330: * @throws PatternSyntaxException if regex is invalid 1331: * @see #replaceFirst(String, String) 1332: * @see Pattern#compile(String) 1333: * @see Pattern#matcher(CharSequence) 1334: * @see Matcher#replaceAll(String) 1335: * @since 1.4 1336: */ 1337: public String replaceAll(String regex, String replacement) 1338: { 1339: return Pattern.compile(regex).matcher(this).replaceAll(replacement); 1340: } 1341: 1342: /** 1343: * Split this string around the matches of a regular expression. Each 1344: * element of the returned array is the largest block of characters not 1345: * terminated by the regular expression, in the order the matches are found. 1346: * 1347: * <p>The limit affects the length of the array. If it is positive, the 1348: * array will contain at most n elements (n - 1 pattern matches). If 1349: * negative, the array length is unlimited, but there can be trailing empty 1350: * entries. if 0, the array length is unlimited, and trailing empty entries 1351: * are discarded. 1352: * 1353: * <p>For example, splitting "boo:and:foo" yields:<br> 1354: * <table border=0> 1355: * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th> 1356: * <tr><td>":"</td> <td>2</td> <td>{ "boo", "and:foo" }</td></tr> 1357: * <tr><td>":"</td> <td>t</td> <td>{ "boo", "and", "foo" }</td></tr> 1358: * <tr><td>":"</td> <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr> 1359: * <tr><td>"o"</td> <td>5</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1360: * <tr><td>"o"</td> <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1361: * <tr><td>"o"</td> <td>0</td> <td>{ "b", "", ":and:f" }</td></tr> 1362: * </table> 1363: * 1364: * <p>This is shorthand for 1365: * <code>{@link Pattern}.compile(regex).split(this, limit)</code>. 1366: * 1367: * @param regex the pattern to match 1368: * @param limit the limit threshold 1369: * @return the array of split strings 1370: * @throws NullPointerException if regex or replacement is null 1371: * @throws PatternSyntaxException if regex is invalid 1372: * @see Pattern#compile(String) 1373: * @see Pattern#split(CharSequence, int) 1374: * @since 1.4 1375: */ 1376: public String[] split(String regex, int limit) 1377: { 1378: return Pattern.compile(regex).split(this, limit); 1379: } 1380: 1381: /** 1382: * Split this string around the matches of a regular expression. Each 1383: * element of the returned array is the largest block of characters not 1384: * terminated by the regular expression, in the order the matches are found. 1385: * The array length is unlimited, and trailing empty entries are discarded, 1386: * as though calling <code>split(regex, 0)</code>. 1387: * 1388: * @param regex the pattern to match 1389: * @return the array of split strings 1390: * @throws NullPointerException if regex or replacement is null 1391: * @throws PatternSyntaxException if regex is invalid 1392: * @see #split(String, int) 1393: * @see Pattern#compile(String) 1394: * @see Pattern#split(CharSequence, int) 1395: * @since 1.4 1396: */ 1397: public String[] split(String regex) 1398: { 1399: return Pattern.compile(regex).split(this, 0); 1400: } 1401: 1402: /** 1403: * Lowercases this String according to a particular locale. This uses 1404: * Unicode's special case mappings, as applied to the given Locale, so the 1405: * resulting string may be a different length. 1406: * 1407: * @param loc locale to use 1408: * @return new lowercased String, or this if no characters were lowercased 1409: * @throws NullPointerException if loc is null 1410: * @see #toUpperCase(Locale) 1411: * @since 1.1 1412: */ 1413: public String toLowerCase(Locale loc) 1414: { 1415: // First, see if the current string is already lower case. 1416: boolean turkish = "tr".equals(loc.getLanguage()); 1417: int i = count; 1418: int x = offset - 1; 1419: while (--i >= 0) 1420: { 1421: char ch = value[++x]; 1422: if ((turkish && ch == '\u0049') 1423: || ch != Character.toLowerCase(ch)) 1424: break; 1425: } 1426: if (i < 0) 1427: return this; 1428: 1429: // Now we perform the conversion. Fortunately, there are no multi-character 1430: // lowercase expansions in Unicode 3.0.0. 1431: char[] newStr = (char[]) value.clone(); 1432: do 1433: { 1434: char ch = value[x]; 1435: // Hardcoded special case. 1436: newStr[x++] = (turkish && ch == '\u0049') ? '\u0131' 1437: : Character.toLowerCase(ch); 1438: } 1439: while (--i >= 0); 1440: // Package constructor avoids an array copy. 1441: return new String(newStr, offset, count, true); 1442: } 1443: 1444: /** 1445: * Lowercases this String. This uses Unicode's special case mappings, as 1446: * applied to the platform's default Locale, so the resulting string may 1447: * be a different length. 1448: * 1449: * @return new lowercased String, or this if no characters were lowercased 1450: * @see #toLowerCase(Locale) 1451: * @see #toUpperCase() 1452: */ 1453: public String toLowerCase() 1454: { 1455: return toLowerCase(Locale.getDefault()); 1456: } 1457: 1458: /** 1459: * Uppercases this String according to a particular locale. This uses 1460: * Unicode's special case mappings, as applied to the given Locale, so the 1461: * resulting string may be a different length. 1462: * 1463: * @param loc locale to use 1464: * @return new uppercased String, or this if no characters were uppercased 1465: * @throws NullPointerException if loc is null 1466: * @see #toLowerCase(Locale) 1467: * @since 1.1 1468: */ 1469: public String toUpperCase(Locale loc) 1470: { 1471: // First, see how many characters we have to grow by, as well as if the 1472: // current string is already upper case. 1473: boolean turkish = "tr".equals(loc.getLanguage()); 1474: int expand = 0; 1475: boolean unchanged = true; 1476: int i = count; 1477: int x = i + offset; 1478: while (--i >= 0) 1479: { 1480: char ch = value[--x]; 1481: expand += upperCaseExpansion(ch); 1482: unchanged = (unchanged && expand == 0 1483: && ! (turkish && ch == '\u0069') 1484: && ch == Character.toUpperCase(ch)); 1485: } 1486: if (unchanged) 1487: return this; 1488: 1489: // Now we perform the conversion. 1490: i = count; 1491: if (expand == 0) 1492: { 1493: char[] newStr = (char[]) value.clone(); 1494: while (--i >= 0) 1495: { 1496: char ch = value[x]; 1497: // Hardcoded special case. 1498: newStr[x++] = (turkish && ch == '\u0069') ? '\u0130' 1499: : Character.toUpperCase(ch); 1500: } 1501: // Package constructor avoids an array copy. 1502: return new String(newStr, offset, count, true); 1503: } 1504: 1505: // Expansion is necessary. 1506: char[] newStr = new char[count + expand]; 1507: int j = 0; 1508: while (--i >= 0) 1509: { 1510: char ch = value[x++]; 1511: // Hardcoded special case. 1512: if (turkish && ch == '\u0069') 1513: { 1514: newStr[j++] = '\u0130'; 1515: continue; 1516: } 1517: expand = upperCaseExpansion(ch); 1518: if (expand > 0) 1519: { 1520: int index = upperCaseIndex(ch); 1521: while (expand-- >= 0) 1522: newStr[j++] = upperExpand[index++]; 1523: } 1524: else 1525: newStr[j++] = Character.toUpperCase(ch); 1526: } 1527: // Package constructor avoids an array copy. 1528: return new String(newStr, 0, newStr.length, true); 1529: } 1530: 1531: /** 1532: * Uppercases this String. This uses Unicode's special case mappings, as 1533: * applied to the platform's default Locale, so the resulting string may 1534: * be a different length. 1535: * 1536: * @return new uppercased String, or this if no characters were uppercased 1537: * @see #toUpperCase(Locale) 1538: * @see #toLowerCase() 1539: */ 1540: public String toUpperCase() 1541: { 1542: return toUpperCase(Locale.getDefault()); 1543: } 1544: 1545: /** 1546: * Trims all characters less than or equal to <code>'\u0020'</code> 1547: * (<code>' '</code>) from the beginning and end of this String. This 1548: * includes many, but not all, ASCII control characters, and all 1549: * {@link Character#isWhitespace(char)}. 1550: * 1551: * @return new trimmed String, or this if nothing trimmed 1552: */ 1553: public String trim() 1554: { 1555: int limit = count + offset; 1556: if (count == 0 || (value[offset] > '\u0020' 1557: && value[limit - 1] > '\u0020')) 1558: return this; 1559: int begin = offset; 1560: do 1561: if (begin == limit) 1562: return ""; 1563: while (value[begin++] <= '\u0020'); 1564: int end = limit; 1565: while (value[--end] <= '\u0020'); 1566: return substring(begin - offset - 1, end - offset + 1); 1567: } 1568: 1569: /** 1570: * Returns this, as it is already a String! 1571: * 1572: * @return this 1573: */ 1574: public String toString() 1575: { 1576: return this; 1577: } 1578: 1579: /** 1580: * Copies the contents of this String into a character array. Subsequent 1581: * changes to the array do not affect the String. 1582: * 1583: * @return character array copying the String 1584: */ 1585: public char[] toCharArray() 1586: { 1587: if (count == value.length) 1588: return (char[]) value.clone(); 1589: 1590: char[] copy = new char[count]; 1591: VMSystem.arraycopy(value, offset, copy, 0, count); 1592: return copy; 1593: } 1594: 1595: /** 1596: * Returns a String representation of an Object. This is "null" if the 1597: * object is null, otherwise it is <code>obj.toString()</code> (which 1598: * can be null). 1599: * 1600: * @param obj the Object 1601: * @return the string conversion of obj 1602: */ 1603: public static String valueOf(Object obj) 1604: { 1605: return obj == null ? "null" : obj.toString(); 1606: } 1607: 1608: /** 1609: * Returns a String representation of a character array. Subsequent 1610: * changes to the array do not affect the String. 1611: * 1612: * @param data the character array 1613: * @return a String containing the same character sequence as data 1614: * @throws NullPointerException if data is null 1615: * @see #valueOf(char[], int, int) 1616: * @see #String(char[]) 1617: */ 1618: public static String valueOf(char[] data) 1619: { 1620: return valueOf (data, 0, data.length); 1621: } 1622: 1623: /** 1624: * Returns a String representing the character sequence of the char array, 1625: * starting at the specified offset, and copying chars up to the specified 1626: * count. Subsequent changes to the array do not affect the String. 1627: * 1628: * @param data character array 1629: * @param offset position (base 0) to start copying out of data 1630: * @param count the number of characters from data to copy 1631: * @return String containing the chars from data[offset..offset+count] 1632: * @throws NullPointerException if data is null 1633: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1634: * || offset + count < 0 (overflow) 1635: * || offset + count > data.length) 1636: * (while unspecified, this is a StringIndexOutOfBoundsException) 1637: * @see #String(char[], int, int) 1638: */ 1639: public static String valueOf(char[] data, int offset, int count) 1640: { 1641: return new String(data, offset, count, false); 1642: } 1643: 1644: /** 1645: * Returns a String representing the character sequence of the char array, 1646: * starting at the specified offset, and copying chars up to the specified 1647: * count. Subsequent changes to the array do not affect the String. 1648: * 1649: * @param data character array 1650: * @param offset position (base 0) to start copying out of data 1651: * @param count the number of characters from data to copy 1652: * @return String containing the chars from data[offset..offset+count] 1653: * @throws NullPointerException if data is null 1654: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1655: * || offset + count < 0 (overflow) 1656: * || offset + count > data.length) 1657: * (while unspecified, this is a StringIndexOutOfBoundsException) 1658: * @see #String(char[], int, int) 1659: */ 1660: public static String copyValueOf(char[] data, int offset, int count) 1661: { 1662: return new String(data, offset, count, false); 1663: } 1664: 1665: /** 1666: * Returns a String representation of a character array. Subsequent 1667: * changes to the array do not affect the String. 1668: * 1669: * @param data the character array 1670: * @return a String containing the same character sequence as data 1671: * @throws NullPointerException if data is null 1672: * @see #copyValueOf(char[], int, int) 1673: * @see #String(char[]) 1674: */ 1675: public static String copyValueOf(char[] data) 1676: { 1677: return copyValueOf (data, 0, data.length); 1678: } 1679: 1680: /** 1681: * Returns a String representing a boolean. 1682: * 1683: * @param b the boolean 1684: * @return "true" if b is true, else "false" 1685: */ 1686: public static String valueOf(boolean b) 1687: { 1688: return b ? "true" : "false"; 1689: } 1690: 1691: /** 1692: * Returns a String representing a character. 1693: * 1694: * @param c the character 1695: * @return String containing the single character c 1696: */ 1697: public static String valueOf(char c) 1698: { 1699: // Package constructor avoids an array copy. 1700: return new String(new char[] { c }, 0, 1, true); 1701: } 1702: 1703: /** 1704: * Returns a String representing an integer. 1705: * 1706: * @param i the integer 1707: * @return String containing the integer in base 10 1708: * @see Integer#toString(int) 1709: */ 1710: public static String valueOf(int i) 1711: { 1712: // See Integer to understand why we call the two-arg variant. 1713: return Integer.toString(i, 10); 1714: } 1715: 1716: /** 1717: * Returns a String representing a long. 1718: * 1719: * @param l the long 1720: * @return String containing the long in base 10 1721: * @see Long#toString(long) 1722: */ 1723: public static String valueOf(long l) 1724: { 1725: return Long.toString(l); 1726: } 1727: 1728: /** 1729: * Returns a String representing a float. 1730: * 1731: * @param f the float 1732: * @return String containing the float 1733: * @see Float#toString(float) 1734: */ 1735: public static String valueOf(float f) 1736: { 1737: return Float.toString(f); 1738: } 1739: 1740: /** 1741: * Returns a String representing a double. 1742: * 1743: * @param d the double 1744: * @return String containing the double 1745: * @see Double#toString(double) 1746: */ 1747: public static String valueOf(double d) 1748: { 1749: return Double.toString(d); 1750: } 1751: 1752: /** 1753: * If two Strings are considered equal, by the equals() method, 1754: * then intern() will return the same String instance. ie. 1755: * if (s1.equals(s2)) then (s1.intern() == s2.intern()). 1756: * All string literals and string-valued constant expressions 1757: * are already interned. 1758: * 1759: * @return the interned String 1760: */ 1761: public String intern() 1762: { 1763: return VMString.intern(this); 1764: } 1765: 1766: /** 1767: * Return the number of code points between two indices in the 1768: * <code>String</code>. An unpaired surrogate counts as a 1769: * code point for this purpose. Characters outside the indicated 1770: * range are not examined, even if the range ends in the middle of a 1771: * surrogate pair. 1772: * 1773: * @param start the starting index 1774: * @param end one past the ending index 1775: * @return the number of code points 1776: * @since 1.5 1777: */ 1778: public synchronized int codePointCount(int start, int end) 1779: { 1780: if (start < 0 || end >= count || start > end) 1781: throw new StringIndexOutOfBoundsException(); 1782: 1783: start += offset; 1784: end += offset; 1785: int count = 0; 1786: while (start < end) 1787: { 1788: char base = value[start]; 1789: if (base < Character.MIN_HIGH_SURROGATE 1790: || base > Character.MAX_HIGH_SURROGATE 1791: || start == end 1792: || start == count 1793: || value[start + 1] < Character.MIN_LOW_SURROGATE 1794: || value[start + 1] > Character.MAX_LOW_SURROGATE) 1795: { 1796: // Nothing. 1797: } 1798: else 1799: { 1800: // Surrogate pair. 1801: ++start; 1802: } 1803: ++start; 1804: ++count; 1805: } 1806: return count; 1807: } 1808: 1809: /** 1810: * Helper function used to detect which characters have a multi-character 1811: * uppercase expansion. Note that this is only used in locations which 1812: * track one-to-many capitalization (java.lang.Character does not do this). 1813: * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the 1814: * longest uppercase expansion is three characters (a growth of 2 from the 1815: * lowercase character). 1816: * 1817: * @param ch the char to check 1818: * @return the number of characters to add when converting to uppercase 1819: * @see CharData#DIRECTION 1820: * @see CharData#UPPER_SPECIAL 1821: * @see #toUpperCase(Locale) 1822: */ 1823: private static int upperCaseExpansion(char ch) 1824: { 1825: return Character.direction[Character.readChar(ch) >> 7] & 3; 1826: } 1827: 1828: /** 1829: * Helper function used to locate the offset in upperExpand given a 1830: * character with a multi-character expansion. The binary search is 1831: * optimized under the assumption that this method will only be called on 1832: * characters which exist in upperSpecial. 1833: * 1834: * @param ch the char to check 1835: * @return the index where its expansion begins 1836: * @see CharData#UPPER_SPECIAL 1837: * @see CharData#UPPER_EXPAND 1838: * @see #toUpperCase(Locale) 1839: */ 1840: private static int upperCaseIndex(char ch) 1841: { 1842: // Simple binary search for the correct character. 1843: int low = 0; 1844: int hi = upperSpecial.length - 2; 1845: int mid = ((low + hi) >> 2) << 1; 1846: char c = upperSpecial[mid]; 1847: while (ch != c) 1848: { 1849: if (ch < c) 1850: hi = mid - 2; 1851: else 1852: low = mid + 2; 1853: mid = ((low + hi) >> 2) << 1; 1854: c = upperSpecial[mid]; 1855: } 1856: return upperSpecial[mid + 1]; 1857: } 1858: 1859: /** 1860: * Returns the value array of the given string if it is zero based or a 1861: * copy of it that is zero based (stripping offset and making length equal 1862: * to count). Used for accessing the char[]s of gnu.java.lang.CharData. 1863: * Package private for use in Character. 1864: */ 1865: static char[] zeroBasedStringValue(String s) 1866: { 1867: char[] value; 1868: 1869: if (s.offset == 0 && s.count == s.value.length) 1870: value = s.value; 1871: else 1872: { 1873: int count = s.count; 1874: value = new char[count]; 1875: VMSystem.arraycopy(s.value, s.offset, value, 0, count); 1876: } 1877: 1878: return value; 1879: } 1880: 1881: /** 1882: * Returns true iff this String contains the sequence of Characters 1883: * described in s. 1884: * @param s the CharSequence 1885: * @return true iff this String contains s 1886: * 1887: * @since 1.5 1888: */ 1889: public boolean contains (CharSequence s) 1890: { 1891: return this.indexOf(s.toString()) != -1; 1892: } 1893: 1894: /** 1895: * Returns a string that is this string with all instances of the sequence 1896: * represented by <code>target</code> replaced by the sequence in 1897: * <code>replacement</code>. 1898: * @param target the sequence to be replaced 1899: * @param replacement the sequence used as the replacement 1900: * @return the string constructed as above 1901: */ 1902: public String replace (CharSequence target, CharSequence replacement) 1903: { 1904: String targetString = target.toString(); 1905: String replaceString = replacement.toString(); 1906: int targetLength = target.length(); 1907: int replaceLength = replacement.length(); 1908: 1909: int startPos = this.indexOf(targetString); 1910: StringBuilder result = new StringBuilder(this); 1911: while (startPos != -1) 1912: { 1913: // Replace the target with the replacement 1914: result.replace(startPos, startPos + targetLength, replaceString); 1915: 1916: // Search for a new occurrence of the target 1917: startPos = result.indexOf(targetString, startPos + replaceLength); 1918: } 1919: return result.toString(); 1920: } 1921: }
GNU Classpath (0.20) |