Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: 
  45: /**
  46:  * Wrapper class for the primitive char data type.  In addition, this class
  47:  * allows one to retrieve property information and perform transformations
  48:  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
  49:  * java.lang.Character is designed to be very dynamic, and as such, it
  50:  * retrieves information on the Unicode character set from a separate
  51:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  52:  *
  53:  * <p>For predicates, boundaries are used to describe
  54:  * the set of characters for which the method will return true.
  55:  * This syntax uses fairly normal regular expression notation.
  56:  * See 5.13 of the Unicode Standard, Version 3.0, for the
  57:  * boundary specification.
  58:  *
  59:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  60:  * for more information on the Unicode Standard.
  61:  *
  62:  * @author Tom Tromey (tromey@cygnus.com)
  63:  * @author Paul N. Fisher
  64:  * @author Jochen Hoenicke
  65:  * @author Eric Blake (ebb9@email.byu.edu)
  66:  * @see CharData
  67:  * @since 1.0
  68:  * @status updated to 1.4
  69:  */
  70: public final class Character implements Serializable, Comparable
  71: {
  72:   /**
  73:    * A subset of Unicode blocks.
  74:    *
  75:    * @author Paul N. Fisher
  76:    * @author Eric Blake (ebb9@email.byu.edu)
  77:    * @since 1.2
  78:    */
  79:   public static class Subset
  80:   {
  81:     /** The name of the subset. */
  82:     private final String name;
  83: 
  84:     /**
  85:      * Construct a new subset of characters.
  86:      *
  87:      * @param name the name of the subset
  88:      * @throws NullPointerException if name is null
  89:      */
  90:     protected Subset(String name)
  91:     {
  92:       // Note that name.toString() is name, unless name was null.
  93:       this.name = name.toString();
  94:     }
  95: 
  96:     /**
  97:      * Compares two Subsets for equality. This is <code>final</code>, and
  98:      * restricts the comparison on the <code>==</code> operator, so it returns
  99:      * true only for the same object.
 100:      *
 101:      * @param o the object to compare
 102:      * @return true if o is this
 103:      */
 104:     public final boolean equals(Object o)
 105:     {
 106:       return o == this;
 107:     }
 108: 
 109:     /**
 110:      * Makes the original hashCode of Object final, to be consistent with
 111:      * equals.
 112:      *
 113:      * @return the hash code for this object
 114:      */
 115:     public final int hashCode()
 116:     {
 117:       return super.hashCode();
 118:     }
 119: 
 120:     /**
 121:      * Returns the name of the subset.
 122:      *
 123:      * @return the name
 124:      */
 125:     public final String toString()
 126:     {
 127:       return name;
 128:     }
 129:   } // class Subset
 130: 
 131:   /**
 132:    * A family of character subsets in the Unicode specification. A character
 133:    * is in at most one of these blocks.
 134:    *
 135:    * This inner class was generated automatically from
 136:    * <code>doc/unicode/Block-3.txt</code>, by some perl scripts.
 137:    * This Unicode definition file can be found on the
 138:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 139:    * JDK 1.4 uses Unicode version 3.0.0.
 140:    *
 141:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 142:    * @since 1.2
 143:    */
 144:   public static final class UnicodeBlock extends Subset
 145:   {
 146:     /** The start of the subset. */
 147:     private final char start;
 148: 
 149:     /** The end of the subset. */
 150:     private final char end;
 151: 
 152:     /**
 153:      * Constructor for strictly defined blocks.
 154:      *
 155:      * @param start the start character of the range
 156:      * @param end the end character of the range
 157:      * @param name the block name
 158:      */
 159:     private UnicodeBlock(char start, char end, String name)
 160:     {
 161:       super(name);
 162:       this.start = start;
 163:       this.end = end;
 164:     }
 165: 
 166:     /**
 167:      * Returns the Unicode character block which a character belongs to.
 168:      *
 169:      * @param ch the character to look up
 170:      * @return the set it belongs to, or null if it is not in one
 171:      */
 172:     public static UnicodeBlock of(char ch)
 173:     {
 174:       // Special case, since SPECIALS contains two ranges.
 175:       if (ch == '\uFEFF')
 176:         return SPECIALS;
 177:       // Simple binary search for the correct block.
 178:       int low = 0;
 179:       int hi = sets.length - 1;
 180:       while (low <= hi)
 181:         {
 182:           int mid = (low + hi) >> 1;
 183:           UnicodeBlock b = sets[mid];
 184:           if (ch < b.start)
 185:             hi = mid - 1;
 186:           else if (ch > b.end)
 187:             low = mid + 1;
 188:           else
 189:             return b;
 190:         }
 191:       return null;
 192:     }
 193: 
 194:     /**
 195:      * Basic Latin.
 196:      * '\u0000' - '\u007F'.
 197:      */
 198:     public static final UnicodeBlock BASIC_LATIN
 199:       = new UnicodeBlock('\u0000', '\u007F',
 200:                          "BASIC_LATIN");
 201: 
 202:     /**
 203:      * Latin-1 Supplement.
 204:      * '\u0080' - '\u00FF'.
 205:      */
 206:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 207:       = new UnicodeBlock('\u0080', '\u00FF',
 208:                          "LATIN_1_SUPPLEMENT");
 209: 
 210:     /**
 211:      * Latin Extended-A.
 212:      * '\u0100' - '\u017F'.
 213:      */
 214:     public static final UnicodeBlock LATIN_EXTENDED_A
 215:       = new UnicodeBlock('\u0100', '\u017F',
 216:                          "LATIN_EXTENDED_A");
 217: 
 218:     /**
 219:      * Latin Extended-B.
 220:      * '\u0180' - '\u024F'.
 221:      */
 222:     public static final UnicodeBlock LATIN_EXTENDED_B
 223:       = new UnicodeBlock('\u0180', '\u024F',
 224:                          "LATIN_EXTENDED_B");
 225: 
 226:     /**
 227:      * IPA Extensions.
 228:      * '\u0250' - '\u02AF'.
 229:      */
 230:     public static final UnicodeBlock IPA_EXTENSIONS
 231:       = new UnicodeBlock('\u0250', '\u02AF',
 232:                          "IPA_EXTENSIONS");
 233: 
 234:     /**
 235:      * Spacing Modifier Letters.
 236:      * '\u02B0' - '\u02FF'.
 237:      */
 238:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 239:       = new UnicodeBlock('\u02B0', '\u02FF',
 240:                          "SPACING_MODIFIER_LETTERS");
 241: 
 242:     /**
 243:      * Combining Diacritical Marks.
 244:      * '\u0300' - '\u036F'.
 245:      */
 246:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 247:       = new UnicodeBlock('\u0300', '\u036F',
 248:                          "COMBINING_DIACRITICAL_MARKS");
 249: 
 250:     /**
 251:      * Greek.
 252:      * '\u0370' - '\u03FF'.
 253:      */
 254:     public static final UnicodeBlock GREEK
 255:       = new UnicodeBlock('\u0370', '\u03FF',
 256:                          "GREEK");
 257: 
 258:     /**
 259:      * Cyrillic.
 260:      * '\u0400' - '\u04FF'.
 261:      */
 262:     public static final UnicodeBlock CYRILLIC
 263:       = new UnicodeBlock('\u0400', '\u04FF',
 264:                          "CYRILLIC");
 265: 
 266:     /**
 267:      * Armenian.
 268:      * '\u0530' - '\u058F'.
 269:      */
 270:     public static final UnicodeBlock ARMENIAN
 271:       = new UnicodeBlock('\u0530', '\u058F',
 272:                          "ARMENIAN");
 273: 
 274:     /**
 275:      * Hebrew.
 276:      * '\u0590' - '\u05FF'.
 277:      */
 278:     public static final UnicodeBlock HEBREW
 279:       = new UnicodeBlock('\u0590', '\u05FF',
 280:                          "HEBREW");
 281: 
 282:     /**
 283:      * Arabic.
 284:      * '\u0600' - '\u06FF'.
 285:      */
 286:     public static final UnicodeBlock ARABIC
 287:       = new UnicodeBlock('\u0600', '\u06FF',
 288:                          "ARABIC");
 289: 
 290:     /**
 291:      * Syriac.
 292:      * '\u0700' - '\u074F'.
 293:      * @since 1.4
 294:      */
 295:     public static final UnicodeBlock SYRIAC
 296:       = new UnicodeBlock('\u0700', '\u074F',
 297:                          "SYRIAC");
 298: 
 299:     /**
 300:      * Thaana.
 301:      * '\u0780' - '\u07BF'.
 302:      * @since 1.4
 303:      */
 304:     public static final UnicodeBlock THAANA
 305:       = new UnicodeBlock('\u0780', '\u07BF',
 306:                          "THAANA");
 307: 
 308:     /**
 309:      * Devanagari.
 310:      * '\u0900' - '\u097F'.
 311:      */
 312:     public static final UnicodeBlock DEVANAGARI
 313:       = new UnicodeBlock('\u0900', '\u097F',
 314:                          "DEVANAGARI");
 315: 
 316:     /**
 317:      * Bengali.
 318:      * '\u0980' - '\u09FF'.
 319:      */
 320:     public static final UnicodeBlock BENGALI
 321:       = new UnicodeBlock('\u0980', '\u09FF',
 322:                          "BENGALI");
 323: 
 324:     /**
 325:      * Gurmukhi.
 326:      * '\u0A00' - '\u0A7F'.
 327:      */
 328:     public static final UnicodeBlock GURMUKHI
 329:       = new UnicodeBlock('\u0A00', '\u0A7F',
 330:                          "GURMUKHI");
 331: 
 332:     /**
 333:      * Gujarati.
 334:      * '\u0A80' - '\u0AFF'.
 335:      */
 336:     public static final UnicodeBlock GUJARATI
 337:       = new UnicodeBlock('\u0A80', '\u0AFF',
 338:                          "GUJARATI");
 339: 
 340:     /**
 341:      * Oriya.
 342:      * '\u0B00' - '\u0B7F'.
 343:      */
 344:     public static final UnicodeBlock ORIYA
 345:       = new UnicodeBlock('\u0B00', '\u0B7F',
 346:                          "ORIYA");
 347: 
 348:     /**
 349:      * Tamil.
 350:      * '\u0B80' - '\u0BFF'.
 351:      */
 352:     public static final UnicodeBlock TAMIL
 353:       = new UnicodeBlock('\u0B80', '\u0BFF',
 354:                          "TAMIL");
 355: 
 356:     /**
 357:      * Telugu.
 358:      * '\u0C00' - '\u0C7F'.
 359:      */
 360:     public static final UnicodeBlock TELUGU
 361:       = new UnicodeBlock('\u0C00', '\u0C7F',
 362:                          "TELUGU");
 363: 
 364:     /**
 365:      * Kannada.
 366:      * '\u0C80' - '\u0CFF'.
 367:      */
 368:     public static final UnicodeBlock KANNADA
 369:       = new UnicodeBlock('\u0C80', '\u0CFF',
 370:                          "KANNADA");
 371: 
 372:     /**
 373:      * Malayalam.
 374:      * '\u0D00' - '\u0D7F'.
 375:      */
 376:     public static final UnicodeBlock MALAYALAM
 377:       = new UnicodeBlock('\u0D00', '\u0D7F',
 378:                          "MALAYALAM");
 379: 
 380:     /**
 381:      * Sinhala.
 382:      * '\u0D80' - '\u0DFF'.
 383:      * @since 1.4
 384:      */
 385:     public static final UnicodeBlock SINHALA
 386:       = new UnicodeBlock('\u0D80', '\u0DFF',
 387:                          "SINHALA");
 388: 
 389:     /**
 390:      * Thai.
 391:      * '\u0E00' - '\u0E7F'.
 392:      */
 393:     public static final UnicodeBlock THAI
 394:       = new UnicodeBlock('\u0E00', '\u0E7F',
 395:                          "THAI");
 396: 
 397:     /**
 398:      * Lao.
 399:      * '\u0E80' - '\u0EFF'.
 400:      */
 401:     public static final UnicodeBlock LAO
 402:       = new UnicodeBlock('\u0E80', '\u0EFF',
 403:                          "LAO");
 404: 
 405:     /**
 406:      * Tibetan.
 407:      * '\u0F00' - '\u0FFF'.
 408:      */
 409:     public static final UnicodeBlock TIBETAN
 410:       = new UnicodeBlock('\u0F00', '\u0FFF',
 411:                          "TIBETAN");
 412: 
 413:     /**
 414:      * Myanmar.
 415:      * '\u1000' - '\u109F'.
 416:      * @since 1.4
 417:      */
 418:     public static final UnicodeBlock MYANMAR
 419:       = new UnicodeBlock('\u1000', '\u109F',
 420:                          "MYANMAR");
 421: 
 422:     /**
 423:      * Georgian.
 424:      * '\u10A0' - '\u10FF'.
 425:      */
 426:     public static final UnicodeBlock GEORGIAN
 427:       = new UnicodeBlock('\u10A0', '\u10FF',
 428:                          "GEORGIAN");
 429: 
 430:     /**
 431:      * Hangul Jamo.
 432:      * '\u1100' - '\u11FF'.
 433:      */
 434:     public static final UnicodeBlock HANGUL_JAMO
 435:       = new UnicodeBlock('\u1100', '\u11FF',
 436:                          "HANGUL_JAMO");
 437: 
 438:     /**
 439:      * Ethiopic.
 440:      * '\u1200' - '\u137F'.
 441:      * @since 1.4
 442:      */
 443:     public static final UnicodeBlock ETHIOPIC
 444:       = new UnicodeBlock('\u1200', '\u137F',
 445:                          "ETHIOPIC");
 446: 
 447:     /**
 448:      * Cherokee.
 449:      * '\u13A0' - '\u13FF'.
 450:      * @since 1.4
 451:      */
 452:     public static final UnicodeBlock CHEROKEE
 453:       = new UnicodeBlock('\u13A0', '\u13FF',
 454:                          "CHEROKEE");
 455: 
 456:     /**
 457:      * Unified Canadian Aboriginal Syllabics.
 458:      * '\u1400' - '\u167F'.
 459:      * @since 1.4
 460:      */
 461:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 462:       = new UnicodeBlock('\u1400', '\u167F',
 463:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
 464: 
 465:     /**
 466:      * Ogham.
 467:      * '\u1680' - '\u169F'.
 468:      * @since 1.4
 469:      */
 470:     public static final UnicodeBlock OGHAM
 471:       = new UnicodeBlock('\u1680', '\u169F',
 472:                          "OGHAM");
 473: 
 474:     /**
 475:      * Runic.
 476:      * '\u16A0' - '\u16FF'.
 477:      * @since 1.4
 478:      */
 479:     public static final UnicodeBlock RUNIC
 480:       = new UnicodeBlock('\u16A0', '\u16FF',
 481:                          "RUNIC");
 482: 
 483:     /**
 484:      * Khmer.
 485:      * '\u1780' - '\u17FF'.
 486:      * @since 1.4
 487:      */
 488:     public static final UnicodeBlock KHMER
 489:       = new UnicodeBlock('\u1780', '\u17FF',
 490:                          "KHMER");
 491: 
 492:     /**
 493:      * Mongolian.
 494:      * '\u1800' - '\u18AF'.
 495:      * @since 1.4
 496:      */
 497:     public static final UnicodeBlock MONGOLIAN
 498:       = new UnicodeBlock('\u1800', '\u18AF',
 499:                          "MONGOLIAN");
 500: 
 501:     /**
 502:      * Latin Extended Additional.
 503:      * '\u1E00' - '\u1EFF'.
 504:      */
 505:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 506:       = new UnicodeBlock('\u1E00', '\u1EFF',
 507:                          "LATIN_EXTENDED_ADDITIONAL");
 508: 
 509:     /**
 510:      * Greek Extended.
 511:      * '\u1F00' - '\u1FFF'.
 512:      */
 513:     public static final UnicodeBlock GREEK_EXTENDED
 514:       = new UnicodeBlock('\u1F00', '\u1FFF',
 515:                          "GREEK_EXTENDED");
 516: 
 517:     /**
 518:      * General Punctuation.
 519:      * '\u2000' - '\u206F'.
 520:      */
 521:     public static final UnicodeBlock GENERAL_PUNCTUATION
 522:       = new UnicodeBlock('\u2000', '\u206F',
 523:                          "GENERAL_PUNCTUATION");
 524: 
 525:     /**
 526:      * Superscripts and Subscripts.
 527:      * '\u2070' - '\u209F'.
 528:      */
 529:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 530:       = new UnicodeBlock('\u2070', '\u209F',
 531:                          "SUPERSCRIPTS_AND_SUBSCRIPTS");
 532: 
 533:     /**
 534:      * Currency Symbols.
 535:      * '\u20A0' - '\u20CF'.
 536:      */
 537:     public static final UnicodeBlock CURRENCY_SYMBOLS
 538:       = new UnicodeBlock('\u20A0', '\u20CF',
 539:                          "CURRENCY_SYMBOLS");
 540: 
 541:     /**
 542:      * Combining Marks for Symbols.
 543:      * '\u20D0' - '\u20FF'.
 544:      */
 545:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 546:       = new UnicodeBlock('\u20D0', '\u20FF',
 547:                          "COMBINING_MARKS_FOR_SYMBOLS");
 548: 
 549:     /**
 550:      * Letterlike Symbols.
 551:      * '\u2100' - '\u214F'.
 552:      */
 553:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 554:       = new UnicodeBlock('\u2100', '\u214F',
 555:                          "LETTERLIKE_SYMBOLS");
 556: 
 557:     /**
 558:      * Number Forms.
 559:      * '\u2150' - '\u218F'.
 560:      */
 561:     public static final UnicodeBlock NUMBER_FORMS
 562:       = new UnicodeBlock('\u2150', '\u218F',
 563:                          "NUMBER_FORMS");
 564: 
 565:     /**
 566:      * Arrows.
 567:      * '\u2190' - '\u21FF'.
 568:      */
 569:     public static final UnicodeBlock ARROWS
 570:       = new UnicodeBlock('\u2190', '\u21FF',
 571:                          "ARROWS");
 572: 
 573:     /**
 574:      * Mathematical Operators.
 575:      * '\u2200' - '\u22FF'.
 576:      */
 577:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 578:       = new UnicodeBlock('\u2200', '\u22FF',
 579:                          "MATHEMATICAL_OPERATORS");
 580: 
 581:     /**
 582:      * Miscellaneous Technical.
 583:      * '\u2300' - '\u23FF'.
 584:      */
 585:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 586:       = new UnicodeBlock('\u2300', '\u23FF',
 587:                          "MISCELLANEOUS_TECHNICAL");
 588: 
 589:     /**
 590:      * Control Pictures.
 591:      * '\u2400' - '\u243F'.
 592:      */
 593:     public static final UnicodeBlock CONTROL_PICTURES
 594:       = new UnicodeBlock('\u2400', '\u243F',
 595:                          "CONTROL_PICTURES");
 596: 
 597:     /**
 598:      * Optical Character Recognition.
 599:      * '\u2440' - '\u245F'.
 600:      */
 601:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 602:       = new UnicodeBlock('\u2440', '\u245F',
 603:                          "OPTICAL_CHARACTER_RECOGNITION");
 604: 
 605:     /**
 606:      * Enclosed Alphanumerics.
 607:      * '\u2460' - '\u24FF'.
 608:      */
 609:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 610:       = new UnicodeBlock('\u2460', '\u24FF',
 611:                          "ENCLOSED_ALPHANUMERICS");
 612: 
 613:     /**
 614:      * Box Drawing.
 615:      * '\u2500' - '\u257F'.
 616:      */
 617:     public static final UnicodeBlock BOX_DRAWING
 618:       = new UnicodeBlock('\u2500', '\u257F',
 619:                          "BOX_DRAWING");
 620: 
 621:     /**
 622:      * Block Elements.
 623:      * '\u2580' - '\u259F'.
 624:      */
 625:     public static final UnicodeBlock BLOCK_ELEMENTS
 626:       = new UnicodeBlock('\u2580', '\u259F',
 627:                          "BLOCK_ELEMENTS");
 628: 
 629:     /**
 630:      * Geometric Shapes.
 631:      * '\u25A0' - '\u25FF'.
 632:      */
 633:     public static final UnicodeBlock GEOMETRIC_SHAPES
 634:       = new UnicodeBlock('\u25A0', '\u25FF',
 635:                          "GEOMETRIC_SHAPES");
 636: 
 637:     /**
 638:      * Miscellaneous Symbols.
 639:      * '\u2600' - '\u26FF'.
 640:      */
 641:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 642:       = new UnicodeBlock('\u2600', '\u26FF',
 643:                          "MISCELLANEOUS_SYMBOLS");
 644: 
 645:     /**
 646:      * Dingbats.
 647:      * '\u2700' - '\u27BF'.
 648:      */
 649:     public static final UnicodeBlock DINGBATS
 650:       = new UnicodeBlock('\u2700', '\u27BF',
 651:                          "DINGBATS");
 652: 
 653:     /**
 654:      * Braille Patterns.
 655:      * '\u2800' - '\u28FF'.
 656:      * @since 1.4
 657:      */
 658:     public static final UnicodeBlock BRAILLE_PATTERNS
 659:       = new UnicodeBlock('\u2800', '\u28FF',
 660:                          "BRAILLE_PATTERNS");
 661: 
 662:     /**
 663:      * CJK Radicals Supplement.
 664:      * '\u2E80' - '\u2EFF'.
 665:      * @since 1.4
 666:      */
 667:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 668:       = new UnicodeBlock('\u2E80', '\u2EFF',
 669:                          "CJK_RADICALS_SUPPLEMENT");
 670: 
 671:     /**
 672:      * Kangxi Radicals.
 673:      * '\u2F00' - '\u2FDF'.
 674:      * @since 1.4
 675:      */
 676:     public static final UnicodeBlock KANGXI_RADICALS
 677:       = new UnicodeBlock('\u2F00', '\u2FDF',
 678:                          "KANGXI_RADICALS");
 679: 
 680:     /**
 681:      * Ideographic Description Characters.
 682:      * '\u2FF0' - '\u2FFF'.
 683:      * @since 1.4
 684:      */
 685:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
 686:       = new UnicodeBlock('\u2FF0', '\u2FFF',
 687:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
 688: 
 689:     /**
 690:      * CJK Symbols and Punctuation.
 691:      * '\u3000' - '\u303F'.
 692:      */
 693:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
 694:       = new UnicodeBlock('\u3000', '\u303F',
 695:                          "CJK_SYMBOLS_AND_PUNCTUATION");
 696: 
 697:     /**
 698:      * Hiragana.
 699:      * '\u3040' - '\u309F'.
 700:      */
 701:     public static final UnicodeBlock HIRAGANA
 702:       = new UnicodeBlock('\u3040', '\u309F',
 703:                          "HIRAGANA");
 704: 
 705:     /**
 706:      * Katakana.
 707:      * '\u30A0' - '\u30FF'.
 708:      */
 709:     public static final UnicodeBlock KATAKANA
 710:       = new UnicodeBlock('\u30A0', '\u30FF',
 711:                          "KATAKANA");
 712: 
 713:     /**
 714:      * Bopomofo.
 715:      * '\u3100' - '\u312F'.
 716:      */
 717:     public static final UnicodeBlock BOPOMOFO
 718:       = new UnicodeBlock('\u3100', '\u312F',
 719:                          "BOPOMOFO");
 720: 
 721:     /**
 722:      * Hangul Compatibility Jamo.
 723:      * '\u3130' - '\u318F'.
 724:      */
 725:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
 726:       = new UnicodeBlock('\u3130', '\u318F',
 727:                          "HANGUL_COMPATIBILITY_JAMO");
 728: 
 729:     /**
 730:      * Kanbun.
 731:      * '\u3190' - '\u319F'.
 732:      */
 733:     public static final UnicodeBlock KANBUN
 734:       = new UnicodeBlock('\u3190', '\u319F',
 735:                          "KANBUN");
 736: 
 737:     /**
 738:      * Bopomofo Extended.
 739:      * '\u31A0' - '\u31BF'.
 740:      * @since 1.4
 741:      */
 742:     public static final UnicodeBlock BOPOMOFO_EXTENDED
 743:       = new UnicodeBlock('\u31A0', '\u31BF',
 744:                          "BOPOMOFO_EXTENDED");
 745: 
 746:     /**
 747:      * Enclosed CJK Letters and Months.
 748:      * '\u3200' - '\u32FF'.
 749:      */
 750:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
 751:       = new UnicodeBlock('\u3200', '\u32FF',
 752:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS");
 753: 
 754:     /**
 755:      * CJK Compatibility.
 756:      * '\u3300' - '\u33FF'.
 757:      */
 758:     public static final UnicodeBlock CJK_COMPATIBILITY
 759:       = new UnicodeBlock('\u3300', '\u33FF',
 760:                          "CJK_COMPATIBILITY");
 761: 
 762:     /**
 763:      * CJK Unified Ideographs Extension A.
 764:      * '\u3400' - '\u4DB5'.
 765:      * @since 1.4
 766:      */
 767:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
 768:       = new UnicodeBlock('\u3400', '\u4DB5',
 769:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
 770: 
 771:     /**
 772:      * CJK Unified Ideographs.
 773:      * '\u4E00' - '\u9FFF'.
 774:      */
 775:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
 776:       = new UnicodeBlock('\u4E00', '\u9FFF',
 777:                          "CJK_UNIFIED_IDEOGRAPHS");
 778: 
 779:     /**
 780:      * Yi Syllables.
 781:      * '\uA000' - '\uA48F'.
 782:      * @since 1.4
 783:      */
 784:     public static final UnicodeBlock YI_SYLLABLES
 785:       = new UnicodeBlock('\uA000', '\uA48F',
 786:                          "YI_SYLLABLES");
 787: 
 788:     /**
 789:      * Yi Radicals.
 790:      * '\uA490' - '\uA4CF'.
 791:      * @since 1.4
 792:      */
 793:     public static final UnicodeBlock YI_RADICALS
 794:       = new UnicodeBlock('\uA490', '\uA4CF',
 795:                          "YI_RADICALS");
 796: 
 797:     /**
 798:      * Hangul Syllables.
 799:      * '\uAC00' - '\uD7A3'.
 800:      */
 801:     public static final UnicodeBlock HANGUL_SYLLABLES
 802:       = new UnicodeBlock('\uAC00', '\uD7A3',
 803:                          "HANGUL_SYLLABLES");
 804: 
 805:     /**
 806:      * Surrogates Area.
 807:      * '\uD800' - '\uDFFF'.
 808:      */
 809:     public static final UnicodeBlock SURROGATES_AREA
 810:       = new UnicodeBlock('\uD800', '\uDFFF',
 811:                          "SURROGATES_AREA");
 812: 
 813:     /**
 814:      * Private Use Area.
 815:      * '\uE000' - '\uF8FF'.
 816:      */
 817:     public static final UnicodeBlock PRIVATE_USE_AREA
 818:       = new UnicodeBlock('\uE000', '\uF8FF',
 819:                          "PRIVATE_USE_AREA");
 820: 
 821:     /**
 822:      * CJK Compatibility Ideographs.
 823:      * '\uF900' - '\uFAFF'.
 824:      */
 825:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
 826:       = new UnicodeBlock('\uF900', '\uFAFF',
 827:                          "CJK_COMPATIBILITY_IDEOGRAPHS");
 828: 
 829:     /**
 830:      * Alphabetic Presentation Forms.
 831:      * '\uFB00' - '\uFB4F'.
 832:      */
 833:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
 834:       = new UnicodeBlock('\uFB00', '\uFB4F',
 835:                          "ALPHABETIC_PRESENTATION_FORMS");
 836: 
 837:     /**
 838:      * Arabic Presentation Forms-A.
 839:      * '\uFB50' - '\uFDFF'.
 840:      */
 841:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
 842:       = new UnicodeBlock('\uFB50', '\uFDFF',
 843:                          "ARABIC_PRESENTATION_FORMS_A");
 844: 
 845:     /**
 846:      * Combining Half Marks.
 847:      * '\uFE20' - '\uFE2F'.
 848:      */
 849:     public static final UnicodeBlock COMBINING_HALF_MARKS
 850:       = new UnicodeBlock('\uFE20', '\uFE2F',
 851:                          "COMBINING_HALF_MARKS");
 852: 
 853:     /**
 854:      * CJK Compatibility Forms.
 855:      * '\uFE30' - '\uFE4F'.
 856:      */
 857:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
 858:       = new UnicodeBlock('\uFE30', '\uFE4F',
 859:                          "CJK_COMPATIBILITY_FORMS");
 860: 
 861:     /**
 862:      * Small Form Variants.
 863:      * '\uFE50' - '\uFE6F'.
 864:      */
 865:     public static final UnicodeBlock SMALL_FORM_VARIANTS
 866:       = new UnicodeBlock('\uFE50', '\uFE6F',
 867:                          "SMALL_FORM_VARIANTS");
 868: 
 869:     /**
 870:      * Arabic Presentation Forms-B.
 871:      * '\uFE70' - '\uFEFE'.
 872:      */
 873:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
 874:       = new UnicodeBlock('\uFE70', '\uFEFE',
 875:                          "ARABIC_PRESENTATION_FORMS_B");
 876: 
 877:     /**
 878:      * Halfwidth and Fullwidth Forms.
 879:      * '\uFF00' - '\uFFEF'.
 880:      */
 881:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
 882:       = new UnicodeBlock('\uFF00', '\uFFEF',
 883:                          "HALFWIDTH_AND_FULLWIDTH_FORMS");
 884: 
 885:     /**
 886:      * Specials.
 887:      * '\uFEFF', '\uFFF0' - '\uFFFD'.
 888:      */
 889:     public static final UnicodeBlock SPECIALS
 890:       = new UnicodeBlock('\uFFF0', '\uFFFD',
 891:                          "SPECIALS");
 892: 
 893:     /**
 894:      * The defined subsets.
 895:      */
 896:     private static final UnicodeBlock sets[] = {
 897:       BASIC_LATIN,
 898:       LATIN_1_SUPPLEMENT,
 899:       LATIN_EXTENDED_A,
 900:       LATIN_EXTENDED_B,
 901:       IPA_EXTENSIONS,
 902:       SPACING_MODIFIER_LETTERS,
 903:       COMBINING_DIACRITICAL_MARKS,
 904:       GREEK,
 905:       CYRILLIC,
 906:       ARMENIAN,
 907:       HEBREW,
 908:       ARABIC,
 909:       SYRIAC,
 910:       THAANA,
 911:       DEVANAGARI,
 912:       BENGALI,
 913:       GURMUKHI,
 914:       GUJARATI,
 915:       ORIYA,
 916:       TAMIL,
 917:       TELUGU,
 918:       KANNADA,
 919:       MALAYALAM,
 920:       SINHALA,
 921:       THAI,
 922:       LAO,
 923:       TIBETAN,
 924:       MYANMAR,
 925:       GEORGIAN,
 926:       HANGUL_JAMO,
 927:       ETHIOPIC,
 928:       CHEROKEE,
 929:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 930:       OGHAM,
 931:       RUNIC,
 932:       KHMER,
 933:       MONGOLIAN,
 934:       LATIN_EXTENDED_ADDITIONAL,
 935:       GREEK_EXTENDED,
 936:       GENERAL_PUNCTUATION,
 937:       SUPERSCRIPTS_AND_SUBSCRIPTS,
 938:       CURRENCY_SYMBOLS,
 939:       COMBINING_MARKS_FOR_SYMBOLS,
 940:       LETTERLIKE_SYMBOLS,
 941:       NUMBER_FORMS,
 942:       ARROWS,
 943:       MATHEMATICAL_OPERATORS,
 944:       MISCELLANEOUS_TECHNICAL,
 945:       CONTROL_PICTURES,
 946:       OPTICAL_CHARACTER_RECOGNITION,
 947:       ENCLOSED_ALPHANUMERICS,
 948:       BOX_DRAWING,
 949:       BLOCK_ELEMENTS,
 950:       GEOMETRIC_SHAPES,
 951:       MISCELLANEOUS_SYMBOLS,
 952:       DINGBATS,
 953:       BRAILLE_PATTERNS,
 954:       CJK_RADICALS_SUPPLEMENT,
 955:       KANGXI_RADICALS,
 956:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 957:       CJK_SYMBOLS_AND_PUNCTUATION,
 958:       HIRAGANA,
 959:       KATAKANA,
 960:       BOPOMOFO,
 961:       HANGUL_COMPATIBILITY_JAMO,
 962:       KANBUN,
 963:       BOPOMOFO_EXTENDED,
 964:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
 965:       CJK_COMPATIBILITY,
 966:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 967:       CJK_UNIFIED_IDEOGRAPHS,
 968:       YI_SYLLABLES,
 969:       YI_RADICALS,
 970:       HANGUL_SYLLABLES,
 971:       SURROGATES_AREA,
 972:       PRIVATE_USE_AREA,
 973:       CJK_COMPATIBILITY_IDEOGRAPHS,
 974:       ALPHABETIC_PRESENTATION_FORMS,
 975:       ARABIC_PRESENTATION_FORMS_A,
 976:       COMBINING_HALF_MARKS,
 977:       CJK_COMPATIBILITY_FORMS,
 978:       SMALL_FORM_VARIANTS,
 979:       ARABIC_PRESENTATION_FORMS_B,
 980:       HALFWIDTH_AND_FULLWIDTH_FORMS,
 981:       SPECIALS,
 982:     };
 983:   } // class UnicodeBlock
 984: 
 985:   /**
 986:    * The immutable value of this Character.
 987:    *
 988:    * @serial the value of this Character
 989:    */
 990:   private final char value;
 991: 
 992:   /**
 993:    * Compatible with JDK 1.0+.
 994:    */
 995:   private static final long serialVersionUID = 3786198910865385080L;
 996: 
 997:   /**
 998:    * Smallest value allowed for radix arguments in Java. This value is 2.
 999:    *
1000:    * @see #digit(char, int)
1001:    * @see #forDigit(int, int)
1002:    * @see Integer#toString(int, int)
1003:    * @see Integer#valueOf(String)
1004:    */
1005:   public static final int MIN_RADIX = 2;
1006: 
1007:   /**
1008:    * Largest value allowed for radix arguments in Java. This value is 36.
1009:    *
1010:    * @see #digit(char, int)
1011:    * @see #forDigit(int, int)
1012:    * @see Integer#toString(int, int)
1013:    * @see Integer#valueOf(String)
1014:    */
1015:   public static final int MAX_RADIX = 36;
1016: 
1017:   /**
1018:    * The minimum value the char data type can hold.
1019:    * This value is <code>'\\u0000'</code>.
1020:    */
1021:   public static final char MIN_VALUE = '\u0000';
1022: 
1023:   /**
1024:    * The maximum value the char data type can hold.
1025:    * This value is <code>'\\uFFFF'</code>.
1026:    */
1027:   public static final char MAX_VALUE = '\uFFFF';
1028: 
1029:   /**
1030:    * Class object representing the primitive char data type.
1031:    *
1032:    * @since 1.1
1033:    */
1034:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1035: 
1036:   /**
1037:    * The number of bits needed to represent a <code>char</code>.
1038:    * @since 1.5
1039:    */
1040:   public static final int SIZE = 16;
1041: 
1042:   // This caches some Character values, and is used by boxing
1043:   // conversions via valueOf().  We must cache at least 0..127;
1044:   // this constant controls how much we actually cache.
1045:   private static final int MAX_CACHE = 127;
1046:   private static Character[] charCache = new Character[MAX_CACHE + 1];
1047: 
1048:   /**
1049:    * Lu = Letter, Uppercase (Informative).
1050:    *
1051:    * @since 1.1
1052:    */
1053:   public static final byte UPPERCASE_LETTER = 1;
1054: 
1055:   /**
1056:    * Ll = Letter, Lowercase (Informative).
1057:    *
1058:    * @since 1.1
1059:    */
1060:   public static final byte LOWERCASE_LETTER = 2;
1061: 
1062:   /**
1063:    * Lt = Letter, Titlecase (Informative).
1064:    *
1065:    * @since 1.1
1066:    */
1067:   public static final byte TITLECASE_LETTER = 3;
1068: 
1069:   /**
1070:    * Mn = Mark, Non-Spacing (Normative).
1071:    *
1072:    * @since 1.1
1073:    */
1074:   public static final byte NON_SPACING_MARK = 6;
1075: 
1076:   /**
1077:    * Mc = Mark, Spacing Combining (Normative).
1078:    *
1079:    * @since 1.1
1080:    */
1081:   public static final byte COMBINING_SPACING_MARK = 8;
1082: 
1083:   /**
1084:    * Me = Mark, Enclosing (Normative).
1085:    *
1086:    * @since 1.1
1087:    */
1088:   public static final byte ENCLOSING_MARK = 7;
1089: 
1090:   /**
1091:    * Nd = Number, Decimal Digit (Normative).
1092:    *
1093:    * @since 1.1
1094:    */
1095:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1096: 
1097:   /**
1098:    * Nl = Number, Letter (Normative).
1099:    *
1100:    * @since 1.1
1101:    */
1102:   public static final byte LETTER_NUMBER = 10;
1103: 
1104:   /**
1105:    * No = Number, Other (Normative).
1106:    *
1107:    * @since 1.1
1108:    */
1109:   public static final byte OTHER_NUMBER = 11;
1110: 
1111:   /**
1112:    * Zs = Separator, Space (Normative).
1113:    *
1114:    * @since 1.1
1115:    */
1116:   public static final byte SPACE_SEPARATOR = 12;
1117: 
1118:   /**
1119:    * Zl = Separator, Line (Normative).
1120:    *
1121:    * @since 1.1
1122:    */
1123:   public static final byte LINE_SEPARATOR = 13;
1124: 
1125:   /**
1126:    * Zp = Separator, Paragraph (Normative).
1127:    *
1128:    * @since 1.1
1129:    */
1130:   public static final byte PARAGRAPH_SEPARATOR = 14;
1131: 
1132:   /**
1133:    * Cc = Other, Control (Normative).
1134:    *
1135:    * @since 1.1
1136:    */
1137:   public static final byte CONTROL = 15;
1138: 
1139:   /**
1140:    * Cf = Other, Format (Normative).
1141:    *
1142:    * @since 1.1
1143:    */
1144:   public static final byte FORMAT = 16;
1145: 
1146:   /**
1147:    * Cs = Other, Surrogate (Normative).
1148:    *
1149:    * @since 1.1
1150:    */
1151:   public static final byte SURROGATE = 19;
1152: 
1153:   /**
1154:    * Co = Other, Private Use (Normative).
1155:    *
1156:    * @since 1.1
1157:    */
1158:   public static final byte PRIVATE_USE = 18;
1159: 
1160:   /**
1161:    * Cn = Other, Not Assigned (Normative).
1162:    *
1163:    * @since 1.1
1164:    */
1165:   public static final byte UNASSIGNED = 0;
1166: 
1167:   /**
1168:    * Lm = Letter, Modifier (Informative).
1169:    *
1170:    * @since 1.1
1171:    */
1172:   public static final byte MODIFIER_LETTER = 4;
1173: 
1174:   /**
1175:    * Lo = Letter, Other (Informative).
1176:    *
1177:    * @since 1.1
1178:    */
1179:   public static final byte OTHER_LETTER = 5;
1180: 
1181:   /**
1182:    * Pc = Punctuation, Connector (Informative).
1183:    *
1184:    * @since 1.1
1185:    */
1186:   public static final byte CONNECTOR_PUNCTUATION = 23;
1187: 
1188:   /**
1189:    * Pd = Punctuation, Dash (Informative).
1190:    *
1191:    * @since 1.1
1192:    */
1193:   public static final byte DASH_PUNCTUATION = 20;
1194: 
1195:   /**
1196:    * Ps = Punctuation, Open (Informative).
1197:    *
1198:    * @since 1.1
1199:    */
1200:   public static final byte START_PUNCTUATION = 21;
1201: 
1202:   /**
1203:    * Pe = Punctuation, Close (Informative).
1204:    *
1205:    * @since 1.1
1206:    */
1207:   public static final byte END_PUNCTUATION = 22;
1208: 
1209:   /**
1210:    * Pi = Punctuation, Initial Quote (Informative).
1211:    *
1212:    * @since 1.4
1213:    */
1214:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1215: 
1216:   /**
1217:    * Pf = Punctuation, Final Quote (Informative).
1218:    *
1219:    * @since 1.4
1220:    */
1221:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1222: 
1223:   /**
1224:    * Po = Punctuation, Other (Informative).
1225:    *
1226:    * @since 1.1
1227:    */
1228:   public static final byte OTHER_PUNCTUATION = 24;
1229: 
1230:   /**
1231:    * Sm = Symbol, Math (Informative).
1232:    *
1233:    * @since 1.1
1234:    */
1235:   public static final byte MATH_SYMBOL = 25;
1236: 
1237:   /**
1238:    * Sc = Symbol, Currency (Informative).
1239:    *
1240:    * @since 1.1
1241:    */
1242:   public static final byte CURRENCY_SYMBOL = 26;
1243: 
1244:   /**
1245:    * Sk = Symbol, Modifier (Informative).
1246:    *
1247:    * @since 1.1
1248:    */
1249:   public static final byte MODIFIER_SYMBOL = 27;
1250: 
1251:   /**
1252:    * So = Symbol, Other (Informative).
1253:    *
1254:    * @since 1.1
1255:    */
1256:   public static final byte OTHER_SYMBOL = 28;
1257: 
1258:   /**
1259:    * Undefined bidirectional character type. Undefined char values have
1260:    * undefined directionality in the Unicode specification.
1261:    *
1262:    * @since 1.4
1263:    */
1264:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1265: 
1266:   /**
1267:    * Strong bidirectional character type "L".
1268:    *
1269:    * @since 1.4
1270:    */
1271:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1272: 
1273:   /**
1274:    * Strong bidirectional character type "R".
1275:    *
1276:    * @since 1.4
1277:    */
1278:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1279: 
1280:   /**
1281:    * Strong bidirectional character type "AL".
1282:    *
1283:    * @since 1.4
1284:    */
1285:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1286: 
1287:   /**
1288:    * Weak bidirectional character type "EN".
1289:    *
1290:    * @since 1.4
1291:    */
1292:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1293: 
1294:   /**
1295:    * Weak bidirectional character type "ES".
1296:    *
1297:    * @since 1.4
1298:    */
1299:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1300: 
1301:   /**
1302:    * Weak bidirectional character type "ET".
1303:    *
1304:    * @since 1.4
1305:    */
1306:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1307: 
1308:   /**
1309:    * Weak bidirectional character type "AN".
1310:    *
1311:    * @since 1.4
1312:    */
1313:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1314: 
1315:   /**
1316:    * Weak bidirectional character type "CS".
1317:    *
1318:    * @since 1.4
1319:    */
1320:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1321: 
1322:   /**
1323:    * Weak bidirectional character type "NSM".
1324:    *
1325:    * @since 1.4
1326:    */
1327:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1328: 
1329:   /**
1330:    * Weak bidirectional character type "BN".
1331:    *
1332:    * @since 1.4
1333:    */
1334:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1335: 
1336:   /**
1337:    * Neutral bidirectional character type "B".
1338:    *
1339:    * @since 1.4
1340:    */
1341:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1342: 
1343:   /**
1344:    * Neutral bidirectional character type "S".
1345:    *
1346:    * @since 1.4
1347:    */
1348:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1349: 
1350:   /**
1351:    * Strong bidirectional character type "WS".
1352:    *
1353:    * @since 1.4
1354:    */
1355:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
1356: 
1357:   /**
1358:    * Neutral bidirectional character type "ON".
1359:    *
1360:    * @since 1.4
1361:    */
1362:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1363: 
1364:   /**
1365:    * Strong bidirectional character type "LRE".
1366:    *
1367:    * @since 1.4
1368:    */
1369:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1370: 
1371:   /**
1372:    * Strong bidirectional character type "LRO".
1373:    *
1374:    * @since 1.4
1375:    */
1376:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1377: 
1378:   /**
1379:    * Strong bidirectional character type "RLE".
1380:    *
1381:    * @since 1.4
1382:    */
1383:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1384: 
1385:   /**
1386:    * Strong bidirectional character type "RLO".
1387:    *
1388:    * @since 1.4
1389:    */
1390:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1391: 
1392:   /**
1393:    * Weak bidirectional character type "PDF".
1394:    *
1395:    * @since 1.4
1396:    */
1397:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1398: 
1399:   /**
1400:    * Stores unicode block offset lookup table. Exploit package visibility of
1401:    * String.value to avoid copying the array.
1402:    * @see #readChar(char)
1403:    * @see CharData#BLOCKS
1404:    */
1405:   private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS);
1406: 
1407:   /**
1408:    * Stores unicode attribute offset lookup table. Exploit package visibility
1409:    * of String.value to avoid copying the array.
1410:    * @see CharData#DATA
1411:    */
1412:   private static final char[] data = String.zeroBasedStringValue(CharData.DATA);
1413: 
1414:   /**
1415:    * Stores unicode numeric value attribute table. Exploit package visibility
1416:    * of String.value to avoid copying the array.
1417:    * @see CharData#NUM_VALUE
1418:    */
1419:   private static final char[] numValue
1420:       = String.zeroBasedStringValue(CharData.NUM_VALUE);
1421: 
1422:   /**
1423:    * Stores unicode uppercase attribute table. Exploit package visibility
1424:    * of String.value to avoid copying the array.
1425:    * @see CharData#UPPER
1426:    */
1427:   private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER);
1428: 
1429:   /**
1430:    * Stores unicode lowercase attribute table. Exploit package visibility
1431:    * of String.value to avoid copying the array.
1432:    * @see CharData#LOWER
1433:    */
1434:   private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER);
1435: 
1436:   /**
1437:    * Stores unicode direction attribute table. Exploit package visibility
1438:    * of String.value to avoid copying the array.
1439:    * @see CharData#DIRECTION
1440:    */
1441:   // Package visible for use by String.
1442:   static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION);
1443: 
1444:   /**
1445:    * Stores unicode titlecase table. Exploit package visibility of
1446:    * String.value to avoid copying the array.
1447:    * @see CharData#TITLE
1448:    */
1449:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
1450: 
1451:   /**
1452:    * Mask for grabbing the type out of the contents of data.
1453:    * @see CharData#DATA
1454:    */
1455:   private static final int TYPE_MASK = 0x1F;
1456: 
1457:   /**
1458:    * Mask for grabbing the non-breaking space flag out of the contents of
1459:    * data.
1460:    * @see CharData#DATA
1461:    */
1462:   private static final int NO_BREAK_MASK = 0x20;
1463: 
1464:   /**
1465:    * Mask for grabbing the mirrored directionality flag out of the contents
1466:    * of data.
1467:    * @see CharData#DATA
1468:    */
1469:   private static final int MIRROR_MASK = 0x40;
1470: 
1471:   /**
1472:    * Min value for supplementary code point.
1473:    *
1474:    * @since 1.5
1475:    */
1476:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
1477: 
1478:   /**
1479:    * Min value for code point.
1480:    *
1481:    * @since 1.5
1482:    */
1483:   public static final int MIN_CODE_POINT = 0; 
1484:  
1485:  
1486:   /**
1487:    * Max value for code point.
1488:    *
1489:    * @since 1.5
1490:    */
1491:   public static final int MAX_CODE_POINT = 0x010ffff;
1492: 
1493: 
1494:   /**
1495:    * Minimum high surrogate code in UTF-16 encoding.
1496:    *
1497:    * @since 1.5
1498:    */
1499:   public static final char MIN_HIGH_SURROGATE = '\ud800';
1500: 
1501:   /**
1502:    * Maximum high surrogate code in UTF-16 encoding.
1503:    *
1504:    * @since 1.5
1505:    */
1506:   public static final char MAX_HIGH_SURROGATE = '\udbff';
1507:  
1508:   /**
1509:    * Minimum low surrogate code in UTF-16 encoding.
1510:    *
1511:    * @since 1.5
1512:    */
1513:   public static final char MIN_LOW_SURROGATE = '\udc00';
1514: 
1515:   /**
1516:    * Maximum low surrogate code in UTF-16 encoding.
1517:    *
1518:    * @since 1.5
1519:    */
1520:   public static final char MAX_LOW_SURROGATE = '\udfff';
1521: 
1522:   /**
1523:    * Minimum surrogate code in UTF-16 encoding.
1524:    *
1525:    * @since 1.5
1526:    */
1527:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1528: 
1529:   /**
1530:    * Maximum low surrogate code in UTF-16 encoding.
1531:    *
1532:    * @since 1.5
1533:    */
1534:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1535: 
1536:   /**
1537:    * Grabs an attribute offset from the Unicode attribute database. The lower
1538:    * 5 bits are the character type, the next 2 bits are flags, and the top
1539:    * 9 bits are the offset into the attribute tables.
1540:    *
1541:    * @param ch the character to look up
1542:    * @return the character's attribute offset and type
1543:    * @see #TYPE_MASK
1544:    * @see #NO_BREAK_MASK
1545:    * @see #MIRROR_MASK
1546:    * @see CharData#DATA
1547:    * @see CharData#SHIFT
1548:    */
1549:   // Package visible for use in String.
1550:   static char readChar(char ch)
1551:   {
1552:     // Perform 16-bit addition to find the correct entry in data.
1553:     return data[(char) (blocks[ch >> CharData.SHIFT] + ch)];
1554:   }
1555: 
1556:   /**
1557:    * Wraps up a character.
1558:    *
1559:    * @param value the character to wrap
1560:    */
1561:   public Character(char value)
1562:   {
1563:     this.value = value;
1564:   }
1565: 
1566:   /**
1567:    * Returns the character which has been wrapped by this class.
1568:    *
1569:    * @return the character wrapped
1570:    */
1571:   public char charValue()
1572:   {
1573:     return value;
1574:   }
1575: 
1576:   /**
1577:    * Returns the numerical value (unsigned) of the wrapped character.
1578:    * Range of returned values: 0x0000-0xFFFF.
1579:    *
1580:    * @return the value of the wrapped character
1581:    */
1582:   public int hashCode()
1583:   {
1584:     return value;
1585:   }
1586: 
1587:   /**
1588:    * Determines if an object is equal to this object. This is only true for
1589:    * another Character object wrapping the same value.
1590:    *
1591:    * @param o object to compare
1592:    * @return true if o is a Character with the same value
1593:    */
1594:   public boolean equals(Object o)
1595:   {
1596:     return o instanceof Character && value == ((Character) o).value;
1597:   }
1598: 
1599:   /**
1600:    * Converts the wrapped character into a String.
1601:    *
1602:    * @return a String containing one character -- the wrapped character
1603:    *         of this instance
1604:    */
1605:   public String toString()
1606:   {
1607:     // Package constructor avoids an array copy.
1608:     return new String(new char[] { value }, 0, 1, true);
1609:   }
1610: 
1611:   /**
1612:    * Returns a String of length 1 representing the specified character.
1613:    *
1614:    * @param ch the character to convert
1615:    * @return a String containing the character
1616:    * @since 1.4
1617:    */
1618:   public static String toString(char ch)
1619:   {
1620:     // Package constructor avoids an array copy.
1621:     return new String(new char[] { ch }, 0, 1, true);
1622:   }
1623: 
1624:   /**
1625:    * Determines if a character is a Unicode lowercase letter. For example,
1626:    * <code>'a'</code> is lowercase.
1627:    * <br>
1628:    * lowercase = [Ll]
1629:    *
1630:    * @param ch character to test
1631:    * @return true if ch is a Unicode lowercase letter, else false
1632:    * @see #isUpperCase(char)
1633:    * @see #isTitleCase(char)
1634:    * @see #toLowerCase(char)
1635:    * @see #getType(char)
1636:    */
1637:   public static boolean isLowerCase(char ch)
1638:   {
1639:     return getType(ch) == LOWERCASE_LETTER;
1640:   }
1641: 
1642:   /**
1643:    * Determines if a character is a Unicode uppercase letter. For example,
1644:    * <code>'A'</code> is uppercase.
1645:    * <br>
1646:    * uppercase = [Lu]
1647:    *
1648:    * @param ch character to test
1649:    * @return true if ch is a Unicode uppercase letter, else false
1650:    * @see #isLowerCase(char)
1651:    * @see #isTitleCase(char)
1652:    * @see #toUpperCase(char)
1653:    * @see #getType(char)
1654:    */
1655:   public static boolean isUpperCase(char ch)
1656:   {
1657:     return getType(ch) == UPPERCASE_LETTER;
1658:   }
1659: 
1660:   /**
1661:    * Determines if a character is a Unicode titlecase letter. For example,
1662:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
1663:    * <br>
1664:    * titlecase = [Lt]
1665:    *
1666:    * @param ch character to test
1667:    * @return true if ch is a Unicode titlecase letter, else false
1668:    * @see #isLowerCase(char)
1669:    * @see #isUpperCase(char)
1670:    * @see #toTitleCase(char)
1671:    * @see #getType(char)
1672:    */
1673:   public static boolean isTitleCase(char ch)
1674:   {
1675:     return getType(ch) == TITLECASE_LETTER;
1676:   }
1677: 
1678:   /**
1679:    * Determines if a character is a Unicode decimal digit. For example,
1680:    * <code>'0'</code> is a digit.
1681:    * <br>
1682:    * Unicode decimal digit = [Nd]
1683:    *
1684:    * @param ch character to test
1685:    * @return true if ch is a Unicode decimal digit, else false
1686:    * @see #digit(char, int)
1687:    * @see #forDigit(int, int)
1688:    * @see #getType(char)
1689:    */
1690:   public static boolean isDigit(char ch)
1691:   {
1692:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
1693:   }
1694: 
1695:   /**
1696:    * Determines if a character is part of the Unicode Standard. This is an
1697:    * evolving standard, but covers every character in the data file.
1698:    * <br>
1699:    * defined = not [Cn]
1700:    *
1701:    * @param ch character to test
1702:    * @return true if ch is a Unicode character, else false
1703:    * @see #isDigit(char)
1704:    * @see #isLetter(char)
1705:    * @see #isLetterOrDigit(char)
1706:    * @see #isLowerCase(char)
1707:    * @see #isTitleCase(char)
1708:    * @see #isUpperCase(char)
1709:    */
1710:   public static boolean isDefined(char ch)
1711:   {
1712:     return getType(ch) != UNASSIGNED;
1713:   }
1714: 
1715:   /**
1716:    * Determines if a character is a Unicode letter. Not all letters have case,
1717:    * so this may return true when isLowerCase and isUpperCase return false.
1718:    * <br>
1719:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
1720:    *
1721:    * @param ch character to test
1722:    * @return true if ch is a Unicode letter, else false
1723:    * @see #isDigit(char)
1724:    * @see #isJavaIdentifierStart(char)
1725:    * @see #isJavaLetter(char)
1726:    * @see #isJavaLetterOrDigit(char)
1727:    * @see #isLetterOrDigit(char)
1728:    * @see #isLowerCase(char)
1729:    * @see #isTitleCase(char)
1730:    * @see #isUnicodeIdentifierStart(char)
1731:    * @see #isUpperCase(char)
1732:    */
1733:   public static boolean isLetter(char ch)
1734:   {
1735:     return ((1 << getType(ch))
1736:             & ((1 << UPPERCASE_LETTER)
1737:                | (1 << LOWERCASE_LETTER)
1738:                | (1 << TITLECASE_LETTER)
1739:                | (1 << MODIFIER_LETTER)
1740:                | (1 << OTHER_LETTER))) != 0;
1741:   }
1742: 
1743:   /**
1744:    * Determines if a character is a Unicode letter or a Unicode digit. This
1745:    * is the combination of isLetter and isDigit.
1746:    * <br>
1747:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
1748:    *
1749:    * @param ch character to test
1750:    * @return true if ch is a Unicode letter or a Unicode digit, else false
1751:    * @see #isDigit(char)
1752:    * @see #isJavaIdentifierPart(char)
1753:    * @see #isJavaLetter(char)
1754:    * @see #isJavaLetterOrDigit(char)
1755:    * @see #isLetter(char)
1756:    * @see #isUnicodeIdentifierPart(char)
1757:    */
1758:   public static boolean isLetterOrDigit(char ch)
1759:   {
1760:     return ((1 << getType(ch))
1761:             & ((1 << UPPERCASE_LETTER)
1762:                | (1 << LOWERCASE_LETTER)
1763:                | (1 << TITLECASE_LETTER)
1764:                | (1 << MODIFIER_LETTER)
1765:                | (1 << OTHER_LETTER)
1766:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
1767:   }
1768: 
1769:   /**
1770:    * Determines if a character can start a Java identifier. This is the
1771:    * combination of isLetter, any character where getType returns
1772:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1773:    * (like '_').
1774:    *
1775:    * @param ch character to test
1776:    * @return true if ch can start a Java identifier, else false
1777:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
1778:    * @see #isJavaLetterOrDigit(char)
1779:    * @see #isJavaIdentifierStart(char)
1780:    * @see #isJavaIdentifierPart(char)
1781:    * @see #isLetter(char)
1782:    * @see #isLetterOrDigit(char)
1783:    * @see #isUnicodeIdentifierStart(char)
1784:    */
1785:   public static boolean isJavaLetter(char ch)
1786:   {
1787:     return isJavaIdentifierStart(ch);
1788:   }
1789: 
1790:   /**
1791:    * Determines if a character can follow the first letter in
1792:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1793:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1794:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1795:    * or isIdentifierIgnorable.
1796:    *
1797:    * @param ch character to test
1798:    * @return true if ch can follow the first letter in a Java identifier
1799:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
1800:    * @see #isJavaLetter(char)
1801:    * @see #isJavaIdentifierStart(char)
1802:    * @see #isJavaIdentifierPart(char)
1803:    * @see #isLetter(char)
1804:    * @see #isLetterOrDigit(char)
1805:    * @see #isUnicodeIdentifierPart(char)
1806:    * @see #isIdentifierIgnorable(char)
1807:    */
1808:   public static boolean isJavaLetterOrDigit(char ch)
1809:   {
1810:     return isJavaIdentifierPart(ch);
1811:   }
1812: 
1813:   /**
1814:    * Determines if a character can start a Java identifier. This is the
1815:    * combination of isLetter, any character where getType returns
1816:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1817:    * (like '_').
1818:    * <br>
1819:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
1820:    *
1821:    * @param ch character to test
1822:    * @return true if ch can start a Java identifier, else false
1823:    * @see #isJavaIdentifierPart(char)
1824:    * @see #isLetter(char)
1825:    * @see #isUnicodeIdentifierStart(char)
1826:    * @since 1.1
1827:    */
1828:   public static boolean isJavaIdentifierStart(char ch)
1829:   {
1830:     return ((1 << getType(ch))
1831:             & ((1 << UPPERCASE_LETTER)
1832:                | (1 << LOWERCASE_LETTER)
1833:                | (1 << TITLECASE_LETTER)
1834:                | (1 << MODIFIER_LETTER)
1835:                | (1 << OTHER_LETTER)
1836:                | (1 << LETTER_NUMBER)
1837:                | (1 << CURRENCY_SYMBOL)
1838:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
1839:   }
1840: 
1841:   /**
1842:    * Determines if a character can follow the first letter in
1843:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1844:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1845:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1846:    * or isIdentifierIgnorable.
1847:    * <br>
1848:    * Java identifier extender =
1849:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
1850:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1851:    *
1852:    * @param ch character to test
1853:    * @return true if ch can follow the first letter in a Java identifier
1854:    * @see #isIdentifierIgnorable(char)
1855:    * @see #isJavaIdentifierStart(char)
1856:    * @see #isLetterOrDigit(char)
1857:    * @see #isUnicodeIdentifierPart(char)
1858:    * @since 1.1
1859:    */
1860:   public static boolean isJavaIdentifierPart(char ch)
1861:   {
1862:     int category = getType(ch);
1863:     return ((1 << category)
1864:             & ((1 << UPPERCASE_LETTER)
1865:                | (1 << LOWERCASE_LETTER)
1866:                | (1 << TITLECASE_LETTER)
1867:                | (1 << MODIFIER_LETTER)
1868:                | (1 << OTHER_LETTER)
1869:                | (1 << NON_SPACING_MARK)
1870:                | (1 << COMBINING_SPACING_MARK)
1871:                | (1 << DECIMAL_DIGIT_NUMBER)
1872:                | (1 << LETTER_NUMBER)
1873:                | (1 << CURRENCY_SYMBOL)
1874:                | (1 << CONNECTOR_PUNCTUATION)
1875:                | (1 << FORMAT))) != 0
1876:       || (category == CONTROL && isIdentifierIgnorable(ch));
1877:   }
1878: 
1879:   /**
1880:    * Determines if a character can start a Unicode identifier.  Only
1881:    * letters can start a Unicode identifier, but this includes characters
1882:    * in LETTER_NUMBER.
1883:    * <br>
1884:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
1885:    *
1886:    * @param ch character to test
1887:    * @return true if ch can start a Unicode identifier, else false
1888:    * @see #isJavaIdentifierStart(char)
1889:    * @see #isLetter(char)
1890:    * @see #isUnicodeIdentifierPart(char)
1891:    * @since 1.1
1892:    */
1893:   public static boolean isUnicodeIdentifierStart(char ch)
1894:   {
1895:     return ((1 << getType(ch))
1896:             & ((1 << UPPERCASE_LETTER)
1897:                | (1 << LOWERCASE_LETTER)
1898:                | (1 << TITLECASE_LETTER)
1899:                | (1 << MODIFIER_LETTER)
1900:                | (1 << OTHER_LETTER)
1901:                | (1 << LETTER_NUMBER))) != 0;
1902:   }
1903: 
1904:   /**
1905:    * Determines if a character can follow the first letter in
1906:    * a Unicode identifier. This includes letters, connecting punctuation,
1907:    * digits, numeric letters, combining marks, non-spacing marks, and
1908:    * isIdentifierIgnorable.
1909:    * <br>
1910:    * Unicode identifier extender =
1911:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
1912:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1913:    *
1914:    * @param ch character to test
1915:    * @return true if ch can follow the first letter in a Unicode identifier
1916:    * @see #isIdentifierIgnorable(char)
1917:    * @see #isJavaIdentifierPart(char)
1918:    * @see #isLetterOrDigit(char)
1919:    * @see #isUnicodeIdentifierStart(char)
1920:    * @since 1.1
1921:    */
1922:   public static boolean isUnicodeIdentifierPart(char ch)
1923:   {
1924:     int category = getType(ch);
1925:     return ((1 << category)
1926:             & ((1 << UPPERCASE_LETTER)
1927:                | (1 << LOWERCASE_LETTER)
1928:                | (1 << TITLECASE_LETTER)
1929:                | (1 << MODIFIER_LETTER)
1930:                | (1 << OTHER_LETTER)
1931:                | (1 << NON_SPACING_MARK)
1932:                | (1 << COMBINING_SPACING_MARK)
1933:                | (1 << DECIMAL_DIGIT_NUMBER)
1934:                | (1 << LETTER_NUMBER)
1935:                | (1 << CONNECTOR_PUNCTUATION)
1936:                | (1 << FORMAT))) != 0
1937:       || (category == CONTROL && isIdentifierIgnorable(ch));
1938:   }
1939: 
1940:   /**
1941:    * Determines if a character is ignorable in a Unicode identifier. This
1942:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
1943:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
1944:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
1945:    * <code>'\u009F'</code>), and FORMAT characters.
1946:    * <br>
1947:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
1948:    *    |U+007F-U+009F
1949:    *
1950:    * @param ch character to test
1951:    * @return true if ch is ignorable in a Unicode or Java identifier
1952:    * @see #isJavaIdentifierPart(char)
1953:    * @see #isUnicodeIdentifierPart(char)
1954:    * @since 1.1
1955:    */
1956:   public static boolean isIdentifierIgnorable(char ch)
1957:   {
1958:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
1959:                                || (ch <= '\u001B' && ch >= '\u000E')))
1960:       || getType(ch) == FORMAT;
1961:   }
1962: 
1963:   /**
1964:    * Converts a Unicode character into its lowercase equivalent mapping.
1965:    * If a mapping does not exist, then the character passed is returned.
1966:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
1967:    *
1968:    * @param ch character to convert to lowercase
1969:    * @return lowercase mapping of ch, or ch if lowercase mapping does
1970:    *         not exist
1971:    * @see #isLowerCase(char)
1972:    * @see #isUpperCase(char)
1973:    * @see #toTitleCase(char)
1974:    * @see #toUpperCase(char)
1975:    */
1976:   public static char toLowerCase(char ch)
1977:   {
1978:     // Signedness doesn't matter, as result is cast back to char.
1979:     return (char) (ch + lower[readChar(ch) >> 7]);
1980:   }
1981: 
1982:   /**
1983:    * Converts a Unicode character into its uppercase equivalent mapping.
1984:    * If a mapping does not exist, then the character passed is returned.
1985:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
1986:    *
1987:    * @param ch character to convert to uppercase
1988:    * @return uppercase mapping of ch, or ch if uppercase mapping does
1989:    *         not exist
1990:    * @see #isLowerCase(char)
1991:    * @see #isUpperCase(char)
1992:    * @see #toLowerCase(char)
1993:    * @see #toTitleCase(char)
1994:    */
1995:   public static char toUpperCase(char ch)
1996:   {
1997:     // Signedness doesn't matter, as result is cast back to char.
1998:     return (char) (ch + upper[readChar(ch) >> 7]);
1999:   }
2000: 
2001:   /**
2002:    * Converts a Unicode character into its titlecase equivalent mapping.
2003:    * If a mapping does not exist, then the character passed is returned.
2004:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
2005:    *
2006:    * @param ch character to convert to titlecase
2007:    * @return titlecase mapping of ch, or ch if titlecase mapping does
2008:    *         not exist
2009:    * @see #isTitleCase(char)
2010:    * @see #toLowerCase(char)
2011:    * @see #toUpperCase(char)
2012:    */
2013:   public static char toTitleCase(char ch)
2014:   {
2015:     // As title is short, it doesn't hurt to exhaustively iterate over it.
2016:     for (int i = title.length - 2; i >= 0; i -= 2)
2017:       if (title[i] == ch)
2018:         return title[i + 1];
2019:     return toUpperCase(ch);
2020:   }
2021: 
2022:   /**
2023:    * Converts a character into a digit of the specified radix. If the radix
2024:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
2025:    * exceeds the radix, or if ch is not a decimal digit or in the case
2026:    * insensitive set of 'a'-'z', the result is -1.
2027:    * <br>
2028:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
2029:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2030:    *
2031:    * @param ch character to convert into a digit
2032:    * @param radix radix in which ch is a digit
2033:    * @return digit which ch represents in radix, or -1 not a valid digit
2034:    * @see #MIN_RADIX
2035:    * @see #MAX_RADIX
2036:    * @see #forDigit(int, int)
2037:    * @see #isDigit(char)
2038:    * @see #getNumericValue(char)
2039:    */
2040:   public static int digit(char ch, int radix)
2041:   {
2042:     if (radix < MIN_RADIX || radix > MAX_RADIX)
2043:       return -1;
2044:     char attr = readChar(ch);
2045:     if (((1 << (attr & TYPE_MASK))
2046:          & ((1 << UPPERCASE_LETTER)
2047:             | (1 << LOWERCASE_LETTER)
2048:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
2049:       {
2050:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
2051:         int digit = numValue[attr >> 7];
2052:         return (digit < radix) ? digit : -1;
2053:       }
2054:     return -1;
2055:   }
2056: 
2057:   /**
2058:    * Returns the Unicode numeric value property of a character. For example,
2059:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
2060:    *
2061:    * <p>This method also returns values for the letters A through Z, (not
2062:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
2063:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
2064:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
2065:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
2066:    * <code>'\uFF5A'</code> (full width variants).
2067:    *
2068:    * <p>If the character lacks a numeric value property, -1 is returned.
2069:    * If the character has a numeric value property which is not representable
2070:    * as a nonnegative integer, such as a fraction, -2 is returned.
2071:    *
2072:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
2073:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2074:    *
2075:    * @param ch character from which the numeric value property will
2076:    *        be retrieved
2077:    * @return the numeric value property of ch, or -1 if it does not exist, or
2078:    *         -2 if it is not representable as a nonnegative integer
2079:    * @see #forDigit(int, int)
2080:    * @see #digit(char, int)
2081:    * @see #isDigit(char)
2082:    * @since 1.1
2083:    */
2084:   public static int getNumericValue(char ch)
2085:   {
2086:     // Treat numValue as signed.
2087:     return (short) numValue[readChar(ch) >> 7];
2088:   }
2089: 
2090:   /**
2091:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
2092:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
2093:    * <code>'\r'</code>, and <code>' '</code>.
2094:    * <br>
2095:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
2096:    *
2097:    * @param ch character to test
2098:    * @return true if ch is a space, else false
2099:    * @deprecated Replaced by {@link #isWhitespace(char)}
2100:    * @see #isSpaceChar(char)
2101:    * @see #isWhitespace(char)
2102:    */
2103:   public static boolean isSpace(char ch)
2104:   {
2105:     // Performing the subtraction up front alleviates need to compare longs.
2106:     return ch-- <= ' ' && ((1 << ch)
2107:                            & ((1 << (' ' - 1))
2108:                               | (1 << ('\t' - 1))
2109:                               | (1 << ('\n' - 1))
2110:                               | (1 << ('\r' - 1))
2111:                               | (1 << ('\f' - 1)))) != 0;
2112:   }
2113: 
2114:   /**
2115:    * Determines if a character is a Unicode space character. This includes
2116:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
2117:    * <br>
2118:    * Unicode space = [Zs]|[Zp]|[Zl]
2119:    *
2120:    * @param ch character to test
2121:    * @return true if ch is a Unicode space, else false
2122:    * @see #isWhitespace(char)
2123:    * @since 1.1
2124:    */
2125:   public static boolean isSpaceChar(char ch)
2126:   {
2127:     return ((1 << getType(ch))
2128:             & ((1 << SPACE_SEPARATOR)
2129:                | (1 << LINE_SEPARATOR)
2130:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
2131:   }
2132: 
2133:   /**
2134:    * Determines if a character is Java whitespace. This includes Unicode
2135:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
2136:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
2137:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
2138:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
2139:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
2140:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
2141:    * and <code>'\u001F'</code>.
2142:    * <br>
2143:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
2144:    *
2145:    * @param ch character to test
2146:    * @return true if ch is Java whitespace, else false
2147:    * @see #isSpaceChar(char)
2148:    * @since 1.1
2149:    */
2150:   public static boolean isWhitespace(char ch)
2151:   {
2152:     int attr = readChar(ch);
2153:     return ((((1 << (attr & TYPE_MASK))
2154:               & ((1 << SPACE_SEPARATOR)
2155:                  | (1 << LINE_SEPARATOR)
2156:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
2157:             && (attr & NO_BREAK_MASK) == 0)
2158:       || (ch <= '\u001F' && ((1 << ch)
2159:                              & ((1 << '\t')
2160:                                 | (1 << '\n')
2161:                                 | (1 << '\u000B')
2162:                                 | (1 << '\u000C')
2163:                                 | (1 << '\r')
2164:                                 | (1 << '\u001C')
2165:                                 | (1 << '\u001D')
2166:                                 | (1 << '\u001E')
2167:                                 | (1 << '\u001F'))) != 0);
2168:   }
2169: 
2170:   /**
2171:    * Determines if a character has the ISO Control property.
2172:    * <br>
2173:    * ISO Control = [Cc]
2174:    *
2175:    * @param ch character to test
2176:    * @return true if ch is an ISO Control character, else false
2177:    * @see #isSpaceChar(char)
2178:    * @see #isWhitespace(char)
2179:    * @since 1.1
2180:    */
2181:   public static boolean isISOControl(char ch)
2182:   {
2183:     return getType(ch) == CONTROL;
2184:   }
2185: 
2186:   /**
2187:    * Returns the Unicode general category property of a character.
2188:    *
2189:    * @param ch character from which the general category property will
2190:    *        be retrieved
2191:    * @return the character category property of ch as an integer
2192:    * @see #UNASSIGNED
2193:    * @see #UPPERCASE_LETTER
2194:    * @see #LOWERCASE_LETTER
2195:    * @see #TITLECASE_LETTER
2196:    * @see #MODIFIER_LETTER
2197:    * @see #OTHER_LETTER
2198:    * @see #NON_SPACING_MARK
2199:    * @see #ENCLOSING_MARK
2200:    * @see #COMBINING_SPACING_MARK
2201:    * @see #DECIMAL_DIGIT_NUMBER
2202:    * @see #LETTER_NUMBER
2203:    * @see #OTHER_NUMBER
2204:    * @see #SPACE_SEPARATOR
2205:    * @see #LINE_SEPARATOR
2206:    * @see #PARAGRAPH_SEPARATOR
2207:    * @see #CONTROL
2208:    * @see #FORMAT
2209:    * @see #PRIVATE_USE
2210:    * @see #SURROGATE
2211:    * @see #DASH_PUNCTUATION
2212:    * @see #START_PUNCTUATION
2213:    * @see #END_PUNCTUATION
2214:    * @see #CONNECTOR_PUNCTUATION
2215:    * @see #OTHER_PUNCTUATION
2216:    * @see #MATH_SYMBOL
2217:    * @see #CURRENCY_SYMBOL
2218:    * @see #MODIFIER_SYMBOL
2219:    * @see #INITIAL_QUOTE_PUNCTUATION
2220:    * @see #FINAL_QUOTE_PUNCTUATION
2221:    * @since 1.1
2222:    */
2223:   public static int getType(char ch)
2224:   {
2225:     return readChar(ch) & TYPE_MASK;
2226:   }
2227: 
2228:   /**
2229:    * Converts a digit into a character which represents that digit
2230:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2231:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
2232:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2233:    * <br>
2234:    * return value boundary = U+0030-U+0039|U+0061-U+007A
2235:    *
2236:    * @param digit digit to be converted into a character
2237:    * @param radix radix of digit
2238:    * @return character representing digit in radix, or '\0'
2239:    * @see #MIN_RADIX
2240:    * @see #MAX_RADIX
2241:    * @see #digit(char, int)
2242:    */
2243:   public static char forDigit(int digit, int radix)
2244:   {
2245:     if (radix < MIN_RADIX || radix > MAX_RADIX
2246:         || digit < 0 || digit >= radix)
2247:       return '\0';
2248:     return Number.digits[digit];
2249:   }
2250: 
2251:   /**
2252:    * Returns the Unicode directionality property of the character. This
2253:    * is used in the visual ordering of text.
2254:    *
2255:    * @param ch the character to look up
2256:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2257:    * @see #DIRECTIONALITY_UNDEFINED
2258:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2259:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2260:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2261:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2262:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2263:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2264:    * @see #DIRECTIONALITY_ARABIC_NUMBER
2265:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2266:    * @see #DIRECTIONALITY_NONSPACING_MARK
2267:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2268:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2269:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2270:    * @see #DIRECTIONALITY_WHITESPACE
2271:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
2272:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2273:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2274:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2275:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2276:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2277:    * @since 1.4
2278:    */
2279:   public static byte getDirectionality(char ch)
2280:   {
2281:     // The result will correctly be signed.
2282:     return (byte) (direction[readChar(ch) >> 7] >> 2);
2283:   }
2284: 
2285:   /**
2286:    * Determines whether the character is mirrored according to Unicode. For
2287:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2288:    * left-to-right text, but ')' in right-to-left text.
2289:    *
2290:    * @param ch the character to look up
2291:    * @return true if the character is mirrored
2292:    * @since 1.4
2293:    */
2294:   public static boolean isMirrored(char ch)
2295:   {
2296:     return (readChar(ch) & MIRROR_MASK) != 0;
2297:   }
2298: 
2299:   /**
2300:    * Compares another Character to this Character, numerically.
2301:    *
2302:    * @param anotherCharacter Character to compare with this Character
2303:    * @return a negative integer if this Character is less than
2304:    *         anotherCharacter, zero if this Character is equal, and
2305:    *         a positive integer if this Character is greater
2306:    * @throws NullPointerException if anotherCharacter is null
2307:    * @since 1.2
2308:    */
2309:   public int compareTo(Character anotherCharacter)
2310:   {
2311:     return value - anotherCharacter.value;
2312:   }
2313: 
2314:   /**
2315:    * Compares an object to this Character.  Assuming the object is a
2316:    * Character object, this method performs the same comparison as
2317:    * compareTo(Character).
2318:    *
2319:    * @param o object to compare
2320:    * @return the comparison value
2321:    * @throws ClassCastException if o is not a Character object
2322:    * @throws NullPointerException if o is null
2323:    * @see #compareTo(Character)
2324:    * @since 1.2
2325:    */
2326:   public int compareTo(Object o)
2327:   {
2328:     return compareTo((Character) o);
2329:   }
2330: 
2331:   /**
2332:    * Returns an <code>Character</code> object wrapping the value.
2333:    * In contrast to the <code>Character</code> constructor, this method
2334:    * will cache some values.  It is used by boxing conversion.
2335:    *
2336:    * @param val the value to wrap
2337:    * @return the <code>Character</code>
2338:    * 
2339:    * @since 1.5
2340:    */
2341:   public static Character valueOf(char val)
2342:   {
2343:     if (val > MAX_CACHE)
2344:       return new Character(val);
2345:     synchronized (charCache)
2346:       {
2347:     if (charCache[val - MIN_VALUE] == null)
2348:       charCache[val - MIN_VALUE] = new Character(val);
2349:     return charCache[val - MIN_VALUE];
2350:       }
2351:   }
2352: 
2353:   /**
2354:    * Reverse the bytes in val.
2355:    * @since 1.5
2356:    */
2357:   public static char reverseBytes(char val)
2358:   {
2359:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
2360:   }
2361: 
2362:   /**
2363:    * Converts a unicode code point to a UTF-16 representation of that
2364:    * code point.
2365:    * 
2366:    * @param codePoint the unicode code point
2367:    *
2368:    * @return the UTF-16 representation of that code point
2369:    *
2370:    * @throws IllegalArgumentException if the code point is not a valid
2371:    *         unicode code point
2372:    *
2373:    * @since 1.5
2374:    */
2375:   public static char[] toChars(int codePoint)
2376:   {
2377:     char[] result = new char[charCount(codePoint)];
2378:     int ignore = toChars(codePoint, result, 0);
2379:     return result;
2380:   }
2381: 
2382:   /**
2383:    * Converts a unicode code point to its UTF-16 representation.
2384:    *
2385:    * @param codePoint the unicode code point
2386:    * @param dst the target char array
2387:    * @param dstIndex the start index for the target
2388:    *
2389:    * @return number of characters written to <code>dst</code>
2390:    *
2391:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
2392:    *         valid unicode code point
2393:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
2394:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
2395:    *         in <code>dst</code> or if the UTF-16 representation does not
2396:    *         fit into <code>dst</code>
2397:    *
2398:    * @since 1.5
2399:    */
2400:   public static int toChars(int codePoint, char[] dst, int dstIndex)
2401:   {
2402:     if (!isValidCodePoint(codePoint))
2403:       {
2404:         throw new IllegalArgumentException("not a valid code point: "
2405:                                            + codePoint);
2406:       }
2407: 
2408:     int result;
2409:     if (isSupplementaryCodePoint(codePoint))
2410:       {
2411:         // Write second char first to cause IndexOutOfBoundsException
2412:         // immediately.
2413:         final int cp2 = codePoint - 0x10000;
2414:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
2415:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
2416:         result = 2;
2417:       }
2418:     else
2419:       {
2420:         dst[dstIndex] = (char) codePoint;
2421:         result = 1; 
2422:       }
2423:     return result;
2424:   }
2425: 
2426:   /**
2427:    * Return number of 16-bit characters required to represent the given
2428:    * code point.
2429:    *
2430:    * @param codePoint a unicode code point
2431:    *
2432:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
2433:    *
2434:    * @since 1.5
2435:    */
2436:   public static int charCount(int codePoint)
2437:   {
2438:     return 
2439:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
2440:       ? 2 
2441:       : 1;
2442:   }
2443: 
2444:   /**
2445:    * Determines whether the specified code point is
2446:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
2447:    * supplementary character range.
2448:    *
2449:    * @param codePoint a Unicode code point
2450:    *
2451:    * @return <code>true</code> if code point is in supplementary range
2452:    *
2453:    * @since 1.5
2454:    */
2455:   public static boolean isSupplementaryCodePoint(int codePoint)
2456:   {
2457:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2458:       && codePoint <= MAX_CODE_POINT;
2459:   }
2460: 
2461:   /**
2462:    * Determines whether the specified code point is
2463:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
2464:    *
2465:    * @param codePoint a Unicode code point
2466:    *
2467:    * @return <code>true</code> if code point is valid
2468:    *
2469:    * @since 1.5
2470:    */
2471:   public static boolean isValidCodePoint(int codePoint)
2472:   {
2473:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2474:   }
2475: 
2476:   /**
2477:    * Return true if the given character is a high surrogate.
2478:    * @param ch the character
2479:    * @return true if the character is a high surrogate character
2480:    *
2481:    * @since 1.5
2482:    */
2483:   public static boolean isHighSurrogate(char ch)
2484:   {
2485:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2486:   }
2487: 
2488:   /**
2489:    * Return true if the given character is a low surrogate.
2490:    * @param ch the character
2491:    * @return true if the character is a low surrogate character
2492:    *
2493:    * @since 1.5
2494:    */
2495:   public static boolean isLowSurrogate(char ch)
2496:   {
2497:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2498:   }
2499: 
2500:   /**
2501:    * Return true if the given characters compose a surrogate pair.
2502:    * This is true if the first character is a high surrogate and the
2503:    * second character is a low surrogate.
2504:    * @param ch1 the first character
2505:    * @param ch2 the first character
2506:    * @return true if the characters compose a surrogate pair
2507:    *
2508:    * @since 1.5
2509:    */
2510:   public static boolean isSurrogatePair(char ch1, char ch2)
2511:   {
2512:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
2513:   }
2514: 
2515:   /**
2516:    * Given a valid surrogate pair, this returns the corresponding
2517:    * code point.
2518:    * @param high the high character of the pair
2519:    * @param low the low character of the pair
2520:    * @return the corresponding code point
2521:    *
2522:    * @since 1.5
2523:    */
2524:   public static int toCodePoint(char high, char low)
2525:   {
2526:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
2527:       (low - MIN_LOW_SURROGATE) + 0x10000;
2528:   }
2529: 
2530:   /**
2531:    * Get the code point at the specified index in the CharSequence.
2532:    * This is like CharSequence#charAt(int), but if the character is
2533:    * the start of a surrogate pair, and there is a following
2534:    * character, and this character completes the pair, then the
2535:    * corresponding supplementary code point is returned.  Otherwise,
2536:    * the character at the index is returned.
2537:    *
2538:    * @param sequence the CharSequence
2539:    * @param index the index of the codepoint to get, starting at 0
2540:    * @return the codepoint at the specified index
2541:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2542:    * @since 1.5
2543:    */
2544:   public static int codePointAt(CharSequence sequence, int index)
2545:   {
2546:     int len = sequence.length();
2547:     if (index < 0 || index >= len)
2548:       throw new IndexOutOfBoundsException();
2549:     char high = sequence.charAt(index);
2550:     if (! isHighSurrogate(high) || ++index >= len)
2551:       return high;
2552:     char low = sequence.charAt(index);
2553:     if (! isLowSurrogate(low))
2554:       return high;
2555:     return toCodePoint(high, low);
2556:   }
2557: 
2558:   /**
2559:    * Get the code point at the specified index in the CharSequence.
2560:    * If the character is the start of a surrogate pair, and there is a
2561:    * following character, and this character completes the pair, then
2562:    * the corresponding supplementary code point is returned.
2563:    * Otherwise, the character at the index is returned.
2564:    *
2565:    * @param chars the character array in which to look
2566:    * @param index the index of the codepoint to get, starting at 0
2567:    * @return the codepoint at the specified index
2568:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2569:    * @since 1.5
2570:    */
2571:   public static int codePointAt(char[] chars, int index)
2572:   {
2573:     return codePointAt(chars, index, chars.length);
2574:   }
2575: 
2576:   /**
2577:    * Get the code point at the specified index in the CharSequence.
2578:    * If the character is the start of a surrogate pair, and there is a
2579:    * following character within the specified range, and this
2580:    * character completes the pair, then the corresponding
2581:    * supplementary code point is returned.  Otherwise, the character
2582:    * at the index is returned.
2583:    *
2584:    * @param chars the character array in which to look
2585:    * @param index the index of the codepoint to get, starting at 0
2586:    * @param limit the limit past which characters should not be examined
2587:    * @return the codepoint at the specified index
2588:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
2589:    * limit, or if limit is negative or &gt;= the length of the array
2590:    * @since 1.5
2591:    */
2592:   public static int codePointAt(char[] chars, int index, int limit)
2593:   {
2594:     if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
2595:       throw new IndexOutOfBoundsException();
2596:     char high = chars[index];
2597:     if (! isHighSurrogate(high) || ++index >= limit)
2598:       return high;
2599:     char low = chars[index];
2600:     if (! isLowSurrogate(low))
2601:       return high;
2602:     return toCodePoint(high, low);
2603:   }
2604: 
2605:   /**
2606:    * Get the code point before the specified index.  This is like
2607:    * #codePointAt(char[], int), but checks the characters at
2608:    * <code>index-1</code> and <code>index-2</code> to see if they form
2609:    * a supplementary code point.  If they do not, the character at
2610:    * <code>index-1</code> is returned.
2611:    *
2612:    * @param chars the character array
2613:    * @param index the index just past the codepoint to get, starting at 0
2614:    * @return the codepoint at the specified index
2615:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2616:    * @since 1.5
2617:    */
2618:   public static int codePointBefore(char[] chars, int index)
2619:   {
2620:     return codePointBefore(chars, index, 1);
2621:   }
2622: 
2623:   /**
2624:    * Get the code point before the specified index.  This is like
2625:    * #codePointAt(char[], int), but checks the characters at
2626:    * <code>index-1</code> and <code>index-2</code> to see if they form
2627:    * a supplementary code point.  If they do not, the character at
2628:    * <code>index-1</code> is returned.  The start parameter is used to
2629:    * limit the range of the array which may be examined.
2630:    *
2631:    * @param chars the character array
2632:    * @param index the index just past the codepoint to get, starting at 0
2633:    * @param start the index before which characters should not be examined
2634:    * @return the codepoint at the specified index
2635:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
2636:    * the length of the array, or if limit is negative or &gt;= the
2637:    * length of the array
2638:    * @since 1.5
2639:    */
2640:   public static int codePointBefore(char[] chars, int index, int start)
2641:   {
2642:     if (index < start || index > chars.length
2643:     || start < 0 || start >= chars.length)
2644:       throw new IndexOutOfBoundsException();
2645:     --index;
2646:     char low = chars[index];
2647:     if (! isLowSurrogate(low) || --index < start)
2648:       return low;
2649:     char high = chars[index];
2650:     if (! isHighSurrogate(high))
2651:       return low;
2652:     return toCodePoint(high, low);
2653:   }
2654: 
2655:   /**
2656:    * Get the code point before the specified index.  This is like
2657:    * #codePointAt(CharSequence, int), but checks the characters at
2658:    * <code>index-1</code> and <code>index-2</code> to see if they form
2659:    * a supplementary code point.  If they do not, the character at
2660:    * <code>index-1</code> is returned.
2661:    *
2662:    * @param sequence the CharSequence
2663:    * @param index the index just past the codepoint to get, starting at 0
2664:    * @return the codepoint at the specified index
2665:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2666:    * @since 1.5
2667:    */
2668:   public static int codePointBefore(CharSequence sequence, int index)
2669:   {
2670:     int len = sequence.length();
2671:     if (index < 1 || index > len)
2672:       throw new IndexOutOfBoundsException();
2673:     --index;
2674:     char low = sequence.charAt(index);
2675:     if (! isLowSurrogate(low) || --index < 0)
2676:       return low;
2677:     char high = sequence.charAt(index);
2678:     if (! isHighSurrogate(high))
2679:       return low;
2680:     return toCodePoint(high, low);
2681:   }
2682: } // class Character