Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: 
  45: /**
  46:  * Wrapper class for the primitive char data type.  In addition, this class
  47:  * allows one to retrieve property information and perform transformations
  48:  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
  49:  * java.lang.Character is designed to be very dynamic, and as such, it
  50:  * retrieves information on the Unicode character set from a separate
  51:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  52:  *
  53:  * <p>For predicates, boundaries are used to describe
  54:  * the set of characters for which the method will return true.
  55:  * This syntax uses fairly normal regular expression notation.
  56:  * See 5.13 of the Unicode Standard, Version 3.0, for the
  57:  * boundary specification.
  58:  *
  59:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  60:  * for more information on the Unicode Standard.
  61:  *
  62:  * @author Tom Tromey (tromey@cygnus.com)
  63:  * @author Paul N. Fisher
  64:  * @author Jochen Hoenicke
  65:  * @author Eric Blake (ebb9@email.byu.edu)
  66:  * @see CharData
  67:  * @since 1.0
  68:  * @status updated to 1.4
  69:  */
  70: public final class Character implements Serializable, Comparable
  71: {
  72:   /**
  73:    * A subset of Unicode blocks.
  74:    *
  75:    * @author Paul N. Fisher
  76:    * @author Eric Blake (ebb9@email.byu.edu)
  77:    * @since 1.2
  78:    */
  79:   public static class Subset
  80:   {
  81:     /** The name of the subset. */
  82:     private final String name;
  83: 
  84:     /**
  85:      * Construct a new subset of characters.
  86:      *
  87:      * @param name the name of the subset
  88:      * @throws NullPointerException if name is null
  89:      */
  90:     protected Subset(String name)
  91:     {
  92:       // Note that name.toString() is name, unless name was null.
  93:       this.name = name.toString();
  94:     }
  95: 
  96:     /**
  97:      * Compares two Subsets for equality. This is <code>final</code>, and
  98:      * restricts the comparison on the <code>==</code> operator, so it returns
  99:      * true only for the same object.
 100:      *
 101:      * @param o the object to compare
 102:      * @return true if o is this
 103:      */
 104:     public final boolean equals(Object o)
 105:     {
 106:       return o == this;
 107:     }
 108: 
 109:     /**
 110:      * Makes the original hashCode of Object final, to be consistent with
 111:      * equals.
 112:      *
 113:      * @return the hash code for this object
 114:      */
 115:     public final int hashCode()
 116:     {
 117:       return super.hashCode();
 118:     }
 119: 
 120:     /**
 121:      * Returns the name of the subset.
 122:      *
 123:      * @return the name
 124:      */
 125:     public final String toString()
 126:     {
 127:       return name;
 128:     }
 129:   } // class Subset
 130: 
 131:   /**
 132:    * A family of character subsets in the Unicode specification. A character
 133:    * is in at most one of these blocks.
 134:    *
 135:    * This inner class was generated automatically from
 136:    * <code>doc/unicode/Block-3.txt</code>, by some perl scripts.
 137:    * This Unicode definition file can be found on the
 138:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 139:    * JDK 1.4 uses Unicode version 3.0.0.
 140:    *
 141:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 142:    * @since 1.2
 143:    */
 144:   public static final class UnicodeBlock extends Subset
 145:   {
 146:     /** The start of the subset. */
 147:     private final char start;
 148: 
 149:     /** The end of the subset. */
 150:     private final char end;
 151: 
 152:     /**
 153:      * Constructor for strictly defined blocks.
 154:      *
 155:      * @param start the start character of the range
 156:      * @param end the end character of the range
 157:      * @param name the block name
 158:      */
 159:     private UnicodeBlock(char start, char end, String name)
 160:     {
 161:       super(name);
 162:       this.start = start;
 163:       this.end = end;
 164:     }
 165: 
 166:     /**
 167:      * Returns the Unicode character block which a character belongs to.
 168:      *
 169:      * @param ch the character to look up
 170:      * @return the set it belongs to, or null if it is not in one
 171:      */
 172:     public static UnicodeBlock of(char ch)
 173:     {
 174:       // Special case, since SPECIALS contains two ranges.
 175:       if (ch == '\uFEFF')
 176:         return SPECIALS;
 177:       // Simple binary search for the correct block.
 178:       int low = 0;
 179:       int hi = sets.length - 1;
 180:       while (low <= hi)
 181:         {
 182:           int mid = (low + hi) >> 1;
 183:           UnicodeBlock b = sets[mid];
 184:           if (ch < b.start)
 185:             hi = mid - 1;
 186:           else if (ch > b.end)
 187:             low = mid + 1;
 188:           else
 189:             return b;
 190:         }
 191:       return null;
 192:     }
 193: 
 194:     /**
 195:      * Basic Latin.
 196:      * '\u0000' - '\u007F'.
 197:      */
 198:     public static final UnicodeBlock BASIC_LATIN
 199:       = new UnicodeBlock('\u0000', '\u007F',
 200:                          "BASIC_LATIN");
 201: 
 202:     /**
 203:      * Latin-1 Supplement.
 204:      * '\u0080' - '\u00FF'.
 205:      */
 206:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 207:       = new UnicodeBlock('\u0080', '\u00FF',
 208:                          "LATIN_1_SUPPLEMENT");
 209: 
 210:     /**
 211:      * Latin Extended-A.
 212:      * '\u0100' - '\u017F'.
 213:      */
 214:     public static final UnicodeBlock LATIN_EXTENDED_A
 215:       = new UnicodeBlock('\u0100', '\u017F',
 216:                          "LATIN_EXTENDED_A");
 217: 
 218:     /**
 219:      * Latin Extended-B.
 220:      * '\u0180' - '\u024F'.
 221:      */
 222:     public static final UnicodeBlock LATIN_EXTENDED_B
 223:       = new UnicodeBlock('\u0180', '\u024F',
 224:                          "LATIN_EXTENDED_B");
 225: 
 226:     /**
 227:      * IPA Extensions.
 228:      * '\u0250' - '\u02AF'.
 229:      */
 230:     public static final UnicodeBlock IPA_EXTENSIONS
 231:       = new UnicodeBlock('\u0250', '\u02AF',
 232:                          "IPA_EXTENSIONS");
 233: 
 234:     /**
 235:      * Spacing Modifier Letters.
 236:      * '\u02B0' - '\u02FF'.
 237:      */
 238:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 239:       = new UnicodeBlock('\u02B0', '\u02FF',
 240:                          "SPACING_MODIFIER_LETTERS");
 241: 
 242:     /**
 243:      * Combining Diacritical Marks.
 244:      * '\u0300' - '\u036F'.
 245:      */
 246:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 247:       = new UnicodeBlock('\u0300', '\u036F',
 248:                          "COMBINING_DIACRITICAL_MARKS");
 249: 
 250:     /**
 251:      * Greek.
 252:      * '\u0370' - '\u03FF'.
 253:      */
 254:     public static final UnicodeBlock GREEK
 255:       = new UnicodeBlock('\u0370', '\u03FF',
 256:                          "GREEK");
 257: 
 258:     /**
 259:      * Cyrillic.
 260:      * '\u0400' - '\u04FF'.
 261:      */
 262:     public static final UnicodeBlock CYRILLIC
 263:       = new UnicodeBlock('\u0400', '\u04FF',
 264:                          "CYRILLIC");
 265: 
 266:     /**
 267:      * Armenian.
 268:      * '\u0530' - '\u058F'.
 269:      */
 270:     public static final UnicodeBlock ARMENIAN
 271:       = new UnicodeBlock('\u0530', '\u058F',
 272:                          "ARMENIAN");
 273: 
 274:     /**
 275:      * Hebrew.
 276:      * '\u0590' - '\u05FF'.
 277:      */
 278:     public static final UnicodeBlock HEBREW
 279:       = new UnicodeBlock('\u0590', '\u05FF',
 280:                          "HEBREW");
 281: 
 282:     /**
 283:      * Arabic.
 284:      * '\u0600' - '\u06FF'.
 285:      */
 286:     public static final UnicodeBlock ARABIC
 287:       = new UnicodeBlock('\u0600', '\u06FF',
 288:                          "ARABIC");
 289: 
 290:     /**
 291:      * Syriac.
 292:      * '\u0700' - '\u074F'.
 293:      * @since 1.4
 294:      */
 295:     public static final UnicodeBlock SYRIAC
 296:       = new UnicodeBlock('\u0700', '\u074F',
 297:                          "SYRIAC");
 298: 
 299:     /**
 300:      * Thaana.
 301:      * '\u0780' - '\u07BF'.
 302:      * @since 1.4
 303:      */
 304:     public static final UnicodeBlock THAANA
 305:       = new UnicodeBlock('\u0780', '\u07BF',
 306:                          "THAANA");
 307: 
 308:     /**
 309:      * Devanagari.
 310:      * '\u0900' - '\u097F'.
 311:      */
 312:     public static final UnicodeBlock DEVANAGARI
 313:       = new UnicodeBlock('\u0900', '\u097F',
 314:                          "DEVANAGARI");
 315: 
 316:     /**
 317:      * Bengali.
 318:      * '\u0980' - '\u09FF'.
 319:      */
 320:     public static final UnicodeBlock BENGALI
 321:       = new UnicodeBlock('\u0980', '\u09FF',
 322:                          "BENGALI");
 323: 
 324:     /**
 325:      * Gurmukhi.
 326:      * '\u0A00' - '\u0A7F'.
 327:      */
 328:     public static final UnicodeBlock GURMUKHI
 329:       = new UnicodeBlock('\u0A00', '\u0A7F',
 330:                          "GURMUKHI");
 331: 
 332:     /**
 333:      * Gujarati.
 334:      * '\u0A80' - '\u0AFF'.
 335:      */
 336:     public static final UnicodeBlock GUJARATI
 337:       = new UnicodeBlock('\u0A80', '\u0AFF',
 338:                          "GUJARATI");
 339: 
 340:     /**
 341:      * Oriya.
 342:      * '\u0B00' - '\u0B7F'.
 343:      */
 344:     public static final UnicodeBlock ORIYA
 345:       = new UnicodeBlock('\u0B00', '\u0B7F',
 346:                          "ORIYA");
 347: 
 348:     /**
 349:      * Tamil.
 350:      * '\u0B80' - '\u0BFF'.
 351:      */
 352:     public static final UnicodeBlock TAMIL
 353:       = new UnicodeBlock('\u0B80', '\u0BFF',
 354:                          "TAMIL");
 355: 
 356:     /**
 357:      * Telugu.
 358:      * '\u0C00' - '\u0C7F'.
 359:      */
 360:     public static final UnicodeBlock TELUGU
 361:       = new UnicodeBlock('\u0C00', '\u0C7F',
 362:                          "TELUGU");
 363: 
 364:     /**
 365:      * Kannada.
 366:      * '\u0C80' - '\u0CFF'.
 367:      */
 368:     public static final UnicodeBlock KANNADA
 369:       = new UnicodeBlock('\u0C80', '\u0CFF',
 370:                          "KANNADA");
 371: 
 372:     /**
 373:      * Malayalam.
 374:      * '\u0D00' - '\u0D7F'.
 375:      */
 376:     public static final UnicodeBlock MALAYALAM
 377:       = new UnicodeBlock('\u0D00', '\u0D7F',
 378:                          "MALAYALAM");
 379: 
 380:     /**
 381:      * Sinhala.
 382:      * '\u0D80' - '\u0DFF'.
 383:      * @since 1.4
 384:      */
 385:     public static final UnicodeBlock SINHALA
 386:       = new UnicodeBlock('\u0D80', '\u0DFF',
 387:                          "SINHALA");
 388: 
 389:     /**
 390:      * Thai.
 391:      * '\u0E00' - '\u0E7F'.
 392:      */
 393:     public static final UnicodeBlock THAI
 394:       = new UnicodeBlock('\u0E00', '\u0E7F',
 395:                          "THAI");
 396: 
 397:     /**
 398:      * Lao.
 399:      * '\u0E80' - '\u0EFF'.
 400:      */
 401:     public static final UnicodeBlock LAO
 402:       = new UnicodeBlock('\u0E80', '\u0EFF',
 403:                          "LAO");
 404: 
 405:     /**
 406:      * Tibetan.
 407:      * '\u0F00' - '\u0FFF'.
 408:      */
 409:     public static final UnicodeBlock TIBETAN
 410:       = new UnicodeBlock('\u0F00', '\u0FFF',
 411:                          "TIBETAN");
 412: 
 413:     /**
 414:      * Myanmar.
 415:      * '\u1000' - '\u109F'.
 416:      * @since 1.4
 417:      */
 418:     public static final UnicodeBlock MYANMAR
 419:       = new UnicodeBlock('\u1000', '\u109F',
 420:                          "MYANMAR");
 421: 
 422:     /**
 423:      * Georgian.
 424:      * '\u10A0' - '\u10FF'.
 425:      */
 426:     public static final UnicodeBlock GEORGIAN
 427:       = new UnicodeBlock('\u10A0', '\u10FF',
 428:                          "GEORGIAN");
 429: 
 430:     /**
 431:      * Hangul Jamo.
 432:      * '\u1100' - '\u11FF'.
 433:      */
 434:     public static final UnicodeBlock HANGUL_JAMO
 435:       = new UnicodeBlock('\u1100', '\u11FF',
 436:                          "HANGUL_JAMO");
 437: 
 438:     /**
 439:      * Ethiopic.
 440:      * '\u1200' - '\u137F'.
 441:      * @since 1.4
 442:      */
 443:     public static final UnicodeBlock ETHIOPIC
 444:       = new UnicodeBlock('\u1200', '\u137F',
 445:                          "ETHIOPIC");
 446: 
 447:     /**
 448:      * Cherokee.
 449:      * '\u13A0' - '\u13FF'.
 450:      * @since 1.4
 451:      */
 452:     public static final UnicodeBlock CHEROKEE
 453:       = new UnicodeBlock('\u13A0', '\u13FF',
 454:                          "CHEROKEE");
 455: 
 456:     /**
 457:      * Unified Canadian Aboriginal Syllabics.
 458:      * '\u1400' - '\u167F'.
 459:      * @since 1.4
 460:      */
 461:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 462:       = new UnicodeBlock('\u1400', '\u167F',
 463:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
 464: 
 465:     /**
 466:      * Ogham.
 467:      * '\u1680' - '\u169F'.
 468:      * @since 1.4
 469:      */
 470:     public static final UnicodeBlock OGHAM
 471:       = new UnicodeBlock('\u1680', '\u169F',
 472:                          "OGHAM");
 473: 
 474:     /**
 475:      * Runic.
 476:      * '\u16A0' - '\u16FF'.
 477:      * @since 1.4
 478:      */
 479:     public static final UnicodeBlock RUNIC
 480:       = new UnicodeBlock('\u16A0', '\u16FF',
 481:                          "RUNIC");
 482: 
 483:     /**
 484:      * Khmer.
 485:      * '\u1780' - '\u17FF'.
 486:      * @since 1.4
 487:      */
 488:     public static final UnicodeBlock KHMER
 489:       = new UnicodeBlock('\u1780', '\u17FF',
 490:                          "KHMER");
 491: 
 492:     /**
 493:      * Mongolian.
 494:      * '\u1800' - '\u18AF'.
 495:      * @since 1.4
 496:      */
 497:     public static final UnicodeBlock MONGOLIAN
 498:       = new UnicodeBlock('\u1800', '\u18AF',
 499:                          "MONGOLIAN");
 500: 
 501:     /**
 502:      * Latin Extended Additional.
 503:      * '\u1E00' - '\u1EFF'.
 504:      */
 505:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 506:       = new UnicodeBlock('\u1E00', '\u1EFF',
 507:                          "LATIN_EXTENDED_ADDITIONAL");
 508: 
 509:     /**
 510:      * Greek Extended.
 511:      * '\u1F00' - '\u1FFF'.
 512:      */
 513:     public static final UnicodeBlock GREEK_EXTENDED
 514:       = new UnicodeBlock('\u1F00', '\u1FFF',
 515:                          "GREEK_EXTENDED");
 516: 
 517:     /**
 518:      * General Punctuation.
 519:      * '\u2000' - '\u206F'.
 520:      */
 521:     public static final UnicodeBlock GENERAL_PUNCTUATION
 522:       = new UnicodeBlock('\u2000', '\u206F',
 523:                          "GENERAL_PUNCTUATION");
 524: 
 525:     /**
 526:      * Superscripts and Subscripts.
 527:      * '\u2070' - '\u209F'.
 528:      */
 529:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 530:       = new UnicodeBlock('\u2070', '\u209F',
 531:                          "SUPERSCRIPTS_AND_SUBSCRIPTS");
 532: 
 533:     /**
 534:      * Currency Symbols.
 535:      * '\u20A0' - '\u20CF'.
 536:      */
 537:     public static final UnicodeBlock CURRENCY_SYMBOLS
 538:       = new UnicodeBlock('\u20A0', '\u20CF',
 539:                          "CURRENCY_SYMBOLS");
 540: 
 541:     /**
 542:      * Combining Marks for Symbols.
 543:      * '\u20D0' - '\u20FF'.
 544:      */
 545:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 546:       = new UnicodeBlock('\u20D0', '\u20FF',
 547:                          "COMBINING_MARKS_FOR_SYMBOLS");
 548: 
 549:     /**
 550:      * Letterlike Symbols.
 551:      * '\u2100' - '\u214F'.
 552:      */
 553:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 554:       = new UnicodeBlock('\u2100', '\u214F',
 555:                          "LETTERLIKE_SYMBOLS");
 556: 
 557:     /**
 558:      * Number Forms.
 559:      * '\u2150' - '\u218F'.
 560:      */
 561:     public static final UnicodeBlock NUMBER_FORMS
 562:       = new UnicodeBlock('\u2150', '\u218F',
 563:                          "NUMBER_FORMS");
 564: 
 565:     /**
 566:      * Arrows.
 567:      * '\u2190' - '\u21FF'.
 568:      */
 569:     public static final UnicodeBlock ARROWS
 570:       = new UnicodeBlock('\u2190', '\u21FF',
 571:                          "ARROWS");
 572: 
 573:     /**
 574:      * Mathematical Operators.
 575:      * '\u2200' - '\u22FF'.
 576:      */
 577:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 578:       = new UnicodeBlock('\u2200', '\u22FF',
 579:                          "MATHEMATICAL_OPERATORS");
 580: 
 581:     /**
 582:      * Miscellaneous Technical.
 583:      * '\u2300' - '\u23FF'.
 584:      */
 585:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 586:       = new UnicodeBlock('\u2300', '\u23FF',
 587:                          "MISCELLANEOUS_TECHNICAL");
 588: 
 589:     /**
 590:      * Control Pictures.
 591:      * '\u2400' - '\u243F'.
 592:      */
 593:     public static final UnicodeBlock CONTROL_PICTURES
 594:       = new UnicodeBlock('\u2400', '\u243F',
 595:                          "CONTROL_PICTURES");
 596: 
 597:     /**
 598:      * Optical Character Recognition.
 599:      * '\u2440' - '\u245F'.
 600:      */
 601:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 602:       = new UnicodeBlock('\u2440', '\u245F',
 603:                          "OPTICAL_CHARACTER_RECOGNITION");
 604: 
 605:     /**
 606:      * Enclosed Alphanumerics.
 607:      * '\u2460' - '\u24FF'.
 608:      */
 609:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 610:       = new UnicodeBlock('\u2460', '\u24FF',
 611:                          "ENCLOSED_ALPHANUMERICS");
 612: 
 613:     /**
 614:      * Box Drawing.
 615:      * '\u2500' - '\u257F'.
 616:      */
 617:     public static final UnicodeBlock BOX_DRAWING
 618:       = new UnicodeBlock('\u2500', '\u257F',
 619:                          "BOX_DRAWING");
 620: 
 621:     /**
 622:      * Block Elements.
 623:      * '\u2580' - '\u259F'.
 624:      */
 625:     public static final UnicodeBlock BLOCK_ELEMENTS
 626:       = new UnicodeBlock('\u2580', '\u259F',
 627:                          "BLOCK_ELEMENTS");
 628: 
 629:     /**
 630:      * Geometric Shapes.
 631:      * '\u25A0' - '\u25FF'.
 632:      */
 633:     public static final UnicodeBlock GEOMETRIC_SHAPES
 634:       = new UnicodeBlock('\u25A0', '\u25FF',
 635:                          "GEOMETRIC_SHAPES");
 636: 
 637:     /**
 638:      * Miscellaneous Symbols.
 639:      * '\u2600' - '\u26FF'.
 640:      */
 641:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 642:       = new UnicodeBlock('\u2600', '\u26FF',
 643:                          "MISCELLANEOUS_SYMBOLS");
 644: 
 645:     /**
 646:      * Dingbats.
 647:      * '\u2700' - '\u27BF'.
 648:      */
 649:     public static final UnicodeBlock DINGBATS
 650:       = new UnicodeBlock('\u2700', '\u27BF',
 651:                          "DINGBATS");
 652: 
 653:     /**
 654:      * Braille Patterns.
 655:      * '\u2800' - '\u28FF'.
 656:      * @since 1.4
 657:      */
 658:     public static final UnicodeBlock BRAILLE_PATTERNS
 659:       = new UnicodeBlock('\u2800', '\u28FF',
 660:                          "BRAILLE_PATTERNS");
 661: 
 662:     /**
 663:      * CJK Radicals Supplement.
 664:      * '\u2E80' - '\u2EFF'.
 665:      * @since 1.4
 666:      */
 667:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 668:       = new UnicodeBlock('\u2E80', '\u2EFF',
 669:                          "CJK_RADICALS_SUPPLEMENT");
 670: 
 671:     /**
 672:      * Kangxi Radicals.
 673:      * '\u2F00' - '\u2FDF'.
 674:      * @since 1.4
 675:      */
 676:     public static final UnicodeBlock KANGXI_RADICALS
 677:       = new UnicodeBlock('\u2F00', '\u2FDF',
 678:                          "KANGXI_RADICALS");
 679: 
 680:     /**
 681:      * Ideographic Description Characters.
 682:      * '\u2FF0' - '\u2FFF'.
 683:      * @since 1.4
 684:      */
 685:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
 686:       = new UnicodeBlock('\u2FF0', '\u2FFF',
 687:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
 688: 
 689:     /**
 690:      * CJK Symbols and Punctuation.
 691:      * '\u3000' - '\u303F'.
 692:      */
 693:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
 694:       = new UnicodeBlock('\u3000', '\u303F',
 695:                          "CJK_SYMBOLS_AND_PUNCTUATION");
 696: 
 697:     /**
 698:      * Hiragana.
 699:      * '\u3040' - '\u309F'.
 700:      */
 701:     public static final UnicodeBlock HIRAGANA
 702:       = new UnicodeBlock('\u3040', '\u309F',
 703:                          "HIRAGANA");
 704: 
 705:     /**
 706:      * Katakana.
 707:      * '\u30A0' - '\u30FF'.
 708:      */
 709:     public static final UnicodeBlock KATAKANA
 710:       = new UnicodeBlock('\u30A0', '\u30FF',
 711:                          "KATAKANA");
 712: 
 713:     /**
 714:      * Bopomofo.
 715:      * '\u3100' - '\u312F'.
 716:      */
 717:     public static final UnicodeBlock BOPOMOFO
 718:       = new UnicodeBlock('\u3100', '\u312F',
 719:                          "BOPOMOFO");
 720: 
 721:     /**
 722:      * Hangul Compatibility Jamo.
 723:      * '\u3130' - '\u318F'.
 724:      */
 725:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
 726:       = new UnicodeBlock('\u3130', '\u318F',
 727:                          "HANGUL_COMPATIBILITY_JAMO");
 728: 
 729:     /**
 730:      * Kanbun.
 731:      * '\u3190' - '\u319F'.
 732:      */
 733:     public static final UnicodeBlock KANBUN
 734:       = new UnicodeBlock('\u3190', '\u319F',
 735:                          "KANBUN");
 736: 
 737:     /**
 738:      * Bopomofo Extended.
 739:      * '\u31A0' - '\u31BF'.
 740:      * @since 1.4
 741:      */
 742:     public static final UnicodeBlock BOPOMOFO_EXTENDED
 743:       = new UnicodeBlock('\u31A0', '\u31BF',
 744:                          "BOPOMOFO_EXTENDED");
 745: 
 746:     /**
 747:      * Enclosed CJK Letters and Months.
 748:      * '\u3200' - '\u32FF'.
 749:      */
 750:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
 751:       = new UnicodeBlock('\u3200', '\u32FF',
 752:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS");
 753: 
 754:     /**
 755:      * CJK Compatibility.
 756:      * '\u3300' - '\u33FF'.
 757:      */
 758:     public static final UnicodeBlock CJK_COMPATIBILITY
 759:       = new UnicodeBlock('\u3300', '\u33FF',
 760:                          "CJK_COMPATIBILITY");
 761: 
 762:     /**
 763:      * CJK Unified Ideographs Extension A.
 764:      * '\u3400' - '\u4DB5'.
 765:      * @since 1.4
 766:      */
 767:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
 768:       = new UnicodeBlock('\u3400', '\u4DB5',
 769:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
 770: 
 771:     /**
 772:      * CJK Unified Ideographs.
 773:      * '\u4E00' - '\u9FFF'.
 774:      */
 775:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
 776:       = new UnicodeBlock('\u4E00', '\u9FFF',
 777:                          "CJK_UNIFIED_IDEOGRAPHS");
 778: 
 779:     /**
 780:      * Yi Syllables.
 781:      * '\uA000' - '\uA48F'.
 782:      * @since 1.4
 783:      */
 784:     public static final UnicodeBlock YI_SYLLABLES
 785:       = new UnicodeBlock('\uA000', '\uA48F',
 786:                          "YI_SYLLABLES");
 787: 
 788:     /**
 789:      * Yi Radicals.
 790:      * '\uA490' - '\uA4CF'.
 791:      * @since 1.4
 792:      */
 793:     public static final UnicodeBlock YI_RADICALS
 794:       = new UnicodeBlock('\uA490', '\uA4CF',
 795:                          "YI_RADICALS");
 796: 
 797:     /**
 798:      * Hangul Syllables.
 799:      * '\uAC00' - '\uD7A3'.
 800:      */
 801:     public static final UnicodeBlock HANGUL_SYLLABLES
 802:       = new UnicodeBlock('\uAC00', '\uD7A3',
 803:                          "HANGUL_SYLLABLES");
 804: 
 805:     /**
 806:      * Surrogates Area.
 807:      * '\uD800' - '\uDFFF'.
 808:      */
 809:     public static final UnicodeBlock SURROGATES_AREA
 810:       = new UnicodeBlock('\uD800', '\uDFFF',
 811:                          "SURROGATES_AREA");
 812: 
 813:     /**
 814:      * Private Use Area.
 815:      * '\uE000' - '\uF8FF'.
 816:      */
 817:     public static final UnicodeBlock PRIVATE_USE_AREA
 818:       = new UnicodeBlock('\uE000', '\uF8FF',
 819:                          "PRIVATE_USE_AREA");
 820: 
 821:     /**
 822:      * CJK Compatibility Ideographs.
 823:      * '\uF900' - '\uFAFF'.
 824:      */
 825:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
 826:       = new UnicodeBlock('\uF900', '\uFAFF',
 827:                          "CJK_COMPATIBILITY_IDEOGRAPHS");
 828: 
 829:     /**
 830:      * Alphabetic Presentation Forms.
 831:      * '\uFB00' - '\uFB4F'.
 832:      */
 833:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
 834:       = new UnicodeBlock('\uFB00', '\uFB4F',
 835:                          "ALPHABETIC_PRESENTATION_FORMS");
 836: 
 837:     /**
 838:      * Arabic Presentation Forms-A.
 839:      * '\uFB50' - '\uFDFF'.
 840:      */
 841:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
 842:       = new UnicodeBlock('\uFB50', '\uFDFF',
 843:                          "ARABIC_PRESENTATION_FORMS_A");
 844: 
 845:     /**
 846:      * Combining Half Marks.
 847:      * '\uFE20' - '\uFE2F'.
 848:      */
 849:     public static final UnicodeBlock COMBINING_HALF_MARKS
 850:       = new UnicodeBlock('\uFE20', '\uFE2F',
 851:                          "COMBINING_HALF_MARKS");
 852: 
 853:     /**
 854:      * CJK Compatibility Forms.
 855:      * '\uFE30' - '\uFE4F'.
 856:      */
 857:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
 858:       = new UnicodeBlock('\uFE30', '\uFE4F',
 859:                          "CJK_COMPATIBILITY_FORMS");
 860: 
 861:     /**
 862:      * Small Form Variants.
 863:      * '\uFE50' - '\uFE6F'.
 864:      */
 865:     public static final UnicodeBlock SMALL_FORM_VARIANTS
 866:       = new UnicodeBlock('\uFE50', '\uFE6F',
 867:                          "SMALL_FORM_VARIANTS");
 868: 
 869:     /**
 870:      * Arabic Presentation Forms-B.
 871:      * '\uFE70' - '\uFEFE'.
 872:      */
 873:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
 874:       = new UnicodeBlock('\uFE70', '\uFEFE',
 875:                          "ARABIC_PRESENTATION_FORMS_B");
 876: 
 877:     /**
 878:      * Halfwidth and Fullwidth Forms.
 879:      * '\uFF00' - '\uFFEF'.
 880:      */
 881:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
 882:       = new UnicodeBlock('\uFF00', '\uFFEF',
 883:                          "HALFWIDTH_AND_FULLWIDTH_FORMS");
 884: 
 885:     /**
 886:      * Specials.
 887:      * '\uFEFF', '\uFFF0' - '\uFFFD'.
 888:      */
 889:     public static final UnicodeBlock SPECIALS
 890:       = new UnicodeBlock('\uFFF0', '\uFFFD',
 891:                          "SPECIALS");
 892: 
 893:     /**
 894:      * The defined subsets.
 895:      */
 896:     private static final UnicodeBlock sets[] = {
 897:       BASIC_LATIN,
 898:       LATIN_1_SUPPLEMENT,
 899:       LATIN_EXTENDED_A,
 900:       LATIN_EXTENDED_B,
 901:       IPA_EXTENSIONS,
 902:       SPACING_MODIFIER_LETTERS,
 903:       COMBINING_DIACRITICAL_MARKS,
 904:       GREEK,
 905:       CYRILLIC,
 906:       ARMENIAN,
 907:       HEBREW,
 908:       ARABIC,
 909:       SYRIAC,
 910:       THAANA,
 911:       DEVANAGARI,
 912:       BENGALI,
 913:       GURMUKHI,
 914:       GUJARATI,
 915:       ORIYA,
 916:       TAMIL,
 917:       TELUGU,
 918:       KANNADA,
 919:       MALAYALAM,
 920:       SINHALA,
 921:       THAI,
 922:       LAO,
 923:       TIBETAN,
 924:       MYANMAR,
 925:       GEORGIAN,
 926:       HANGUL_JAMO,
 927:       ETHIOPIC,
 928:       CHEROKEE,
 929:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 930:       OGHAM,
 931:       RUNIC,
 932:       KHMER,
 933:       MONGOLIAN,
 934:       LATIN_EXTENDED_ADDITIONAL,
 935:       GREEK_EXTENDED,
 936:       GENERAL_PUNCTUATION,
 937:       SUPERSCRIPTS_AND_SUBSCRIPTS,
 938:       CURRENCY_SYMBOLS,
 939:       COMBINING_MARKS_FOR_SYMBOLS,
 940:       LETTERLIKE_SYMBOLS,
 941:       NUMBER_FORMS,
 942:       ARROWS,
 943:       MATHEMATICAL_OPERATORS,
 944:       MISCELLANEOUS_TECHNICAL,
 945:       CONTROL_PICTURES,
 946:       OPTICAL_CHARACTER_RECOGNITION,
 947:       ENCLOSED_ALPHANUMERICS,
 948:       BOX_DRAWING,
 949:       BLOCK_ELEMENTS,
 950:       GEOMETRIC_SHAPES,
 951:       MISCELLANEOUS_SYMBOLS,
 952:       DINGBATS,
 953:       BRAILLE_PATTERNS,
 954:       CJK_RADICALS_SUPPLEMENT,
 955:       KANGXI_RADICALS,
 956:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 957:       CJK_SYMBOLS_AND_PUNCTUATION,
 958:       HIRAGANA,
 959:       KATAKANA,
 960:       BOPOMOFO,
 961:       HANGUL_COMPATIBILITY_JAMO,
 962:       KANBUN,
 963:       BOPOMOFO_EXTENDED,
 964:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
 965:       CJK_COMPATIBILITY,
 966:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 967:       CJK_UNIFIED_IDEOGRAPHS,
 968:       YI_SYLLABLES,
 969:       YI_RADICALS,
 970:       HANGUL_SYLLABLES,
 971:       SURROGATES_AREA,
 972:       PRIVATE_USE_AREA,
 973:       CJK_COMPATIBILITY_IDEOGRAPHS,
 974:       ALPHABETIC_PRESENTATION_FORMS,
 975:       ARABIC_PRESENTATION_FORMS_A,
 976:       COMBINING_HALF_MARKS,
 977:       CJK_COMPATIBILITY_FORMS,
 978:       SMALL_FORM_VARIANTS,
 979:       ARABIC_PRESENTATION_FORMS_B,
 980:       HALFWIDTH_AND_FULLWIDTH_FORMS,
 981:       SPECIALS,
 982:     };
 983:   } // class UnicodeBlock
 984: 
 985:   /**
 986:    * The immutable value of this Character.
 987:    *
 988:    * @serial the value of this Character
 989:    */
 990:   private final char value;
 991: 
 992:   /**
 993:    * Compatible with JDK 1.0+.
 994:    */
 995:   private static final long serialVersionUID = 3786198910865385080L;
 996: 
 997:   /**
 998:    * Smallest value allowed for radix arguments in Java. This value is 2.
 999:    *
1000:    * @see #digit(char, int)
1001:    * @see #forDigit(int, int)
1002:    * @see Integer#toString(int, int)
1003:    * @see Integer#valueOf(String)
1004:    */
1005:   public static final int MIN_RADIX = 2;
1006: 
1007:   /**
1008:    * Largest value allowed for radix arguments in Java. This value is 36.
1009:    *
1010:    * @see #digit(char, int)
1011:    * @see #forDigit(int, int)
1012:    * @see Integer#toString(int, int)
1013:    * @see Integer#valueOf(String)
1014:    */
1015:   public static final int MAX_RADIX = 36;
1016: 
1017:   /**
1018:    * The minimum value the char data type can hold.
1019:    * This value is <code>'\\u0000'</code>.
1020:    */
1021:   public static final char MIN_VALUE = '\u0000';
1022: 
1023:   /**
1024:    * The maximum value the char data type can hold.
1025:    * This value is <code>'\\uFFFF'</code>.
1026:    */
1027:   public static final char MAX_VALUE = '\uFFFF';
1028: 
1029:   /**
1030:    * Class object representing the primitive char data type.
1031:    *
1032:    * @since 1.1
1033:    */
1034:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1035: 
1036:   /**
1037:    * Lu = Letter, Uppercase (Informative).
1038:    *
1039:    * @since 1.1
1040:    */
1041:   public static final byte UPPERCASE_LETTER = 1;
1042: 
1043:   /**
1044:    * Ll = Letter, Lowercase (Informative).
1045:    *
1046:    * @since 1.1
1047:    */
1048:   public static final byte LOWERCASE_LETTER = 2;
1049: 
1050:   /**
1051:    * Lt = Letter, Titlecase (Informative).
1052:    *
1053:    * @since 1.1
1054:    */
1055:   public static final byte TITLECASE_LETTER = 3;
1056: 
1057:   /**
1058:    * Mn = Mark, Non-Spacing (Normative).
1059:    *
1060:    * @since 1.1
1061:    */
1062:   public static final byte NON_SPACING_MARK = 6;
1063: 
1064:   /**
1065:    * Mc = Mark, Spacing Combining (Normative).
1066:    *
1067:    * @since 1.1
1068:    */
1069:   public static final byte COMBINING_SPACING_MARK = 8;
1070: 
1071:   /**
1072:    * Me = Mark, Enclosing (Normative).
1073:    *
1074:    * @since 1.1
1075:    */
1076:   public static final byte ENCLOSING_MARK = 7;
1077: 
1078:   /**
1079:    * Nd = Number, Decimal Digit (Normative).
1080:    *
1081:    * @since 1.1
1082:    */
1083:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1084: 
1085:   /**
1086:    * Nl = Number, Letter (Normative).
1087:    *
1088:    * @since 1.1
1089:    */
1090:   public static final byte LETTER_NUMBER = 10;
1091: 
1092:   /**
1093:    * No = Number, Other (Normative).
1094:    *
1095:    * @since 1.1
1096:    */
1097:   public static final byte OTHER_NUMBER = 11;
1098: 
1099:   /**
1100:    * Zs = Separator, Space (Normative).
1101:    *
1102:    * @since 1.1
1103:    */
1104:   public static final byte SPACE_SEPARATOR = 12;
1105: 
1106:   /**
1107:    * Zl = Separator, Line (Normative).
1108:    *
1109:    * @since 1.1
1110:    */
1111:   public static final byte LINE_SEPARATOR = 13;
1112: 
1113:   /**
1114:    * Zp = Separator, Paragraph (Normative).
1115:    *
1116:    * @since 1.1
1117:    */
1118:   public static final byte PARAGRAPH_SEPARATOR = 14;
1119: 
1120:   /**
1121:    * Cc = Other, Control (Normative).
1122:    *
1123:    * @since 1.1
1124:    */
1125:   public static final byte CONTROL = 15;
1126: 
1127:   /**
1128:    * Cf = Other, Format (Normative).
1129:    *
1130:    * @since 1.1
1131:    */
1132:   public static final byte FORMAT = 16;
1133: 
1134:   /**
1135:    * Cs = Other, Surrogate (Normative).
1136:    *
1137:    * @since 1.1
1138:    */
1139:   public static final byte SURROGATE = 19;
1140: 
1141:   /**
1142:    * Co = Other, Private Use (Normative).
1143:    *
1144:    * @since 1.1
1145:    */
1146:   public static final byte PRIVATE_USE = 18;
1147: 
1148:   /**
1149:    * Cn = Other, Not Assigned (Normative).
1150:    *
1151:    * @since 1.1
1152:    */
1153:   public static final byte UNASSIGNED = 0;
1154: 
1155:   /**
1156:    * Lm = Letter, Modifier (Informative).
1157:    *
1158:    * @since 1.1
1159:    */
1160:   public static final byte MODIFIER_LETTER = 4;
1161: 
1162:   /**
1163:    * Lo = Letter, Other (Informative).
1164:    *
1165:    * @since 1.1
1166:    */
1167:   public static final byte OTHER_LETTER = 5;
1168: 
1169:   /**
1170:    * Pc = Punctuation, Connector (Informative).
1171:    *
1172:    * @since 1.1
1173:    */
1174:   public static final byte CONNECTOR_PUNCTUATION = 23;
1175: 
1176:   /**
1177:    * Pd = Punctuation, Dash (Informative).
1178:    *
1179:    * @since 1.1
1180:    */
1181:   public static final byte DASH_PUNCTUATION = 20;
1182: 
1183:   /**
1184:    * Ps = Punctuation, Open (Informative).
1185:    *
1186:    * @since 1.1
1187:    */
1188:   public static final byte START_PUNCTUATION = 21;
1189: 
1190:   /**
1191:    * Pe = Punctuation, Close (Informative).
1192:    *
1193:    * @since 1.1
1194:    */
1195:   public static final byte END_PUNCTUATION = 22;
1196: 
1197:   /**
1198:    * Pi = Punctuation, Initial Quote (Informative).
1199:    *
1200:    * @since 1.4
1201:    */
1202:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1203: 
1204:   /**
1205:    * Pf = Punctuation, Final Quote (Informative).
1206:    *
1207:    * @since 1.4
1208:    */
1209:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1210: 
1211:   /**
1212:    * Po = Punctuation, Other (Informative).
1213:    *
1214:    * @since 1.1
1215:    */
1216:   public static final byte OTHER_PUNCTUATION = 24;
1217: 
1218:   /**
1219:    * Sm = Symbol, Math (Informative).
1220:    *
1221:    * @since 1.1
1222:    */
1223:   public static final byte MATH_SYMBOL = 25;
1224: 
1225:   /**
1226:    * Sc = Symbol, Currency (Informative).
1227:    *
1228:    * @since 1.1
1229:    */
1230:   public static final byte CURRENCY_SYMBOL = 26;
1231: 
1232:   /**
1233:    * Sk = Symbol, Modifier (Informative).
1234:    *
1235:    * @since 1.1
1236:    */
1237:   public static final byte MODIFIER_SYMBOL = 27;
1238: 
1239:   /**
1240:    * So = Symbol, Other (Informative).
1241:    *
1242:    * @since 1.1
1243:    */
1244:   public static final byte OTHER_SYMBOL = 28;
1245: 
1246:   /**
1247:    * Undefined bidirectional character type. Undefined char values have
1248:    * undefined directionality in the Unicode specification.
1249:    *
1250:    * @since 1.4
1251:    */
1252:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1253: 
1254:   /**
1255:    * Strong bidirectional character type "L".
1256:    *
1257:    * @since 1.4
1258:    */
1259:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1260: 
1261:   /**
1262:    * Strong bidirectional character type "R".
1263:    *
1264:    * @since 1.4
1265:    */
1266:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1267: 
1268:   /**
1269:    * Strong bidirectional character type "AL".
1270:    *
1271:    * @since 1.4
1272:    */
1273:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1274: 
1275:   /**
1276:    * Weak bidirectional character type "EN".
1277:    *
1278:    * @since 1.4
1279:    */
1280:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1281: 
1282:   /**
1283:    * Weak bidirectional character type "ES".
1284:    *
1285:    * @since 1.4
1286:    */
1287:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1288: 
1289:   /**
1290:    * Weak bidirectional character type "ET".
1291:    *
1292:    * @since 1.4
1293:    */
1294:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1295: 
1296:   /**
1297:    * Weak bidirectional character type "AN".
1298:    *
1299:    * @since 1.4
1300:    */
1301:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1302: 
1303:   /**
1304:    * Weak bidirectional character type "CS".
1305:    *
1306:    * @since 1.4
1307:    */
1308:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1309: 
1310:   /**
1311:    * Weak bidirectional character type "NSM".
1312:    *
1313:    * @since 1.4
1314:    */
1315:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1316: 
1317:   /**
1318:    * Weak bidirectional character type "BN".
1319:    *
1320:    * @since 1.4
1321:    */
1322:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1323: 
1324:   /**
1325:    * Neutral bidirectional character type "B".
1326:    *
1327:    * @since 1.4
1328:    */
1329:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1330: 
1331:   /**
1332:    * Neutral bidirectional character type "S".
1333:    *
1334:    * @since 1.4
1335:    */
1336:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1337: 
1338:   /**
1339:    * Strong bidirectional character type "WS".
1340:    *
1341:    * @since 1.4
1342:    */
1343:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
1344: 
1345:   /**
1346:    * Neutral bidirectional character type "ON".
1347:    *
1348:    * @since 1.4
1349:    */
1350:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1351: 
1352:   /**
1353:    * Strong bidirectional character type "LRE".
1354:    *
1355:    * @since 1.4
1356:    */
1357:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1358: 
1359:   /**
1360:    * Strong bidirectional character type "LRO".
1361:    *
1362:    * @since 1.4
1363:    */
1364:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1365: 
1366:   /**
1367:    * Strong bidirectional character type "RLE".
1368:    *
1369:    * @since 1.4
1370:    */
1371:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1372: 
1373:   /**
1374:    * Strong bidirectional character type "RLO".
1375:    *
1376:    * @since 1.4
1377:    */
1378:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1379: 
1380:   /**
1381:    * Weak bidirectional character type "PDF".
1382:    *
1383:    * @since 1.4
1384:    */
1385:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1386: 
1387:   /**
1388:    * Stores unicode block offset lookup table. Exploit package visibility of
1389:    * String.value to avoid copying the array.
1390:    * @see #readChar(char)
1391:    * @see CharData#BLOCKS
1392:    */
1393:   private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS);
1394: 
1395:   /**
1396:    * Stores unicode attribute offset lookup table. Exploit package visibility
1397:    * of String.value to avoid copying the array.
1398:    * @see CharData#DATA
1399:    */
1400:   private static final char[] data = String.zeroBasedStringValue(CharData.DATA);
1401: 
1402:   /**
1403:    * Stores unicode numeric value attribute table. Exploit package visibility
1404:    * of String.value to avoid copying the array.
1405:    * @see CharData#NUM_VALUE
1406:    */
1407:   private static final char[] numValue
1408:       = String.zeroBasedStringValue(CharData.NUM_VALUE);
1409: 
1410:   /**
1411:    * Stores unicode uppercase attribute table. Exploit package visibility
1412:    * of String.value to avoid copying the array.
1413:    * @see CharData#UPPER
1414:    */
1415:   private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER);
1416: 
1417:   /**
1418:    * Stores unicode lowercase attribute table. Exploit package visibility
1419:    * of String.value to avoid copying the array.
1420:    * @see CharData#LOWER
1421:    */
1422:   private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER);
1423: 
1424:   /**
1425:    * Stores unicode direction attribute table. Exploit package visibility
1426:    * of String.value to avoid copying the array.
1427:    * @see CharData#DIRECTION
1428:    */
1429:   // Package visible for use by String.
1430:   static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION);
1431: 
1432:   /**
1433:    * Stores unicode titlecase table. Exploit package visibility of
1434:    * String.value to avoid copying the array.
1435:    * @see CharData#TITLE
1436:    */
1437:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
1438: 
1439:   /**
1440:    * Mask for grabbing the type out of the contents of data.
1441:    * @see CharData#DATA
1442:    */
1443:   private static final int TYPE_MASK = 0x1F;
1444: 
1445:   /**
1446:    * Mask for grabbing the non-breaking space flag out of the contents of
1447:    * data.
1448:    * @see CharData#DATA
1449:    */
1450:   private static final int NO_BREAK_MASK = 0x20;
1451: 
1452:   /**
1453:    * Mask for grabbing the mirrored directionality flag out of the contents
1454:    * of data.
1455:    * @see CharData#DATA
1456:    */
1457:   private static final int MIRROR_MASK = 0x40;
1458: 
1459:   /**
1460:    * Min value for supplementary code point.
1461:    *
1462:    * @since 1.5
1463:    */
1464:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
1465: 
1466:   /**
1467:    * Min value for code point.
1468:    *
1469:    * @since 1.5
1470:    */
1471:   public static final int MIN_CODE_POINT = 0; 
1472:  
1473:  
1474:   /**
1475:    * Max value for code point.
1476:    *
1477:    * @since 1.5
1478:    */
1479:   public static final int MAX_CODE_POINT = 0x010ffff;
1480: 
1481: 
1482:   /**
1483:    * Minimum high surrrogate code in UTF-16 encoding.
1484:    *
1485:    * @since 1.5
1486:    */
1487:   public static final char MIN_HIGH_SURROGATE = '\ud800';
1488: 
1489:   /**
1490:    * Maximum high surrrogate code in UTF-16 encoding.
1491:    *
1492:    * @since 1.5
1493:    */
1494:   public static final char MAX_HIGH_SURROGATE = '\udbff';
1495:  
1496:   /**
1497:    * Minimum low surrrogate code in UTF-16 encoding.
1498:    *
1499:    * @since 1.5
1500:    */
1501:   public static final char MIN_LOW_SURROGATE = '\udc00';
1502: 
1503:   /**
1504:    * Maximum low surrrogate code in UTF-16 encoding.
1505:    *
1506:    * @since 1.5
1507:    */
1508:   public static final char MAX_LOW_SURROGATE = '\udfff';
1509: 
1510:   /**
1511:    * Grabs an attribute offset from the Unicode attribute database. The lower
1512:    * 5 bits are the character type, the next 2 bits are flags, and the top
1513:    * 9 bits are the offset into the attribute tables.
1514:    *
1515:    * @param ch the character to look up
1516:    * @return the character's attribute offset and type
1517:    * @see #TYPE_MASK
1518:    * @see #NO_BREAK_MASK
1519:    * @see #MIRROR_MASK
1520:    * @see CharData#DATA
1521:    * @see CharData#SHIFT
1522:    */
1523:   // Package visible for use in String.
1524:   static char readChar(char ch)
1525:   {
1526:     // Perform 16-bit addition to find the correct entry in data.
1527:     return data[(char) (blocks[ch >> CharData.SHIFT] + ch)];
1528:   }
1529: 
1530:   /**
1531:    * Wraps up a character.
1532:    *
1533:    * @param value the character to wrap
1534:    */
1535:   public Character(char value)
1536:   {
1537:     this.value = value;
1538:   }
1539: 
1540:   /**
1541:    * Returns the character which has been wrapped by this class.
1542:    *
1543:    * @return the character wrapped
1544:    */
1545:   public char charValue()
1546:   {
1547:     return value;
1548:   }
1549: 
1550:   /**
1551:    * Returns the numerical value (unsigned) of the wrapped character.
1552:    * Range of returned values: 0x0000-0xFFFF.
1553:    *
1554:    * @return the value of the wrapped character
1555:    */
1556:   public int hashCode()
1557:   {
1558:     return value;
1559:   }
1560: 
1561:   /**
1562:    * Determines if an object is equal to this object. This is only true for
1563:    * another Character object wrapping the same value.
1564:    *
1565:    * @param o object to compare
1566:    * @return true if o is a Character with the same value
1567:    */
1568:   public boolean equals(Object o)
1569:   {
1570:     return o instanceof Character && value == ((Character) o).value;
1571:   }
1572: 
1573:   /**
1574:    * Converts the wrapped character into a String.
1575:    *
1576:    * @return a String containing one character -- the wrapped character
1577:    *         of this instance
1578:    */
1579:   public String toString()
1580:   {
1581:     // Package constructor avoids an array copy.
1582:     return new String(new char[] { value }, 0, 1, true);
1583:   }
1584: 
1585:   /**
1586:    * Returns a String of length 1 representing the specified character.
1587:    *
1588:    * @param ch the character to convert
1589:    * @return a String containing the character
1590:    * @since 1.4
1591:    */
1592:   public static String toString(char ch)
1593:   {
1594:     // Package constructor avoids an array copy.
1595:     return new String(new char[] { ch }, 0, 1, true);
1596:   }
1597: 
1598:   /**
1599:    * Determines if a character is a Unicode lowercase letter. For example,
1600:    * <code>'a'</code> is lowercase.
1601:    * <br>
1602:    * lowercase = [Ll]
1603:    *
1604:    * @param ch character to test
1605:    * @return true if ch is a Unicode lowercase letter, else false
1606:    * @see #isUpperCase(char)
1607:    * @see #isTitleCase(char)
1608:    * @see #toLowerCase(char)
1609:    * @see #getType(char)
1610:    */
1611:   public static boolean isLowerCase(char ch)
1612:   {
1613:     return getType(ch) == LOWERCASE_LETTER;
1614:   }
1615: 
1616:   /**
1617:    * Determines if a character is a Unicode uppercase letter. For example,
1618:    * <code>'A'</code> is uppercase.
1619:    * <br>
1620:    * uppercase = [Lu]
1621:    *
1622:    * @param ch character to test
1623:    * @return true if ch is a Unicode uppercase letter, else false
1624:    * @see #isLowerCase(char)
1625:    * @see #isTitleCase(char)
1626:    * @see #toUpperCase(char)
1627:    * @see #getType(char)
1628:    */
1629:   public static boolean isUpperCase(char ch)
1630:   {
1631:     return getType(ch) == UPPERCASE_LETTER;
1632:   }
1633: 
1634:   /**
1635:    * Determines if a character is a Unicode titlecase letter. For example,
1636:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
1637:    * <br>
1638:    * titlecase = [Lt]
1639:    *
1640:    * @param ch character to test
1641:    * @return true if ch is a Unicode titlecase letter, else false
1642:    * @see #isLowerCase(char)
1643:    * @see #isUpperCase(char)
1644:    * @see #toTitleCase(char)
1645:    * @see #getType(char)
1646:    */
1647:   public static boolean isTitleCase(char ch)
1648:   {
1649:     return getType(ch) == TITLECASE_LETTER;
1650:   }
1651: 
1652:   /**
1653:    * Determines if a character is a Unicode decimal digit. For example,
1654:    * <code>'0'</code> is a digit.
1655:    * <br>
1656:    * Unicode decimal digit = [Nd]
1657:    *
1658:    * @param ch character to test
1659:    * @return true if ch is a Unicode decimal digit, else false
1660:    * @see #digit(char, int)
1661:    * @see #forDigit(int, int)
1662:    * @see #getType(char)
1663:    */
1664:   public static boolean isDigit(char ch)
1665:   {
1666:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
1667:   }
1668: 
1669:   /**
1670:    * Determines if a character is part of the Unicode Standard. This is an
1671:    * evolving standard, but covers every character in the data file.
1672:    * <br>
1673:    * defined = not [Cn]
1674:    *
1675:    * @param ch character to test
1676:    * @return true if ch is a Unicode character, else false
1677:    * @see #isDigit(char)
1678:    * @see #isLetter(char)
1679:    * @see #isLetterOrDigit(char)
1680:    * @see #isLowerCase(char)
1681:    * @see #isTitleCase(char)
1682:    * @see #isUpperCase(char)
1683:    */
1684:   public static boolean isDefined(char ch)
1685:   {
1686:     return getType(ch) != UNASSIGNED;
1687:   }
1688: 
1689:   /**
1690:    * Determines if a character is a Unicode letter. Not all letters have case,
1691:    * so this may return true when isLowerCase and isUpperCase return false.
1692:    * <br>
1693:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
1694:    *
1695:    * @param ch character to test
1696:    * @return true if ch is a Unicode letter, else false
1697:    * @see #isDigit(char)
1698:    * @see #isJavaIdentifierStart(char)
1699:    * @see #isJavaLetter(char)
1700:    * @see #isJavaLetterOrDigit(char)
1701:    * @see #isLetterOrDigit(char)
1702:    * @see #isLowerCase(char)
1703:    * @see #isTitleCase(char)
1704:    * @see #isUnicodeIdentifierStart(char)
1705:    * @see #isUpperCase(char)
1706:    */
1707:   public static boolean isLetter(char ch)
1708:   {
1709:     return ((1 << getType(ch))
1710:             & ((1 << UPPERCASE_LETTER)
1711:                | (1 << LOWERCASE_LETTER)
1712:                | (1 << TITLECASE_LETTER)
1713:                | (1 << MODIFIER_LETTER)
1714:                | (1 << OTHER_LETTER))) != 0;
1715:   }
1716: 
1717:   /**
1718:    * Determines if a character is a Unicode letter or a Unicode digit. This
1719:    * is the combination of isLetter and isDigit.
1720:    * <br>
1721:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
1722:    *
1723:    * @param ch character to test
1724:    * @return true if ch is a Unicode letter or a Unicode digit, else false
1725:    * @see #isDigit(char)
1726:    * @see #isJavaIdentifierPart(char)
1727:    * @see #isJavaLetter(char)
1728:    * @see #isJavaLetterOrDigit(char)
1729:    * @see #isLetter(char)
1730:    * @see #isUnicodeIdentifierPart(char)
1731:    */
1732:   public static boolean isLetterOrDigit(char ch)
1733:   {
1734:     return ((1 << getType(ch))
1735:             & ((1 << UPPERCASE_LETTER)
1736:                | (1 << LOWERCASE_LETTER)
1737:                | (1 << TITLECASE_LETTER)
1738:                | (1 << MODIFIER_LETTER)
1739:                | (1 << OTHER_LETTER)
1740:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
1741:   }
1742: 
1743:   /**
1744:    * Determines if a character can start a Java identifier. This is the
1745:    * combination of isLetter, any character where getType returns
1746:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1747:    * (like '_').
1748:    *
1749:    * @param ch character to test
1750:    * @return true if ch can start a Java identifier, else false
1751:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
1752:    * @see #isJavaLetterOrDigit(char)
1753:    * @see #isJavaIdentifierStart(char)
1754:    * @see #isJavaIdentifierPart(char)
1755:    * @see #isLetter(char)
1756:    * @see #isLetterOrDigit(char)
1757:    * @see #isUnicodeIdentifierStart(char)
1758:    */
1759:   public static boolean isJavaLetter(char ch)
1760:   {
1761:     return isJavaIdentifierStart(ch);
1762:   }
1763: 
1764:   /**
1765:    * Determines if a character can follow the first letter in
1766:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1767:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1768:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1769:    * or isIdentifierIgnorable.
1770:    *
1771:    * @param ch character to test
1772:    * @return true if ch can follow the first letter in a Java identifier
1773:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
1774:    * @see #isJavaLetter(char)
1775:    * @see #isJavaIdentifierStart(char)
1776:    * @see #isJavaIdentifierPart(char)
1777:    * @see #isLetter(char)
1778:    * @see #isLetterOrDigit(char)
1779:    * @see #isUnicodeIdentifierPart(char)
1780:    * @see #isIdentifierIgnorable(char)
1781:    */
1782:   public static boolean isJavaLetterOrDigit(char ch)
1783:   {
1784:     return isJavaIdentifierPart(ch);
1785:   }
1786: 
1787:   /**
1788:    * Determines if a character can start a Java identifier. This is the
1789:    * combination of isLetter, any character where getType returns
1790:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1791:    * (like '_').
1792:    * <br>
1793:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
1794:    *
1795:    * @param ch character to test
1796:    * @return true if ch can start a Java identifier, else false
1797:    * @see #isJavaIdentifierPart(char)
1798:    * @see #isLetter(char)
1799:    * @see #isUnicodeIdentifierStart(char)
1800:    * @since 1.1
1801:    */
1802:   public static boolean isJavaIdentifierStart(char ch)
1803:   {
1804:     return ((1 << getType(ch))
1805:             & ((1 << UPPERCASE_LETTER)
1806:                | (1 << LOWERCASE_LETTER)
1807:                | (1 << TITLECASE_LETTER)
1808:                | (1 << MODIFIER_LETTER)
1809:                | (1 << OTHER_LETTER)
1810:                | (1 << LETTER_NUMBER)
1811:                | (1 << CURRENCY_SYMBOL)
1812:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
1813:   }
1814: 
1815:   /**
1816:    * Determines if a character can follow the first letter in
1817:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1818:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1819:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1820:    * or isIdentifierIgnorable.
1821:    * <br>
1822:    * Java identifier extender =
1823:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
1824:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1825:    *
1826:    * @param ch character to test
1827:    * @return true if ch can follow the first letter in a Java identifier
1828:    * @see #isIdentifierIgnorable(char)
1829:    * @see #isJavaIdentifierStart(char)
1830:    * @see #isLetterOrDigit(char)
1831:    * @see #isUnicodeIdentifierPart(char)
1832:    * @since 1.1
1833:    */
1834:   public static boolean isJavaIdentifierPart(char ch)
1835:   {
1836:     int category = getType(ch);
1837:     return ((1 << category)
1838:             & ((1 << UPPERCASE_LETTER)
1839:                | (1 << LOWERCASE_LETTER)
1840:                | (1 << TITLECASE_LETTER)
1841:                | (1 << MODIFIER_LETTER)
1842:                | (1 << OTHER_LETTER)
1843:                | (1 << NON_SPACING_MARK)
1844:                | (1 << COMBINING_SPACING_MARK)
1845:                | (1 << DECIMAL_DIGIT_NUMBER)
1846:                | (1 << LETTER_NUMBER)
1847:                | (1 << CURRENCY_SYMBOL)
1848:                | (1 << CONNECTOR_PUNCTUATION)
1849:                | (1 << FORMAT))) != 0
1850:       || (category == CONTROL && isIdentifierIgnorable(ch));
1851:   }
1852: 
1853:   /**
1854:    * Determines if a character can start a Unicode identifier.  Only
1855:    * letters can start a Unicode identifier, but this includes characters
1856:    * in LETTER_NUMBER.
1857:    * <br>
1858:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
1859:    *
1860:    * @param ch character to test
1861:    * @return true if ch can start a Unicode identifier, else false
1862:    * @see #isJavaIdentifierStart(char)
1863:    * @see #isLetter(char)
1864:    * @see #isUnicodeIdentifierPart(char)
1865:    * @since 1.1
1866:    */
1867:   public static boolean isUnicodeIdentifierStart(char ch)
1868:   {
1869:     return ((1 << getType(ch))
1870:             & ((1 << UPPERCASE_LETTER)
1871:                | (1 << LOWERCASE_LETTER)
1872:                | (1 << TITLECASE_LETTER)
1873:                | (1 << MODIFIER_LETTER)
1874:                | (1 << OTHER_LETTER)
1875:                | (1 << LETTER_NUMBER))) != 0;
1876:   }
1877: 
1878:   /**
1879:    * Determines if a character can follow the first letter in
1880:    * a Unicode identifier. This includes letters, connecting punctuation,
1881:    * digits, numeric letters, combining marks, non-spacing marks, and
1882:    * isIdentifierIgnorable.
1883:    * <br>
1884:    * Unicode identifier extender =
1885:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
1886:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1887:    *
1888:    * @param ch character to test
1889:    * @return true if ch can follow the first letter in a Unicode identifier
1890:    * @see #isIdentifierIgnorable(char)
1891:    * @see #isJavaIdentifierPart(char)
1892:    * @see #isLetterOrDigit(char)
1893:    * @see #isUnicodeIdentifierStart(char)
1894:    * @since 1.1
1895:    */
1896:   public static boolean isUnicodeIdentifierPart(char ch)
1897:   {
1898:     int category = getType(ch);
1899:     return ((1 << category)
1900:             & ((1 << UPPERCASE_LETTER)
1901:                | (1 << LOWERCASE_LETTER)
1902:                | (1 << TITLECASE_LETTER)
1903:                | (1 << MODIFIER_LETTER)
1904:                | (1 << OTHER_LETTER)
1905:                | (1 << NON_SPACING_MARK)
1906:                | (1 << COMBINING_SPACING_MARK)
1907:                | (1 << DECIMAL_DIGIT_NUMBER)
1908:                | (1 << LETTER_NUMBER)
1909:                | (1 << CONNECTOR_PUNCTUATION)
1910:                | (1 << FORMAT))) != 0
1911:       || (category == CONTROL && isIdentifierIgnorable(ch));
1912:   }
1913: 
1914:   /**
1915:    * Determines if a character is ignorable in a Unicode identifier. This
1916:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
1917:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
1918:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
1919:    * <code>'\u009F'</code>), and FORMAT characters.
1920:    * <br>
1921:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
1922:    *    |U+007F-U+009F
1923:    *
1924:    * @param ch character to test
1925:    * @return true if ch is ignorable in a Unicode or Java identifier
1926:    * @see #isJavaIdentifierPart(char)
1927:    * @see #isUnicodeIdentifierPart(char)
1928:    * @since 1.1
1929:    */
1930:   public static boolean isIdentifierIgnorable(char ch)
1931:   {
1932:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
1933:                                || (ch <= '\u001B' && ch >= '\u000E')))
1934:       || getType(ch) == FORMAT;
1935:   }
1936: 
1937:   /**
1938:    * Converts a Unicode character into its lowercase equivalent mapping.
1939:    * If a mapping does not exist, then the character passed is returned.
1940:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
1941:    *
1942:    * @param ch character to convert to lowercase
1943:    * @return lowercase mapping of ch, or ch if lowercase mapping does
1944:    *         not exist
1945:    * @see #isLowerCase(char)
1946:    * @see #isUpperCase(char)
1947:    * @see #toTitleCase(char)
1948:    * @see #toUpperCase(char)
1949:    */
1950:   public static char toLowerCase(char ch)
1951:   {
1952:     // Signedness doesn't matter, as result is cast back to char.
1953:     return (char) (ch + lower[readChar(ch) >> 7]);
1954:   }
1955: 
1956:   /**
1957:    * Converts a Unicode character into its uppercase equivalent mapping.
1958:    * If a mapping does not exist, then the character passed is returned.
1959:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
1960:    *
1961:    * @param ch character to convert to uppercase
1962:    * @return uppercase mapping of ch, or ch if uppercase mapping does
1963:    *         not exist
1964:    * @see #isLowerCase(char)
1965:    * @see #isUpperCase(char)
1966:    * @see #toLowerCase(char)
1967:    * @see #toTitleCase(char)
1968:    */
1969:   public static char toUpperCase(char ch)
1970:   {
1971:     // Signedness doesn't matter, as result is cast back to char.
1972:     return (char) (ch + upper[readChar(ch) >> 7]);
1973:   }
1974: 
1975:   /**
1976:    * Converts a Unicode character into its titlecase equivalent mapping.
1977:    * If a mapping does not exist, then the character passed is returned.
1978:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
1979:    *
1980:    * @param ch character to convert to titlecase
1981:    * @return titlecase mapping of ch, or ch if titlecase mapping does
1982:    *         not exist
1983:    * @see #isTitleCase(char)
1984:    * @see #toLowerCase(char)
1985:    * @see #toUpperCase(char)
1986:    */
1987:   public static char toTitleCase(char ch)
1988:   {
1989:     // As title is short, it doesn't hurt to exhaustively iterate over it.
1990:     for (int i = title.length - 2; i >= 0; i -= 2)
1991:       if (title[i] == ch)
1992:         return title[i + 1];
1993:     return toUpperCase(ch);
1994:   }
1995: 
1996:   /**
1997:    * Converts a character into a digit of the specified radix. If the radix
1998:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
1999:    * exceeds the radix, or if ch is not a decimal digit or in the case
2000:    * insensitive set of 'a'-'z', the result is -1.
2001:    * <br>
2002:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
2003:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2004:    *
2005:    * @param ch character to convert into a digit
2006:    * @param radix radix in which ch is a digit
2007:    * @return digit which ch represents in radix, or -1 not a valid digit
2008:    * @see #MIN_RADIX
2009:    * @see #MAX_RADIX
2010:    * @see #forDigit(int, int)
2011:    * @see #isDigit(char)
2012:    * @see #getNumericValue(char)
2013:    */
2014:   public static int digit(char ch, int radix)
2015:   {
2016:     if (radix < MIN_RADIX || radix > MAX_RADIX)
2017:       return -1;
2018:     char attr = readChar(ch);
2019:     if (((1 << (attr & TYPE_MASK))
2020:          & ((1 << UPPERCASE_LETTER)
2021:             | (1 << LOWERCASE_LETTER)
2022:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
2023:       {
2024:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
2025:         int digit = numValue[attr >> 7];
2026:         return (digit < radix) ? digit : -1;
2027:       }
2028:     return -1;
2029:   }
2030: 
2031:   /**
2032:    * Returns the Unicode numeric value property of a character. For example,
2033:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
2034:    *
2035:    * <p>This method also returns values for the letters A through Z, (not
2036:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
2037:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
2038:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
2039:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
2040:    * <code>'\uFF5A'</code> (full width variants).
2041:    *
2042:    * <p>If the character lacks a numeric value property, -1 is returned.
2043:    * If the character has a numeric value property which is not representable
2044:    * as a nonnegative integer, such as a fraction, -2 is returned.
2045:    *
2046:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
2047:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2048:    *
2049:    * @param ch character from which the numeric value property will
2050:    *        be retrieved
2051:    * @return the numeric value property of ch, or -1 if it does not exist, or
2052:    *         -2 if it is not representable as a nonnegative integer
2053:    * @see #forDigit(int, int)
2054:    * @see #digit(char, int)
2055:    * @see #isDigit(char)
2056:    * @since 1.1
2057:    */
2058:   public static int getNumericValue(char ch)
2059:   {
2060:     // Treat numValue as signed.
2061:     return (short) numValue[readChar(ch) >> 7];
2062:   }
2063: 
2064:   /**
2065:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
2066:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
2067:    * <code>'\r'</code>, and <code>' '</code>.
2068:    * <br>
2069:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
2070:    *
2071:    * @param ch character to test
2072:    * @return true if ch is a space, else false
2073:    * @deprecated Replaced by {@link #isWhitespace(char)}
2074:    * @see #isSpaceChar(char)
2075:    * @see #isWhitespace(char)
2076:    */
2077:   public static boolean isSpace(char ch)
2078:   {
2079:     // Performing the subtraction up front alleviates need to compare longs.
2080:     return ch-- <= ' ' && ((1 << ch)
2081:                            & ((1 << (' ' - 1))
2082:                               | (1 << ('\t' - 1))
2083:                               | (1 << ('\n' - 1))
2084:                               | (1 << ('\r' - 1))
2085:                               | (1 << ('\f' - 1)))) != 0;
2086:   }
2087: 
2088:   /**
2089:    * Determines if a character is a Unicode space character. This includes
2090:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
2091:    * <br>
2092:    * Unicode space = [Zs]|[Zp]|[Zl]
2093:    *
2094:    * @param ch character to test
2095:    * @return true if ch is a Unicode space, else false
2096:    * @see #isWhitespace(char)
2097:    * @since 1.1
2098:    */
2099:   public static boolean isSpaceChar(char ch)
2100:   {
2101:     return ((1 << getType(ch))
2102:             & ((1 << SPACE_SEPARATOR)
2103:                | (1 << LINE_SEPARATOR)
2104:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
2105:   }
2106: 
2107:   /**
2108:    * Determines if a character is Java whitespace. This includes Unicode
2109:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
2110:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
2111:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
2112:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
2113:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
2114:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
2115:    * and <code>'\u001F'</code>.
2116:    * <br>
2117:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
2118:    *
2119:    * @param ch character to test
2120:    * @return true if ch is Java whitespace, else false
2121:    * @see #isSpaceChar(char)
2122:    * @since 1.1
2123:    */
2124:   public static boolean isWhitespace(char ch)
2125:   {
2126:     int attr = readChar(ch);
2127:     return ((((1 << (attr & TYPE_MASK))
2128:               & ((1 << SPACE_SEPARATOR)
2129:                  | (1 << LINE_SEPARATOR)
2130:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
2131:             && (attr & NO_BREAK_MASK) == 0)
2132:       || (ch <= '\u001F' && ((1 << ch)
2133:                              & ((1 << '\t')
2134:                                 | (1 << '\n')
2135:                                 | (1 << '\u000B')
2136:                                 | (1 << '\u000C')
2137:                                 | (1 << '\r')
2138:                                 | (1 << '\u001C')
2139:                                 | (1 << '\u001D')
2140:                                 | (1 << '\u001E')
2141:                                 | (1 << '\u001F'))) != 0);
2142:   }
2143: 
2144:   /**
2145:    * Determines if a character has the ISO Control property.
2146:    * <br>
2147:    * ISO Control = [Cc]
2148:    *
2149:    * @param ch character to test
2150:    * @return true if ch is an ISO Control character, else false
2151:    * @see #isSpaceChar(char)
2152:    * @see #isWhitespace(char)
2153:    * @since 1.1
2154:    */
2155:   public static boolean isISOControl(char ch)
2156:   {
2157:     return getType(ch) == CONTROL;
2158:   }
2159: 
2160:   /**
2161:    * Returns the Unicode general category property of a character.
2162:    *
2163:    * @param ch character from which the general category property will
2164:    *        be retrieved
2165:    * @return the character category property of ch as an integer
2166:    * @see #UNASSIGNED
2167:    * @see #UPPERCASE_LETTER
2168:    * @see #LOWERCASE_LETTER
2169:    * @see #TITLECASE_LETTER
2170:    * @see #MODIFIER_LETTER
2171:    * @see #OTHER_LETTER
2172:    * @see #NON_SPACING_MARK
2173:    * @see #ENCLOSING_MARK
2174:    * @see #COMBINING_SPACING_MARK
2175:    * @see #DECIMAL_DIGIT_NUMBER
2176:    * @see #LETTER_NUMBER
2177:    * @see #OTHER_NUMBER
2178:    * @see #SPACE_SEPARATOR
2179:    * @see #LINE_SEPARATOR
2180:    * @see #PARAGRAPH_SEPARATOR
2181:    * @see #CONTROL
2182:    * @see #FORMAT
2183:    * @see #PRIVATE_USE
2184:    * @see #SURROGATE
2185:    * @see #DASH_PUNCTUATION
2186:    * @see #START_PUNCTUATION
2187:    * @see #END_PUNCTUATION
2188:    * @see #CONNECTOR_PUNCTUATION
2189:    * @see #OTHER_PUNCTUATION
2190:    * @see #MATH_SYMBOL
2191:    * @see #CURRENCY_SYMBOL
2192:    * @see #MODIFIER_SYMBOL
2193:    * @see #INITIAL_QUOTE_PUNCTUATION
2194:    * @see #FINAL_QUOTE_PUNCTUATION
2195:    * @since 1.1
2196:    */
2197:   public static int getType(char ch)
2198:   {
2199:     return readChar(ch) & TYPE_MASK;
2200:   }
2201: 
2202:   /**
2203:    * Converts a digit into a character which represents that digit
2204:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2205:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
2206:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2207:    * <br>
2208:    * return value boundary = U+0030-U+0039|U+0061-U+007A
2209:    *
2210:    * @param digit digit to be converted into a character
2211:    * @param radix radix of digit
2212:    * @return character representing digit in radix, or '\0'
2213:    * @see #MIN_RADIX
2214:    * @see #MAX_RADIX
2215:    * @see #digit(char, int)
2216:    */
2217:   public static char forDigit(int digit, int radix)
2218:   {
2219:     if (radix < MIN_RADIX || radix > MAX_RADIX
2220:         || digit < 0 || digit >= radix)
2221:       return '\0';
2222:     return Number.digits[digit];
2223:   }
2224: 
2225:   /**
2226:    * Returns the Unicode directionality property of the character. This
2227:    * is used in the visual ordering of text.
2228:    *
2229:    * @param ch the character to look up
2230:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2231:    * @see #DIRECTIONALITY_UNDEFINED
2232:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2233:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2234:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2235:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2236:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2237:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2238:    * @see #DIRECTIONALITY_ARABIC_NUMBER
2239:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2240:    * @see #DIRECTIONALITY_NONSPACING_MARK
2241:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2242:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2243:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2244:    * @see #DIRECTIONALITY_WHITESPACE
2245:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
2246:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2247:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2248:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2249:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2250:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2251:    * @since 1.4
2252:    */
2253:   public static byte getDirectionality(char ch)
2254:   {
2255:     // The result will correctly be signed.
2256:     return (byte) (direction[readChar(ch) >> 7] >> 2);
2257:   }
2258: 
2259:   /**
2260:    * Determines whether the character is mirrored according to Unicode. For
2261:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2262:    * left-to-right text, but ')' in right-to-left text.
2263:    *
2264:    * @param ch the character to look up
2265:    * @return true if the character is mirrored
2266:    * @since 1.4
2267:    */
2268:   public static boolean isMirrored(char ch)
2269:   {
2270:     return (readChar(ch) & MIRROR_MASK) != 0;
2271:   }
2272: 
2273:   /**
2274:    * Compares another Character to this Character, numerically.
2275:    *
2276:    * @param anotherCharacter Character to compare with this Character
2277:    * @return a negative integer if this Character is less than
2278:    *         anotherCharacter, zero if this Character is equal, and
2279:    *         a positive integer if this Character is greater
2280:    * @throws NullPointerException if anotherCharacter is null
2281:    * @since 1.2
2282:    */
2283:   public int compareTo(Character anotherCharacter)
2284:   {
2285:     return value - anotherCharacter.value;
2286:   }
2287: 
2288:   /**
2289:    * Compares an object to this Character.  Assuming the object is a
2290:    * Character object, this method performs the same comparison as
2291:    * compareTo(Character).
2292:    *
2293:    * @param o object to compare
2294:    * @return the comparison value
2295:    * @throws ClassCastException if o is not a Character object
2296:    * @throws NullPointerException if o is null
2297:    * @see #compareTo(Character)
2298:    * @since 1.2
2299:    */
2300:   public int compareTo(Object o)
2301:   {
2302:     return compareTo((Character) o);
2303:   }
2304: 
2305:   /**
2306:    * Converts a unicode code point to a UTF-16 representation of that
2307:    * code point.
2308:    * 
2309:    * @param codePoint the unicode code point
2310:    *
2311:    * @return the UTF-16 representation of that code point
2312:    *
2313:    * @throws IllegalArgumentException if the code point is not a valid
2314:    *         unicode code point
2315:    *
2316:    * @since 1.5
2317:    */
2318:   public static char[] toChars(int codePoint)
2319:   {
2320:     char[] result = new char[charCount(codePoint)];
2321:     int ignore = toChars(codePoint, result, 0);
2322:     return result;
2323:   }
2324: 
2325:   /**
2326:    * Converts a unicode code point to its UTF-16 representation.
2327:    *
2328:    * @param codePoint the unicode code point
2329:    * @param dst the target char array
2330:    * @param dstIndex the start index for the target
2331:    *
2332:    * @return number of characters written to <code>dst</code>
2333:    *
2334:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
2335:    *         valid unicode code point
2336:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
2337:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
2338:    *         in <code>dst</code> or if the UTF-16 representation does not
2339:    *         fit into <code>dst</code>
2340:    *
2341:    * @since 1.5
2342:    */
2343:   public static int toChars(int codePoint, char[] dst, int dstIndex)
2344:   {
2345:     if (!isValidCodePoint(codePoint))
2346:       {
2347:         throw new IllegalArgumentException("not a valid code point: "
2348:                                            + codePoint);
2349:       }
2350: 
2351:     int result;
2352:     if (isSupplementaryCodePoint(codePoint))
2353:       {
2354:         // Write second char first to cause IndexOutOfBoundsException
2355:         // immediately.
2356:         dst[dstIndex + 1] = (char) ((codePoint & 0x3ff)
2357:                                     + (int) MIN_LOW_SURROGATE );
2358:         dst[dstIndex] = (char) ((codePoint >> 10) + (int) MIN_HIGH_SURROGATE);
2359:         result = 2;
2360:     }
2361:     else
2362:       {
2363:         dst[dstIndex] = (char) codePoint;
2364:         result = 1; 
2365:       }
2366:     return result;
2367:   }
2368: 
2369:   /**
2370:    * Return number of 16-bit characters required to represent the given
2371:    * code point.
2372:    *
2373:    * @param codePoint a uncode code point
2374:    *
2375:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
2376:    *
2377:    * @since 1.5
2378:    */
2379:   public static int charCount(int codePoint)
2380:   {
2381:     return 
2382:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
2383:       ? 2 
2384:       : 1;
2385:   }
2386: 
2387:   /**
2388:    * Determines whether the specified code point is
2389:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
2390:    * supplementary character range.
2391:    *
2392:    * @param codePoint a Unicode code point
2393:    *
2394:    * @return <code>true</code> if code point is in supplementary range
2395:    *
2396:    * @since 1.5
2397:    */
2398:   public static boolean isSupplementaryCodePoint(int codePoint)
2399:   {
2400:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2401:       && codePoint <= MAX_CODE_POINT;
2402:   }
2403: 
2404:   /**
2405:    * Determines whether the specified code point is
2406:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
2407:    *
2408:    * @param codePoint a Unicode code point
2409:    *
2410:    * @return <code>true</code> if code point is valid
2411:    *
2412:    * @since 1.5
2413:    */
2414:   public static boolean isValidCodePoint(int codePoint)
2415:   {
2416:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2417:   }
2418: } // class Character