Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: import java.text.Collator;
  45: import java.util.Locale;
  46: 
  47: /**
  48:  * Wrapper class for the primitive char data type.  In addition, this class
  49:  * allows one to retrieve property information and perform transformations
  50:  * on the defined characters in the Unicode Standard, Version 4.0.0.
  51:  * java.lang.Character is designed to be very dynamic, and as such, it
  52:  * retrieves information on the Unicode character set from a separate
  53:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  54:  *
  55:  * <p>For predicates, boundaries are used to describe
  56:  * the set of characters for which the method will return true.
  57:  * This syntax uses fairly normal regular expression notation.
  58:  * See 5.13 of the Unicode Standard, Version 4.0, for the
  59:  * boundary specification.
  60:  *
  61:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  62:  * for more information on the Unicode Standard.
  63:  *
  64:  * @author Tom Tromey (tromey@cygnus.com)
  65:  * @author Paul N. Fisher
  66:  * @author Jochen Hoenicke
  67:  * @author Eric Blake (ebb9@email.byu.edu)
  68:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  69:  * @see CharData
  70:  * @since 1.0
  71:  * @status partly updated to 1.5; some things still missing
  72:  */
  73: public final class Character implements Serializable, Comparable<Character>
  74: {
  75:   /**
  76:    * A subset of Unicode blocks.
  77:    *
  78:    * @author Paul N. Fisher
  79:    * @author Eric Blake (ebb9@email.byu.edu)
  80:    * @since 1.2
  81:    */
  82:   public static class Subset
  83:   {
  84:     /** The name of the subset. */
  85:     private final String name;
  86: 
  87:     /**
  88:      * Construct a new subset of characters.
  89:      *
  90:      * @param name the name of the subset
  91:      * @throws NullPointerException if name is null
  92:      */
  93:     protected Subset(String name)
  94:     {
  95:       // Note that name.toString() is name, unless name was null.
  96:       this.name = name.toString();
  97:     }
  98: 
  99:     /**
 100:      * Compares two Subsets for equality. This is <code>final</code>, and
 101:      * restricts the comparison on the <code>==</code> operator, so it returns
 102:      * true only for the same object.
 103:      *
 104:      * @param o the object to compare
 105:      * @return true if o is this
 106:      */
 107:     public final boolean equals(Object o)
 108:     {
 109:       return o == this;
 110:     }
 111: 
 112:     /**
 113:      * Makes the original hashCode of Object final, to be consistent with
 114:      * equals.
 115:      *
 116:      * @return the hash code for this object
 117:      */
 118:     public final int hashCode()
 119:     {
 120:       return super.hashCode();
 121:     }
 122: 
 123:     /**
 124:      * Returns the name of the subset.
 125:      *
 126:      * @return the name
 127:      */
 128:     public final String toString()
 129:     {
 130:       return name;
 131:     }
 132:   } // class Subset
 133: 
 134:   /**
 135:    * A family of character subsets in the Unicode specification. A character
 136:    * is in at most one of these blocks.
 137:    *
 138:    * This inner class was generated automatically from
 139:    * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
 140:    * This Unicode definition file can be found on the
 141:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 142:    * JDK 1.5 uses Unicode version 4.0.0.
 143:    *
 144:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 145:    * @since 1.2
 146:    */
 147:   public static final class UnicodeBlock extends Subset
 148:   {
 149:     /** The start of the subset. */
 150:     private final int start;
 151: 
 152:     /** The end of the subset. */
 153:     private final int end;
 154: 
 155:     /** The canonical name of the block according to the Unicode standard. */
 156:     private final String canonicalName;
 157: 
 158:     /** Enumeration for the <code>forName()</code> method */
 159:     private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
 160: 
 161:     /**
 162:      * Constructor for strictly defined blocks.
 163:      *
 164:      * @param start the start character of the range
 165:      * @param end the end character of the range
 166:      * @param name the block name
 167:      * @param canonicalName the name of the block as defined in the Unicode
 168:      *        standard.
 169:      */
 170:     private UnicodeBlock(int start, int end, String name,
 171:              String canonicalName)
 172:     {
 173:       super(name);
 174:       this.start = start;
 175:       this.end = end;
 176:       this.canonicalName = canonicalName;
 177:     }
 178: 
 179:     /**
 180:      * Returns the Unicode character block which a character belongs to.
 181:      * <strong>Note</strong>: This method does not support the use of
 182:      * supplementary characters.  For such support, <code>of(int)</code>
 183:      * should be used instead.
 184:      *
 185:      * @param ch the character to look up
 186:      * @return the set it belongs to, or null if it is not in one
 187:      */
 188:     public static UnicodeBlock of(char ch)
 189:     {
 190:       return of((int) ch);
 191:     }
 192: 
 193:     /**
 194:      * Returns the Unicode character block which a code point belongs to.
 195:      *
 196:      * @param codePoint the character to look up
 197:      * @return the set it belongs to, or null if it is not in one.
 198:      * @throws IllegalArgumentException if the specified code point is
 199:      *         invalid.
 200:      * @since 1.5
 201:      */
 202:     public static UnicodeBlock of(int codePoint)
 203:     {
 204:       if (codePoint > MAX_CODE_POINT)
 205:     throw new IllegalArgumentException("The supplied integer value is " +
 206:                        "too large to be a codepoint.");
 207:       // Simple binary search for the correct block.
 208:       int low = 0;
 209:       int hi = sets.length - 1;
 210:       while (low <= hi)
 211:         {
 212:           int mid = (low + hi) >> 1;
 213:           UnicodeBlock b = sets[mid];
 214:           if (codePoint < b.start)
 215:             hi = mid - 1;
 216:           else if (codePoint > b.end)
 217:             low = mid + 1;
 218:           else
 219:             return b;
 220:         }
 221:       return null;
 222:     }
 223: 
 224:     /**
 225:      * <p>
 226:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 227:      * by the Unicode standard.  The version of Unicode in use is defined by
 228:      * the <code>Character</code> class, and the names are given in the
 229:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 230:      * The name may be specified in one of three ways:
 231:      * </p>
 232:      * <ol>
 233:      * <li>The canonical, human-readable name used by the Unicode standard.
 234:      * This is the name with all spaces and hyphens retained.  For example,
 235:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 236:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 237:      * <li>The name used for the constants specified by this class, which
 238:      * is the canonical name with all spaces and hyphens replaced with
 239:      * underscores e.g. `BASIC_LATIN'</li>
 240:      * </ol>
 241:      * <p>
 242:      * The names are compared case-insensitively using the case comparison
 243:      * associated with the U.S. English locale.  The method recognises the
 244:      * previous names used for blocks as well as the current ones.  At
 245:      * present, this simply means that the deprecated `SURROGATES_AREA'
 246:      * will be recognised by this method (the <code>of()</code> methods
 247:      * only return one of the three new surrogate blocks).
 248:      * </p>
 249:      *
 250:      * @param blockName the name of the block to look up.
 251:      * @return the specified block.
 252:      * @throws NullPointerException if the <code>blockName</code> is
 253:      *         <code>null</code>.
 254:      * @throws IllegalArgumentException if the name does not match any Unicode
 255:      *         block.
 256:      * @since 1.5
 257:      */
 258:     public static final UnicodeBlock forName(String blockName)
 259:     {
 260:       NameType type;
 261:       if (blockName.indexOf(' ') != -1)
 262:         type = NameType.CANONICAL;
 263:       else if (blockName.indexOf('_') != -1)
 264:         type = NameType.CONSTANT;
 265:       else
 266:         type = NameType.NO_SPACES;
 267:       Collator usCollator = Collator.getInstance(Locale.US);
 268:       usCollator.setStrength(Collator.PRIMARY);
 269:       /* Special case for deprecated blocks not in sets */
 270:       switch (type)
 271:       {
 272:         case CANONICAL:
 273:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 274:             return SURROGATES_AREA;
 275:           break;
 276:         case NO_SPACES:
 277:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 278:             return SURROGATES_AREA;
 279:           break;
 280:         case CONSTANT:
 281:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 282:             return SURROGATES_AREA;
 283:           break;
 284:       }
 285:       /* Other cases */
 286:       switch (type)
 287:       {
 288:         case CANONICAL:
 289:           for (UnicodeBlock block : sets)
 290:             if (usCollator.compare(blockName, block.canonicalName) == 0)
 291:               return block;
 292:           break;
 293:         case NO_SPACES:
 294:           for (UnicodeBlock block : sets)
 295:         {
 296:           String nsName = block.canonicalName.replaceAll(" ","");
 297:           if (usCollator.compare(blockName, nsName) == 0)
 298:         return block;
 299:         }
 300:       break;
 301:         case CONSTANT:
 302:           for (UnicodeBlock block : sets)
 303:             if (usCollator.compare(blockName, block.toString()) == 0)
 304:               return block;
 305:           break;
 306:       }
 307:       throw new IllegalArgumentException("No Unicode block found for " +
 308:                                          blockName + ".");
 309:     }
 310: 
 311:     /**
 312:      * Basic Latin.
 313:      * 0x0000 - 0x007F.
 314:      */
 315:     public static final UnicodeBlock BASIC_LATIN
 316:       = new UnicodeBlock(0x0000, 0x007F,
 317:                          "BASIC_LATIN", 
 318:                          "Basic Latin");
 319: 
 320:     /**
 321:      * Latin-1 Supplement.
 322:      * 0x0080 - 0x00FF.
 323:      */
 324:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 325:       = new UnicodeBlock(0x0080, 0x00FF,
 326:                          "LATIN_1_SUPPLEMENT", 
 327:                          "Latin-1 Supplement");
 328: 
 329:     /**
 330:      * Latin Extended-A.
 331:      * 0x0100 - 0x017F.
 332:      */
 333:     public static final UnicodeBlock LATIN_EXTENDED_A
 334:       = new UnicodeBlock(0x0100, 0x017F,
 335:                          "LATIN_EXTENDED_A", 
 336:                          "Latin Extended-A");
 337: 
 338:     /**
 339:      * Latin Extended-B.
 340:      * 0x0180 - 0x024F.
 341:      */
 342:     public static final UnicodeBlock LATIN_EXTENDED_B
 343:       = new UnicodeBlock(0x0180, 0x024F,
 344:                          "LATIN_EXTENDED_B", 
 345:                          "Latin Extended-B");
 346: 
 347:     /**
 348:      * IPA Extensions.
 349:      * 0x0250 - 0x02AF.
 350:      */
 351:     public static final UnicodeBlock IPA_EXTENSIONS
 352:       = new UnicodeBlock(0x0250, 0x02AF,
 353:                          "IPA_EXTENSIONS", 
 354:                          "IPA Extensions");
 355: 
 356:     /**
 357:      * Spacing Modifier Letters.
 358:      * 0x02B0 - 0x02FF.
 359:      */
 360:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 361:       = new UnicodeBlock(0x02B0, 0x02FF,
 362:                          "SPACING_MODIFIER_LETTERS", 
 363:                          "Spacing Modifier Letters");
 364: 
 365:     /**
 366:      * Combining Diacritical Marks.
 367:      * 0x0300 - 0x036F.
 368:      */
 369:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 370:       = new UnicodeBlock(0x0300, 0x036F,
 371:                          "COMBINING_DIACRITICAL_MARKS", 
 372:                          "Combining Diacritical Marks");
 373: 
 374:     /**
 375:      * Greek.
 376:      * 0x0370 - 0x03FF.
 377:      */
 378:     public static final UnicodeBlock GREEK
 379:       = new UnicodeBlock(0x0370, 0x03FF,
 380:                          "GREEK", 
 381:                          "Greek");
 382: 
 383:     /**
 384:      * Cyrillic.
 385:      * 0x0400 - 0x04FF.
 386:      */
 387:     public static final UnicodeBlock CYRILLIC
 388:       = new UnicodeBlock(0x0400, 0x04FF,
 389:                          "CYRILLIC", 
 390:                          "Cyrillic");
 391: 
 392:     /**
 393:      * Cyrillic Supplementary.
 394:      * 0x0500 - 0x052F.
 395:      * @since 1.5
 396:      */
 397:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 398:       = new UnicodeBlock(0x0500, 0x052F,
 399:                          "CYRILLIC_SUPPLEMENTARY", 
 400:                          "Cyrillic Supplementary");
 401: 
 402:     /**
 403:      * Armenian.
 404:      * 0x0530 - 0x058F.
 405:      */
 406:     public static final UnicodeBlock ARMENIAN
 407:       = new UnicodeBlock(0x0530, 0x058F,
 408:                          "ARMENIAN", 
 409:                          "Armenian");
 410: 
 411:     /**
 412:      * Hebrew.
 413:      * 0x0590 - 0x05FF.
 414:      */
 415:     public static final UnicodeBlock HEBREW
 416:       = new UnicodeBlock(0x0590, 0x05FF,
 417:                          "HEBREW", 
 418:                          "Hebrew");
 419: 
 420:     /**
 421:      * Arabic.
 422:      * 0x0600 - 0x06FF.
 423:      */
 424:     public static final UnicodeBlock ARABIC
 425:       = new UnicodeBlock(0x0600, 0x06FF,
 426:                          "ARABIC", 
 427:                          "Arabic");
 428: 
 429:     /**
 430:      * Syriac.
 431:      * 0x0700 - 0x074F.
 432:      * @since 1.4
 433:      */
 434:     public static final UnicodeBlock SYRIAC
 435:       = new UnicodeBlock(0x0700, 0x074F,
 436:                          "SYRIAC", 
 437:                          "Syriac");
 438: 
 439:     /**
 440:      * Thaana.
 441:      * 0x0780 - 0x07BF.
 442:      * @since 1.4
 443:      */
 444:     public static final UnicodeBlock THAANA
 445:       = new UnicodeBlock(0x0780, 0x07BF,
 446:                          "THAANA", 
 447:                          "Thaana");
 448: 
 449:     /**
 450:      * Devanagari.
 451:      * 0x0900 - 0x097F.
 452:      */
 453:     public static final UnicodeBlock DEVANAGARI
 454:       = new UnicodeBlock(0x0900, 0x097F,
 455:                          "DEVANAGARI", 
 456:                          "Devanagari");
 457: 
 458:     /**
 459:      * Bengali.
 460:      * 0x0980 - 0x09FF.
 461:      */
 462:     public static final UnicodeBlock BENGALI
 463:       = new UnicodeBlock(0x0980, 0x09FF,
 464:                          "BENGALI", 
 465:                          "Bengali");
 466: 
 467:     /**
 468:      * Gurmukhi.
 469:      * 0x0A00 - 0x0A7F.
 470:      */
 471:     public static final UnicodeBlock GURMUKHI
 472:       = new UnicodeBlock(0x0A00, 0x0A7F,
 473:                          "GURMUKHI", 
 474:                          "Gurmukhi");
 475: 
 476:     /**
 477:      * Gujarati.
 478:      * 0x0A80 - 0x0AFF.
 479:      */
 480:     public static final UnicodeBlock GUJARATI
 481:       = new UnicodeBlock(0x0A80, 0x0AFF,
 482:                          "GUJARATI", 
 483:                          "Gujarati");
 484: 
 485:     /**
 486:      * Oriya.
 487:      * 0x0B00 - 0x0B7F.
 488:      */
 489:     public static final UnicodeBlock ORIYA
 490:       = new UnicodeBlock(0x0B00, 0x0B7F,
 491:                          "ORIYA", 
 492:                          "Oriya");
 493: 
 494:     /**
 495:      * Tamil.
 496:      * 0x0B80 - 0x0BFF.
 497:      */
 498:     public static final UnicodeBlock TAMIL
 499:       = new UnicodeBlock(0x0B80, 0x0BFF,
 500:                          "TAMIL", 
 501:                          "Tamil");
 502: 
 503:     /**
 504:      * Telugu.
 505:      * 0x0C00 - 0x0C7F.
 506:      */
 507:     public static final UnicodeBlock TELUGU
 508:       = new UnicodeBlock(0x0C00, 0x0C7F,
 509:                          "TELUGU", 
 510:                          "Telugu");
 511: 
 512:     /**
 513:      * Kannada.
 514:      * 0x0C80 - 0x0CFF.
 515:      */
 516:     public static final UnicodeBlock KANNADA
 517:       = new UnicodeBlock(0x0C80, 0x0CFF,
 518:                          "KANNADA", 
 519:                          "Kannada");
 520: 
 521:     /**
 522:      * Malayalam.
 523:      * 0x0D00 - 0x0D7F.
 524:      */
 525:     public static final UnicodeBlock MALAYALAM
 526:       = new UnicodeBlock(0x0D00, 0x0D7F,
 527:                          "MALAYALAM", 
 528:                          "Malayalam");
 529: 
 530:     /**
 531:      * Sinhala.
 532:      * 0x0D80 - 0x0DFF.
 533:      * @since 1.4
 534:      */
 535:     public static final UnicodeBlock SINHALA
 536:       = new UnicodeBlock(0x0D80, 0x0DFF,
 537:                          "SINHALA", 
 538:                          "Sinhala");
 539: 
 540:     /**
 541:      * Thai.
 542:      * 0x0E00 - 0x0E7F.
 543:      */
 544:     public static final UnicodeBlock THAI
 545:       = new UnicodeBlock(0x0E00, 0x0E7F,
 546:                          "THAI", 
 547:                          "Thai");
 548: 
 549:     /**
 550:      * Lao.
 551:      * 0x0E80 - 0x0EFF.
 552:      */
 553:     public static final UnicodeBlock LAO
 554:       = new UnicodeBlock(0x0E80, 0x0EFF,
 555:                          "LAO", 
 556:                          "Lao");
 557: 
 558:     /**
 559:      * Tibetan.
 560:      * 0x0F00 - 0x0FFF.
 561:      */
 562:     public static final UnicodeBlock TIBETAN
 563:       = new UnicodeBlock(0x0F00, 0x0FFF,
 564:                          "TIBETAN", 
 565:                          "Tibetan");
 566: 
 567:     /**
 568:      * Myanmar.
 569:      * 0x1000 - 0x109F.
 570:      * @since 1.4
 571:      */
 572:     public static final UnicodeBlock MYANMAR
 573:       = new UnicodeBlock(0x1000, 0x109F,
 574:                          "MYANMAR", 
 575:                          "Myanmar");
 576: 
 577:     /**
 578:      * Georgian.
 579:      * 0x10A0 - 0x10FF.
 580:      */
 581:     public static final UnicodeBlock GEORGIAN
 582:       = new UnicodeBlock(0x10A0, 0x10FF,
 583:                          "GEORGIAN", 
 584:                          "Georgian");
 585: 
 586:     /**
 587:      * Hangul Jamo.
 588:      * 0x1100 - 0x11FF.
 589:      */
 590:     public static final UnicodeBlock HANGUL_JAMO
 591:       = new UnicodeBlock(0x1100, 0x11FF,
 592:                          "HANGUL_JAMO", 
 593:                          "Hangul Jamo");
 594: 
 595:     /**
 596:      * Ethiopic.
 597:      * 0x1200 - 0x137F.
 598:      * @since 1.4
 599:      */
 600:     public static final UnicodeBlock ETHIOPIC
 601:       = new UnicodeBlock(0x1200, 0x137F,
 602:                          "ETHIOPIC", 
 603:                          "Ethiopic");
 604: 
 605:     /**
 606:      * Cherokee.
 607:      * 0x13A0 - 0x13FF.
 608:      * @since 1.4
 609:      */
 610:     public static final UnicodeBlock CHEROKEE
 611:       = new UnicodeBlock(0x13A0, 0x13FF,
 612:                          "CHEROKEE", 
 613:                          "Cherokee");
 614: 
 615:     /**
 616:      * Unified Canadian Aboriginal Syllabics.
 617:      * 0x1400 - 0x167F.
 618:      * @since 1.4
 619:      */
 620:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 621:       = new UnicodeBlock(0x1400, 0x167F,
 622:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 623:                          "Unified Canadian Aboriginal Syllabics");
 624: 
 625:     /**
 626:      * Ogham.
 627:      * 0x1680 - 0x169F.
 628:      * @since 1.4
 629:      */
 630:     public static final UnicodeBlock OGHAM
 631:       = new UnicodeBlock(0x1680, 0x169F,
 632:                          "OGHAM", 
 633:                          "Ogham");
 634: 
 635:     /**
 636:      * Runic.
 637:      * 0x16A0 - 0x16FF.
 638:      * @since 1.4
 639:      */
 640:     public static final UnicodeBlock RUNIC
 641:       = new UnicodeBlock(0x16A0, 0x16FF,
 642:                          "RUNIC", 
 643:                          "Runic");
 644: 
 645:     /**
 646:      * Tagalog.
 647:      * 0x1700 - 0x171F.
 648:      * @since 1.5
 649:      */
 650:     public static final UnicodeBlock TAGALOG
 651:       = new UnicodeBlock(0x1700, 0x171F,
 652:                          "TAGALOG", 
 653:                          "Tagalog");
 654: 
 655:     /**
 656:      * Hanunoo.
 657:      * 0x1720 - 0x173F.
 658:      * @since 1.5
 659:      */
 660:     public static final UnicodeBlock HANUNOO
 661:       = new UnicodeBlock(0x1720, 0x173F,
 662:                          "HANUNOO", 
 663:                          "Hanunoo");
 664: 
 665:     /**
 666:      * Buhid.
 667:      * 0x1740 - 0x175F.
 668:      * @since 1.5
 669:      */
 670:     public static final UnicodeBlock BUHID
 671:       = new UnicodeBlock(0x1740, 0x175F,
 672:                          "BUHID", 
 673:                          "Buhid");
 674: 
 675:     /**
 676:      * Tagbanwa.
 677:      * 0x1760 - 0x177F.
 678:      * @since 1.5
 679:      */
 680:     public static final UnicodeBlock TAGBANWA
 681:       = new UnicodeBlock(0x1760, 0x177F,
 682:                          "TAGBANWA", 
 683:                          "Tagbanwa");
 684: 
 685:     /**
 686:      * Khmer.
 687:      * 0x1780 - 0x17FF.
 688:      * @since 1.4
 689:      */
 690:     public static final UnicodeBlock KHMER
 691:       = new UnicodeBlock(0x1780, 0x17FF,
 692:                          "KHMER", 
 693:                          "Khmer");
 694: 
 695:     /**
 696:      * Mongolian.
 697:      * 0x1800 - 0x18AF.
 698:      * @since 1.4
 699:      */
 700:     public static final UnicodeBlock MONGOLIAN
 701:       = new UnicodeBlock(0x1800, 0x18AF,
 702:                          "MONGOLIAN", 
 703:                          "Mongolian");
 704: 
 705:     /**
 706:      * Limbu.
 707:      * 0x1900 - 0x194F.
 708:      * @since 1.5
 709:      */
 710:     public static final UnicodeBlock LIMBU
 711:       = new UnicodeBlock(0x1900, 0x194F,
 712:                          "LIMBU", 
 713:                          "Limbu");
 714: 
 715:     /**
 716:      * Tai Le.
 717:      * 0x1950 - 0x197F.
 718:      * @since 1.5
 719:      */
 720:     public static final UnicodeBlock TAI_LE
 721:       = new UnicodeBlock(0x1950, 0x197F,
 722:                          "TAI_LE", 
 723:                          "Tai Le");
 724: 
 725:     /**
 726:      * Khmer Symbols.
 727:      * 0x19E0 - 0x19FF.
 728:      * @since 1.5
 729:      */
 730:     public static final UnicodeBlock KHMER_SYMBOLS
 731:       = new UnicodeBlock(0x19E0, 0x19FF,
 732:                          "KHMER_SYMBOLS", 
 733:                          "Khmer Symbols");
 734: 
 735:     /**
 736:      * Phonetic Extensions.
 737:      * 0x1D00 - 0x1D7F.
 738:      * @since 1.5
 739:      */
 740:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 741:       = new UnicodeBlock(0x1D00, 0x1D7F,
 742:                          "PHONETIC_EXTENSIONS", 
 743:                          "Phonetic Extensions");
 744: 
 745:     /**
 746:      * Latin Extended Additional.
 747:      * 0x1E00 - 0x1EFF.
 748:      */
 749:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 750:       = new UnicodeBlock(0x1E00, 0x1EFF,
 751:                          "LATIN_EXTENDED_ADDITIONAL", 
 752:                          "Latin Extended Additional");
 753: 
 754:     /**
 755:      * Greek Extended.
 756:      * 0x1F00 - 0x1FFF.
 757:      */
 758:     public static final UnicodeBlock GREEK_EXTENDED
 759:       = new UnicodeBlock(0x1F00, 0x1FFF,
 760:                          "GREEK_EXTENDED", 
 761:                          "Greek Extended");
 762: 
 763:     /**
 764:      * General Punctuation.
 765:      * 0x2000 - 0x206F.
 766:      */
 767:     public static final UnicodeBlock GENERAL_PUNCTUATION
 768:       = new UnicodeBlock(0x2000, 0x206F,
 769:                          "GENERAL_PUNCTUATION", 
 770:                          "General Punctuation");
 771: 
 772:     /**
 773:      * Superscripts and Subscripts.
 774:      * 0x2070 - 0x209F.
 775:      */
 776:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 777:       = new UnicodeBlock(0x2070, 0x209F,
 778:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 779:                          "Superscripts and Subscripts");
 780: 
 781:     /**
 782:      * Currency Symbols.
 783:      * 0x20A0 - 0x20CF.
 784:      */
 785:     public static final UnicodeBlock CURRENCY_SYMBOLS
 786:       = new UnicodeBlock(0x20A0, 0x20CF,
 787:                          "CURRENCY_SYMBOLS", 
 788:                          "Currency Symbols");
 789: 
 790:     /**
 791:      * Combining Marks for Symbols.
 792:      * 0x20D0 - 0x20FF.
 793:      */
 794:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 795:       = new UnicodeBlock(0x20D0, 0x20FF,
 796:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 797:                          "Combining Marks for Symbols");
 798: 
 799:     /**
 800:      * Letterlike Symbols.
 801:      * 0x2100 - 0x214F.
 802:      */
 803:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 804:       = new UnicodeBlock(0x2100, 0x214F,
 805:                          "LETTERLIKE_SYMBOLS", 
 806:                          "Letterlike Symbols");
 807: 
 808:     /**
 809:      * Number Forms.
 810:      * 0x2150 - 0x218F.
 811:      */
 812:     public static final UnicodeBlock NUMBER_FORMS
 813:       = new UnicodeBlock(0x2150, 0x218F,
 814:                          "NUMBER_FORMS", 
 815:                          "Number Forms");
 816: 
 817:     /**
 818:      * Arrows.
 819:      * 0x2190 - 0x21FF.
 820:      */
 821:     public static final UnicodeBlock ARROWS
 822:       = new UnicodeBlock(0x2190, 0x21FF,
 823:                          "ARROWS", 
 824:                          "Arrows");
 825: 
 826:     /**
 827:      * Mathematical Operators.
 828:      * 0x2200 - 0x22FF.
 829:      */
 830:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 831:       = new UnicodeBlock(0x2200, 0x22FF,
 832:                          "MATHEMATICAL_OPERATORS", 
 833:                          "Mathematical Operators");
 834: 
 835:     /**
 836:      * Miscellaneous Technical.
 837:      * 0x2300 - 0x23FF.
 838:      */
 839:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 840:       = new UnicodeBlock(0x2300, 0x23FF,
 841:                          "MISCELLANEOUS_TECHNICAL", 
 842:                          "Miscellaneous Technical");
 843: 
 844:     /**
 845:      * Control Pictures.
 846:      * 0x2400 - 0x243F.
 847:      */
 848:     public static final UnicodeBlock CONTROL_PICTURES
 849:       = new UnicodeBlock(0x2400, 0x243F,
 850:                          "CONTROL_PICTURES", 
 851:                          "Control Pictures");
 852: 
 853:     /**
 854:      * Optical Character Recognition.
 855:      * 0x2440 - 0x245F.
 856:      */
 857:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 858:       = new UnicodeBlock(0x2440, 0x245F,
 859:                          "OPTICAL_CHARACTER_RECOGNITION", 
 860:                          "Optical Character Recognition");
 861: 
 862:     /**
 863:      * Enclosed Alphanumerics.
 864:      * 0x2460 - 0x24FF.
 865:      */
 866:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 867:       = new UnicodeBlock(0x2460, 0x24FF,
 868:                          "ENCLOSED_ALPHANUMERICS", 
 869:                          "Enclosed Alphanumerics");
 870: 
 871:     /**
 872:      * Box Drawing.
 873:      * 0x2500 - 0x257F.
 874:      */
 875:     public static final UnicodeBlock BOX_DRAWING
 876:       = new UnicodeBlock(0x2500, 0x257F,
 877:                          "BOX_DRAWING", 
 878:                          "Box Drawing");
 879: 
 880:     /**
 881:      * Block Elements.
 882:      * 0x2580 - 0x259F.
 883:      */
 884:     public static final UnicodeBlock BLOCK_ELEMENTS
 885:       = new UnicodeBlock(0x2580, 0x259F,
 886:                          "BLOCK_ELEMENTS", 
 887:                          "Block Elements");
 888: 
 889:     /**
 890:      * Geometric Shapes.
 891:      * 0x25A0 - 0x25FF.
 892:      */
 893:     public static final UnicodeBlock GEOMETRIC_SHAPES
 894:       = new UnicodeBlock(0x25A0, 0x25FF,
 895:                          "GEOMETRIC_SHAPES", 
 896:                          "Geometric Shapes");
 897: 
 898:     /**
 899:      * Miscellaneous Symbols.
 900:      * 0x2600 - 0x26FF.
 901:      */
 902:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 903:       = new UnicodeBlock(0x2600, 0x26FF,
 904:                          "MISCELLANEOUS_SYMBOLS", 
 905:                          "Miscellaneous Symbols");
 906: 
 907:     /**
 908:      * Dingbats.
 909:      * 0x2700 - 0x27BF.
 910:      */
 911:     public static final UnicodeBlock DINGBATS
 912:       = new UnicodeBlock(0x2700, 0x27BF,
 913:                          "DINGBATS", 
 914:                          "Dingbats");
 915: 
 916:     /**
 917:      * Miscellaneous Mathematical Symbols-A.
 918:      * 0x27C0 - 0x27EF.
 919:      * @since 1.5
 920:      */
 921:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 922:       = new UnicodeBlock(0x27C0, 0x27EF,
 923:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 924:                          "Miscellaneous Mathematical Symbols-A");
 925: 
 926:     /**
 927:      * Supplemental Arrows-A.
 928:      * 0x27F0 - 0x27FF.
 929:      * @since 1.5
 930:      */
 931:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 932:       = new UnicodeBlock(0x27F0, 0x27FF,
 933:                          "SUPPLEMENTAL_ARROWS_A", 
 934:                          "Supplemental Arrows-A");
 935: 
 936:     /**
 937:      * Braille Patterns.
 938:      * 0x2800 - 0x28FF.
 939:      * @since 1.4
 940:      */
 941:     public static final UnicodeBlock BRAILLE_PATTERNS
 942:       = new UnicodeBlock(0x2800, 0x28FF,
 943:                          "BRAILLE_PATTERNS", 
 944:                          "Braille Patterns");
 945: 
 946:     /**
 947:      * Supplemental Arrows-B.
 948:      * 0x2900 - 0x297F.
 949:      * @since 1.5
 950:      */
 951:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 952:       = new UnicodeBlock(0x2900, 0x297F,
 953:                          "SUPPLEMENTAL_ARROWS_B", 
 954:                          "Supplemental Arrows-B");
 955: 
 956:     /**
 957:      * Miscellaneous Mathematical Symbols-B.
 958:      * 0x2980 - 0x29FF.
 959:      * @since 1.5
 960:      */
 961:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 962:       = new UnicodeBlock(0x2980, 0x29FF,
 963:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 964:                          "Miscellaneous Mathematical Symbols-B");
 965: 
 966:     /**
 967:      * Supplemental Mathematical Operators.
 968:      * 0x2A00 - 0x2AFF.
 969:      * @since 1.5
 970:      */
 971:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 972:       = new UnicodeBlock(0x2A00, 0x2AFF,
 973:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 974:                          "Supplemental Mathematical Operators");
 975: 
 976:     /**
 977:      * Miscellaneous Symbols and Arrows.
 978:      * 0x2B00 - 0x2BFF.
 979:      * @since 1.5
 980:      */
 981:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 982:       = new UnicodeBlock(0x2B00, 0x2BFF,
 983:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 984:                          "Miscellaneous Symbols and Arrows");
 985: 
 986:     /**
 987:      * CJK Radicals Supplement.
 988:      * 0x2E80 - 0x2EFF.
 989:      * @since 1.4
 990:      */
 991:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 992:       = new UnicodeBlock(0x2E80, 0x2EFF,
 993:                          "CJK_RADICALS_SUPPLEMENT", 
 994:                          "CJK Radicals Supplement");
 995: 
 996:     /**
 997:      * Kangxi Radicals.
 998:      * 0x2F00 - 0x2FDF.
 999:      * @since 1.4
1000:      */
1001:     public static final UnicodeBlock KANGXI_RADICALS
1002:       = new UnicodeBlock(0x2F00, 0x2FDF,
1003:                          "KANGXI_RADICALS", 
1004:                          "Kangxi Radicals");
1005: 
1006:     /**
1007:      * Ideographic Description Characters.
1008:      * 0x2FF0 - 0x2FFF.
1009:      * @since 1.4
1010:      */
1011:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1012:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1013:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1014:                          "Ideographic Description Characters");
1015: 
1016:     /**
1017:      * CJK Symbols and Punctuation.
1018:      * 0x3000 - 0x303F.
1019:      */
1020:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1021:       = new UnicodeBlock(0x3000, 0x303F,
1022:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1023:                          "CJK Symbols and Punctuation");
1024: 
1025:     /**
1026:      * Hiragana.
1027:      * 0x3040 - 0x309F.
1028:      */
1029:     public static final UnicodeBlock HIRAGANA
1030:       = new UnicodeBlock(0x3040, 0x309F,
1031:                          "HIRAGANA", 
1032:                          "Hiragana");
1033: 
1034:     /**
1035:      * Katakana.
1036:      * 0x30A0 - 0x30FF.
1037:      */
1038:     public static final UnicodeBlock KATAKANA
1039:       = new UnicodeBlock(0x30A0, 0x30FF,
1040:                          "KATAKANA", 
1041:                          "Katakana");
1042: 
1043:     /**
1044:      * Bopomofo.
1045:      * 0x3100 - 0x312F.
1046:      */
1047:     public static final UnicodeBlock BOPOMOFO
1048:       = new UnicodeBlock(0x3100, 0x312F,
1049:                          "BOPOMOFO", 
1050:                          "Bopomofo");
1051: 
1052:     /**
1053:      * Hangul Compatibility Jamo.
1054:      * 0x3130 - 0x318F.
1055:      */
1056:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1057:       = new UnicodeBlock(0x3130, 0x318F,
1058:                          "HANGUL_COMPATIBILITY_JAMO", 
1059:                          "Hangul Compatibility Jamo");
1060: 
1061:     /**
1062:      * Kanbun.
1063:      * 0x3190 - 0x319F.
1064:      */
1065:     public static final UnicodeBlock KANBUN
1066:       = new UnicodeBlock(0x3190, 0x319F,
1067:                          "KANBUN", 
1068:                          "Kanbun");
1069: 
1070:     /**
1071:      * Bopomofo Extended.
1072:      * 0x31A0 - 0x31BF.
1073:      * @since 1.4
1074:      */
1075:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1076:       = new UnicodeBlock(0x31A0, 0x31BF,
1077:                          "BOPOMOFO_EXTENDED", 
1078:                          "Bopomofo Extended");
1079: 
1080:     /**
1081:      * Katakana Phonetic Extensions.
1082:      * 0x31F0 - 0x31FF.
1083:      * @since 1.5
1084:      */
1085:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1086:       = new UnicodeBlock(0x31F0, 0x31FF,
1087:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1088:                          "Katakana Phonetic Extensions");
1089: 
1090:     /**
1091:      * Enclosed CJK Letters and Months.
1092:      * 0x3200 - 0x32FF.
1093:      */
1094:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1095:       = new UnicodeBlock(0x3200, 0x32FF,
1096:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1097:                          "Enclosed CJK Letters and Months");
1098: 
1099:     /**
1100:      * CJK Compatibility.
1101:      * 0x3300 - 0x33FF.
1102:      */
1103:     public static final UnicodeBlock CJK_COMPATIBILITY
1104:       = new UnicodeBlock(0x3300, 0x33FF,
1105:                          "CJK_COMPATIBILITY", 
1106:                          "CJK Compatibility");
1107: 
1108:     /**
1109:      * CJK Unified Ideographs Extension A.
1110:      * 0x3400 - 0x4DBF.
1111:      * @since 1.4
1112:      */
1113:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1114:       = new UnicodeBlock(0x3400, 0x4DBF,
1115:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1116:                          "CJK Unified Ideographs Extension A");
1117: 
1118:     /**
1119:      * Yijing Hexagram Symbols.
1120:      * 0x4DC0 - 0x4DFF.
1121:      * @since 1.5
1122:      */
1123:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1124:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1125:                          "YIJING_HEXAGRAM_SYMBOLS", 
1126:                          "Yijing Hexagram Symbols");
1127: 
1128:     /**
1129:      * CJK Unified Ideographs.
1130:      * 0x4E00 - 0x9FFF.
1131:      */
1132:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1133:       = new UnicodeBlock(0x4E00, 0x9FFF,
1134:                          "CJK_UNIFIED_IDEOGRAPHS", 
1135:                          "CJK Unified Ideographs");
1136: 
1137:     /**
1138:      * Yi Syllables.
1139:      * 0xA000 - 0xA48F.
1140:      * @since 1.4
1141:      */
1142:     public static final UnicodeBlock YI_SYLLABLES
1143:       = new UnicodeBlock(0xA000, 0xA48F,
1144:                          "YI_SYLLABLES", 
1145:                          "Yi Syllables");
1146: 
1147:     /**
1148:      * Yi Radicals.
1149:      * 0xA490 - 0xA4CF.
1150:      * @since 1.4
1151:      */
1152:     public static final UnicodeBlock YI_RADICALS
1153:       = new UnicodeBlock(0xA490, 0xA4CF,
1154:                          "YI_RADICALS", 
1155:                          "Yi Radicals");
1156: 
1157:     /**
1158:      * Hangul Syllables.
1159:      * 0xAC00 - 0xD7AF.
1160:      */
1161:     public static final UnicodeBlock HANGUL_SYLLABLES
1162:       = new UnicodeBlock(0xAC00, 0xD7AF,
1163:                          "HANGUL_SYLLABLES", 
1164:                          "Hangul Syllables");
1165: 
1166:     /**
1167:      * High Surrogates.
1168:      * 0xD800 - 0xDB7F.
1169:      * @since 1.5
1170:      */
1171:     public static final UnicodeBlock HIGH_SURROGATES
1172:       = new UnicodeBlock(0xD800, 0xDB7F,
1173:                          "HIGH_SURROGATES", 
1174:                          "High Surrogates");
1175: 
1176:     /**
1177:      * High Private Use Surrogates.
1178:      * 0xDB80 - 0xDBFF.
1179:      * @since 1.5
1180:      */
1181:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1182:       = new UnicodeBlock(0xDB80, 0xDBFF,
1183:                          "HIGH_PRIVATE_USE_SURROGATES", 
1184:                          "High Private Use Surrogates");
1185: 
1186:     /**
1187:      * Low Surrogates.
1188:      * 0xDC00 - 0xDFFF.
1189:      * @since 1.5
1190:      */
1191:     public static final UnicodeBlock LOW_SURROGATES
1192:       = new UnicodeBlock(0xDC00, 0xDFFF,
1193:                          "LOW_SURROGATES", 
1194:                          "Low Surrogates");
1195: 
1196:     /**
1197:      * Private Use Area.
1198:      * 0xE000 - 0xF8FF.
1199:      */
1200:     public static final UnicodeBlock PRIVATE_USE_AREA
1201:       = new UnicodeBlock(0xE000, 0xF8FF,
1202:                          "PRIVATE_USE_AREA", 
1203:                          "Private Use Area");
1204: 
1205:     /**
1206:      * CJK Compatibility Ideographs.
1207:      * 0xF900 - 0xFAFF.
1208:      */
1209:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1210:       = new UnicodeBlock(0xF900, 0xFAFF,
1211:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1212:                          "CJK Compatibility Ideographs");
1213: 
1214:     /**
1215:      * Alphabetic Presentation Forms.
1216:      * 0xFB00 - 0xFB4F.
1217:      */
1218:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1219:       = new UnicodeBlock(0xFB00, 0xFB4F,
1220:                          "ALPHABETIC_PRESENTATION_FORMS", 
1221:                          "Alphabetic Presentation Forms");
1222: 
1223:     /**
1224:      * Arabic Presentation Forms-A.
1225:      * 0xFB50 - 0xFDFF.
1226:      */
1227:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1228:       = new UnicodeBlock(0xFB50, 0xFDFF,
1229:                          "ARABIC_PRESENTATION_FORMS_A", 
1230:                          "Arabic Presentation Forms-A");
1231: 
1232:     /**
1233:      * Variation Selectors.
1234:      * 0xFE00 - 0xFE0F.
1235:      * @since 1.5
1236:      */
1237:     public static final UnicodeBlock VARIATION_SELECTORS
1238:       = new UnicodeBlock(0xFE00, 0xFE0F,
1239:                          "VARIATION_SELECTORS", 
1240:                          "Variation Selectors");
1241: 
1242:     /**
1243:      * Combining Half Marks.
1244:      * 0xFE20 - 0xFE2F.
1245:      */
1246:     public static final UnicodeBlock COMBINING_HALF_MARKS
1247:       = new UnicodeBlock(0xFE20, 0xFE2F,
1248:                          "COMBINING_HALF_MARKS", 
1249:                          "Combining Half Marks");
1250: 
1251:     /**
1252:      * CJK Compatibility Forms.
1253:      * 0xFE30 - 0xFE4F.
1254:      */
1255:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1256:       = new UnicodeBlock(0xFE30, 0xFE4F,
1257:                          "CJK_COMPATIBILITY_FORMS", 
1258:                          "CJK Compatibility Forms");
1259: 
1260:     /**
1261:      * Small Form Variants.
1262:      * 0xFE50 - 0xFE6F.
1263:      */
1264:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1265:       = new UnicodeBlock(0xFE50, 0xFE6F,
1266:                          "SMALL_FORM_VARIANTS", 
1267:                          "Small Form Variants");
1268: 
1269:     /**
1270:      * Arabic Presentation Forms-B.
1271:      * 0xFE70 - 0xFEFF.
1272:      */
1273:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1274:       = new UnicodeBlock(0xFE70, 0xFEFF,
1275:                          "ARABIC_PRESENTATION_FORMS_B", 
1276:                          "Arabic Presentation Forms-B");
1277: 
1278:     /**
1279:      * Halfwidth and Fullwidth Forms.
1280:      * 0xFF00 - 0xFFEF.
1281:      */
1282:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1283:       = new UnicodeBlock(0xFF00, 0xFFEF,
1284:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1285:                          "Halfwidth and Fullwidth Forms");
1286: 
1287:     /**
1288:      * Specials.
1289:      * 0xFFF0 - 0xFFFF.
1290:      */
1291:     public static final UnicodeBlock SPECIALS
1292:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1293:                          "SPECIALS", 
1294:                          "Specials");
1295: 
1296:     /**
1297:      * Linear B Syllabary.
1298:      * 0x10000 - 0x1007F.
1299:      * @since 1.5
1300:      */
1301:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1302:       = new UnicodeBlock(0x10000, 0x1007F,
1303:                          "LINEAR_B_SYLLABARY", 
1304:                          "Linear B Syllabary");
1305: 
1306:     /**
1307:      * Linear B Ideograms.
1308:      * 0x10080 - 0x100FF.
1309:      * @since 1.5
1310:      */
1311:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1312:       = new UnicodeBlock(0x10080, 0x100FF,
1313:                          "LINEAR_B_IDEOGRAMS", 
1314:                          "Linear B Ideograms");
1315: 
1316:     /**
1317:      * Aegean Numbers.
1318:      * 0x10100 - 0x1013F.
1319:      * @since 1.5
1320:      */
1321:     public static final UnicodeBlock AEGEAN_NUMBERS
1322:       = new UnicodeBlock(0x10100, 0x1013F,
1323:                          "AEGEAN_NUMBERS", 
1324:                          "Aegean Numbers");
1325: 
1326:     /**
1327:      * Old Italic.
1328:      * 0x10300 - 0x1032F.
1329:      * @since 1.5
1330:      */
1331:     public static final UnicodeBlock OLD_ITALIC
1332:       = new UnicodeBlock(0x10300, 0x1032F,
1333:                          "OLD_ITALIC", 
1334:                          "Old Italic");
1335: 
1336:     /**
1337:      * Gothic.
1338:      * 0x10330 - 0x1034F.
1339:      * @since 1.5
1340:      */
1341:     public static final UnicodeBlock GOTHIC
1342:       = new UnicodeBlock(0x10330, 0x1034F,
1343:                          "GOTHIC", 
1344:                          "Gothic");
1345: 
1346:     /**
1347:      * Ugaritic.
1348:      * 0x10380 - 0x1039F.
1349:      * @since 1.5
1350:      */
1351:     public static final UnicodeBlock UGARITIC
1352:       = new UnicodeBlock(0x10380, 0x1039F,
1353:                          "UGARITIC", 
1354:                          "Ugaritic");
1355: 
1356:     /**
1357:      * Deseret.
1358:      * 0x10400 - 0x1044F.
1359:      * @since 1.5
1360:      */
1361:     public static final UnicodeBlock DESERET
1362:       = new UnicodeBlock(0x10400, 0x1044F,
1363:                          "DESERET", 
1364:                          "Deseret");
1365: 
1366:     /**
1367:      * Shavian.
1368:      * 0x10450 - 0x1047F.
1369:      * @since 1.5
1370:      */
1371:     public static final UnicodeBlock SHAVIAN
1372:       = new UnicodeBlock(0x10450, 0x1047F,
1373:                          "SHAVIAN", 
1374:                          "Shavian");
1375: 
1376:     /**
1377:      * Osmanya.
1378:      * 0x10480 - 0x104AF.
1379:      * @since 1.5
1380:      */
1381:     public static final UnicodeBlock OSMANYA
1382:       = new UnicodeBlock(0x10480, 0x104AF,
1383:                          "OSMANYA", 
1384:                          "Osmanya");
1385: 
1386:     /**
1387:      * Cypriot Syllabary.
1388:      * 0x10800 - 0x1083F.
1389:      * @since 1.5
1390:      */
1391:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1392:       = new UnicodeBlock(0x10800, 0x1083F,
1393:                          "CYPRIOT_SYLLABARY", 
1394:                          "Cypriot Syllabary");
1395: 
1396:     /**
1397:      * Byzantine Musical Symbols.
1398:      * 0x1D000 - 0x1D0FF.
1399:      * @since 1.5
1400:      */
1401:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1402:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1403:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1404:                          "Byzantine Musical Symbols");
1405: 
1406:     /**
1407:      * Musical Symbols.
1408:      * 0x1D100 - 0x1D1FF.
1409:      * @since 1.5
1410:      */
1411:     public static final UnicodeBlock MUSICAL_SYMBOLS
1412:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1413:                          "MUSICAL_SYMBOLS", 
1414:                          "Musical Symbols");
1415: 
1416:     /**
1417:      * Tai Xuan Jing Symbols.
1418:      * 0x1D300 - 0x1D35F.
1419:      * @since 1.5
1420:      */
1421:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1422:       = new UnicodeBlock(0x1D300, 0x1D35F,
1423:                          "TAI_XUAN_JING_SYMBOLS", 
1424:                          "Tai Xuan Jing Symbols");
1425: 
1426:     /**
1427:      * Mathematical Alphanumeric Symbols.
1428:      * 0x1D400 - 0x1D7FF.
1429:      * @since 1.5
1430:      */
1431:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1432:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1433:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1434:                          "Mathematical Alphanumeric Symbols");
1435: 
1436:     /**
1437:      * CJK Unified Ideographs Extension B.
1438:      * 0x20000 - 0x2A6DF.
1439:      * @since 1.5
1440:      */
1441:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1442:       = new UnicodeBlock(0x20000, 0x2A6DF,
1443:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1444:                          "CJK Unified Ideographs Extension B");
1445: 
1446:     /**
1447:      * CJK Compatibility Ideographs Supplement.
1448:      * 0x2F800 - 0x2FA1F.
1449:      * @since 1.5
1450:      */
1451:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1452:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1453:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1454:                          "CJK Compatibility Ideographs Supplement");
1455: 
1456:     /**
1457:      * Tags.
1458:      * 0xE0000 - 0xE007F.
1459:      * @since 1.5
1460:      */
1461:     public static final UnicodeBlock TAGS
1462:       = new UnicodeBlock(0xE0000, 0xE007F,
1463:                          "TAGS", 
1464:                          "Tags");
1465: 
1466:     /**
1467:      * Variation Selectors Supplement.
1468:      * 0xE0100 - 0xE01EF.
1469:      * @since 1.5
1470:      */
1471:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1472:       = new UnicodeBlock(0xE0100, 0xE01EF,
1473:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1474:                          "Variation Selectors Supplement");
1475: 
1476:     /**
1477:      * Supplementary Private Use Area-A.
1478:      * 0xF0000 - 0xFFFFF.
1479:      * @since 1.5
1480:      */
1481:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1482:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1483:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1484:                          "Supplementary Private Use Area-A");
1485: 
1486:     /**
1487:      * Supplementary Private Use Area-B.
1488:      * 0x100000 - 0x10FFFF.
1489:      * @since 1.5
1490:      */
1491:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1492:       = new UnicodeBlock(0x100000, 0x10FFFF,
1493:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1494:                          "Supplementary Private Use Area-B");
1495: 
1496:     /**
1497:      * Surrogates Area.
1498:      * 'D800' - 'DFFF'.
1499:      * @deprecated As of 1.5, the three areas, 
1500:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1501:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1502:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1503:      * by the Unicode standard, should be used in preference to
1504:      * this.  These are also returned from calls to <code>of(int)</code>
1505:      * and <code>of(char)</code>.
1506:      */
1507:     @Deprecated
1508:     public static final UnicodeBlock SURROGATES_AREA
1509:       = new UnicodeBlock(0xD800, 0xDFFF,
1510:                          "SURROGATES_AREA",
1511:              "Surrogates Area");
1512: 
1513:     /**
1514:      * The defined subsets.
1515:      */
1516:     private static final UnicodeBlock sets[] = {
1517:       BASIC_LATIN,
1518:       LATIN_1_SUPPLEMENT,
1519:       LATIN_EXTENDED_A,
1520:       LATIN_EXTENDED_B,
1521:       IPA_EXTENSIONS,
1522:       SPACING_MODIFIER_LETTERS,
1523:       COMBINING_DIACRITICAL_MARKS,
1524:       GREEK,
1525:       CYRILLIC,
1526:       CYRILLIC_SUPPLEMENTARY,
1527:       ARMENIAN,
1528:       HEBREW,
1529:       ARABIC,
1530:       SYRIAC,
1531:       THAANA,
1532:       DEVANAGARI,
1533:       BENGALI,
1534:       GURMUKHI,
1535:       GUJARATI,
1536:       ORIYA,
1537:       TAMIL,
1538:       TELUGU,
1539:       KANNADA,
1540:       MALAYALAM,
1541:       SINHALA,
1542:       THAI,
1543:       LAO,
1544:       TIBETAN,
1545:       MYANMAR,
1546:       GEORGIAN,
1547:       HANGUL_JAMO,
1548:       ETHIOPIC,
1549:       CHEROKEE,
1550:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1551:       OGHAM,
1552:       RUNIC,
1553:       TAGALOG,
1554:       HANUNOO,
1555:       BUHID,
1556:       TAGBANWA,
1557:       KHMER,
1558:       MONGOLIAN,
1559:       LIMBU,
1560:       TAI_LE,
1561:       KHMER_SYMBOLS,
1562:       PHONETIC_EXTENSIONS,
1563:       LATIN_EXTENDED_ADDITIONAL,
1564:       GREEK_EXTENDED,
1565:       GENERAL_PUNCTUATION,
1566:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1567:       CURRENCY_SYMBOLS,
1568:       COMBINING_MARKS_FOR_SYMBOLS,
1569:       LETTERLIKE_SYMBOLS,
1570:       NUMBER_FORMS,
1571:       ARROWS,
1572:       MATHEMATICAL_OPERATORS,
1573:       MISCELLANEOUS_TECHNICAL,
1574:       CONTROL_PICTURES,
1575:       OPTICAL_CHARACTER_RECOGNITION,
1576:       ENCLOSED_ALPHANUMERICS,
1577:       BOX_DRAWING,
1578:       BLOCK_ELEMENTS,
1579:       GEOMETRIC_SHAPES,
1580:       MISCELLANEOUS_SYMBOLS,
1581:       DINGBATS,
1582:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1583:       SUPPLEMENTAL_ARROWS_A,
1584:       BRAILLE_PATTERNS,
1585:       SUPPLEMENTAL_ARROWS_B,
1586:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1587:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1588:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1589:       CJK_RADICALS_SUPPLEMENT,
1590:       KANGXI_RADICALS,
1591:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1592:       CJK_SYMBOLS_AND_PUNCTUATION,
1593:       HIRAGANA,
1594:       KATAKANA,
1595:       BOPOMOFO,
1596:       HANGUL_COMPATIBILITY_JAMO,
1597:       KANBUN,
1598:       BOPOMOFO_EXTENDED,
1599:       KATAKANA_PHONETIC_EXTENSIONS,
1600:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1601:       CJK_COMPATIBILITY,
1602:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1603:       YIJING_HEXAGRAM_SYMBOLS,
1604:       CJK_UNIFIED_IDEOGRAPHS,
1605:       YI_SYLLABLES,
1606:       YI_RADICALS,
1607:       HANGUL_SYLLABLES,
1608:       HIGH_SURROGATES,
1609:       HIGH_PRIVATE_USE_SURROGATES,
1610:       LOW_SURROGATES,
1611:       PRIVATE_USE_AREA,
1612:       CJK_COMPATIBILITY_IDEOGRAPHS,
1613:       ALPHABETIC_PRESENTATION_FORMS,
1614:       ARABIC_PRESENTATION_FORMS_A,
1615:       VARIATION_SELECTORS,
1616:       COMBINING_HALF_MARKS,
1617:       CJK_COMPATIBILITY_FORMS,
1618:       SMALL_FORM_VARIANTS,
1619:       ARABIC_PRESENTATION_FORMS_B,
1620:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1621:       SPECIALS,
1622:       LINEAR_B_SYLLABARY,
1623:       LINEAR_B_IDEOGRAMS,
1624:       AEGEAN_NUMBERS,
1625:       OLD_ITALIC,
1626:       GOTHIC,
1627:       UGARITIC,
1628:       DESERET,
1629:       SHAVIAN,
1630:       OSMANYA,
1631:       CYPRIOT_SYLLABARY,
1632:       BYZANTINE_MUSICAL_SYMBOLS,
1633:       MUSICAL_SYMBOLS,
1634:       TAI_XUAN_JING_SYMBOLS,
1635:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1636:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1637:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1638:       TAGS,
1639:       VARIATION_SELECTORS_SUPPLEMENT,
1640:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1641:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1642:     };
1643:   } // class UnicodeBlock
1644: 
1645:   /**
1646:    * A class to encompass all the properties of characters in the 
1647:    * private use blocks in the Unicode standard.  This class extends
1648:    * UnassignedCharacters because the return type from getType() is 
1649:    * different.
1650:    * @author Anthony Balkissoon abalkiss at redhat dot com
1651:    *
1652:    */
1653:   private static class PrivateUseCharacters extends UnassignedCharacters
1654:   {
1655:     /**
1656:      * Returns the type of the character cp.
1657:      */
1658:     static int getType(int cp)
1659:     {
1660:       // The upper 2 code points in any plane are considered unassigned, 
1661:       // even in the private-use planes.
1662:       if ((cp & 0xffff) >= 0xfffe)
1663:         return UnassignedCharacters.getType(cp);
1664:       return PRIVATE_USE;
1665:     }
1666:     
1667:     /**
1668:      * Returns true if the character cp is defined.
1669:      */
1670:     static boolean isDefined(int cp)
1671:     {
1672:       // The upper 2 code points in any plane are considered unassigned, 
1673:       // even in the private-use planes.
1674:       if ((cp & 0xffff) >= 0xfffe)
1675:         return UnassignedCharacters.isDefined(cp);
1676:       return true;
1677:     }
1678:     
1679:     /**
1680:      * Gets the directionality for the character cp.
1681:      */
1682:     static byte getDirectionality(int cp)
1683:     {
1684:       if ((cp & 0xffff) >= 0xfffe)
1685:         return UnassignedCharacters.getDirectionality(cp);
1686:       return DIRECTIONALITY_LEFT_TO_RIGHT;
1687:     }
1688:   }
1689:   
1690:   /**
1691:    * A class to encompass all the properties of code points that are 
1692:    * currently undefined in the Unicode standard.
1693:    * @author Anthony Balkissoon abalkiss at redhat dot com
1694:    *
1695:    */
1696:   private static class UnassignedCharacters
1697:   {
1698:     /**
1699:      * Returns the numeric value for the unassigned characters.
1700:      * @param cp the character
1701:      * @param radix the radix (not used)
1702:      * @return the numeric value of this character in this radix
1703:      */
1704:     static int digit(int cp, int radix)
1705:     {
1706:       return -1;
1707:     }
1708: 
1709:     /**
1710:      * Returns the Unicode directionality property for unassigned 
1711:      * characters.
1712:      * @param cp the character
1713:      * @return DIRECTIONALITY_UNDEFINED
1714:      */
1715:     static byte getDirectionality(int cp)
1716:     {
1717:       return DIRECTIONALITY_UNDEFINED;
1718:     }
1719: 
1720:     /**
1721:      * Returns -1, the numeric value for unassigned Unicode characters.
1722:      * @param cp the character
1723:      * @return -1
1724:      */
1725:     static int getNumericValue(int cp)
1726:     {
1727:       return -1;
1728:     }
1729: 
1730:     /**
1731:      * Returns UNASSIGNED, the type of unassigned Unicode characters.
1732:      * @param cp the character
1733:      * @return UNASSIGNED
1734:      */
1735:     static int getType(int cp)
1736:     {
1737:       return UNASSIGNED;
1738:     }
1739:     
1740:     /**
1741:      * Returns false to indiciate that the character is not defined in the 
1742:      * Unicode standard.
1743:      * @param cp the character
1744:      * @return false
1745:      */
1746:     static boolean isDefined(int cp)
1747:     {
1748:       return false;
1749:     }
1750: 
1751:     /**
1752:      * Returns false to indicate that the character is not a digit.
1753:      * @param cp the character
1754:      * @return false
1755:      */
1756:     static boolean isDigit(int cp)
1757:     {
1758:       return false;
1759:     }
1760: 
1761:     /**
1762:      * Returns false to indicate that the character cannot be ignored 
1763:      * within an identifier
1764:      * @param cp the character
1765:      * @return false
1766:      */
1767:     static boolean isIdentifierIgnorable(int cp)
1768:     {
1769:       return false;
1770:     }
1771: 
1772:     /**
1773:      * Returns false to indicate that the character cannot be part of a 
1774:      * Java identifier.
1775:      * @param cp the character
1776:      * @return false
1777:      */
1778:     static boolean isJavaIdentifierPart(int cp)
1779:     {
1780:       return false;
1781:     }
1782: 
1783:     /**
1784:      * Returns false to indicate that the character cannot be start a 
1785:      * Java identifier.
1786:      * @param cp the character
1787:      * @return false
1788:      */
1789:     static boolean isJavaIdentiferStart(int cp)
1790:     {
1791:       return false;
1792:     }
1793: 
1794:     /**
1795:      * Returns false to indicate that the character is not a letter.
1796:      * @param cp the character
1797:      * @return false
1798:      */
1799:     static boolean isLetter(int cp)
1800:     {
1801:       return false;
1802:     }
1803: 
1804:     /**
1805:      * Returns false to indicate that the character cannot is neither a letter
1806:      * nor a digit.
1807:      * @param cp the character
1808:      * @return false
1809:      */
1810:     static boolean isLetterOrDigit(int cp)
1811:     {
1812:       return false;
1813:     }
1814: 
1815:     /**
1816:      * Returns false to indicate that the character is not a lowercase letter.
1817:      * @param cp the character
1818:      * @return false
1819:      */
1820:     static boolean isLowerCase(int cp)
1821:     {
1822:       return false;
1823:     }
1824:     
1825:     /**
1826:      * Returns false to indicate that the character cannot is not mirrored.
1827:      * @param cp the character
1828:      * @return false
1829:      */
1830:     static boolean isMirrored(int cp)
1831:     {
1832:       return false;
1833:     }
1834: 
1835:     /**
1836:      * Returns false to indicate that the character is not a space character.
1837:      * @param cp the character
1838:      * @return false
1839:      */
1840:     static boolean isSpaceChar(int cp)
1841:     {
1842:       return false;
1843:     }
1844:     
1845:     /**
1846:      * Returns false to indicate that the character it not a titlecase letter.
1847:      * @param cp the character
1848:      * @return false
1849:      */
1850:     static boolean isTitleCase(int cp)
1851:     {
1852:       return false;
1853:     }
1854:     
1855:     /**
1856:      * Returns false to indicate that the character cannot be part of a 
1857:      * Unicode identifier.
1858:      * @param cp the character
1859:      * @return false
1860:      */
1861:     static boolean isUnicodeIdentifierPart(int cp)
1862:     {
1863:       return false;
1864:     }
1865: 
1866:     /**
1867:      * Returns false to indicate that the character cannot start a 
1868:      * Unicode identifier.
1869:      * @param cp the character
1870:      * @return false
1871:      */
1872:     static boolean isUnicodeIdentifierStart(int cp)
1873:     {
1874:       return false;
1875:     }
1876: 
1877:     /**
1878:      * Returns false to indicate that the character is not an uppercase letter.
1879:      * @param cp the character
1880:      * @return false
1881:      */
1882:     static boolean isUpperCase(int cp)
1883:     {
1884:       return false;
1885:     }
1886: 
1887:     /**
1888:      * Returns false to indicate that the character is not a whitespace
1889:      * character.
1890:      * @param cp the character
1891:      * @return false
1892:      */
1893:     static boolean isWhiteSpace(int cp)
1894:     {
1895:       return false;
1896:     }
1897: 
1898:     /**
1899:      * Returns cp to indicate this character has no lowercase conversion.
1900:      * @param cp the character
1901:      * @return cp
1902:      */
1903:     static int toLowerCase(int cp)
1904:     {
1905:       return cp;
1906:     }
1907:     
1908:     /**
1909:      * Returns cp to indicate this character has no titlecase conversion.
1910:      * @param cp the character
1911:      * @return cp
1912:      */
1913:     static int toTitleCase(int cp)
1914:     {
1915:       return cp;
1916:     }
1917: 
1918:     /**
1919:      * Returns cp to indicate this character has no uppercase conversion.
1920:      * @param cp the character
1921:      * @return cp
1922:      */
1923:     static int toUpperCase(int cp)
1924:     {
1925:       return cp;
1926:     }    
1927:   }
1928: 
1929:   /**
1930:    * The immutable value of this Character.
1931:    *
1932:    * @serial the value of this Character
1933:    */
1934:   private final char value;
1935: 
1936:   /**
1937:    * Compatible with JDK 1.0+.
1938:    */
1939:   private static final long serialVersionUID = 3786198910865385080L;
1940: 
1941:   /**
1942:    * Smallest value allowed for radix arguments in Java. This value is 2.
1943:    *
1944:    * @see #digit(char, int)
1945:    * @see #forDigit(int, int)
1946:    * @see Integer#toString(int, int)
1947:    * @see Integer#valueOf(String)
1948:    */
1949:   public static final int MIN_RADIX = 2;
1950: 
1951:   /**
1952:    * Largest value allowed for radix arguments in Java. This value is 36.
1953:    *
1954:    * @see #digit(char, int)
1955:    * @see #forDigit(int, int)
1956:    * @see Integer#toString(int, int)
1957:    * @see Integer#valueOf(String)
1958:    */
1959:   public static final int MAX_RADIX = 36;
1960: 
1961:   /**
1962:    * The minimum value the char data type can hold.
1963:    * This value is <code>'\\u0000'</code>.
1964:    */
1965:   public static final char MIN_VALUE = '\u0000';
1966: 
1967:   /**
1968:    * The maximum value the char data type can hold.
1969:    * This value is <code>'\\uFFFF'</code>.
1970:    */
1971:   public static final char MAX_VALUE = '\uFFFF';
1972: 
1973:   /**
1974:    * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
1975:    * @since 1.5
1976:    */
1977:   public static final int MIN_CODE_POINT = 0;
1978: 
1979:   /**
1980:    * The maximum Unicode 4.0 code point, which is greater than the range
1981:    * of the char data type.
1982:    * This value is <code>0x10FFFF</code>.
1983:    * @since 1.5
1984:    */
1985:   public static final int MAX_CODE_POINT = 0x10FFFF;
1986: 
1987:   /**
1988:    * The minimum Unicode high surrogate code unit, or
1989:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1990:    * This value is <code>'\uD800'</code>.
1991:    * @since 1.5
1992:    */
1993:   public static final char MIN_HIGH_SURROGATE = '\uD800';
1994: 
1995:   /**
1996:    * The maximum Unicode high surrogate code unit, or
1997:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1998:    * This value is <code>'\uDBFF'</code>.
1999:    * @since 1.5
2000:    */
2001:   public static final char MAX_HIGH_SURROGATE = '\uDBFF';
2002: 
2003:   /**
2004:    * The minimum Unicode low surrogate code unit, or
2005:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2006:    * This value is <code>'\uDC00'</code>.
2007:    * @since 1.5
2008:    */
2009:   public static final char MIN_LOW_SURROGATE = '\uDC00';
2010: 
2011:   /**
2012:    * The maximum Unicode low surrogate code unit, or
2013:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2014:    * This value is <code>'\uDFFF'</code>.
2015:    * @since 1.5
2016:    */
2017:   public static final char MAX_LOW_SURROGATE = '\uDFFF';  
2018: 
2019:   /**
2020:    * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
2021:    * This value is <code>'\uD800'</code>.
2022:    * @since 1.5
2023:    */
2024:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2025: 
2026:   /**
2027:    * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
2028:    * This value is <code>'\uDFFF'</code>.
2029:    * @since 1.5
2030:    */
2031:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2032: 
2033:   /**
2034:    * The lowest possible supplementary Unicode code point (the first code
2035:    * point outside the basic multilingual plane (BMP)).
2036:    * This value is <code>0x10000</code>.
2037:    */ 
2038:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2039: 
2040:   /**
2041:    * Class object representing the primitive char data type.
2042:    *
2043:    * @since 1.1
2044:    */
2045:   public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
2046: 
2047:   /**
2048:    * The number of bits needed to represent a <code>char</code>.
2049:    * @since 1.5
2050:    */
2051:   public static final int SIZE = 16;
2052: 
2053:   // This caches some Character values, and is used by boxing
2054:   // conversions via valueOf().  We must cache at least 0..127;
2055:   // this constant controls how much we actually cache.
2056:   private static final int MAX_CACHE = 127;
2057:   private static Character[] charCache = new Character[MAX_CACHE + 1];
2058:   static
2059:   {
2060:      for (char i=0; i <= MAX_CACHE; i++)
2061:        charCache[i] = new Character(i);
2062:   }
2063: 
2064:   /**
2065:    * Lu = Letter, Uppercase (Informative).
2066:    *
2067:    * @since 1.1
2068:    */
2069:   public static final byte UPPERCASE_LETTER = 1;
2070: 
2071:   /**
2072:    * Ll = Letter, Lowercase (Informative).
2073:    *
2074:    * @since 1.1
2075:    */
2076:   public static final byte LOWERCASE_LETTER = 2;
2077: 
2078:   /**
2079:    * Lt = Letter, Titlecase (Informative).
2080:    *
2081:    * @since 1.1
2082:    */
2083:   public static final byte TITLECASE_LETTER = 3;
2084: 
2085:   /**
2086:    * Mn = Mark, Non-Spacing (Normative).
2087:    *
2088:    * @since 1.1
2089:    */
2090:   public static final byte NON_SPACING_MARK = 6;
2091: 
2092:   /**
2093:    * Mc = Mark, Spacing Combining (Normative).
2094:    *
2095:    * @since 1.1
2096:    */
2097:   public static final byte COMBINING_SPACING_MARK = 8;
2098: 
2099:   /**
2100:    * Me = Mark, Enclosing (Normative).
2101:    *
2102:    * @since 1.1
2103:    */
2104:   public static final byte ENCLOSING_MARK = 7;
2105: 
2106:   /**
2107:    * Nd = Number, Decimal Digit (Normative).
2108:    *
2109:    * @since 1.1
2110:    */
2111:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
2112: 
2113:   /**
2114:    * Nl = Number, Letter (Normative).
2115:    *
2116:    * @since 1.1
2117:    */
2118:   public static final byte LETTER_NUMBER = 10;
2119: 
2120:   /**
2121:    * No = Number, Other (Normative).
2122:    *
2123:    * @since 1.1
2124:    */
2125:   public static final byte OTHER_NUMBER = 11;
2126: 
2127:   /**
2128:    * Zs = Separator, Space (Normative).
2129:    *
2130:    * @since 1.1
2131:    */
2132:   public static final byte SPACE_SEPARATOR = 12;
2133: 
2134:   /**
2135:    * Zl = Separator, Line (Normative).
2136:    *
2137:    * @since 1.1
2138:    */
2139:   public static final byte LINE_SEPARATOR = 13;
2140: 
2141:   /**
2142:    * Zp = Separator, Paragraph (Normative).
2143:    *
2144:    * @since 1.1
2145:    */
2146:   public static final byte PARAGRAPH_SEPARATOR = 14;
2147: 
2148:   /**
2149:    * Cc = Other, Control (Normative).
2150:    *
2151:    * @since 1.1
2152:    */
2153:   public static final byte CONTROL = 15;
2154: 
2155:   /**
2156:    * Cf = Other, Format (Normative).
2157:    *
2158:    * @since 1.1
2159:    */
2160:   public static final byte FORMAT = 16;
2161: 
2162:   /**
2163:    * Cs = Other, Surrogate (Normative).
2164:    *
2165:    * @since 1.1
2166:    */
2167:   public static final byte SURROGATE = 19;
2168: 
2169:   /**
2170:    * Co = Other, Private Use (Normative).
2171:    *
2172:    * @since 1.1
2173:    */
2174:   public static final byte PRIVATE_USE = 18;
2175: 
2176:   /**
2177:    * Cn = Other, Not Assigned (Normative).
2178:    *
2179:    * @since 1.1
2180:    */
2181:   public static final byte UNASSIGNED = 0;
2182: 
2183:   /**
2184:    * Lm = Letter, Modifier (Informative).
2185:    *
2186:    * @since 1.1
2187:    */
2188:   public static final byte MODIFIER_LETTER = 4;
2189: 
2190:   /**
2191:    * Lo = Letter, Other (Informative).
2192:    *
2193:    * @since 1.1
2194:    */
2195:   public static final byte OTHER_LETTER = 5;
2196: 
2197:   /**
2198:    * Pc = Punctuation, Connector (Informative).
2199:    *
2200:    * @since 1.1
2201:    */
2202:   public static final byte CONNECTOR_PUNCTUATION = 23;
2203: 
2204:   /**
2205:    * Pd = Punctuation, Dash (Informative).
2206:    *
2207:    * @since 1.1
2208:    */
2209:   public static final byte DASH_PUNCTUATION = 20;
2210: 
2211:   /**
2212:    * Ps = Punctuation, Open (Informative).
2213:    *
2214:    * @since 1.1
2215:    */
2216:   public static final byte START_PUNCTUATION = 21;
2217: 
2218:   /**
2219:    * Pe = Punctuation, Close (Informative).
2220:    *
2221:    * @since 1.1
2222:    */
2223:   public static final byte END_PUNCTUATION = 22;
2224: 
2225:   /**
2226:    * Pi = Punctuation, Initial Quote (Informative).
2227:    *
2228:    * @since 1.4
2229:    */
2230:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
2231: 
2232:   /**
2233:    * Pf = Punctuation, Final Quote (Informative).
2234:    *
2235:    * @since 1.4
2236:    */
2237:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
2238: 
2239:   /**
2240:    * Po = Punctuation, Other (Informative).
2241:    *
2242:    * @since 1.1
2243:    */
2244:   public static final byte OTHER_PUNCTUATION = 24;
2245: 
2246:   /**
2247:    * Sm = Symbol, Math (Informative).
2248:    *
2249:    * @since 1.1
2250:    */
2251:   public static final byte MATH_SYMBOL = 25;
2252: 
2253:   /**
2254:    * Sc = Symbol, Currency (Informative).
2255:    *
2256:    * @since 1.1
2257:    */
2258:   public static final byte CURRENCY_SYMBOL = 26;
2259: 
2260:   /**
2261:    * Sk = Symbol, Modifier (Informative).
2262:    *
2263:    * @since 1.1
2264:    */
2265:   public static final byte MODIFIER_SYMBOL = 27;
2266: 
2267:   /**
2268:    * So = Symbol, Other (Informative).
2269:    *
2270:    * @since 1.1
2271:    */
2272:   public static final byte OTHER_SYMBOL = 28;
2273: 
2274:   /**
2275:    * Undefined bidirectional character type. Undefined char values have
2276:    * undefined directionality in the Unicode specification.
2277:    *
2278:    * @since 1.4
2279:    */
2280:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
2281: 
2282:   /**
2283:    * Strong bidirectional character type "L".
2284:    *
2285:    * @since 1.4
2286:    */
2287:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2288: 
2289:   /**
2290:    * Strong bidirectional character type "R".
2291:    *
2292:    * @since 1.4
2293:    */
2294:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2295: 
2296:   /**
2297:    * Strong bidirectional character type "AL".
2298:    *
2299:    * @since 1.4
2300:    */
2301:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2302: 
2303:   /**
2304:    * Weak bidirectional character type "EN".
2305:    *
2306:    * @since 1.4
2307:    */
2308:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2309: 
2310:   /**
2311:    * Weak bidirectional character type "ES".
2312:    *
2313:    * @since 1.4
2314:    */
2315:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2316: 
2317:   /**
2318:    * Weak bidirectional character type "ET".
2319:    *
2320:    * @since 1.4
2321:    */
2322:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2323: 
2324:   /**
2325:    * Weak bidirectional character type "AN".
2326:    *
2327:    * @since 1.4
2328:    */
2329:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2330: 
2331:   /**
2332:    * Weak bidirectional character type "CS".
2333:    *
2334:    * @since 1.4
2335:    */
2336:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2337: 
2338:   /**
2339:    * Weak bidirectional character type "NSM".
2340:    *
2341:    * @since 1.4
2342:    */
2343:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2344: 
2345:   /**
2346:    * Weak bidirectional character type "BN".
2347:    *
2348:    * @since 1.4
2349:    */
2350:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2351: 
2352:   /**
2353:    * Neutral bidirectional character type "B".
2354:    *
2355:    * @since 1.4
2356:    */
2357:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2358: 
2359:   /**
2360:    * Neutral bidirectional character type "S".
2361:    *
2362:    * @since 1.4
2363:    */
2364:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2365: 
2366:   /**
2367:    * Strong bidirectional character type "WS".
2368:    *
2369:    * @since 1.4
2370:    */
2371:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2372: 
2373:   /**
2374:    * Neutral bidirectional character type "ON".
2375:    *
2376:    * @since 1.4
2377:    */
2378:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2379: 
2380:   /**
2381:    * Strong bidirectional character type "LRE".
2382:    *
2383:    * @since 1.4
2384:    */
2385:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2386: 
2387:   /**
2388:    * Strong bidirectional character type "LRO".
2389:    *
2390:    * @since 1.4
2391:    */
2392:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2393: 
2394:   /**
2395:    * Strong bidirectional character type "RLE".
2396:    *
2397:    * @since 1.4
2398:    */
2399:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2400: 
2401:   /**
2402:    * Strong bidirectional character type "RLO".
2403:    *
2404:    * @since 1.4
2405:    */
2406:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2407: 
2408:   /**
2409:    * Weak bidirectional character type "PDF".
2410:    *
2411:    * @since 1.4
2412:    */
2413:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2414: 
2415:   /**
2416:    * Stores unicode block offset lookup table. Exploit package visibility of
2417:    * String.value to avoid copying the array.
2418:    * @see #readCodePoint(int)
2419:    * @see CharData#BLOCKS
2420:    */
2421:   private static final char[][] blocks = 
2422:     new char[][]{
2423:                  String.zeroBasedStringValue(CharData.BLOCKS[0]),
2424:                  String.zeroBasedStringValue(CharData.BLOCKS[1]),
2425:                  String.zeroBasedStringValue(CharData.BLOCKS[2]),
2426:                  String.zeroBasedStringValue(CharData.BLOCKS[3]),
2427:                  String.zeroBasedStringValue(CharData.BLOCKS[4]),
2428:                  String.zeroBasedStringValue(CharData.BLOCKS[5]),
2429:                  String.zeroBasedStringValue(CharData.BLOCKS[6]),
2430:                  String.zeroBasedStringValue(CharData.BLOCKS[7]),
2431:                  String.zeroBasedStringValue(CharData.BLOCKS[8]),
2432:                  String.zeroBasedStringValue(CharData.BLOCKS[9]),
2433:                  String.zeroBasedStringValue(CharData.BLOCKS[10]),
2434:                  String.zeroBasedStringValue(CharData.BLOCKS[11]),
2435:                  String.zeroBasedStringValue(CharData.BLOCKS[12]),
2436:                  String.zeroBasedStringValue(CharData.BLOCKS[13]),
2437:                  String.zeroBasedStringValue(CharData.BLOCKS[14]),
2438:                  String.zeroBasedStringValue(CharData.BLOCKS[15]),
2439:                  String.zeroBasedStringValue(CharData.BLOCKS[16])};
2440: 
2441:   /**
2442:    * Stores unicode attribute offset lookup table. Exploit package visibility
2443:    * of String.value to avoid copying the array.
2444:    * @see CharData#DATA
2445:    */
2446:   private static final char[][] data = 
2447:     new char[][]{
2448:                  String.zeroBasedStringValue(CharData.DATA[0]),
2449:                  String.zeroBasedStringValue(CharData.DATA[1]),
2450:                  String.zeroBasedStringValue(CharData.DATA[2]),
2451:                  String.zeroBasedStringValue(CharData.DATA[3]),
2452:                  String.zeroBasedStringValue(CharData.DATA[4]),
2453:                  String.zeroBasedStringValue(CharData.DATA[5]),
2454:                  String.zeroBasedStringValue(CharData.DATA[6]),
2455:                  String.zeroBasedStringValue(CharData.DATA[7]),
2456:                  String.zeroBasedStringValue(CharData.DATA[8]),
2457:                  String.zeroBasedStringValue(CharData.DATA[9]),
2458:                  String.zeroBasedStringValue(CharData.DATA[10]),
2459:                  String.zeroBasedStringValue(CharData.DATA[11]),
2460:                  String.zeroBasedStringValue(CharData.DATA[12]),
2461:                  String.zeroBasedStringValue(CharData.DATA[13]),
2462:                  String.zeroBasedStringValue(CharData.DATA[14]),
2463:                  String.zeroBasedStringValue(CharData.DATA[15]),
2464:                  String.zeroBasedStringValue(CharData.DATA[16])};
2465: 
2466:   /**
2467:    * Stores unicode numeric value attribute table. Exploit package visibility
2468:    * of String.value to avoid copying the array.
2469:    * @see CharData#NUM_VALUE
2470:    */
2471:   private static final char[][] numValue = 
2472:     new char[][]{
2473:                  String.zeroBasedStringValue(CharData.NUM_VALUE[0]),
2474:                  String.zeroBasedStringValue(CharData.NUM_VALUE[1]),
2475:                  String.zeroBasedStringValue(CharData.NUM_VALUE[2]),
2476:                  String.zeroBasedStringValue(CharData.NUM_VALUE[3]),
2477:                  String.zeroBasedStringValue(CharData.NUM_VALUE[4]),
2478:                  String.zeroBasedStringValue(CharData.NUM_VALUE[5]),
2479:                  String.zeroBasedStringValue(CharData.NUM_VALUE[6]),
2480:                  String.zeroBasedStringValue(CharData.NUM_VALUE[7]),
2481:                  String.zeroBasedStringValue(CharData.NUM_VALUE[8]),
2482:                  String.zeroBasedStringValue(CharData.NUM_VALUE[9]),
2483:                  String.zeroBasedStringValue(CharData.NUM_VALUE[10]),
2484:                  String.zeroBasedStringValue(CharData.NUM_VALUE[11]),
2485:                  String.zeroBasedStringValue(CharData.NUM_VALUE[12]),
2486:                  String.zeroBasedStringValue(CharData.NUM_VALUE[13]),
2487:                  String.zeroBasedStringValue(CharData.NUM_VALUE[14]),
2488:                  String.zeroBasedStringValue(CharData.NUM_VALUE[15]),
2489:                  String.zeroBasedStringValue(CharData.NUM_VALUE[16])};
2490: 
2491:   /**
2492:    * Stores unicode uppercase attribute table. Exploit package visibility
2493:    * of String.value to avoid copying the array.
2494:    * @see CharData#UPPER
2495:    */  
2496:   private static final char[][] upper = 
2497:     new char[][]{
2498:                  String.zeroBasedStringValue(CharData.UPPER[0]),
2499:                  String.zeroBasedStringValue(CharData.UPPER[1]),
2500:                  String.zeroBasedStringValue(CharData.UPPER[2]),
2501:                  String.zeroBasedStringValue(CharData.UPPER[3]),
2502:                  String.zeroBasedStringValue(CharData.UPPER[4]),
2503:                  String.zeroBasedStringValue(CharData.UPPER[5]),
2504:                  String.zeroBasedStringValue(CharData.UPPER[6]),
2505:                  String.zeroBasedStringValue(CharData.UPPER[7]),
2506:                  String.zeroBasedStringValue(CharData.UPPER[8]),
2507:                  String.zeroBasedStringValue(CharData.UPPER[9]),
2508:                  String.zeroBasedStringValue(CharData.UPPER[10]),
2509:                  String.zeroBasedStringValue(CharData.UPPER[11]),
2510:                  String.zeroBasedStringValue(CharData.UPPER[12]),
2511:                  String.zeroBasedStringValue(CharData.UPPER[13]),
2512:                  String.zeroBasedStringValue(CharData.UPPER[14]),
2513:                  String.zeroBasedStringValue(CharData.UPPER[15]),
2514:                  String.zeroBasedStringValue(CharData.UPPER[16])};
2515: 
2516:   /**
2517:    * Stores unicode lowercase attribute table. Exploit package visibility
2518:    * of String.value to avoid copying the array.
2519:    * @see CharData#LOWER
2520:    */
2521:   private static final char[][] lower = 
2522:     new char[][]{
2523:                  String.zeroBasedStringValue(CharData.LOWER[0]),
2524:                  String.zeroBasedStringValue(CharData.LOWER[1]),
2525:                  String.zeroBasedStringValue(CharData.LOWER[2]),
2526:                  String.zeroBasedStringValue(CharData.LOWER[3]),
2527:                  String.zeroBasedStringValue(CharData.LOWER[4]),
2528:                  String.zeroBasedStringValue(CharData.LOWER[5]),
2529:                  String.zeroBasedStringValue(CharData.LOWER[6]),
2530:                  String.zeroBasedStringValue(CharData.LOWER[7]),
2531:                  String.zeroBasedStringValue(CharData.LOWER[8]),
2532:                  String.zeroBasedStringValue(CharData.LOWER[9]),
2533:                  String.zeroBasedStringValue(CharData.LOWER[10]),
2534:                  String.zeroBasedStringValue(CharData.LOWER[11]),
2535:                  String.zeroBasedStringValue(CharData.LOWER[12]),
2536:                  String.zeroBasedStringValue(CharData.LOWER[13]),
2537:                  String.zeroBasedStringValue(CharData.LOWER[14]),
2538:                  String.zeroBasedStringValue(CharData.LOWER[15]),
2539:                  String.zeroBasedStringValue(CharData.LOWER[16])};
2540: 
2541:   /**
2542:    * Stores unicode direction attribute table. Exploit package visibility
2543:    * of String.value to avoid copying the array.
2544:    * @see CharData#DIRECTION
2545:    */
2546:   // Package visible for use by String.
2547:   static final char[][] direction = 
2548:     new char[][]{
2549:                  String.zeroBasedStringValue(CharData.DIRECTION[0]),
2550:                  String.zeroBasedStringValue(CharData.DIRECTION[1]),
2551:                  String.zeroBasedStringValue(CharData.DIRECTION[2]),
2552:                  String.zeroBasedStringValue(CharData.DIRECTION[3]),
2553:                  String.zeroBasedStringValue(CharData.DIRECTION[4]),
2554:                  String.zeroBasedStringValue(CharData.DIRECTION[5]),
2555:                  String.zeroBasedStringValue(CharData.DIRECTION[6]),
2556:                  String.zeroBasedStringValue(CharData.DIRECTION[7]),
2557:                  String.zeroBasedStringValue(CharData.DIRECTION[8]),
2558:                  String.zeroBasedStringValue(CharData.DIRECTION[9]),
2559:                  String.zeroBasedStringValue(CharData.DIRECTION[10]),
2560:                  String.zeroBasedStringValue(CharData.DIRECTION[11]),
2561:                  String.zeroBasedStringValue(CharData.DIRECTION[12]),
2562:                  String.zeroBasedStringValue(CharData.DIRECTION[13]),
2563:                  String.zeroBasedStringValue(CharData.DIRECTION[14]),
2564:                  String.zeroBasedStringValue(CharData.DIRECTION[15]),
2565:                  String.zeroBasedStringValue(CharData.DIRECTION[16])};
2566: 
2567:   /**
2568:    * Stores unicode titlecase table. Exploit package visibility of
2569:    * String.value to avoid copying the array.
2570:    * @see CharData#TITLE
2571:    */
2572:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);  
2573: 
2574:   /**
2575:    * Mask for grabbing the type out of the contents of data.
2576:    * @see CharData#DATA
2577:    */
2578:   private static final int TYPE_MASK = 0x1F;
2579: 
2580:   /**
2581:    * Mask for grabbing the non-breaking space flag out of the contents of
2582:    * data.
2583:    * @see CharData#DATA
2584:    */
2585:   private static final int NO_BREAK_MASK = 0x20;
2586: 
2587:   /**
2588:    * Mask for grabbing the mirrored directionality flag out of the contents
2589:    * of data.
2590:    * @see CharData#DATA
2591:    */
2592:   private static final int MIRROR_MASK = 0x40;
2593: 
2594:   /**
2595:    * Grabs an attribute offset from the Unicode attribute database. The lower
2596:    * 5 bits are the character type, the next 2 bits are flags, and the top
2597:    * 9 bits are the offset into the attribute tables.
2598:    *
2599:    * @param codePoint the character to look up
2600:    * @return the character's attribute offset and type
2601:    * @see #TYPE_MASK
2602:    * @see #NO_BREAK_MASK
2603:    * @see #MIRROR_MASK
2604:    * @see CharData#DATA
2605:    * @see CharData#SHIFT
2606:    */
2607:   // Package visible for use in String.
2608:   static char readCodePoint(int codePoint)
2609:   {
2610:     int plane = codePoint >>> 16;
2611:     char offset = (char) (codePoint & 0xffff);
2612:     return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)];
2613:   }
2614: 
2615:   /**
2616:    * Wraps up a character.
2617:    *
2618:    * @param value the character to wrap
2619:    */
2620:   public Character(char value)
2621:   {
2622:     this.value = value;
2623:   }
2624: 
2625:   /**
2626:    * Returns the character which has been wrapped by this class.
2627:    *
2628:    * @return the character wrapped
2629:    */
2630:   public char charValue()
2631:   {
2632:     return value;
2633:   }
2634: 
2635:   /**
2636:    * Returns the numerical value (unsigned) of the wrapped character.
2637:    * Range of returned values: 0x0000-0xFFFF.
2638:    *
2639:    * @return the value of the wrapped character
2640:    */
2641:   public int hashCode()
2642:   {
2643:     return value;
2644:   }
2645: 
2646:   /**
2647:    * Determines if an object is equal to this object. This is only true for
2648:    * another Character object wrapping the same value.
2649:    *
2650:    * @param o object to compare
2651:    * @return true if o is a Character with the same value
2652:    */
2653:   public boolean equals(Object o)
2654:   {
2655:     return o instanceof Character && value == ((Character) o).value;
2656:   }
2657: 
2658:   /**
2659:    * Converts the wrapped character into a String.
2660:    *
2661:    * @return a String containing one character -- the wrapped character
2662:    *         of this instance
2663:    */
2664:   public String toString()
2665:   {
2666:     // Package constructor avoids an array copy.
2667:     return new String(new char[] { value }, 0, 1, true);
2668:   }
2669: 
2670:   /**
2671:    * Returns a String of length 1 representing the specified character.
2672:    *
2673:    * @param ch the character to convert
2674:    * @return a String containing the character
2675:    * @since 1.4
2676:    */
2677:   public static String toString(char ch)
2678:   {
2679:     // Package constructor avoids an array copy.
2680:     return new String(new char[] { ch }, 0, 1, true);
2681:   }
2682: 
2683:   /**
2684:    * Determines if a character is a Unicode lowercase letter. For example,
2685:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2686:    * LOWERCASE_LETTER.
2687:    * <br>
2688:    * lowercase = [Ll]
2689:    *
2690:    * @param ch character to test
2691:    * @return true if ch is a Unicode lowercase letter, else false
2692:    * @see #isUpperCase(char)
2693:    * @see #isTitleCase(char)
2694:    * @see #toLowerCase(char)
2695:    * @see #getType(char)
2696:    */
2697:   public static boolean isLowerCase(char ch)
2698:   {
2699:     return isLowerCase((int)ch);
2700:   }
2701:   
2702:   /**
2703:    * Determines if a character is a Unicode lowercase letter. For example,
2704:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2705:    * LOWERCASE_LETTER.
2706:    * <br>
2707:    * lowercase = [Ll]
2708:    *
2709:    * @param codePoint character to test
2710:    * @return true if ch is a Unicode lowercase letter, else false
2711:    * @see #isUpperCase(char)
2712:    * @see #isTitleCase(char)
2713:    * @see #toLowerCase(char)
2714:    * @see #getType(char)
2715:    * 
2716:    * @since 1.5
2717:    */
2718:   public static boolean isLowerCase(int codePoint)
2719:   {
2720:     return getType(codePoint) == LOWERCASE_LETTER;
2721:   }
2722: 
2723:   /**
2724:    * Determines if a character is a Unicode uppercase letter. For example,
2725:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2726:    * UPPERCASE_LETTER.
2727:    * <br>
2728:    * uppercase = [Lu]
2729:    *
2730:    * @param ch character to test
2731:    * @return true if ch is a Unicode uppercase letter, else false
2732:    * @see #isLowerCase(char)
2733:    * @see #isTitleCase(char)
2734:    * @see #toUpperCase(char)
2735:    * @see #getType(char)
2736:    */
2737:   public static boolean isUpperCase(char ch)
2738:   {
2739:     return isUpperCase((int)ch);
2740:   }
2741:   
2742:   /**
2743:    * Determines if a character is a Unicode uppercase letter. For example,
2744:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2745:    * UPPERCASE_LETTER.
2746:    * <br>
2747:    * uppercase = [Lu]
2748:    *
2749:    * @param codePoint character to test
2750:    * @return true if ch is a Unicode uppercase letter, else false
2751:    * @see #isLowerCase(char)
2752:    * @see #isTitleCase(char)
2753:    * @see #toUpperCase(char)
2754:    * @see #getType(char)
2755:    * 
2756:    * @since 1.5
2757:    */
2758:   public static boolean isUpperCase(int codePoint)
2759:   {
2760:     return getType(codePoint) == UPPERCASE_LETTER;
2761:   }
2762: 
2763:   /**
2764:    * Determines if a character is a Unicode titlecase letter. For example,
2765:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2766:    * True if getType() returns TITLECASE_LETTER.
2767:    * <br>
2768:    * titlecase = [Lt]
2769:    *
2770:    * @param ch character to test
2771:    * @return true if ch is a Unicode titlecase letter, else false
2772:    * @see #isLowerCase(char)
2773:    * @see #isUpperCase(char)
2774:    * @see #toTitleCase(char)
2775:    * @see #getType(char)
2776:    */
2777:   public static boolean isTitleCase(char ch)
2778:   {
2779:     return isTitleCase((int)ch);
2780:   }
2781: 
2782:   /**
2783:    * Determines if a character is a Unicode titlecase letter. For example,
2784:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2785:    * True if getType() returns TITLECASE_LETTER.
2786:    * <br>
2787:    * titlecase = [Lt]
2788:    *
2789:    * @param codePoint character to test
2790:    * @return true if ch is a Unicode titlecase letter, else false
2791:    * @see #isLowerCase(char)
2792:    * @see #isUpperCase(char)
2793:    * @see #toTitleCase(char)
2794:    * @see #getType(char)
2795:    * 
2796:    * @since 1.5
2797:    */
2798:   public static boolean isTitleCase(int codePoint)
2799:   {
2800:     return getType(codePoint) == TITLECASE_LETTER;
2801:   }
2802:   
2803: 
2804:   /**
2805:    * Determines if a character is a Unicode decimal digit. For example,
2806:    * <code>'0'</code> is a digit.  A character is a Unicode digit if
2807:    * getType() returns DECIMAL_DIGIT_NUMBER.
2808:    * <br>
2809:    * Unicode decimal digit = [Nd]
2810:    *
2811:    * @param ch character to test
2812:    * @return true if ch is a Unicode decimal digit, else false
2813:    * @see #digit(char, int)
2814:    * @see #forDigit(int, int)
2815:    * @see #getType(char)
2816:    */
2817:   public static boolean isDigit(char ch)
2818:   {
2819:     return isDigit((int)ch);
2820:   }
2821:   
2822:   /**
2823:    * Determines if a character is a Unicode decimal digit. For example,
2824:    * <code>'0'</code> is a digit. A character is a Unicode digit if
2825:    * getType() returns DECIMAL_DIGIT_NUMBER.
2826:    * <br>
2827:    * Unicode decimal digit = [Nd]
2828:    *
2829:    * @param codePoint character to test
2830:    * @return true if ch is a Unicode decimal digit, else false
2831:    * @see #digit(char, int)
2832:    * @see #forDigit(int, int)
2833:    * @see #getType(char)
2834:    * 
2835:    * @since 1.5
2836:    */
2837: 
2838:   public static boolean isDigit(int codePoint)
2839:   {
2840:     return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2841:   }
2842: 
2843:   /**
2844:    * Determines if a character is part of the Unicode Standard. This is an
2845:    * evolving standard, but covers every character in the data file.
2846:    * <br>
2847:    * defined = not [Cn]
2848:    *
2849:    * @param ch character to test
2850:    * @return true if ch is a Unicode character, else false
2851:    * @see #isDigit(char)
2852:    * @see #isLetter(char)
2853:    * @see #isLetterOrDigit(char)
2854:    * @see #isLowerCase(char)
2855:    * @see #isTitleCase(char)
2856:    * @see #isUpperCase(char)
2857:    */
2858:   public static boolean isDefined(char ch)
2859:   {
2860:     return isDefined((int)ch);
2861:   }
2862:   
2863:   /**
2864:    * Determines if a character is part of the Unicode Standard. This is an
2865:    * evolving standard, but covers every character in the data file.
2866:    * <br>
2867:    * defined = not [Cn]
2868:    *
2869:    * @param codePoint character to test
2870:    * @return true if ch is a Unicode character, else false
2871:    * @see #isDigit(char)
2872:    * @see #isLetter(char)
2873:    * @see #isLetterOrDigit(char)
2874:    * @see #isLowerCase(char)
2875:    * @see #isTitleCase(char)
2876:    * @see #isUpperCase(char)
2877:    * 
2878:    * @since 1.5
2879:    */
2880:   public static boolean isDefined(int codePoint)
2881:   {
2882:     return getType(codePoint) != UNASSIGNED;
2883:   }
2884: 
2885:   /**
2886:    * Determines if a character is a Unicode letter. Not all letters have case,
2887:    * so this may return true when isLowerCase and isUpperCase return false.
2888:    * A character is a Unicode letter if getType() returns one of 
2889:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2890:    * or OTHER_LETTER.
2891:    * <br>
2892:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2893:    *
2894:    * @param ch character to test
2895:    * @return true if ch is a Unicode letter, else false
2896:    * @see #isDigit(char)
2897:    * @see #isJavaIdentifierStart(char)
2898:    * @see #isJavaLetter(char)
2899:    * @see #isJavaLetterOrDigit(char)
2900:    * @see #isLetterOrDigit(char)
2901:    * @see #isLowerCase(char)
2902:    * @see #isTitleCase(char)
2903:    * @see #isUnicodeIdentifierStart(char)
2904:    * @see #isUpperCase(char)
2905:    */
2906:   public static boolean isLetter(char ch)
2907:   {
2908:     return isLetter((int)ch);
2909:   }
2910:   
2911:   /**
2912:    * Determines if a character is a Unicode letter. Not all letters have case,
2913:    * so this may return true when isLowerCase and isUpperCase return false.
2914:    * A character is a Unicode letter if getType() returns one of 
2915:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2916:    * or OTHER_LETTER.
2917:    * <br>
2918:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2919:    *
2920:    * @param codePoint character to test
2921:    * @return true if ch is a Unicode letter, else false
2922:    * @see #isDigit(char)
2923:    * @see #isJavaIdentifierStart(char)
2924:    * @see #isJavaLetter(char)
2925:    * @see #isJavaLetterOrDigit(char)
2926:    * @see #isLetterOrDigit(char)
2927:    * @see #isLowerCase(char)
2928:    * @see #isTitleCase(char)
2929:    * @see #isUnicodeIdentifierStart(char)
2930:    * @see #isUpperCase(char)
2931:    * 
2932:    * @since 1.5
2933:    */
2934:   public static boolean isLetter(int codePoint)
2935:   {
2936:     return ((1 << getType(codePoint))
2937:         & ((1 << UPPERCASE_LETTER)
2938:             | (1 << LOWERCASE_LETTER)
2939:             | (1 << TITLECASE_LETTER)
2940:             | (1 << MODIFIER_LETTER)
2941:             | (1 << OTHER_LETTER))) != 0;
2942:   }
2943:   /**
2944:    * Returns the index into the given CharSequence that is offset
2945:    * <code>codePointOffset</code> code points from <code>index</code>.
2946:    * @param seq the CharSequence
2947:    * @param index the start position in the CharSequence
2948:    * @param codePointOffset the number of code points offset from the start
2949:    * position
2950:    * @return the index into the CharSequence that is codePointOffset code 
2951:    * points offset from index
2952:    * 
2953:    * @throws NullPointerException if seq is null
2954:    * @throws IndexOutOfBoundsException if index is negative or greater than the
2955:    * length of the sequence.
2956:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
2957:    * subsequence from index to the end of seq has fewer than codePointOffset
2958:    * code points
2959:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2960:    * subsequence from the start of seq to index has fewer than 
2961:    * (-codePointOffset) code points
2962:    * @since 1.5
2963:    */
2964:   public static int offsetByCodePoints(CharSequence seq,
2965:                                        int index,
2966:                                        int codePointOffset)
2967:   {
2968:     int len = seq.length();
2969:     if (index < 0 || index > len)
2970:       throw new IndexOutOfBoundsException();
2971:     
2972:     int numToGo = codePointOffset;
2973:     int offset = index;
2974:     int adjust = 1;
2975:     if (numToGo >= 0)
2976:       {
2977:         for (; numToGo > 0; offset++)
2978:           {
2979:             numToGo--;
2980:             if (Character.isHighSurrogate(seq.charAt(offset))
2981:                 && (offset + 1) < len
2982:                 && Character.isLowSurrogate(seq.charAt(offset + 1)))
2983:               offset++;
2984:           }
2985:         return offset;
2986:       }
2987:     else
2988:       {
2989:         numToGo *= -1;
2990:         for (; numToGo > 0;)
2991:           {
2992:             numToGo--;
2993:             offset--;
2994:             if (Character.isLowSurrogate(seq.charAt(offset))
2995:                 && (offset - 1) >= 0
2996:                 && Character.isHighSurrogate(seq.charAt(offset - 1)))
2997:               offset--;
2998:           }
2999:         return offset;
3000:       }
3001:   }
3002:   
3003:   /**
3004:    * Returns the index into the given char subarray that is offset
3005:    * <code>codePointOffset</code> code points from <code>index</code>.
3006:    * @param a the char array
3007:    * @param start the start index of the subarray
3008:    * @param count the length of the subarray
3009:    * @param index the index to be offset
3010:    * @param codePointOffset the number of code points offset from <code>index
3011:    * </code>
3012:    * @return the index into the char array
3013:    * 
3014:    * @throws NullPointerException if a is null
3015:    * @throws IndexOutOfBoundsException if start or count is negative or if
3016:    * start + count is greater than the length of the array
3017:    * @throws IndexOutOfBoundsException if index is less than start or larger 
3018:    * than start + count
3019:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the
3020:    * subarray from index to start + count - 1 has fewer than codePointOffset
3021:    * code points.
3022:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
3023:    * subarray from start to index - 1 has fewer than (-codePointOffset) code
3024:    * points
3025:    * 
3026:    * @since 1.5
3027:    */
3028:   public static int offsetByCodePoints(char[] a,
3029:                                        int start,
3030:                                        int count,
3031:                                        int index,
3032:                                        int codePointOffset)
3033:   {
3034:     int len = a.length;
3035:     int end = start + count;
3036:     if (start < 0 || count < 0 || end > len || index < start || index > end)
3037:       throw new IndexOutOfBoundsException();
3038:     
3039:     int numToGo = codePointOffset;
3040:     int offset = index;
3041:     int adjust = 1;
3042:     if (numToGo >= 0)
3043:       {
3044:         for (; numToGo > 0; offset++)
3045:           {
3046:             numToGo--;
3047:             if (Character.isHighSurrogate(a[offset])
3048:                 && (offset + 1) < len
3049:                 && Character.isLowSurrogate(a[offset + 1]))
3050:               offset++;
3051:           }
3052:         return offset;
3053:       }
3054:     else
3055:       {
3056:         numToGo *= -1;
3057:         for (; numToGo > 0;)
3058:           {
3059:             numToGo--;
3060:             offset--;
3061:             if (Character.isLowSurrogate(a[offset])
3062:                 && (offset - 1) >= 0
3063:                 && Character.isHighSurrogate(a[offset - 1]))
3064:               offset--;
3065:             if (offset < start)
3066:               throw new IndexOutOfBoundsException();
3067:           }
3068:         return offset;
3069:       }
3070: 
3071:   }
3072:   
3073:   /**
3074:    * Returns the number of Unicode code points in the specified range of the
3075:    * given CharSequence.  The first char in the range is at position
3076:    * beginIndex and the last one is at position endIndex - 1.  Paired 
3077:    * surrogates (supplementary characters are represented by a pair of chars - 
3078:    * one from the high surrogates and one from the low surrogates) 
3079:    * count as just one code point.
3080:    * @param seq the CharSequence to inspect
3081:    * @param beginIndex the beginning of the range
3082:    * @param endIndex the end of the range
3083:    * @return the number of Unicode code points in the given range of the 
3084:    * sequence
3085:    * @throws NullPointerException if seq is null
3086:    * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
3087:    * larger than the length of seq, or if beginIndex is greater than endIndex.
3088:    * @since 1.5
3089:    */
3090:   public static int codePointCount(CharSequence seq, int beginIndex,
3091:                                    int endIndex)
3092:   {
3093:     int len = seq.length();
3094:     if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
3095:       throw new IndexOutOfBoundsException();
3096:         
3097:     int count = 0;
3098:     for (int i = beginIndex; i < endIndex; i++)
3099:       {
3100:         count++;
3101:         // If there is a pairing, count it only once.
3102:         if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
3103:             && isLowSurrogate(seq.charAt(i + 1)))
3104:           i ++;
3105:       }    
3106:     return count;
3107:   }
3108:   
3109:   /**
3110:    * Returns the number of Unicode code points in the specified range of the
3111:    * given char array.  The first char in the range is at position
3112:    * offset and the length of the range is count.  Paired surrogates
3113:    * (supplementary characters are represented by a pair of chars - 
3114:    * one from the high surrogates and one from the low surrogates) 
3115:    * count as just one code point.
3116:    * @param a the char array to inspect
3117:    * @param offset the beginning of the range
3118:    * @param count the length of the range
3119:    * @return the number of Unicode code points in the given range of the 
3120:    * array
3121:    * @throws NullPointerException if a is null
3122:    * @throws IndexOutOfBoundsException if offset or count is negative or if 
3123:    * offset + countendIndex is larger than the length of a.
3124:    * @since 1.5
3125:    */
3126:   public static int codePointCount(char[] a, int offset,
3127:                                    int count)
3128:   {
3129:     int len = a.length;
3130:     int end = offset + count;
3131:     if (offset < 0 || count < 0 || end > len)
3132:       throw new IndexOutOfBoundsException();
3133:         
3134:     int counter = 0;
3135:     for (int i = offset; i < end; i++)
3136:       {
3137:         counter++;
3138:         // If there is a pairing, count it only once.
3139:         if (isHighSurrogate(a[i]) && (i + 1) < end
3140:             && isLowSurrogate(a[i + 1]))
3141:           i ++;
3142:       }    
3143:     return counter;
3144:   }
3145: 
3146:   /**
3147:    * Determines if a character is a Unicode letter or a Unicode digit. This
3148:    * is the combination of isLetter and isDigit.
3149:    * <br>
3150:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3151:    *
3152:    * @param ch character to test
3153:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3154:    * @see #isDigit(char)
3155:    * @see #isJavaIdentifierPart(char)
3156:    * @see #isJavaLetter(char)
3157:    * @see #isJavaLetterOrDigit(char)
3158:    * @see #isLetter(char)
3159:    * @see #isUnicodeIdentifierPart(char)
3160:    */
3161:   public static boolean isLetterOrDigit(char ch)
3162:   {
3163:     return isLetterOrDigit((int)ch);
3164:   }
3165: 
3166:   /**
3167:    * Determines if a character is a Unicode letter or a Unicode digit. This
3168:    * is the combination of isLetter and isDigit.
3169:    * <br>
3170:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3171:    *
3172:    * @param codePoint character to test
3173:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3174:    * @see #isDigit(char)
3175:    * @see #isJavaIdentifierPart(char)
3176:    * @see #isJavaLetter(char)
3177:    * @see #isJavaLetterOrDigit(char)
3178:    * @see #isLetter(char)
3179:    * @see #isUnicodeIdentifierPart(char)
3180:    * 
3181:    * @since 1.5
3182:    */
3183:   public static boolean isLetterOrDigit(int codePoint)
3184:   {
3185:     return ((1 << getType(codePoint))
3186:         & ((1 << UPPERCASE_LETTER)
3187:            | (1 << LOWERCASE_LETTER)
3188:            | (1 << TITLECASE_LETTER)
3189:            | (1 << MODIFIER_LETTER)
3190:            | (1 << OTHER_LETTER)
3191:            | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
3192:   }
3193:   
3194:   /**
3195:    * Determines if a character can start a Java identifier. This is the
3196:    * combination of isLetter, any character where getType returns
3197:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3198:    * (like '_').
3199:    *
3200:    * @param ch character to test
3201:    * @return true if ch can start a Java identifier, else false
3202:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
3203:    * @see #isJavaLetterOrDigit(char)
3204:    * @see #isJavaIdentifierStart(char)
3205:    * @see #isJavaIdentifierPart(char)
3206:    * @see #isLetter(char)
3207:    * @see #isLetterOrDigit(char)
3208:    * @see #isUnicodeIdentifierStart(char)
3209:    */
3210:   public static boolean isJavaLetter(char ch)
3211:   {
3212:     return isJavaIdentifierStart(ch);
3213:   }
3214: 
3215:   /**
3216:    * Determines if a character can follow the first letter in
3217:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3218:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3219:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3220:    * or isIdentifierIgnorable.
3221:    *
3222:    * @param ch character to test
3223:    * @return true if ch can follow the first letter in a Java identifier
3224:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
3225:    * @see #isJavaLetter(char)
3226:    * @see #isJavaIdentifierStart(char)
3227:    * @see #isJavaIdentifierPart(char)
3228:    * @see #isLetter(char)
3229:    * @see #isLetterOrDigit(char)
3230:    * @see #isUnicodeIdentifierPart(char)
3231:    * @see #isIdentifierIgnorable(char)
3232:    */
3233:   public static boolean isJavaLetterOrDigit(char ch)
3234:   {
3235:     return isJavaIdentifierPart(ch);
3236:   }
3237: 
3238:   /**
3239:    * Determines if a character can start a Java identifier. This is the
3240:    * combination of isLetter, any character where getType returns
3241:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3242:    * (like '_').
3243:    * <br>
3244:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3245:    *
3246:    * @param ch character to test
3247:    * @return true if ch can start a Java identifier, else false
3248:    * @see #isJavaIdentifierPart(char)
3249:    * @see #isLetter(char)
3250:    * @see #isUnicodeIdentifierStart(char)
3251:    * @since 1.1
3252:    */
3253:   public static boolean isJavaIdentifierStart(char ch)
3254:   {
3255:     return isJavaIdentifierStart((int)ch);
3256:   }
3257: 
3258:   /**
3259:    * Determines if a character can start a Java identifier. This is the
3260:    * combination of isLetter, any character where getType returns
3261:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3262:    * (like '_').
3263:    * <br>
3264:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3265:    *
3266:    * @param codePoint character to test
3267:    * @return true if ch can start a Java identifier, else false
3268:    * @see #isJavaIdentifierPart(char)
3269:    * @see #isLetter(char)
3270:    * @see #isUnicodeIdentifierStart(char)
3271:    * @since 1.5
3272:    */
3273:   public static boolean isJavaIdentifierStart(int codePoint)
3274:   {
3275:     return ((1 << getType(codePoint))
3276:             & ((1 << UPPERCASE_LETTER)
3277:                | (1 << LOWERCASE_LETTER)
3278:                | (1 << TITLECASE_LETTER)
3279:                | (1 << MODIFIER_LETTER)
3280:                | (1 << OTHER_LETTER)
3281:                | (1 << LETTER_NUMBER)
3282:                | (1 << CURRENCY_SYMBOL)
3283:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
3284:   }
3285: 
3286:   /**
3287:    * Determines if a character can follow the first letter in
3288:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3289:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3290:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3291:    * or isIdentifierIgnorable.
3292:    * <br>
3293:    * Java identifier extender =
3294:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3295:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3296:    *
3297:    * @param ch character to test
3298:    * @return true if ch can follow the first letter in a Java identifier
3299:    * @see #isIdentifierIgnorable(char)
3300:    * @see #isJavaIdentifierStart(char)
3301:    * @see #isLetterOrDigit(char)
3302:    * @see #isUnicodeIdentifierPart(char)
3303:    * @since 1.1
3304:    */
3305:   public static boolean isJavaIdentifierPart(char ch)
3306:   {
3307:     return isJavaIdentifierPart((int)ch);
3308:   }
3309:   
3310:   /**
3311:    * Determines if a character can follow the first letter in
3312:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3313:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3314:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3315:    * or isIdentifierIgnorable.
3316:    * <br>
3317:    * Java identifier extender =
3318:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3319:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3320:    *
3321:    * @param codePoint character to test
3322:    * @return true if ch can follow the first letter in a Java identifier
3323:    * @see #isIdentifierIgnorable(char)
3324:    * @see #isJavaIdentifierStart(char)
3325:    * @see #isLetterOrDigit(char)
3326:    * @see #isUnicodeIdentifierPart(char)
3327:    * @since 1.5
3328:    */
3329:   public static boolean isJavaIdentifierPart(int codePoint)
3330:   {
3331:     int category = getType(codePoint);
3332:     return ((1 << category)
3333:             & ((1 << UPPERCASE_LETTER)
3334:                | (1 << LOWERCASE_LETTER)
3335:                | (1 << TITLECASE_LETTER)
3336:                | (1 << MODIFIER_LETTER)
3337:                | (1 << OTHER_LETTER)
3338:                | (1 << NON_SPACING_MARK)
3339:                | (1 << COMBINING_SPACING_MARK)
3340:                | (1 << DECIMAL_DIGIT_NUMBER)
3341:                | (1 << LETTER_NUMBER)
3342:                | (1 << CURRENCY_SYMBOL)
3343:                | (1 << CONNECTOR_PUNCTUATION)
3344:                | (1 << FORMAT))) != 0
3345:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3346:   }
3347: 
3348:   /**
3349:    * Determines if a character can start a Unicode identifier.  Only
3350:    * letters can start a Unicode identifier, but this includes characters
3351:    * in LETTER_NUMBER.
3352:    * <br>
3353:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3354:    *
3355:    * @param ch character to test
3356:    * @return true if ch can start a Unicode identifier, else false
3357:    * @see #isJavaIdentifierStart(char)
3358:    * @see #isLetter(char)
3359:    * @see #isUnicodeIdentifierPart(char)
3360:    * @since 1.1
3361:    */
3362:   public static boolean isUnicodeIdentifierStart(char ch)
3363:   {
3364:     return isUnicodeIdentifierStart((int)ch);
3365:   }
3366: 
3367:   /**
3368:    * Determines if a character can start a Unicode identifier.  Only
3369:    * letters can start a Unicode identifier, but this includes characters
3370:    * in LETTER_NUMBER.
3371:    * <br>
3372:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3373:    *
3374:    * @param codePoint character to test
3375:    * @return true if ch can start a Unicode identifier, else false
3376:    * @see #isJavaIdentifierStart(char)
3377:    * @see #isLetter(char)
3378:    * @see #isUnicodeIdentifierPart(char)
3379:    * @since 1.5
3380:    */
3381:   public static boolean isUnicodeIdentifierStart(int codePoint)
3382:   {
3383:     return ((1 << getType(codePoint))
3384:             & ((1 << UPPERCASE_LETTER)
3385:                | (1 << LOWERCASE_LETTER)
3386:                | (1 << TITLECASE_LETTER)
3387:                | (1 << MODIFIER_LETTER)
3388:                | (1 << OTHER_LETTER)
3389:                | (1 << LETTER_NUMBER))) != 0;
3390:   }
3391: 
3392:   /**
3393:    * Determines if a character can follow the first letter in
3394:    * a Unicode identifier. This includes letters, connecting punctuation,
3395:    * digits, numeric letters, combining marks, non-spacing marks, and
3396:    * isIdentifierIgnorable.
3397:    * <br>
3398:    * Unicode identifier extender =
3399:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3400:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3401:    *
3402:    * @param ch character to test
3403:    * @return true if ch can follow the first letter in a Unicode identifier
3404:    * @see #isIdentifierIgnorable(char)
3405:    * @see #isJavaIdentifierPart(char)
3406:    * @see #isLetterOrDigit(char)
3407:    * @see #isUnicodeIdentifierStart(char)
3408:    * @since 1.1
3409:    */
3410:   public static boolean isUnicodeIdentifierPart(char ch)
3411:   {
3412:     return isUnicodeIdentifierPart((int)ch);
3413:   }
3414:   
3415:   /**
3416:    * Determines if a character can follow the first letter in
3417:    * a Unicode identifier. This includes letters, connecting punctuation,
3418:    * digits, numeric letters, combining marks, non-spacing marks, and
3419:    * isIdentifierIgnorable.
3420:    * <br>
3421:    * Unicode identifier extender =
3422:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3423:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3424:    *
3425:    * @param codePoint character to test
3426:    * @return true if ch can follow the first letter in a Unicode identifier
3427:    * @see #isIdentifierIgnorable(char)
3428:    * @see #isJavaIdentifierPart(char)
3429:    * @see #isLetterOrDigit(char)
3430:    * @see #isUnicodeIdentifierStart(char)
3431:    * @since 1.5
3432:    */
3433:   public static boolean isUnicodeIdentifierPart(int codePoint)
3434:   {
3435:     int category = getType(codePoint);
3436:     return ((1 << category)
3437:             & ((1 << UPPERCASE_LETTER)
3438:                | (1 << LOWERCASE_LETTER)
3439:                | (1 << TITLECASE_LETTER)
3440:                | (1 << MODIFIER_LETTER)
3441:                | (1 << OTHER_LETTER)
3442:                | (1 << NON_SPACING_MARK)
3443:                | (1 << COMBINING_SPACING_MARK)
3444:                | (1 << DECIMAL_DIGIT_NUMBER)
3445:                | (1 << LETTER_NUMBER)
3446:                | (1 << CONNECTOR_PUNCTUATION)
3447:                | (1 << FORMAT))) != 0
3448:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3449:   }
3450: 
3451:   /**
3452:    * Determines if a character is ignorable in a Unicode identifier. This
3453:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3454:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3455:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3456:    * <code>'\u009F'</code>), and FORMAT characters.
3457:    * <br>
3458:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3459:    *    |U+007F-U+009F
3460:    *
3461:    * @param ch character to test
3462:    * @return true if ch is ignorable in a Unicode or Java identifier
3463:    * @see #isJavaIdentifierPart(char)
3464:    * @see #isUnicodeIdentifierPart(char)
3465:    * @since 1.1
3466:    */
3467:   public static boolean isIdentifierIgnorable(char ch)
3468:   {
3469:     return isIdentifierIgnorable((int)ch);
3470:   }
3471: 
3472:   /**
3473:    * Determines if a character is ignorable in a Unicode identifier. This
3474:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3475:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3476:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3477:    * <code>'\u009F'</code>), and FORMAT characters.
3478:    * <br>
3479:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3480:    *    |U+007F-U+009F
3481:    *
3482:    * @param codePoint character to test
3483:    * @return true if ch is ignorable in a Unicode or Java identifier
3484:    * @see #isJavaIdentifierPart(char)
3485:    * @see #isUnicodeIdentifierPart(char)
3486:    * @since 1.5
3487:    */
3488:   public static boolean isIdentifierIgnorable(int codePoint)
3489:   {
3490:     if ((codePoint >= 0 && codePoint <= 0x0008)
3491:         || (codePoint >= 0x000E && codePoint <= 0x001B)
3492:         || (codePoint >= 0x007F && codePoint <= 0x009F)
3493:         || getType(codePoint) == FORMAT)
3494:       return true;
3495:     return false;
3496:   }
3497: 
3498:   /**
3499:    * Converts a Unicode character into its lowercase equivalent mapping.
3500:    * If a mapping does not exist, then the character passed is returned.
3501:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3502:    *
3503:    * @param ch character to convert to lowercase
3504:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3505:    *         not exist
3506:    * @see #isLowerCase(char)
3507:    * @see #isUpperCase(char)
3508:    * @see #toTitleCase(char)
3509:    * @see #toUpperCase(char)
3510:    */
3511:   public static char toLowerCase(char ch)
3512:   {
3513:     return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch);
3514:   }
3515:   
3516:   /**
3517:    * Converts a Unicode character into its lowercase equivalent mapping.
3518:    * If a mapping does not exist, then the character passed is returned.
3519:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3520:    *
3521:    * @param codePoint character to convert to lowercase
3522:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3523:    *         not exist
3524:    * @see #isLowerCase(char)
3525:    * @see #isUpperCase(char)
3526:    * @see #toTitleCase(char)
3527:    * @see #toUpperCase(char)
3528:    * 
3529:    * @since 1.5
3530:    */
3531:   public static int toLowerCase(int codePoint)
3532:   {
3533:     // If the code point is unassigned or in one of the private use areas
3534:     // then we delegate the call to the appropriate private static inner class.
3535:     int plane = codePoint >>> 16;
3536:     if (plane > 2 && plane < 14)
3537:       return UnassignedCharacters.toLowerCase(codePoint);
3538:     if (plane > 14)
3539:       return PrivateUseCharacters.toLowerCase(codePoint);
3540:     
3541:     // The short value stored in lower[plane] is the signed difference between
3542:     // codePoint and its lowercase conversion.
3543:     return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3544:   }
3545: 
3546:   /**
3547:    * Converts a Unicode character into its uppercase equivalent mapping.
3548:    * If a mapping does not exist, then the character passed is returned.
3549:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3550:    *
3551:    * @param ch character to convert to uppercase
3552:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3553:    *         not exist
3554:    * @see #isLowerCase(char)
3555:    * @see #isUpperCase(char)
3556:    * @see #toLowerCase(char)
3557:    * @see #toTitleCase(char)
3558:    */
3559:   public static char toUpperCase(char ch)
3560:   {
3561:     return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch);
3562:   }
3563:   
3564:   /**
3565:    * Converts a Unicode character into its uppercase equivalent mapping.
3566:    * If a mapping does not exist, then the character passed is returned.
3567:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3568:    *
3569:    * @param codePoint character to convert to uppercase
3570:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3571:    *         not exist
3572:    * @see #isLowerCase(char)
3573:    * @see #isUpperCase(char)
3574:    * @see #toLowerCase(char)
3575:    * @see #toTitleCase(char)
3576:    * 
3577:    * @since 1.5
3578:    */
3579:   public static int toUpperCase(int codePoint)
3580:   {
3581:     // If the code point is unassigned or in one of the private use areas
3582:     // then we delegate the call to the appropriate private static inner class.
3583:     int plane = codePoint >>> 16;
3584:     if (plane > 2 && plane < 14)
3585:       return UnassignedCharacters.toUpperCase(codePoint);
3586:     if (plane > 14)
3587:       return PrivateUseCharacters.toUpperCase(codePoint);
3588:         
3589:     // The short value stored in upper[plane] is the signed difference between
3590:     // codePoint and its uppercase conversion.
3591:     return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3592:   }
3593: 
3594:   /**
3595:    * Converts a Unicode character into its titlecase equivalent mapping.
3596:    * If a mapping does not exist, then the character passed is returned.
3597:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3598:    *
3599:    * @param ch character to convert to titlecase
3600:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3601:    *         not exist
3602:    * @see #isTitleCase(char)
3603:    * @see #toLowerCase(char)
3604:    * @see #toUpperCase(char)
3605:    */
3606:   public static char toTitleCase(char ch)
3607:   {
3608:     // As title is short, it doesn't hurt to exhaustively iterate over it.
3609:     for (int i = title.length - 2; i >= 0; i -= 2)
3610:       if (title[i] == ch)
3611:         return title[i + 1];
3612:     return toUpperCase(ch);
3613:   }
3614: 
3615:   /**
3616:    * Converts a Unicode character into its titlecase equivalent mapping.
3617:    * If a mapping does not exist, then the character passed is returned.
3618:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3619:    *
3620:    * @param codePoint character to convert to titlecase
3621:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3622:    *         not exist
3623:    * @see #isTitleCase(char)
3624:    * @see #toLowerCase(char)
3625:    * @see #toUpperCase(char)
3626:    * 
3627:    * @since 1.5
3628:    */
3629:   public static int toTitleCase(int codePoint)
3630:   {
3631:     // As of Unicode 4.0.0 no characters outside of plane 0 have
3632:     // titlecase mappings that are different from their uppercase
3633:     // mapping.
3634:     if (codePoint < 0x10000)
3635:       return (int) toTitleCase((char)codePoint);
3636:     return toUpperCase(codePoint);
3637:   }
3638: 
3639:   /**
3640:    * Converts a character into a digit of the specified radix. If the radix
3641:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3642:    * exceeds the radix, or if ch is not a decimal digit or in the case
3643:    * insensitive set of 'a'-'z', the result is -1.
3644:    * <br>
3645:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3646:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3647:    *
3648:    * @param ch character to convert into a digit
3649:    * @param radix radix in which ch is a digit
3650:    * @return digit which ch represents in radix, or -1 not a valid digit
3651:    * @see #MIN_RADIX
3652:    * @see #MAX_RADIX
3653:    * @see #forDigit(int, int)
3654:    * @see #isDigit(char)
3655:    * @see #getNumericValue(char)
3656:    */
3657:   public static int digit(char ch, int radix)
3658:   {
3659:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3660:       return -1;
3661:     char attr = readCodePoint((int)ch);
3662:     if (((1 << (attr & TYPE_MASK))
3663:          & ((1 << UPPERCASE_LETTER)
3664:             | (1 << LOWERCASE_LETTER)
3665:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3666:       {
3667:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3668:         int digit = numValue[0][attr >> 7];
3669:         return (digit < radix) ? digit : -1;
3670:       }
3671:     return -1;
3672:   }
3673: 
3674:   /**
3675:    * Converts a character into a digit of the specified radix. If the radix
3676:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3677:    * exceeds the radix, or if ch is not a decimal digit or in the case
3678:    * insensitive set of 'a'-'z', the result is -1.
3679:    * <br>
3680:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3681:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3682:    *
3683:    * @param codePoint character to convert into a digit
3684:    * @param radix radix in which ch is a digit
3685:    * @return digit which ch represents in radix, or -1 not a valid digit
3686:    * @see #MIN_RADIX
3687:    * @see #MAX_RADIX
3688:    * @see #forDigit(int, int)
3689:    * @see #isDigit(char)
3690:    * @see #getNumericValue(char)
3691:    */
3692:   public static int digit(int codePoint, int radix)
3693:   {
3694:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3695:       return -1;
3696:     
3697:     // If the code point is unassigned or in one of the private use areas
3698:     // then we delegate the call to the appropriate private static inner class.
3699:     int plane = codePoint >>> 16;
3700:     if (plane > 2 && plane < 14)
3701:       return UnassignedCharacters.digit(codePoint, radix);
3702:     if (plane > 14)
3703:       return PrivateUseCharacters.digit(codePoint, radix);
3704:     char attr = readCodePoint(codePoint);
3705:     if (((1 << (attr & TYPE_MASK))
3706:          & ((1 << UPPERCASE_LETTER)
3707:             | (1 << LOWERCASE_LETTER)
3708:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3709:       {
3710:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3711:         int digit = numValue[plane][attr >> 7];
3712:         
3713:         // If digit is less than or equal to -3 then the numerical value was 
3714:         // too large to fit into numValue and is stored in CharData.LARGENUMS.
3715:         if (digit <= -3)
3716:           digit = CharData.LARGENUMS[-digit - 3];
3717:         return (digit < radix) ? digit : -1;
3718:       }
3719:     return -1;
3720:   }
3721:   
3722:   /**
3723:    * Returns the Unicode numeric value property of a character. For example,
3724:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3725:    *
3726:    * <p>This method also returns values for the letters A through Z, (not
3727:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3728:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3729:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3730:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3731:    * <code>'\uFF5A'</code> (full width variants).
3732:    *
3733:    * <p>If the character lacks a numeric value property, -1 is returned.
3734:    * If the character has a numeric value property which is not representable
3735:    * as a nonnegative integer, such as a fraction, -2 is returned.
3736:    *
3737:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3738:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3739:    *
3740:    * @param ch character from which the numeric value property will
3741:    *        be retrieved
3742:    * @return the numeric value property of ch, or -1 if it does not exist, or
3743:    *         -2 if it is not representable as a nonnegative integer
3744:    * @see #forDigit(int, int)
3745:    * @see #digit(char, int)
3746:    * @see #isDigit(char)
3747:    * @since 1.1
3748:    */
3749:   public static int getNumericValue(char ch)
3750:   {
3751:     // Treat numValue as signed.
3752:     return (short) numValue[0][readCodePoint((int)ch) >> 7];
3753:   }
3754:   
3755:   /**
3756:    * Returns the Unicode numeric value property of a character. For example,
3757:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3758:    *
3759:    * <p>This method also returns values for the letters A through Z, (not
3760:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3761:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3762:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3763:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3764:    * <code>'\uFF5A'</code> (full width variants).
3765:    *
3766:    * <p>If the character lacks a numeric value property, -1 is returned.
3767:    * If the character has a numeric value property which is not representable
3768:    * as a nonnegative integer, such as a fraction, -2 is returned.
3769:    *
3770:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3771:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3772:    *
3773:    * @param codePoint character from which the numeric value property will
3774:    *        be retrieved
3775:    * @return the numeric value property of ch, or -1 if it does not exist, or
3776:    *         -2 if it is not representable as a nonnegative integer
3777:    * @see #forDigit(int, int)
3778:    * @see #digit(char, int)
3779:    * @see #isDigit(char)
3780:    * @since 1.5
3781:    */
3782:   public static int getNumericValue(int codePoint)
3783:   {
3784:     // If the code point is unassigned or in one of the private use areas
3785:     // then we delegate the call to the appropriate private static inner class.
3786:     int plane = codePoint >>> 16;
3787:     if (plane > 2 && plane < 14)
3788:       return UnassignedCharacters.getNumericValue(codePoint);
3789:     if (plane > 14)
3790:       return PrivateUseCharacters.getNumericValue(codePoint);
3791:     
3792:     // If the value N found in numValue[plane] is less than or equal to -3
3793:     // then the numeric value was too big to fit into 16 bits and is 
3794:     // stored in CharData.LARGENUMS at offset (-N - 3).
3795:     short num = (short)numValue[plane][readCodePoint(codePoint) >> 7];
3796:     if (num <= -3)
3797:       return CharData.LARGENUMS[-num - 3];
3798:     return num;
3799:   }
3800: 
3801:   /**
3802:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
3803:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3804:    * <code>'\r'</code>, and <code>' '</code>.
3805:    * <br>
3806:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3807:    *
3808:    * @param ch character to test
3809:    * @return true if ch is a space, else false
3810:    * @deprecated Replaced by {@link #isWhitespace(char)}
3811:    * @see #isSpaceChar(char)
3812:    * @see #isWhitespace(char)
3813:    */
3814:   public static boolean isSpace(char ch)
3815:   {
3816:     // Performing the subtraction up front alleviates need to compare longs.
3817:     return ch-- <= ' ' && ((1 << ch)
3818:                            & ((1 << (' ' - 1))
3819:                               | (1 << ('\t' - 1))
3820:                               | (1 << ('\n' - 1))
3821:                               | (1 << ('\r' - 1))
3822:                               | (1 << ('\f' - 1)))) != 0;
3823:   }
3824: 
3825:   /**
3826:    * Determines if a character is a Unicode space character. This includes
3827:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3828:    * <br>
3829:    * Unicode space = [Zs]|[Zp]|[Zl]
3830:    *
3831:    * @param ch character to test
3832:    * @return true if ch is a Unicode space, else false
3833:    * @see #isWhitespace(char)
3834:    * @since 1.1
3835:    */
3836:   public static boolean isSpaceChar(char ch)
3837:   {
3838:     return isSpaceChar((int)ch);
3839:   }
3840:   
3841:   /**
3842:    * Determines if a character is a Unicode space character. This includes
3843:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3844:    * <br>
3845:    * Unicode space = [Zs]|[Zp]|[Zl]
3846:    *
3847:    * @param codePoint character to test
3848:    * @return true if ch is a Unicode space, else false
3849:    * @see #isWhitespace(char)
3850:    * @since 1.5
3851:    */
3852:   public static boolean isSpaceChar(int codePoint)
3853:   {
3854:     return ((1 << getType(codePoint))
3855:             & ((1 << SPACE_SEPARATOR)
3856:                | (1 << LINE_SEPARATOR)
3857:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3858:   }
3859: 
3860:   /**
3861:    * Determines if a character is Java whitespace. This includes Unicode
3862:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3863:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3864:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3865:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3866:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3867:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3868:    * and <code>'\u001F'</code>.
3869:    * <br>
3870:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3871:    *
3872:    * @param ch character to test
3873:    * @return true if ch is Java whitespace, else false
3874:    * @see #isSpaceChar(char)
3875:    * @since 1.1
3876:    */
3877:   public static boolean isWhitespace(char ch)
3878:   {
3879:     return isWhitespace((int) ch);
3880:   }
3881:   
3882:   /**
3883:    * Determines if a character is Java whitespace. This includes Unicode
3884:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3885:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3886:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3887:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3888:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3889:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3890:    * and <code>'\u001F'</code>.
3891:    * <br>
3892:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3893:    *
3894:    * @param codePoint character to test
3895:    * @return true if ch is Java whitespace, else false
3896:    * @see #isSpaceChar(char)
3897:    * @since 1.5
3898:    */
3899:   public static boolean isWhitespace(int codePoint)
3900:   {
3901:     int plane = codePoint >>> 16;
3902:     if (plane > 2 && plane < 14)
3903:       return UnassignedCharacters.isWhiteSpace(codePoint);
3904:     if (plane > 14)
3905:       return PrivateUseCharacters.isWhiteSpace(codePoint);
3906:     
3907:     int attr = readCodePoint(codePoint);
3908:     return ((((1 << (attr & TYPE_MASK))
3909:               & ((1 << SPACE_SEPARATOR)
3910:                  | (1 << LINE_SEPARATOR)
3911:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3912:             && (attr & NO_BREAK_MASK) == 0)
3913:       || (codePoint <= '\u001F' && ((1 << codePoint)
3914:                              & ((1 << '\t')
3915:                                 | (1 << '\n')
3916:                                 | (1 << '\u000B')
3917:                                 | (1 << '\u000C')
3918:                                 | (1 << '\r')
3919:                                 | (1 << '\u001C')
3920:                                 | (1 << '\u001D')
3921:                                 | (1 << '\u001E')
3922:                                 | (1 << '\u001F'))) != 0);
3923:   }
3924: 
3925:   /**
3926:    * Determines if a character has the ISO Control property.
3927:    * <br>
3928:    * ISO Control = [Cc]
3929:    *
3930:    * @param ch character to test
3931:    * @return true if ch is an ISO Control character, else false
3932:    * @see #isSpaceChar(char)
3933:    * @see #isWhitespace(char)
3934:    * @since 1.1
3935:    */
3936:   public static boolean isISOControl(char ch)
3937:   {
3938:     return isISOControl((int)ch);
3939:   }
3940:   
3941:   /**
3942:    * Determines if the character is an ISO Control character.  This is true
3943:    * if the code point is in the range [0, 0x001F] or if it is in the range
3944:    * [0x007F, 0x009F].
3945:    * @param codePoint the character to check
3946:    * @return true if the character is in one of the above ranges
3947:    * 
3948:    * @since 1.5
3949:    */
3950:   public static boolean isISOControl(int codePoint)
3951:   {
3952:     if ((codePoint >= 0 && codePoint <= 0x001F)
3953:         || (codePoint >= 0x007F && codePoint <= 0x009F))
3954:       return true;
3955:     return false;      
3956:   }
3957: 
3958:   /**
3959:    * Returns the Unicode general category property of a character.
3960:    *
3961:    * @param ch character from which the general category property will
3962:    *        be retrieved
3963:    * @return the character category property of ch as an integer
3964:    * @see #UNASSIGNED
3965:    * @see #UPPERCASE_LETTER
3966:    * @see #LOWERCASE_LETTER
3967:    * @see #TITLECASE_LETTER
3968:    * @see #MODIFIER_LETTER
3969:    * @see #OTHER_LETTER
3970:    * @see #NON_SPACING_MARK
3971:    * @see #ENCLOSING_MARK
3972:    * @see #COMBINING_SPACING_MARK
3973:    * @see #DECIMAL_DIGIT_NUMBER
3974:    * @see #LETTER_NUMBER
3975:    * @see #OTHER_NUMBER
3976:    * @see #SPACE_SEPARATOR
3977:    * @see #LINE_SEPARATOR
3978:    * @see #PARAGRAPH_SEPARATOR
3979:    * @see #CONTROL
3980:    * @see #FORMAT
3981:    * @see #PRIVATE_USE
3982:    * @see #SURROGATE
3983:    * @see #DASH_PUNCTUATION
3984:    * @see #START_PUNCTUATION
3985:    * @see #END_PUNCTUATION
3986:    * @see #CONNECTOR_PUNCTUATION
3987:    * @see #OTHER_PUNCTUATION
3988:    * @see #MATH_SYMBOL
3989:    * @see #CURRENCY_SYMBOL
3990:    * @see #MODIFIER_SYMBOL
3991:    * @see #INITIAL_QUOTE_PUNCTUATION
3992:    * @see #FINAL_QUOTE_PUNCTUATION
3993:    * @since 1.1
3994:    */
3995:   public static int getType(char ch)
3996:   {
3997:     return getType((int)ch);
3998:   }
3999:   
4000:   /**
4001:    * Returns the Unicode general category property of a character.
4002:    *
4003:    * @param codePoint character from which the general category property will
4004:    *        be retrieved
4005:    * @return the character category property of ch as an integer
4006:    * @see #UNASSIGNED
4007:    * @see #UPPERCASE_LETTER
4008:    * @see #LOWERCASE_LETTER
4009:    * @see #TITLECASE_LETTER
4010:    * @see #MODIFIER_LETTER
4011:    * @see #OTHER_LETTER
4012:    * @see #NON_SPACING_MARK
4013:    * @see #ENCLOSING_MARK
4014:    * @see #COMBINING_SPACING_MARK
4015:    * @see #DECIMAL_DIGIT_NUMBER
4016:    * @see #LETTER_NUMBER
4017:    * @see #OTHER_NUMBER
4018:    * @see #SPACE_SEPARATOR
4019:    * @see #LINE_SEPARATOR
4020:    * @see #PARAGRAPH_SEPARATOR
4021:    * @see #CONTROL
4022:    * @see #FORMAT
4023:    * @see #PRIVATE_USE
4024:    * @see #SURROGATE
4025:    * @see #DASH_PUNCTUATION
4026:    * @see #START_PUNCTUATION
4027:    * @see #END_PUNCTUATION
4028:    * @see #CONNECTOR_PUNCTUATION
4029:    * @see #OTHER_PUNCTUATION
4030:    * @see #MATH_SYMBOL
4031:    * @see #CURRENCY_SYMBOL
4032:    * @see #MODIFIER_SYMBOL
4033:    * @see #INITIAL_QUOTE_PUNCTUATION
4034:    * @see #FINAL_QUOTE_PUNCTUATION
4035:    * 
4036:    * @since 1.5
4037:    */
4038:   public static int getType(int codePoint)
4039:   {
4040:     // If the codePoint is unassigned or in one of the private use areas
4041:     // then we delegate the call to the appropriate private static inner class.
4042:     int plane = codePoint >>> 16;
4043:     if (plane > 2 && plane < 14)
4044:       return UnassignedCharacters.getType(codePoint);
4045:     if (plane > 14)
4046:       return PrivateUseCharacters.getType(codePoint);
4047:     
4048:     return readCodePoint(codePoint) & TYPE_MASK;
4049:   }
4050: 
4051:   /**
4052:    * Converts a digit into a character which represents that digit
4053:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
4054:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
4055:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
4056:    * <br>
4057:    * return value boundary = U+0030-U+0039|U+0061-U+007A
4058:    *
4059:    * @param digit digit to be converted into a character
4060:    * @param radix radix of digit
4061:    * @return character representing digit in radix, or '\0'
4062:    * @see #MIN_RADIX
4063:    * @see #MAX_RADIX
4064:    * @see #digit(char, int)
4065:    */
4066:   public static char forDigit(int digit, int radix)
4067:   {
4068:     if (radix < MIN_RADIX || radix > MAX_RADIX
4069:         || digit < 0 || digit >= radix)
4070:       return '\0';
4071:     return Number.digits[digit];
4072:   }
4073: 
4074:   /**
4075:    * Returns the Unicode directionality property of the character. This
4076:    * is used in the visual ordering of text.
4077:    *
4078:    * @param ch the character to look up
4079:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4080:    * @see #DIRECTIONALITY_UNDEFINED
4081:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4082:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4083:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4084:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4085:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4086:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4087:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4088:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4089:    * @see #DIRECTIONALITY_NONSPACING_MARK
4090:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4091:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4092:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4093:    * @see #DIRECTIONALITY_WHITESPACE
4094:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4095:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4096:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4097:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4098:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4099:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4100:    * @since 1.4
4101:    */
4102:   public static byte getDirectionality(char ch)
4103:   {
4104:     // The result will correctly be signed.
4105:     return getDirectionality((int)ch);
4106:   }
4107: 
4108:   
4109:   /**
4110:    * Returns the Unicode directionality property of the character. This
4111:    * is used in the visual ordering of text.
4112:    *
4113:    * @param codePoint the character to look up
4114:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4115:    * @see #DIRECTIONALITY_UNDEFINED
4116:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4117:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4118:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4119:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4120:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4121:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4122:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4123:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4124:    * @see #DIRECTIONALITY_NONSPACING_MARK
4125:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4126:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4127:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4128:    * @see #DIRECTIONALITY_WHITESPACE
4129:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4130:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4131:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4132:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4133:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4134:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4135:    * @since 1.5
4136:    */
4137:   public static byte getDirectionality(int codePoint)
4138:   {
4139:     // If the code point is unassigned or in one of the private use areas
4140:     // then we delegate the call to the appropriate private static inner class.
4141:     int plane = codePoint >>> 16;
4142:     if (plane > 2 && plane < 14)
4143:       return UnassignedCharacters.getDirectionality(codePoint);
4144:     if (plane > 14)
4145:       return PrivateUseCharacters.getDirectionality(codePoint);
4146:     
4147:     // The result will correctly be signed.
4148:     return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2);
4149:   }
4150:   
4151:   /**
4152:    * Determines whether the character is mirrored according to Unicode. For
4153:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4154:    * left-to-right text, but ')' in right-to-left text.
4155:    *
4156:    * @param ch the character to look up
4157:    * @return true if the character is mirrored
4158:    * @since 1.4
4159:    */
4160:   public static boolean isMirrored(char ch)
4161:   {
4162:     return (readCodePoint((int)ch) & MIRROR_MASK) != 0;
4163:   }
4164:   
4165:   /**
4166:    * Determines whether the character is mirrored according to Unicode. For
4167:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4168:    * left-to-right text, but ')' in right-to-left text.
4169:    *
4170:    * @param codePoint the character to look up
4171:    * @return true if the character is mirrored
4172:    * @since 1.5
4173:    */
4174:   public static boolean isMirrored(int codePoint)
4175:   {
4176:     // If the code point is unassigned or part of one of the private use areas
4177:     // then we delegate the call to the appropriate private static inner class.
4178:     int plane = codePoint >>> 16;
4179:     if (plane > 2 && plane < 14)
4180:       return UnassignedCharacters.isMirrored(codePoint);
4181:     if (plane > 14)
4182:       return PrivateUseCharacters.isMirrored(codePoint);
4183:     
4184:     return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
4185:   }
4186: 
4187:   /**
4188:    * Compares another Character to this Character, numerically.
4189:    *
4190:    * @param anotherCharacter Character to compare with this Character
4191:    * @return a negative integer if this Character is less than
4192:    *         anotherCharacter, zero if this Character is equal, and
4193:    *         a positive integer if this Character is greater
4194:    * @throws NullPointerException if anotherCharacter is null
4195:    * @since 1.2
4196:    */
4197:   public int compareTo(Character anotherCharacter)
4198:   {
4199:     return value - anotherCharacter.value;
4200:   }
4201: 
4202:   /**
4203:    * Returns an <code>Character</code> object wrapping the value.
4204:    * In contrast to the <code>Character</code> constructor, this method
4205:    * will cache some values.  It is used by boxing conversion.
4206:    *
4207:    * @param val the value to wrap
4208:    * @return the <code>Character</code>
4209:    *
4210:    * @since 1.5
4211:    */
4212:   public static Character valueOf(char val)
4213:   {
4214:     if (val > MAX_CACHE)
4215:       return new Character(val);
4216:     else
4217:       return charCache[val - MIN_VALUE];
4218:   }
4219: 
4220:   /**
4221:    * Reverse the bytes in val.
4222:    * @since 1.5
4223:    */
4224:   public static char reverseBytes(char val)
4225:   {
4226:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
4227:   }
4228: 
4229:   /**
4230:    * Converts a unicode code point to a UTF-16 representation of that
4231:    * code point.
4232:    * 
4233:    * @param codePoint the unicode code point
4234:    *
4235:    * @return the UTF-16 representation of that code point
4236:    *
4237:    * @throws IllegalArgumentException if the code point is not a valid
4238:    *         unicode code point
4239:    *
4240:    * @since 1.5
4241:    */
4242:   public static char[] toChars(int codePoint)
4243:   {
4244:     if (!isValidCodePoint(codePoint))
4245:       throw new IllegalArgumentException("Illegal Unicode code point : "
4246:                                          + codePoint);
4247:     char[] result = new char[charCount(codePoint)];
4248:     int ignore = toChars(codePoint, result, 0);
4249:     return result;
4250:   }
4251: 
4252:   /**
4253:    * Converts a unicode code point to its UTF-16 representation.
4254:    *
4255:    * @param codePoint the unicode code point
4256:    * @param dst the target char array
4257:    * @param dstIndex the start index for the target
4258:    *
4259:    * @return number of characters written to <code>dst</code>
4260:    *
4261:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
4262:    *         valid unicode code point
4263:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
4264:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
4265:    *         in <code>dst</code> or if the UTF-16 representation does not
4266:    *         fit into <code>dst</code>
4267:    *
4268:    * @since 1.5
4269:    */
4270:   public static int toChars(int codePoint, char[] dst, int dstIndex)
4271:   {
4272:     if (!isValidCodePoint(codePoint))
4273:       {
4274:         throw new IllegalArgumentException("not a valid code point: "
4275:                                            + codePoint);
4276:       }
4277: 
4278:     int result;
4279:     if (isSupplementaryCodePoint(codePoint))
4280:       {
4281:         // Write second char first to cause IndexOutOfBoundsException
4282:         // immediately.
4283:         final int cp2 = codePoint - 0x10000;
4284:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
4285:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
4286:         result = 2;
4287:       }
4288:     else
4289:       {
4290:         dst[dstIndex] = (char) codePoint;
4291:         result = 1; 
4292:       }
4293:     return result;
4294:   }
4295: 
4296:   /**
4297:    * Return number of 16-bit characters required to represent the given
4298:    * code point.
4299:    *
4300:    * @param codePoint a unicode code point
4301:    *
4302:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
4303:    *
4304:    * @since 1.5
4305:    */
4306:   public static int charCount(int codePoint)
4307:   {
4308:     return 
4309:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
4310:       ? 2 
4311:       : 1;
4312:   }
4313: 
4314:   /**
4315:    * Determines whether the specified code point is
4316:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
4317:    * supplementary character range.
4318:    *
4319:    * @param codePoint a Unicode code point
4320:    *
4321:    * @return <code>true</code> if code point is in supplementary range
4322:    *
4323:    * @since 1.5
4324:    */
4325:   public static boolean isSupplementaryCodePoint(int codePoint)
4326:   {
4327:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4328:       && codePoint <= MAX_CODE_POINT;
4329:   }
4330: 
4331:   /**
4332:    * Determines whether the specified code point is
4333:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
4334:    *
4335:    * @param codePoint a Unicode code point
4336:    *
4337:    * @return <code>true</code> if code point is valid
4338:    *
4339:    * @since 1.5
4340:    */
4341:   public static boolean isValidCodePoint(int codePoint)
4342:   {
4343:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
4344:   }
4345: 
4346:   /**
4347:    * Return true if the given character is a high surrogate.
4348:    * @param ch the character
4349:    * @return true if the character is a high surrogate character
4350:    *
4351:    * @since 1.5
4352:    */
4353:   public static boolean isHighSurrogate(char ch)
4354:   {
4355:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
4356:   }
4357: 
4358:   /**
4359:    * Return true if the given character is a low surrogate.
4360:    * @param ch the character
4361:    * @return true if the character is a low surrogate character
4362:    *
4363:    * @since 1.5
4364:    */
4365:   public static boolean isLowSurrogate(char ch)
4366:   {
4367:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
4368:   }
4369: 
4370:   /**
4371:    * Return true if the given characters compose a surrogate pair.
4372:    * This is true if the first character is a high surrogate and the
4373:    * second character is a low surrogate.
4374:    * @param ch1 the first character
4375:    * @param ch2 the first character
4376:    * @return true if the characters compose a surrogate pair
4377:    *
4378:    * @since 1.5
4379:    */
4380:   public static boolean isSurrogatePair(char ch1, char ch2)
4381:   {
4382:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
4383:   }
4384: 
4385:   /**
4386:    * Given a valid surrogate pair, this returns the corresponding
4387:    * code point.
4388:    * @param high the high character of the pair
4389:    * @param low the low character of the pair
4390:    * @return the corresponding code point
4391:    *
4392:    * @since 1.5
4393:    */
4394:   public static int toCodePoint(char high, char low)
4395:   {
4396:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
4397:       (low - MIN_LOW_SURROGATE) + 0x10000;
4398:   }
4399: 
4400:   /**
4401:    * Get the code point at the specified index in the CharSequence.
4402:    * This is like CharSequence#charAt(int), but if the character is
4403:    * the start of a surrogate pair, and there is a following
4404:    * character, and this character completes the pair, then the
4405:    * corresponding supplementary code point is returned.  Otherwise,
4406:    * the character at the index is returned.
4407:    *
4408:    * @param sequence the CharSequence
4409:    * @param index the index of the codepoint to get, starting at 0
4410:    * @return the codepoint at the specified index
4411:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4412:    * @since 1.5
4413:    */
4414:   public static int codePointAt(CharSequence sequence, int index)
4415:   {
4416:     int len = sequence.length();
4417:     if (index < 0 || index >= len)
4418:       throw new IndexOutOfBoundsException();
4419:     char high = sequence.charAt(index);
4420:     if (! isHighSurrogate(high) || ++index >= len)
4421:       return high;
4422:     char low = sequence.charAt(index);
4423:     if (! isLowSurrogate(low))
4424:       return high;
4425:     return toCodePoint(high, low);
4426:   }
4427: 
4428:   /**
4429:    * Get the code point at the specified index in the CharSequence.
4430:    * If the character is the start of a surrogate pair, and there is a
4431:    * following character, and this character completes the pair, then
4432:    * the corresponding supplementary code point is returned.
4433:    * Otherwise, the character at the index is returned.
4434:    *
4435:    * @param chars the character array in which to look
4436:    * @param index the index of the codepoint to get, starting at 0
4437:    * @return the codepoint at the specified index
4438:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4439:    * @since 1.5
4440:    */
4441:   public static int codePointAt(char[] chars, int index)
4442:   {
4443:     return codePointAt(chars, index, chars.length);
4444:   }
4445: 
4446:   /**
4447:    * Get the code point at the specified index in the CharSequence.
4448:    * If the character is the start of a surrogate pair, and there is a
4449:    * following character within the specified range, and this
4450:    * character completes the pair, then the corresponding
4451:    * supplementary code point is returned.  Otherwise, the character
4452:    * at the index is returned.
4453:    *
4454:    * @param chars the character array in which to look
4455:    * @param index the index of the codepoint to get, starting at 0
4456:    * @param limit the limit past which characters should not be examined
4457:    * @return the codepoint at the specified index
4458:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
4459:    * limit, or if limit is negative or &gt;= the length of the array
4460:    * @since 1.5
4461:    */
4462:   public static int codePointAt(char[] chars, int index, int limit)
4463:   {
4464:     if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
4465:       throw new IndexOutOfBoundsException();
4466:     char high = chars[index];
4467:     if (! isHighSurrogate(high) || ++index >= limit)
4468:       return high;
4469:     char low = chars[index];
4470:     if (! isLowSurrogate(low))
4471:       return high;
4472:     return toCodePoint(high, low);
4473:   }
4474: 
4475:   /**
4476:    * Get the code point before the specified index.  This is like
4477:    * #codePointAt(char[], int), but checks the characters at
4478:    * <code>index-1</code> and <code>index-2</code> to see if they form
4479:    * a supplementary code point.  If they do not, the character at
4480:    * <code>index-1</code> is returned.
4481:    *
4482:    * @param chars the character array
4483:    * @param index the index just past the codepoint to get, starting at 0
4484:    * @return the codepoint at the specified index
4485:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4486:    * @since 1.5
4487:    */
4488:   public static int codePointBefore(char[] chars, int index)
4489:   {
4490:     return codePointBefore(chars, index, 1);
4491:   }
4492: 
4493:   /**
4494:    * Get the code point before the specified index.  This is like
4495:    * #codePointAt(char[], int), but checks the characters at
4496:    * <code>index-1</code> and <code>index-2</code> to see if they form
4497:    * a supplementary code point.  If they do not, the character at
4498:    * <code>index-1</code> is returned.  The start parameter is used to
4499:    * limit the range of the array which may be examined.
4500:    *
4501:    * @param chars the character array
4502:    * @param index the index just past the codepoint to get, starting at 0
4503:    * @param start the index before which characters should not be examined
4504:    * @return the codepoint at the specified index
4505:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4506:    * the length of the array, or if limit is negative or &gt;= the
4507:    * length of the array
4508:    * @since 1.5
4509:    */
4510:   public static int codePointBefore(char[] chars, int index, int start)
4511:   {
4512:     if (index < start || index > chars.length
4513:     || start < 0 || start >= chars.length)
4514:       throw new IndexOutOfBoundsException();
4515:     --index;
4516:     char low = chars[index];
4517:     if (! isLowSurrogate(low) || --index < start)
4518:       return low;
4519:     char high = chars[index];
4520:     if (! isHighSurrogate(high))
4521:       return low;
4522:     return toCodePoint(high, low);
4523:   }
4524: 
4525:   /**
4526:    * Get the code point before the specified index.  This is like
4527:    * #codePointAt(CharSequence, int), but checks the characters at
4528:    * <code>index-1</code> and <code>index-2</code> to see if they form
4529:    * a supplementary code point.  If they do not, the character at
4530:    * <code>index-1</code> is returned.
4531:    *
4532:    * @param sequence the CharSequence
4533:    * @param index the index just past the codepoint to get, starting at 0
4534:    * @return the codepoint at the specified index
4535:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4536:    * @since 1.5
4537:    */
4538:   public static int codePointBefore(CharSequence sequence, int index)
4539:   {
4540:     int len = sequence.length();
4541:     if (index < 1 || index > len)
4542:       throw new IndexOutOfBoundsException();
4543:     --index;
4544:     char low = sequence.charAt(index);
4545:     if (! isLowSurrogate(low) || --index < 0)
4546:       return low;
4547:     char high = sequence.charAt(index);
4548:     if (! isHighSurrogate(high))
4549:       return low;
4550:     return toCodePoint(high, low);
4551:   }
4552: } // class Character