Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: /*
  39:  * Note: This class must not be merged with Classpath.  Gcj uses C-style
  40:  * arrays (see include/java-chartables.h) to store the Unicode character
  41:  * database, whereas Classpath uses Java objects (char[] extracted from
  42:  * String constants) in gnu.java.lang.CharData.  Gcj's approach is more
  43:  * efficient, because there is no vtable or data relocation to worry about.
  44:  * However, despite the difference in the database interface, the two
  45:  * versions share identical algorithms.
  46:  */
  47: 
  48: package java.lang;
  49: 
  50: import java.io.Serializable;
  51: import java.text.Collator;
  52: import java.util.Locale;
  53: 
  54: /**
  55:  * Wrapper class for the primitive char data type.  In addition, this class
  56:  * allows one to retrieve property information and perform transformations
  57:  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
  58:  * java.lang.Character is designed to be very dynamic, and as such, it
  59:  * retrieves information on the Unicode character set from a separate
  60:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  61:  *
  62:  * <p>For predicates, boundaries are used to describe
  63:  * the set of characters for which the method will return true.
  64:  * This syntax uses fairly normal regular expression notation.
  65:  * See 5.13 of the Unicode Standard, Version 3.0, for the
  66:  * boundary specification.
  67:  *
  68:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  69:  * for more information on the Unicode Standard.
  70:  *
  71:  * @author Tom Tromey (tromey@cygnus.com)
  72:  * @author Paul N. Fisher
  73:  * @author Jochen Hoenicke
  74:  * @author Eric Blake (ebb9@email.byu.edu)
  75:  * @since 1.0
  76:  * @status updated to 1.4
  77:  */
  78: public final class Character implements Serializable, Comparable
  79: {
  80:   /**
  81:    * A subset of Unicode blocks.
  82:    *
  83:    * @author Paul N. Fisher
  84:    * @author Eric Blake (ebb9@email.byu.edu)
  85:    * @since 1.2
  86:    */
  87:   public static class Subset
  88:   {
  89:     /** The name of the subset. */
  90:     private final String name;
  91: 
  92:     /**
  93:      * Construct a new subset of characters.
  94:      *
  95:      * @param name the name of the subset
  96:      * @throws NullPointerException if name is null
  97:      */
  98:     protected Subset(String name)
  99:     {
 100:       // Note that name.toString() is name, unless name was null.
 101:       this.name = name.toString();
 102:     }
 103: 
 104:     /**
 105:      * Compares two Subsets for equality. This is <code>final</code>, and
 106:      * restricts the comparison on the <code>==</code> operator, so it returns
 107:      * true only for the same object.
 108:      *
 109:      * @param o the object to compare
 110:      * @return true if o is this
 111:      */
 112:     public final boolean equals(Object o)
 113:     {
 114:       return o == this;
 115:     }
 116: 
 117:     /**
 118:      * Makes the original hashCode of Object final, to be consistent with
 119:      * equals.
 120:      *
 121:      * @return the hash code for this object
 122:      */
 123:     public final int hashCode()
 124:     {
 125:       return super.hashCode();
 126:     }
 127: 
 128:     /**
 129:      * Returns the name of the subset.
 130:      *
 131:      * @return the name
 132:      */
 133:     public final String toString()
 134:     {
 135:       return name;
 136:     }
 137:   } // class Subset
 138: 
 139:   /**
 140:    * A family of character subsets in the Unicode specification. A character
 141:    * is in at most one of these blocks.
 142:    *
 143:    * This inner class was generated automatically from
 144:    * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
 145:    * This Unicode definition file can be found on the
 146:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 147:    * JDK 1.4 uses Unicode version 3.0.0.
 148:    *
 149:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 150:    * @since 1.2
 151:    */
 152:   public static final class UnicodeBlock extends Subset
 153:   {
 154:     /** The start of the subset. */
 155:     private final int start;
 156: 
 157:     /** The end of the subset. */
 158:     private final int end;
 159: 
 160:     /** The canonical name of the block according to the Unicode standard. */
 161:     private final String canonicalName;
 162: 
 163:     /** Constants for the <code>forName()</code> method */
 164:     private static final int CANONICAL_NAME = 0;
 165:     private static final int NO_SPACES_NAME = 1;
 166:     private static final int CONSTANT_NAME = 2;
 167: 
 168:     /**
 169:      * Constructor for strictly defined blocks.
 170:      *
 171:      * @param start the start character of the range
 172:      * @param end the end character of the range
 173:      * @param name the block name
 174:      */
 175:     private UnicodeBlock(int start, int end, String name,
 176:              String canonicalName)
 177:     {
 178:       super(name);
 179:       this.start = start;
 180:       this.end = end;
 181:       this.canonicalName = canonicalName;
 182:     }
 183: 
 184:     /**
 185:      * Returns the Unicode character block which a character belongs to.
 186:      * <strong>Note</strong>: This method does not support the use of
 187:      * supplementary characters.  For such support, <code>of(int)</code>
 188:      * should be used instead.
 189:      *
 190:      * @param ch the character to look up
 191:      * @return the set it belongs to, or null if it is not in one
 192:      */
 193:     public static UnicodeBlock of(char ch)
 194:     {
 195:       return of((int) ch);
 196:     }
 197: 
 198:     /**
 199:      * Returns the Unicode character block which a code point belongs to.
 200:      *
 201:      * @param codePoint the character to look up
 202:      * @return the set it belongs to, or null if it is not in one.
 203:      * @throws IllegalArgumentException if the specified code point is
 204:      *         invalid.
 205:      * @since 1.5
 206:      */
 207:     public static UnicodeBlock of(int codePoint)
 208:     {
 209:       if (codePoint > MAX_CODE_POINT)
 210:     throw new IllegalArgumentException("The supplied integer value is " +
 211:                        "too large to be a codepoint.");
 212:       // Simple binary search for the correct block.
 213:       int low = 0;
 214:       int hi = sets.length - 1;
 215:       while (low <= hi)
 216:         {
 217:           int mid = (low + hi) >> 1;
 218:           UnicodeBlock b = sets[mid];
 219:           if (codePoint < b.start)
 220:             hi = mid - 1;
 221:           else if (codePoint > b.end)
 222:             low = mid + 1;
 223:           else
 224:             return b;
 225:         }
 226:       return null;
 227:     }
 228: 
 229:     /**
 230:      * <p>
 231:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 232:      * by the Unicode standard.  The version of Unicode in use is defined by
 233:      * the <code>Character</code> class, and the names are given in the
 234:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 235:      * The name may be specified in one of three ways:
 236:      * </p>
 237:      * <ol>
 238:      * <li>The canonical, human-readable name used by the Unicode standard.
 239:      * This is the name with all spaces and hyphens retained.  For example,
 240:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 241:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 242:      * <li>The name used for the constants specified by this class, which
 243:      * is the canonical name with all spaces and hyphens replaced with
 244:      * underscores e.g. `BASIC_LATIN'</li>
 245:      * </ol>
 246:      * <p>
 247:      * The names are compared case-insensitively using the case comparison
 248:      * associated with the U.S. English locale.  The method recognises the
 249:      * previous names used for blocks as well as the current ones.  At
 250:      * present, this simply means that the deprecated `SURROGATES_AREA'
 251:      * will be recognised by this method (the <code>of()</code> methods
 252:      * only return one of the three new surrogate blocks).
 253:      * </p>
 254:      *
 255:      * @param blockName the name of the block to look up.
 256:      * @return the specified block.
 257:      * @throws NullPointerException if the <code>blockName</code> is
 258:      *         <code>null</code>.
 259:      * @throws IllegalArgumentException if the name does not match any Unicode
 260:      *         block.
 261:      * @since 1.5
 262:      */
 263:     public static final UnicodeBlock forName(String blockName)
 264:     {
 265:       int type;
 266:       if (blockName.indexOf(' ') != -1)
 267:         type = CANONICAL_NAME;
 268:       else if (blockName.indexOf('_') != -1)
 269:         type = CONSTANT_NAME;
 270:       else
 271:         type = NO_SPACES_NAME;
 272:       Collator usCollator = Collator.getInstance(Locale.US);
 273:       usCollator.setStrength(Collator.PRIMARY);
 274:       /* Special case for deprecated blocks not in sets */
 275:       switch (type)
 276:       {
 277:         case CANONICAL_NAME:
 278:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 279:             return SURROGATES_AREA;
 280:           break;
 281:         case NO_SPACES_NAME:
 282:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 283:             return SURROGATES_AREA;
 284:           break;
 285:         case CONSTANT_NAME:
 286:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 287:             return SURROGATES_AREA;
 288:           break;
 289:       }
 290:       /* Other cases */
 291:       int setLength = sets.length;
 292:       switch (type)
 293:       {
 294:         case CANONICAL_NAME:
 295:           for (int i = 0; i < setLength; i++)
 296:             {
 297:               UnicodeBlock block = sets[i];
 298:               if (usCollator.compare(blockName, block.canonicalName) == 0)
 299:                 return block;
 300:             }
 301:           break;
 302:         case NO_SPACES_NAME:
 303:           for (int i = 0; i < setLength; i++)
 304:             {
 305:               UnicodeBlock block = sets[i];
 306:               String nsName = block.canonicalName.replaceAll(" ","");
 307:               if (usCollator.compare(blockName, nsName) == 0)
 308:                 return block;
 309:             }        
 310:           break;
 311:         case CONSTANT_NAME:
 312:           for (int i = 0; i < setLength; i++)
 313:             {
 314:               UnicodeBlock block = sets[i];
 315:               if (usCollator.compare(blockName, block.toString()) == 0)
 316:                 return block;
 317:             }
 318:           break;
 319:       }
 320:       throw new IllegalArgumentException("No Unicode block found for " +
 321:                                          blockName + ".");
 322:     }
 323: 
 324:     /**
 325:      * Basic Latin.
 326:      * 0x0000 - 0x007F.
 327:      */
 328:     public static final UnicodeBlock BASIC_LATIN
 329:       = new UnicodeBlock(0x0000, 0x007F,
 330:                          "BASIC_LATIN", 
 331:                          "Basic Latin");
 332: 
 333:     /**
 334:      * Latin-1 Supplement.
 335:      * 0x0080 - 0x00FF.
 336:      */
 337:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 338:       = new UnicodeBlock(0x0080, 0x00FF,
 339:                          "LATIN_1_SUPPLEMENT", 
 340:                          "Latin-1 Supplement");
 341: 
 342:     /**
 343:      * Latin Extended-A.
 344:      * 0x0100 - 0x017F.
 345:      */
 346:     public static final UnicodeBlock LATIN_EXTENDED_A
 347:       = new UnicodeBlock(0x0100, 0x017F,
 348:                          "LATIN_EXTENDED_A", 
 349:                          "Latin Extended-A");
 350: 
 351:     /**
 352:      * Latin Extended-B.
 353:      * 0x0180 - 0x024F.
 354:      */
 355:     public static final UnicodeBlock LATIN_EXTENDED_B
 356:       = new UnicodeBlock(0x0180, 0x024F,
 357:                          "LATIN_EXTENDED_B", 
 358:                          "Latin Extended-B");
 359: 
 360:     /**
 361:      * IPA Extensions.
 362:      * 0x0250 - 0x02AF.
 363:      */
 364:     public static final UnicodeBlock IPA_EXTENSIONS
 365:       = new UnicodeBlock(0x0250, 0x02AF,
 366:                          "IPA_EXTENSIONS", 
 367:                          "IPA Extensions");
 368: 
 369:     /**
 370:      * Spacing Modifier Letters.
 371:      * 0x02B0 - 0x02FF.
 372:      */
 373:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 374:       = new UnicodeBlock(0x02B0, 0x02FF,
 375:                          "SPACING_MODIFIER_LETTERS", 
 376:                          "Spacing Modifier Letters");
 377: 
 378:     /**
 379:      * Combining Diacritical Marks.
 380:      * 0x0300 - 0x036F.
 381:      */
 382:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 383:       = new UnicodeBlock(0x0300, 0x036F,
 384:                          "COMBINING_DIACRITICAL_MARKS", 
 385:                          "Combining Diacritical Marks");
 386: 
 387:     /**
 388:      * Greek.
 389:      * 0x0370 - 0x03FF.
 390:      */
 391:     public static final UnicodeBlock GREEK
 392:       = new UnicodeBlock(0x0370, 0x03FF,
 393:                          "GREEK", 
 394:                          "Greek");
 395: 
 396:     /**
 397:      * Cyrillic.
 398:      * 0x0400 - 0x04FF.
 399:      */
 400:     public static final UnicodeBlock CYRILLIC
 401:       = new UnicodeBlock(0x0400, 0x04FF,
 402:                          "CYRILLIC", 
 403:                          "Cyrillic");
 404: 
 405:     /**
 406:      * Cyrillic Supplementary.
 407:      * 0x0500 - 0x052F.
 408:      * @since 1.5
 409:      */
 410:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 411:       = new UnicodeBlock(0x0500, 0x052F,
 412:                          "CYRILLIC_SUPPLEMENTARY", 
 413:                          "Cyrillic Supplementary");
 414: 
 415:     /**
 416:      * Armenian.
 417:      * 0x0530 - 0x058F.
 418:      */
 419:     public static final UnicodeBlock ARMENIAN
 420:       = new UnicodeBlock(0x0530, 0x058F,
 421:                          "ARMENIAN", 
 422:                          "Armenian");
 423: 
 424:     /**
 425:      * Hebrew.
 426:      * 0x0590 - 0x05FF.
 427:      */
 428:     public static final UnicodeBlock HEBREW
 429:       = new UnicodeBlock(0x0590, 0x05FF,
 430:                          "HEBREW", 
 431:                          "Hebrew");
 432: 
 433:     /**
 434:      * Arabic.
 435:      * 0x0600 - 0x06FF.
 436:      */
 437:     public static final UnicodeBlock ARABIC
 438:       = new UnicodeBlock(0x0600, 0x06FF,
 439:                          "ARABIC", 
 440:                          "Arabic");
 441: 
 442:     /**
 443:      * Syriac.
 444:      * 0x0700 - 0x074F.
 445:      * @since 1.4
 446:      */
 447:     public static final UnicodeBlock SYRIAC
 448:       = new UnicodeBlock(0x0700, 0x074F,
 449:                          "SYRIAC", 
 450:                          "Syriac");
 451: 
 452:     /**
 453:      * Thaana.
 454:      * 0x0780 - 0x07BF.
 455:      * @since 1.4
 456:      */
 457:     public static final UnicodeBlock THAANA
 458:       = new UnicodeBlock(0x0780, 0x07BF,
 459:                          "THAANA", 
 460:                          "Thaana");
 461: 
 462:     /**
 463:      * Devanagari.
 464:      * 0x0900 - 0x097F.
 465:      */
 466:     public static final UnicodeBlock DEVANAGARI
 467:       = new UnicodeBlock(0x0900, 0x097F,
 468:                          "DEVANAGARI", 
 469:                          "Devanagari");
 470: 
 471:     /**
 472:      * Bengali.
 473:      * 0x0980 - 0x09FF.
 474:      */
 475:     public static final UnicodeBlock BENGALI
 476:       = new UnicodeBlock(0x0980, 0x09FF,
 477:                          "BENGALI", 
 478:                          "Bengali");
 479: 
 480:     /**
 481:      * Gurmukhi.
 482:      * 0x0A00 - 0x0A7F.
 483:      */
 484:     public static final UnicodeBlock GURMUKHI
 485:       = new UnicodeBlock(0x0A00, 0x0A7F,
 486:                          "GURMUKHI", 
 487:                          "Gurmukhi");
 488: 
 489:     /**
 490:      * Gujarati.
 491:      * 0x0A80 - 0x0AFF.
 492:      */
 493:     public static final UnicodeBlock GUJARATI
 494:       = new UnicodeBlock(0x0A80, 0x0AFF,
 495:                          "GUJARATI", 
 496:                          "Gujarati");
 497: 
 498:     /**
 499:      * Oriya.
 500:      * 0x0B00 - 0x0B7F.
 501:      */
 502:     public static final UnicodeBlock ORIYA
 503:       = new UnicodeBlock(0x0B00, 0x0B7F,
 504:                          "ORIYA", 
 505:                          "Oriya");
 506: 
 507:     /**
 508:      * Tamil.
 509:      * 0x0B80 - 0x0BFF.
 510:      */
 511:     public static final UnicodeBlock TAMIL
 512:       = new UnicodeBlock(0x0B80, 0x0BFF,
 513:                          "TAMIL", 
 514:                          "Tamil");
 515: 
 516:     /**
 517:      * Telugu.
 518:      * 0x0C00 - 0x0C7F.
 519:      */
 520:     public static final UnicodeBlock TELUGU
 521:       = new UnicodeBlock(0x0C00, 0x0C7F,
 522:                          "TELUGU", 
 523:                          "Telugu");
 524: 
 525:     /**
 526:      * Kannada.
 527:      * 0x0C80 - 0x0CFF.
 528:      */
 529:     public static final UnicodeBlock KANNADA
 530:       = new UnicodeBlock(0x0C80, 0x0CFF,
 531:                          "KANNADA", 
 532:                          "Kannada");
 533: 
 534:     /**
 535:      * Malayalam.
 536:      * 0x0D00 - 0x0D7F.
 537:      */
 538:     public static final UnicodeBlock MALAYALAM
 539:       = new UnicodeBlock(0x0D00, 0x0D7F,
 540:                          "MALAYALAM", 
 541:                          "Malayalam");
 542: 
 543:     /**
 544:      * Sinhala.
 545:      * 0x0D80 - 0x0DFF.
 546:      * @since 1.4
 547:      */
 548:     public static final UnicodeBlock SINHALA
 549:       = new UnicodeBlock(0x0D80, 0x0DFF,
 550:                          "SINHALA", 
 551:                          "Sinhala");
 552: 
 553:     /**
 554:      * Thai.
 555:      * 0x0E00 - 0x0E7F.
 556:      */
 557:     public static final UnicodeBlock THAI
 558:       = new UnicodeBlock(0x0E00, 0x0E7F,
 559:                          "THAI", 
 560:                          "Thai");
 561: 
 562:     /**
 563:      * Lao.
 564:      * 0x0E80 - 0x0EFF.
 565:      */
 566:     public static final UnicodeBlock LAO
 567:       = new UnicodeBlock(0x0E80, 0x0EFF,
 568:                          "LAO", 
 569:                          "Lao");
 570: 
 571:     /**
 572:      * Tibetan.
 573:      * 0x0F00 - 0x0FFF.
 574:      */
 575:     public static final UnicodeBlock TIBETAN
 576:       = new UnicodeBlock(0x0F00, 0x0FFF,
 577:                          "TIBETAN", 
 578:                          "Tibetan");
 579: 
 580:     /**
 581:      * Myanmar.
 582:      * 0x1000 - 0x109F.
 583:      * @since 1.4
 584:      */
 585:     public static final UnicodeBlock MYANMAR
 586:       = new UnicodeBlock(0x1000, 0x109F,
 587:                          "MYANMAR", 
 588:                          "Myanmar");
 589: 
 590:     /**
 591:      * Georgian.
 592:      * 0x10A0 - 0x10FF.
 593:      */
 594:     public static final UnicodeBlock GEORGIAN
 595:       = new UnicodeBlock(0x10A0, 0x10FF,
 596:                          "GEORGIAN", 
 597:                          "Georgian");
 598: 
 599:     /**
 600:      * Hangul Jamo.
 601:      * 0x1100 - 0x11FF.
 602:      */
 603:     public static final UnicodeBlock HANGUL_JAMO
 604:       = new UnicodeBlock(0x1100, 0x11FF,
 605:                          "HANGUL_JAMO", 
 606:                          "Hangul Jamo");
 607: 
 608:     /**
 609:      * Ethiopic.
 610:      * 0x1200 - 0x137F.
 611:      * @since 1.4
 612:      */
 613:     public static final UnicodeBlock ETHIOPIC
 614:       = new UnicodeBlock(0x1200, 0x137F,
 615:                          "ETHIOPIC", 
 616:                          "Ethiopic");
 617: 
 618:     /**
 619:      * Cherokee.
 620:      * 0x13A0 - 0x13FF.
 621:      * @since 1.4
 622:      */
 623:     public static final UnicodeBlock CHEROKEE
 624:       = new UnicodeBlock(0x13A0, 0x13FF,
 625:                          "CHEROKEE", 
 626:                          "Cherokee");
 627: 
 628:     /**
 629:      * Unified Canadian Aboriginal Syllabics.
 630:      * 0x1400 - 0x167F.
 631:      * @since 1.4
 632:      */
 633:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 634:       = new UnicodeBlock(0x1400, 0x167F,
 635:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 636:                          "Unified Canadian Aboriginal Syllabics");
 637: 
 638:     /**
 639:      * Ogham.
 640:      * 0x1680 - 0x169F.
 641:      * @since 1.4
 642:      */
 643:     public static final UnicodeBlock OGHAM
 644:       = new UnicodeBlock(0x1680, 0x169F,
 645:                          "OGHAM", 
 646:                          "Ogham");
 647: 
 648:     /**
 649:      * Runic.
 650:      * 0x16A0 - 0x16FF.
 651:      * @since 1.4
 652:      */
 653:     public static final UnicodeBlock RUNIC
 654:       = new UnicodeBlock(0x16A0, 0x16FF,
 655:                          "RUNIC", 
 656:                          "Runic");
 657: 
 658:     /**
 659:      * Tagalog.
 660:      * 0x1700 - 0x171F.
 661:      * @since 1.5
 662:      */
 663:     public static final UnicodeBlock TAGALOG
 664:       = new UnicodeBlock(0x1700, 0x171F,
 665:                          "TAGALOG", 
 666:                          "Tagalog");
 667: 
 668:     /**
 669:      * Hanunoo.
 670:      * 0x1720 - 0x173F.
 671:      * @since 1.5
 672:      */
 673:     public static final UnicodeBlock HANUNOO
 674:       = new UnicodeBlock(0x1720, 0x173F,
 675:                          "HANUNOO", 
 676:                          "Hanunoo");
 677: 
 678:     /**
 679:      * Buhid.
 680:      * 0x1740 - 0x175F.
 681:      * @since 1.5
 682:      */
 683:     public static final UnicodeBlock BUHID
 684:       = new UnicodeBlock(0x1740, 0x175F,
 685:                          "BUHID", 
 686:                          "Buhid");
 687: 
 688:     /**
 689:      * Tagbanwa.
 690:      * 0x1760 - 0x177F.
 691:      * @since 1.5
 692:      */
 693:     public static final UnicodeBlock TAGBANWA
 694:       = new UnicodeBlock(0x1760, 0x177F,
 695:                          "TAGBANWA", 
 696:                          "Tagbanwa");
 697: 
 698:     /**
 699:      * Khmer.
 700:      * 0x1780 - 0x17FF.
 701:      * @since 1.4
 702:      */
 703:     public static final UnicodeBlock KHMER
 704:       = new UnicodeBlock(0x1780, 0x17FF,
 705:                          "KHMER", 
 706:                          "Khmer");
 707: 
 708:     /**
 709:      * Mongolian.
 710:      * 0x1800 - 0x18AF.
 711:      * @since 1.4
 712:      */
 713:     public static final UnicodeBlock MONGOLIAN
 714:       = new UnicodeBlock(0x1800, 0x18AF,
 715:                          "MONGOLIAN", 
 716:                          "Mongolian");
 717: 
 718:     /**
 719:      * Limbu.
 720:      * 0x1900 - 0x194F.
 721:      * @since 1.5
 722:      */
 723:     public static final UnicodeBlock LIMBU
 724:       = new UnicodeBlock(0x1900, 0x194F,
 725:                          "LIMBU", 
 726:                          "Limbu");
 727: 
 728:     /**
 729:      * Tai Le.
 730:      * 0x1950 - 0x197F.
 731:      * @since 1.5
 732:      */
 733:     public static final UnicodeBlock TAI_LE
 734:       = new UnicodeBlock(0x1950, 0x197F,
 735:                          "TAI_LE", 
 736:                          "Tai Le");
 737: 
 738:     /**
 739:      * Khmer Symbols.
 740:      * 0x19E0 - 0x19FF.
 741:      * @since 1.5
 742:      */
 743:     public static final UnicodeBlock KHMER_SYMBOLS
 744:       = new UnicodeBlock(0x19E0, 0x19FF,
 745:                          "KHMER_SYMBOLS", 
 746:                          "Khmer Symbols");
 747: 
 748:     /**
 749:      * Phonetic Extensions.
 750:      * 0x1D00 - 0x1D7F.
 751:      * @since 1.5
 752:      */
 753:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 754:       = new UnicodeBlock(0x1D00, 0x1D7F,
 755:                          "PHONETIC_EXTENSIONS", 
 756:                          "Phonetic Extensions");
 757: 
 758:     /**
 759:      * Latin Extended Additional.
 760:      * 0x1E00 - 0x1EFF.
 761:      */
 762:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 763:       = new UnicodeBlock(0x1E00, 0x1EFF,
 764:                          "LATIN_EXTENDED_ADDITIONAL", 
 765:                          "Latin Extended Additional");
 766: 
 767:     /**
 768:      * Greek Extended.
 769:      * 0x1F00 - 0x1FFF.
 770:      */
 771:     public static final UnicodeBlock GREEK_EXTENDED
 772:       = new UnicodeBlock(0x1F00, 0x1FFF,
 773:                          "GREEK_EXTENDED", 
 774:                          "Greek Extended");
 775: 
 776:     /**
 777:      * General Punctuation.
 778:      * 0x2000 - 0x206F.
 779:      */
 780:     public static final UnicodeBlock GENERAL_PUNCTUATION
 781:       = new UnicodeBlock(0x2000, 0x206F,
 782:                          "GENERAL_PUNCTUATION", 
 783:                          "General Punctuation");
 784: 
 785:     /**
 786:      * Superscripts and Subscripts.
 787:      * 0x2070 - 0x209F.
 788:      */
 789:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 790:       = new UnicodeBlock(0x2070, 0x209F,
 791:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 792:                          "Superscripts and Subscripts");
 793: 
 794:     /**
 795:      * Currency Symbols.
 796:      * 0x20A0 - 0x20CF.
 797:      */
 798:     public static final UnicodeBlock CURRENCY_SYMBOLS
 799:       = new UnicodeBlock(0x20A0, 0x20CF,
 800:                          "CURRENCY_SYMBOLS", 
 801:                          "Currency Symbols");
 802: 
 803:     /**
 804:      * Combining Marks for Symbols.
 805:      * 0x20D0 - 0x20FF.
 806:      */
 807:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 808:       = new UnicodeBlock(0x20D0, 0x20FF,
 809:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 810:                          "Combining Marks for Symbols");
 811: 
 812:     /**
 813:      * Letterlike Symbols.
 814:      * 0x2100 - 0x214F.
 815:      */
 816:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 817:       = new UnicodeBlock(0x2100, 0x214F,
 818:                          "LETTERLIKE_SYMBOLS", 
 819:                          "Letterlike Symbols");
 820: 
 821:     /**
 822:      * Number Forms.
 823:      * 0x2150 - 0x218F.
 824:      */
 825:     public static final UnicodeBlock NUMBER_FORMS
 826:       = new UnicodeBlock(0x2150, 0x218F,
 827:                          "NUMBER_FORMS", 
 828:                          "Number Forms");
 829: 
 830:     /**
 831:      * Arrows.
 832:      * 0x2190 - 0x21FF.
 833:      */
 834:     public static final UnicodeBlock ARROWS
 835:       = new UnicodeBlock(0x2190, 0x21FF,
 836:                          "ARROWS", 
 837:                          "Arrows");
 838: 
 839:     /**
 840:      * Mathematical Operators.
 841:      * 0x2200 - 0x22FF.
 842:      */
 843:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 844:       = new UnicodeBlock(0x2200, 0x22FF,
 845:                          "MATHEMATICAL_OPERATORS", 
 846:                          "Mathematical Operators");
 847: 
 848:     /**
 849:      * Miscellaneous Technical.
 850:      * 0x2300 - 0x23FF.
 851:      */
 852:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 853:       = new UnicodeBlock(0x2300, 0x23FF,
 854:                          "MISCELLANEOUS_TECHNICAL", 
 855:                          "Miscellaneous Technical");
 856: 
 857:     /**
 858:      * Control Pictures.
 859:      * 0x2400 - 0x243F.
 860:      */
 861:     public static final UnicodeBlock CONTROL_PICTURES
 862:       = new UnicodeBlock(0x2400, 0x243F,
 863:                          "CONTROL_PICTURES", 
 864:                          "Control Pictures");
 865: 
 866:     /**
 867:      * Optical Character Recognition.
 868:      * 0x2440 - 0x245F.
 869:      */
 870:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 871:       = new UnicodeBlock(0x2440, 0x245F,
 872:                          "OPTICAL_CHARACTER_RECOGNITION", 
 873:                          "Optical Character Recognition");
 874: 
 875:     /**
 876:      * Enclosed Alphanumerics.
 877:      * 0x2460 - 0x24FF.
 878:      */
 879:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 880:       = new UnicodeBlock(0x2460, 0x24FF,
 881:                          "ENCLOSED_ALPHANUMERICS", 
 882:                          "Enclosed Alphanumerics");
 883: 
 884:     /**
 885:      * Box Drawing.
 886:      * 0x2500 - 0x257F.
 887:      */
 888:     public static final UnicodeBlock BOX_DRAWING
 889:       = new UnicodeBlock(0x2500, 0x257F,
 890:                          "BOX_DRAWING", 
 891:                          "Box Drawing");
 892: 
 893:     /**
 894:      * Block Elements.
 895:      * 0x2580 - 0x259F.
 896:      */
 897:     public static final UnicodeBlock BLOCK_ELEMENTS
 898:       = new UnicodeBlock(0x2580, 0x259F,
 899:                          "BLOCK_ELEMENTS", 
 900:                          "Block Elements");
 901: 
 902:     /**
 903:      * Geometric Shapes.
 904:      * 0x25A0 - 0x25FF.
 905:      */
 906:     public static final UnicodeBlock GEOMETRIC_SHAPES
 907:       = new UnicodeBlock(0x25A0, 0x25FF,
 908:                          "GEOMETRIC_SHAPES", 
 909:                          "Geometric Shapes");
 910: 
 911:     /**
 912:      * Miscellaneous Symbols.
 913:      * 0x2600 - 0x26FF.
 914:      */
 915:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 916:       = new UnicodeBlock(0x2600, 0x26FF,
 917:                          "MISCELLANEOUS_SYMBOLS", 
 918:                          "Miscellaneous Symbols");
 919: 
 920:     /**
 921:      * Dingbats.
 922:      * 0x2700 - 0x27BF.
 923:      */
 924:     public static final UnicodeBlock DINGBATS
 925:       = new UnicodeBlock(0x2700, 0x27BF,
 926:                          "DINGBATS", 
 927:                          "Dingbats");
 928: 
 929:     /**
 930:      * Miscellaneous Mathematical Symbols-A.
 931:      * 0x27C0 - 0x27EF.
 932:      * @since 1.5
 933:      */
 934:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 935:       = new UnicodeBlock(0x27C0, 0x27EF,
 936:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 937:                          "Miscellaneous Mathematical Symbols-A");
 938: 
 939:     /**
 940:      * Supplemental Arrows-A.
 941:      * 0x27F0 - 0x27FF.
 942:      * @since 1.5
 943:      */
 944:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 945:       = new UnicodeBlock(0x27F0, 0x27FF,
 946:                          "SUPPLEMENTAL_ARROWS_A", 
 947:                          "Supplemental Arrows-A");
 948: 
 949:     /**
 950:      * Braille Patterns.
 951:      * 0x2800 - 0x28FF.
 952:      * @since 1.4
 953:      */
 954:     public static final UnicodeBlock BRAILLE_PATTERNS
 955:       = new UnicodeBlock(0x2800, 0x28FF,
 956:                          "BRAILLE_PATTERNS", 
 957:                          "Braille Patterns");
 958: 
 959:     /**
 960:      * Supplemental Arrows-B.
 961:      * 0x2900 - 0x297F.
 962:      * @since 1.5
 963:      */
 964:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 965:       = new UnicodeBlock(0x2900, 0x297F,
 966:                          "SUPPLEMENTAL_ARROWS_B", 
 967:                          "Supplemental Arrows-B");
 968: 
 969:     /**
 970:      * Miscellaneous Mathematical Symbols-B.
 971:      * 0x2980 - 0x29FF.
 972:      * @since 1.5
 973:      */
 974:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 975:       = new UnicodeBlock(0x2980, 0x29FF,
 976:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 977:                          "Miscellaneous Mathematical Symbols-B");
 978: 
 979:     /**
 980:      * Supplemental Mathematical Operators.
 981:      * 0x2A00 - 0x2AFF.
 982:      * @since 1.5
 983:      */
 984:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 985:       = new UnicodeBlock(0x2A00, 0x2AFF,
 986:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 987:                          "Supplemental Mathematical Operators");
 988: 
 989:     /**
 990:      * Miscellaneous Symbols and Arrows.
 991:      * 0x2B00 - 0x2BFF.
 992:      * @since 1.5
 993:      */
 994:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 995:       = new UnicodeBlock(0x2B00, 0x2BFF,
 996:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 997:                          "Miscellaneous Symbols and Arrows");
 998: 
 999:     /**
1000:      * CJK Radicals Supplement.
1001:      * 0x2E80 - 0x2EFF.
1002:      * @since 1.4
1003:      */
1004:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1005:       = new UnicodeBlock(0x2E80, 0x2EFF,
1006:                          "CJK_RADICALS_SUPPLEMENT", 
1007:                          "CJK Radicals Supplement");
1008: 
1009:     /**
1010:      * Kangxi Radicals.
1011:      * 0x2F00 - 0x2FDF.
1012:      * @since 1.4
1013:      */
1014:     public static final UnicodeBlock KANGXI_RADICALS
1015:       = new UnicodeBlock(0x2F00, 0x2FDF,
1016:                          "KANGXI_RADICALS", 
1017:                          "Kangxi Radicals");
1018: 
1019:     /**
1020:      * Ideographic Description Characters.
1021:      * 0x2FF0 - 0x2FFF.
1022:      * @since 1.4
1023:      */
1024:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1025:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1026:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1027:                          "Ideographic Description Characters");
1028: 
1029:     /**
1030:      * CJK Symbols and Punctuation.
1031:      * 0x3000 - 0x303F.
1032:      */
1033:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1034:       = new UnicodeBlock(0x3000, 0x303F,
1035:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1036:                          "CJK Symbols and Punctuation");
1037: 
1038:     /**
1039:      * Hiragana.
1040:      * 0x3040 - 0x309F.
1041:      */
1042:     public static final UnicodeBlock HIRAGANA
1043:       = new UnicodeBlock(0x3040, 0x309F,
1044:                          "HIRAGANA", 
1045:                          "Hiragana");
1046: 
1047:     /**
1048:      * Katakana.
1049:      * 0x30A0 - 0x30FF.
1050:      */
1051:     public static final UnicodeBlock KATAKANA
1052:       = new UnicodeBlock(0x30A0, 0x30FF,
1053:                          "KATAKANA", 
1054:                          "Katakana");
1055: 
1056:     /**
1057:      * Bopomofo.
1058:      * 0x3100 - 0x312F.
1059:      */
1060:     public static final UnicodeBlock BOPOMOFO
1061:       = new UnicodeBlock(0x3100, 0x312F,
1062:                          "BOPOMOFO", 
1063:                          "Bopomofo");
1064: 
1065:     /**
1066:      * Hangul Compatibility Jamo.
1067:      * 0x3130 - 0x318F.
1068:      */
1069:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1070:       = new UnicodeBlock(0x3130, 0x318F,
1071:                          "HANGUL_COMPATIBILITY_JAMO", 
1072:                          "Hangul Compatibility Jamo");
1073: 
1074:     /**
1075:      * Kanbun.
1076:      * 0x3190 - 0x319F.
1077:      */
1078:     public static final UnicodeBlock KANBUN
1079:       = new UnicodeBlock(0x3190, 0x319F,
1080:                          "KANBUN", 
1081:                          "Kanbun");
1082: 
1083:     /**
1084:      * Bopomofo Extended.
1085:      * 0x31A0 - 0x31BF.
1086:      * @since 1.4
1087:      */
1088:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1089:       = new UnicodeBlock(0x31A0, 0x31BF,
1090:                          "BOPOMOFO_EXTENDED", 
1091:                          "Bopomofo Extended");
1092: 
1093:     /**
1094:      * Katakana Phonetic Extensions.
1095:      * 0x31F0 - 0x31FF.
1096:      * @since 1.5
1097:      */
1098:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1099:       = new UnicodeBlock(0x31F0, 0x31FF,
1100:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1101:                          "Katakana Phonetic Extensions");
1102: 
1103:     /**
1104:      * Enclosed CJK Letters and Months.
1105:      * 0x3200 - 0x32FF.
1106:      */
1107:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1108:       = new UnicodeBlock(0x3200, 0x32FF,
1109:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1110:                          "Enclosed CJK Letters and Months");
1111: 
1112:     /**
1113:      * CJK Compatibility.
1114:      * 0x3300 - 0x33FF.
1115:      */
1116:     public static final UnicodeBlock CJK_COMPATIBILITY
1117:       = new UnicodeBlock(0x3300, 0x33FF,
1118:                          "CJK_COMPATIBILITY", 
1119:                          "CJK Compatibility");
1120: 
1121:     /**
1122:      * CJK Unified Ideographs Extension A.
1123:      * 0x3400 - 0x4DBF.
1124:      * @since 1.4
1125:      */
1126:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1127:       = new UnicodeBlock(0x3400, 0x4DBF,
1128:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1129:                          "CJK Unified Ideographs Extension A");
1130: 
1131:     /**
1132:      * Yijing Hexagram Symbols.
1133:      * 0x4DC0 - 0x4DFF.
1134:      * @since 1.5
1135:      */
1136:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1137:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1138:                          "YIJING_HEXAGRAM_SYMBOLS", 
1139:                          "Yijing Hexagram Symbols");
1140: 
1141:     /**
1142:      * CJK Unified Ideographs.
1143:      * 0x4E00 - 0x9FFF.
1144:      */
1145:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1146:       = new UnicodeBlock(0x4E00, 0x9FFF,
1147:                          "CJK_UNIFIED_IDEOGRAPHS", 
1148:                          "CJK Unified Ideographs");
1149: 
1150:     /**
1151:      * Yi Syllables.
1152:      * 0xA000 - 0xA48F.
1153:      * @since 1.4
1154:      */
1155:     public static final UnicodeBlock YI_SYLLABLES
1156:       = new UnicodeBlock(0xA000, 0xA48F,
1157:                          "YI_SYLLABLES", 
1158:                          "Yi Syllables");
1159: 
1160:     /**
1161:      * Yi Radicals.
1162:      * 0xA490 - 0xA4CF.
1163:      * @since 1.4
1164:      */
1165:     public static final UnicodeBlock YI_RADICALS
1166:       = new UnicodeBlock(0xA490, 0xA4CF,
1167:                          "YI_RADICALS", 
1168:                          "Yi Radicals");
1169: 
1170:     /**
1171:      * Hangul Syllables.
1172:      * 0xAC00 - 0xD7AF.
1173:      */
1174:     public static final UnicodeBlock HANGUL_SYLLABLES
1175:       = new UnicodeBlock(0xAC00, 0xD7AF,
1176:                          "HANGUL_SYLLABLES", 
1177:                          "Hangul Syllables");
1178: 
1179:     /**
1180:      * High Surrogates.
1181:      * 0xD800 - 0xDB7F.
1182:      * @since 1.5
1183:      */
1184:     public static final UnicodeBlock HIGH_SURROGATES
1185:       = new UnicodeBlock(0xD800, 0xDB7F,
1186:                          "HIGH_SURROGATES", 
1187:                          "High Surrogates");
1188: 
1189:     /**
1190:      * High Private Use Surrogates.
1191:      * 0xDB80 - 0xDBFF.
1192:      * @since 1.5
1193:      */
1194:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1195:       = new UnicodeBlock(0xDB80, 0xDBFF,
1196:                          "HIGH_PRIVATE_USE_SURROGATES", 
1197:                          "High Private Use Surrogates");
1198: 
1199:     /**
1200:      * Low Surrogates.
1201:      * 0xDC00 - 0xDFFF.
1202:      * @since 1.5
1203:      */
1204:     public static final UnicodeBlock LOW_SURROGATES
1205:       = new UnicodeBlock(0xDC00, 0xDFFF,
1206:                          "LOW_SURROGATES", 
1207:                          "Low Surrogates");
1208: 
1209:     /**
1210:      * Private Use Area.
1211:      * 0xE000 - 0xF8FF.
1212:      */
1213:     public static final UnicodeBlock PRIVATE_USE_AREA
1214:       = new UnicodeBlock(0xE000, 0xF8FF,
1215:                          "PRIVATE_USE_AREA", 
1216:                          "Private Use Area");
1217: 
1218:     /**
1219:      * CJK Compatibility Ideographs.
1220:      * 0xF900 - 0xFAFF.
1221:      */
1222:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1223:       = new UnicodeBlock(0xF900, 0xFAFF,
1224:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1225:                          "CJK Compatibility Ideographs");
1226: 
1227:     /**
1228:      * Alphabetic Presentation Forms.
1229:      * 0xFB00 - 0xFB4F.
1230:      */
1231:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1232:       = new UnicodeBlock(0xFB00, 0xFB4F,
1233:                          "ALPHABETIC_PRESENTATION_FORMS", 
1234:                          "Alphabetic Presentation Forms");
1235: 
1236:     /**
1237:      * Arabic Presentation Forms-A.
1238:      * 0xFB50 - 0xFDFF.
1239:      */
1240:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1241:       = new UnicodeBlock(0xFB50, 0xFDFF,
1242:                          "ARABIC_PRESENTATION_FORMS_A", 
1243:                          "Arabic Presentation Forms-A");
1244: 
1245:     /**
1246:      * Variation Selectors.
1247:      * 0xFE00 - 0xFE0F.
1248:      * @since 1.5
1249:      */
1250:     public static final UnicodeBlock VARIATION_SELECTORS
1251:       = new UnicodeBlock(0xFE00, 0xFE0F,
1252:                          "VARIATION_SELECTORS", 
1253:                          "Variation Selectors");
1254: 
1255:     /**
1256:      * Combining Half Marks.
1257:      * 0xFE20 - 0xFE2F.
1258:      */
1259:     public static final UnicodeBlock COMBINING_HALF_MARKS
1260:       = new UnicodeBlock(0xFE20, 0xFE2F,
1261:                          "COMBINING_HALF_MARKS", 
1262:                          "Combining Half Marks");
1263: 
1264:     /**
1265:      * CJK Compatibility Forms.
1266:      * 0xFE30 - 0xFE4F.
1267:      */
1268:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1269:       = new UnicodeBlock(0xFE30, 0xFE4F,
1270:                          "CJK_COMPATIBILITY_FORMS", 
1271:                          "CJK Compatibility Forms");
1272: 
1273:     /**
1274:      * Small Form Variants.
1275:      * 0xFE50 - 0xFE6F.
1276:      */
1277:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1278:       = new UnicodeBlock(0xFE50, 0xFE6F,
1279:                          "SMALL_FORM_VARIANTS", 
1280:                          "Small Form Variants");
1281: 
1282:     /**
1283:      * Arabic Presentation Forms-B.
1284:      * 0xFE70 - 0xFEFF.
1285:      */
1286:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1287:       = new UnicodeBlock(0xFE70, 0xFEFF,
1288:                          "ARABIC_PRESENTATION_FORMS_B", 
1289:                          "Arabic Presentation Forms-B");
1290: 
1291:     /**
1292:      * Halfwidth and Fullwidth Forms.
1293:      * 0xFF00 - 0xFFEF.
1294:      */
1295:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1296:       = new UnicodeBlock(0xFF00, 0xFFEF,
1297:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1298:                          "Halfwidth and Fullwidth Forms");
1299: 
1300:     /**
1301:      * Specials.
1302:      * 0xFFF0 - 0xFFFF.
1303:      */
1304:     public static final UnicodeBlock SPECIALS
1305:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1306:                          "SPECIALS", 
1307:                          "Specials");
1308: 
1309:     /**
1310:      * Linear B Syllabary.
1311:      * 0x10000 - 0x1007F.
1312:      * @since 1.5
1313:      */
1314:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1315:       = new UnicodeBlock(0x10000, 0x1007F,
1316:                          "LINEAR_B_SYLLABARY", 
1317:                          "Linear B Syllabary");
1318: 
1319:     /**
1320:      * Linear B Ideograms.
1321:      * 0x10080 - 0x100FF.
1322:      * @since 1.5
1323:      */
1324:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1325:       = new UnicodeBlock(0x10080, 0x100FF,
1326:                          "LINEAR_B_IDEOGRAMS", 
1327:                          "Linear B Ideograms");
1328: 
1329:     /**
1330:      * Aegean Numbers.
1331:      * 0x10100 - 0x1013F.
1332:      * @since 1.5
1333:      */
1334:     public static final UnicodeBlock AEGEAN_NUMBERS
1335:       = new UnicodeBlock(0x10100, 0x1013F,
1336:                          "AEGEAN_NUMBERS", 
1337:                          "Aegean Numbers");
1338: 
1339:     /**
1340:      * Old Italic.
1341:      * 0x10300 - 0x1032F.
1342:      * @since 1.5
1343:      */
1344:     public static final UnicodeBlock OLD_ITALIC
1345:       = new UnicodeBlock(0x10300, 0x1032F,
1346:                          "OLD_ITALIC", 
1347:                          "Old Italic");
1348: 
1349:     /**
1350:      * Gothic.
1351:      * 0x10330 - 0x1034F.
1352:      * @since 1.5
1353:      */
1354:     public static final UnicodeBlock GOTHIC
1355:       = new UnicodeBlock(0x10330, 0x1034F,
1356:                          "GOTHIC", 
1357:                          "Gothic");
1358: 
1359:     /**
1360:      * Ugaritic.
1361:      * 0x10380 - 0x1039F.
1362:      * @since 1.5
1363:      */
1364:     public static final UnicodeBlock UGARITIC
1365:       = new UnicodeBlock(0x10380, 0x1039F,
1366:                          "UGARITIC", 
1367:                          "Ugaritic");
1368: 
1369:     /**
1370:      * Deseret.
1371:      * 0x10400 - 0x1044F.
1372:      * @since 1.5
1373:      */
1374:     public static final UnicodeBlock DESERET
1375:       = new UnicodeBlock(0x10400, 0x1044F,
1376:                          "DESERET", 
1377:                          "Deseret");
1378: 
1379:     /**
1380:      * Shavian.
1381:      * 0x10450 - 0x1047F.
1382:      * @since 1.5
1383:      */
1384:     public static final UnicodeBlock SHAVIAN
1385:       = new UnicodeBlock(0x10450, 0x1047F,
1386:                          "SHAVIAN", 
1387:                          "Shavian");
1388: 
1389:     /**
1390:      * Osmanya.
1391:      * 0x10480 - 0x104AF.
1392:      * @since 1.5
1393:      */
1394:     public static final UnicodeBlock OSMANYA
1395:       = new UnicodeBlock(0x10480, 0x104AF,
1396:                          "OSMANYA", 
1397:                          "Osmanya");
1398: 
1399:     /**
1400:      * Cypriot Syllabary.
1401:      * 0x10800 - 0x1083F.
1402:      * @since 1.5
1403:      */
1404:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1405:       = new UnicodeBlock(0x10800, 0x1083F,
1406:                          "CYPRIOT_SYLLABARY", 
1407:                          "Cypriot Syllabary");
1408: 
1409:     /**
1410:      * Byzantine Musical Symbols.
1411:      * 0x1D000 - 0x1D0FF.
1412:      * @since 1.5
1413:      */
1414:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1415:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1416:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1417:                          "Byzantine Musical Symbols");
1418: 
1419:     /**
1420:      * Musical Symbols.
1421:      * 0x1D100 - 0x1D1FF.
1422:      * @since 1.5
1423:      */
1424:     public static final UnicodeBlock MUSICAL_SYMBOLS
1425:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1426:                          "MUSICAL_SYMBOLS", 
1427:                          "Musical Symbols");
1428: 
1429:     /**
1430:      * Tai Xuan Jing Symbols.
1431:      * 0x1D300 - 0x1D35F.
1432:      * @since 1.5
1433:      */
1434:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1435:       = new UnicodeBlock(0x1D300, 0x1D35F,
1436:                          "TAI_XUAN_JING_SYMBOLS", 
1437:                          "Tai Xuan Jing Symbols");
1438: 
1439:     /**
1440:      * Mathematical Alphanumeric Symbols.
1441:      * 0x1D400 - 0x1D7FF.
1442:      * @since 1.5
1443:      */
1444:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1445:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1446:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1447:                          "Mathematical Alphanumeric Symbols");
1448: 
1449:     /**
1450:      * CJK Unified Ideographs Extension B.
1451:      * 0x20000 - 0x2A6DF.
1452:      * @since 1.5
1453:      */
1454:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1455:       = new UnicodeBlock(0x20000, 0x2A6DF,
1456:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1457:                          "CJK Unified Ideographs Extension B");
1458: 
1459:     /**
1460:      * CJK Compatibility Ideographs Supplement.
1461:      * 0x2F800 - 0x2FA1F.
1462:      * @since 1.5
1463:      */
1464:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1465:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1466:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1467:                          "CJK Compatibility Ideographs Supplement");
1468: 
1469:     /**
1470:      * Tags.
1471:      * 0xE0000 - 0xE007F.
1472:      * @since 1.5
1473:      */
1474:     public static final UnicodeBlock TAGS
1475:       = new UnicodeBlock(0xE0000, 0xE007F,
1476:                          "TAGS", 
1477:                          "Tags");
1478: 
1479:     /**
1480:      * Variation Selectors Supplement.
1481:      * 0xE0100 - 0xE01EF.
1482:      * @since 1.5
1483:      */
1484:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1485:       = new UnicodeBlock(0xE0100, 0xE01EF,
1486:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1487:                          "Variation Selectors Supplement");
1488: 
1489:     /**
1490:      * Supplementary Private Use Area-A.
1491:      * 0xF0000 - 0xFFFFF.
1492:      * @since 1.5
1493:      */
1494:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1495:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1496:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1497:                          "Supplementary Private Use Area-A");
1498: 
1499:     /**
1500:      * Supplementary Private Use Area-B.
1501:      * 0x100000 - 0x10FFFF.
1502:      * @since 1.5
1503:      */
1504:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1505:       = new UnicodeBlock(0x100000, 0x10FFFF,
1506:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1507:                          "Supplementary Private Use Area-B");
1508: 
1509:     /**
1510:      * Surrogates Area.
1511:      * 'D800' - 'DFFF'.
1512:      * @deprecated As of 1.5, the three areas, 
1513:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1514:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1515:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1516:      * by the Unicode standard, should be used in preference to
1517:      * this.  These are also returned from calls to <code>of(int)</code>
1518:      * and <code>of(char)</code>.
1519:      */
1520:     public static final UnicodeBlock SURROGATES_AREA
1521:       = new UnicodeBlock(0xD800, 0xDFFF,
1522:                          "SURROGATES_AREA",
1523:              "Surrogates Area");
1524: 
1525:     /**
1526:      * The defined subsets.
1527:      */
1528:     private static final UnicodeBlock sets[] = {
1529:       BASIC_LATIN,
1530:       LATIN_1_SUPPLEMENT,
1531:       LATIN_EXTENDED_A,
1532:       LATIN_EXTENDED_B,
1533:       IPA_EXTENSIONS,
1534:       SPACING_MODIFIER_LETTERS,
1535:       COMBINING_DIACRITICAL_MARKS,
1536:       GREEK,
1537:       CYRILLIC,
1538:       CYRILLIC_SUPPLEMENTARY,
1539:       ARMENIAN,
1540:       HEBREW,
1541:       ARABIC,
1542:       SYRIAC,
1543:       THAANA,
1544:       DEVANAGARI,
1545:       BENGALI,
1546:       GURMUKHI,
1547:       GUJARATI,
1548:       ORIYA,
1549:       TAMIL,
1550:       TELUGU,
1551:       KANNADA,
1552:       MALAYALAM,
1553:       SINHALA,
1554:       THAI,
1555:       LAO,
1556:       TIBETAN,
1557:       MYANMAR,
1558:       GEORGIAN,
1559:       HANGUL_JAMO,
1560:       ETHIOPIC,
1561:       CHEROKEE,
1562:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1563:       OGHAM,
1564:       RUNIC,
1565:       TAGALOG,
1566:       HANUNOO,
1567:       BUHID,
1568:       TAGBANWA,
1569:       KHMER,
1570:       MONGOLIAN,
1571:       LIMBU,
1572:       TAI_LE,
1573:       KHMER_SYMBOLS,
1574:       PHONETIC_EXTENSIONS,
1575:       LATIN_EXTENDED_ADDITIONAL,
1576:       GREEK_EXTENDED,
1577:       GENERAL_PUNCTUATION,
1578:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1579:       CURRENCY_SYMBOLS,
1580:       COMBINING_MARKS_FOR_SYMBOLS,
1581:       LETTERLIKE_SYMBOLS,
1582:       NUMBER_FORMS,
1583:       ARROWS,
1584:       MATHEMATICAL_OPERATORS,
1585:       MISCELLANEOUS_TECHNICAL,
1586:       CONTROL_PICTURES,
1587:       OPTICAL_CHARACTER_RECOGNITION,
1588:       ENCLOSED_ALPHANUMERICS,
1589:       BOX_DRAWING,
1590:       BLOCK_ELEMENTS,
1591:       GEOMETRIC_SHAPES,
1592:       MISCELLANEOUS_SYMBOLS,
1593:       DINGBATS,
1594:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1595:       SUPPLEMENTAL_ARROWS_A,
1596:       BRAILLE_PATTERNS,
1597:       SUPPLEMENTAL_ARROWS_B,
1598:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1599:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1600:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1601:       CJK_RADICALS_SUPPLEMENT,
1602:       KANGXI_RADICALS,
1603:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1604:       CJK_SYMBOLS_AND_PUNCTUATION,
1605:       HIRAGANA,
1606:       KATAKANA,
1607:       BOPOMOFO,
1608:       HANGUL_COMPATIBILITY_JAMO,
1609:       KANBUN,
1610:       BOPOMOFO_EXTENDED,
1611:       KATAKANA_PHONETIC_EXTENSIONS,
1612:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1613:       CJK_COMPATIBILITY,
1614:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1615:       YIJING_HEXAGRAM_SYMBOLS,
1616:       CJK_UNIFIED_IDEOGRAPHS,
1617:       YI_SYLLABLES,
1618:       YI_RADICALS,
1619:       HANGUL_SYLLABLES,
1620:       HIGH_SURROGATES,
1621:       HIGH_PRIVATE_USE_SURROGATES,
1622:       LOW_SURROGATES,
1623:       PRIVATE_USE_AREA,
1624:       CJK_COMPATIBILITY_IDEOGRAPHS,
1625:       ALPHABETIC_PRESENTATION_FORMS,
1626:       ARABIC_PRESENTATION_FORMS_A,
1627:       VARIATION_SELECTORS,
1628:       COMBINING_HALF_MARKS,
1629:       CJK_COMPATIBILITY_FORMS,
1630:       SMALL_FORM_VARIANTS,
1631:       ARABIC_PRESENTATION_FORMS_B,
1632:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1633:       SPECIALS,
1634:       LINEAR_B_SYLLABARY,
1635:       LINEAR_B_IDEOGRAMS,
1636:       AEGEAN_NUMBERS,
1637:       OLD_ITALIC,
1638:       GOTHIC,
1639:       UGARITIC,
1640:       DESERET,
1641:       SHAVIAN,
1642:       OSMANYA,
1643:       CYPRIOT_SYLLABARY,
1644:       BYZANTINE_MUSICAL_SYMBOLS,
1645:       MUSICAL_SYMBOLS,
1646:       TAI_XUAN_JING_SYMBOLS,
1647:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1648:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1649:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1650:       TAGS,
1651:       VARIATION_SELECTORS_SUPPLEMENT,
1652:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1653:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1654:     };
1655:   } // class UnicodeBlock
1656: 
1657:   /**
1658:    * The immutable value of this Character.
1659:    *
1660:    * @serial the value of this Character
1661:    */
1662:   private final char value;
1663: 
1664:   /**
1665:    * Compatible with JDK 1.0+.
1666:    */
1667:   private static final long serialVersionUID = 3786198910865385080L;
1668: 
1669:   /**
1670:    * Smallest value allowed for radix arguments in Java. This value is 2.
1671:    *
1672:    * @see #digit(char, int)
1673:    * @see #forDigit(int, int)
1674:    * @see Integer#toString(int, int)
1675:    * @see Integer#valueOf(String)
1676:    */
1677:   public static final int MIN_RADIX = 2;
1678: 
1679:   /**
1680:    * Largest value allowed for radix arguments in Java. This value is 36.
1681:    *
1682:    * @see #digit(char, int)
1683:    * @see #forDigit(int, int)
1684:    * @see Integer#toString(int, int)
1685:    * @see Integer#valueOf(String)
1686:    */
1687:   public static final int MAX_RADIX = 36;
1688: 
1689:   /**
1690:    * The minimum value the char data type can hold.
1691:    * This value is <code>'\\u0000'</code>.
1692:    */
1693:   public static final char MIN_VALUE = '\u0000';
1694: 
1695:   /**
1696:    * The maximum value the char data type can hold.
1697:    * This value is <code>'\\uFFFF'</code>.
1698:    */
1699:   public static final char MAX_VALUE = '\uFFFF';
1700: 
1701:   /**
1702:    * Class object representing the primitive char data type.
1703:    *
1704:    * @since 1.1
1705:    */
1706:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1707: 
1708:   /**
1709:    * The number of bits needed to represent a <code>char</code>.
1710:    * @since 1.5
1711:    */
1712:   public static final int SIZE = 16;
1713: 
1714:   // This caches some Character values, and is used by boxing
1715:   // conversions via valueOf().  We must cache at least 0..127;
1716:   // this constant controls how much we actually cache.
1717:   private static final int MAX_CACHE = 127;
1718:   private static Character[] charCache = new Character[MAX_CACHE + 1];
1719: 
1720:   /**
1721:    * Lu = Letter, Uppercase (Informative).
1722:    *
1723:    * @since 1.1
1724:    */
1725:   public static final byte UPPERCASE_LETTER = 1;
1726: 
1727:   /**
1728:    * Ll = Letter, Lowercase (Informative).
1729:    *
1730:    * @since 1.1
1731:    */
1732:   public static final byte LOWERCASE_LETTER = 2;
1733: 
1734:   /**
1735:    * Lt = Letter, Titlecase (Informative).
1736:    *
1737:    * @since 1.1
1738:    */
1739:   public static final byte TITLECASE_LETTER = 3;
1740: 
1741:   /**
1742:    * Mn = Mark, Non-Spacing (Normative).
1743:    *
1744:    * @since 1.1
1745:    */
1746:   public static final byte NON_SPACING_MARK = 6;
1747: 
1748:   /**
1749:    * Mc = Mark, Spacing Combining (Normative).
1750:    *
1751:    * @since 1.1
1752:    */
1753:   public static final byte COMBINING_SPACING_MARK = 8;
1754: 
1755:   /**
1756:    * Me = Mark, Enclosing (Normative).
1757:    *
1758:    * @since 1.1
1759:    */
1760:   public static final byte ENCLOSING_MARK = 7;
1761: 
1762:   /**
1763:    * Nd = Number, Decimal Digit (Normative).
1764:    *
1765:    * @since 1.1
1766:    */
1767:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1768: 
1769:   /**
1770:    * Nl = Number, Letter (Normative).
1771:    *
1772:    * @since 1.1
1773:    */
1774:   public static final byte LETTER_NUMBER = 10;
1775: 
1776:   /**
1777:    * No = Number, Other (Normative).
1778:    *
1779:    * @since 1.1
1780:    */
1781:   public static final byte OTHER_NUMBER = 11;
1782: 
1783:   /**
1784:    * Zs = Separator, Space (Normative).
1785:    *
1786:    * @since 1.1
1787:    */
1788:   public static final byte SPACE_SEPARATOR = 12;
1789: 
1790:   /**
1791:    * Zl = Separator, Line (Normative).
1792:    *
1793:    * @since 1.1
1794:    */
1795:   public static final byte LINE_SEPARATOR = 13;
1796: 
1797:   /**
1798:    * Zp = Separator, Paragraph (Normative).
1799:    *
1800:    * @since 1.1
1801:    */
1802:   public static final byte PARAGRAPH_SEPARATOR = 14;
1803: 
1804:   /**
1805:    * Cc = Other, Control (Normative).
1806:    *
1807:    * @since 1.1
1808:    */
1809:   public static final byte CONTROL = 15;
1810: 
1811:   /**
1812:    * Cf = Other, Format (Normative).
1813:    *
1814:    * @since 1.1
1815:    */
1816:   public static final byte FORMAT = 16;
1817: 
1818:   /**
1819:    * Cs = Other, Surrogate (Normative).
1820:    *
1821:    * @since 1.1
1822:    */
1823:   public static final byte SURROGATE = 19;
1824: 
1825:   /**
1826:    * Co = Other, Private Use (Normative).
1827:    *
1828:    * @since 1.1
1829:    */
1830:   public static final byte PRIVATE_USE = 18;
1831: 
1832:   /**
1833:    * Cn = Other, Not Assigned (Normative).
1834:    *
1835:    * @since 1.1
1836:    */
1837:   public static final byte UNASSIGNED = 0;
1838: 
1839:   /**
1840:    * Lm = Letter, Modifier (Informative).
1841:    *
1842:    * @since 1.1
1843:    */
1844:   public static final byte MODIFIER_LETTER = 4;
1845: 
1846:   /**
1847:    * Lo = Letter, Other (Informative).
1848:    *
1849:    * @since 1.1
1850:    */
1851:   public static final byte OTHER_LETTER = 5;
1852: 
1853:   /**
1854:    * Pc = Punctuation, Connector (Informative).
1855:    *
1856:    * @since 1.1
1857:    */
1858:   public static final byte CONNECTOR_PUNCTUATION = 23;
1859: 
1860:   /**
1861:    * Pd = Punctuation, Dash (Informative).
1862:    *
1863:    * @since 1.1
1864:    */
1865:   public static final byte DASH_PUNCTUATION = 20;
1866: 
1867:   /**
1868:    * Ps = Punctuation, Open (Informative).
1869:    *
1870:    * @since 1.1
1871:    */
1872:   public static final byte START_PUNCTUATION = 21;
1873: 
1874:   /**
1875:    * Pe = Punctuation, Close (Informative).
1876:    *
1877:    * @since 1.1
1878:    */
1879:   public static final byte END_PUNCTUATION = 22;
1880: 
1881:   /**
1882:    * Pi = Punctuation, Initial Quote (Informative).
1883:    *
1884:    * @since 1.4
1885:    */
1886:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1887: 
1888:   /**
1889:    * Pf = Punctuation, Final Quote (Informative).
1890:    *
1891:    * @since 1.4
1892:    */
1893:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1894: 
1895:   /**
1896:    * Po = Punctuation, Other (Informative).
1897:    *
1898:    * @since 1.1
1899:    */
1900:   public static final byte OTHER_PUNCTUATION = 24;
1901: 
1902:   /**
1903:    * Sm = Symbol, Math (Informative).
1904:    *
1905:    * @since 1.1
1906:    */
1907:   public static final byte MATH_SYMBOL = 25;
1908: 
1909:   /**
1910:    * Sc = Symbol, Currency (Informative).
1911:    *
1912:    * @since 1.1
1913:    */
1914:   public static final byte CURRENCY_SYMBOL = 26;
1915: 
1916:   /**
1917:    * Sk = Symbol, Modifier (Informative).
1918:    *
1919:    * @since 1.1
1920:    */
1921:   public static final byte MODIFIER_SYMBOL = 27;
1922: 
1923:   /**
1924:    * So = Symbol, Other (Informative).
1925:    *
1926:    * @since 1.1
1927:    */
1928:   public static final byte OTHER_SYMBOL = 28;
1929: 
1930:   /**
1931:    * Undefined bidirectional character type. Undefined char values have
1932:    * undefined directionality in the Unicode specification.
1933:    *
1934:    * @since 1.4
1935:    */
1936:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1937: 
1938:   /**
1939:    * Strong bidirectional character type "L".
1940:    *
1941:    * @since 1.4
1942:    */
1943:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1944: 
1945:   /**
1946:    * Strong bidirectional character type "R".
1947:    *
1948:    * @since 1.4
1949:    */
1950:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1951: 
1952:   /**
1953:    * Strong bidirectional character type "AL".
1954:    *
1955:    * @since 1.4
1956:    */
1957:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1958: 
1959:   /**
1960:    * Weak bidirectional character type "EN".
1961:    *
1962:    * @since 1.4
1963:    */
1964:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1965: 
1966:   /**
1967:    * Weak bidirectional character type "ES".
1968:    *
1969:    * @since 1.4
1970:    */
1971:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1972: 
1973:   /**
1974:    * Weak bidirectional character type "ET".
1975:    *
1976:    * @since 1.4
1977:    */
1978:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1979: 
1980:   /**
1981:    * Weak bidirectional character type "AN".
1982:    *
1983:    * @since 1.4
1984:    */
1985:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1986: 
1987:   /**
1988:    * Weak bidirectional character type "CS".
1989:    *
1990:    * @since 1.4
1991:    */
1992:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1993: 
1994:   /**
1995:    * Weak bidirectional character type "NSM".
1996:    *
1997:    * @since 1.4
1998:    */
1999:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2000: 
2001:   /**
2002:    * Weak bidirectional character type "BN".
2003:    *
2004:    * @since 1.4
2005:    */
2006:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2007: 
2008:   /**
2009:    * Neutral bidirectional character type "B".
2010:    *
2011:    * @since 1.4
2012:    */
2013:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2014: 
2015:   /**
2016:    * Neutral bidirectional character type "S".
2017:    *
2018:    * @since 1.4
2019:    */
2020:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2021: 
2022:   /**
2023:    * Strong bidirectional character type "WS".
2024:    *
2025:    * @since 1.4
2026:    */
2027:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2028: 
2029:   /**
2030:    * Neutral bidirectional character type "ON".
2031:    *
2032:    * @since 1.4
2033:    */
2034:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2035: 
2036:   /**
2037:    * Strong bidirectional character type "LRE".
2038:    *
2039:    * @since 1.4
2040:    */
2041:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2042: 
2043:   /**
2044:    * Strong bidirectional character type "LRO".
2045:    *
2046:    * @since 1.4
2047:    */
2048:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2049: 
2050:   /**
2051:    * Strong bidirectional character type "RLE".
2052:    *
2053:    * @since 1.4
2054:    */
2055:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2056: 
2057:   /**
2058:    * Strong bidirectional character type "RLO".
2059:    *
2060:    * @since 1.4
2061:    */
2062:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2063: 
2064:   /**
2065:    * Weak bidirectional character type "PDF".
2066:    *
2067:    * @since 1.4
2068:    */
2069:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2070: 
2071:   /**
2072:    * Mask for grabbing the type out of the result of readChar.
2073:    * @see #readChar(char)
2074:    */
2075:   private static final int TYPE_MASK = 0x1F;
2076: 
2077:   /**
2078:    * Mask for grabbing the non-breaking space flag out of the result of
2079:    * readChar.
2080:    * @see #readChar(char)
2081:    */
2082:   private static final int NO_BREAK_MASK = 0x20;
2083: 
2084:   /**
2085:    * Mask for grabbing the mirrored directionality flag out of the result
2086:    * of readChar.
2087:    * @see #readChar(char)
2088:    */
2089:   private static final int MIRROR_MASK = 0x40;
2090: 
2091:   /**
2092:    * Min value for supplementary code point.
2093:    *
2094:    * @since 1.5
2095:    */
2096:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2097: 
2098:   /**
2099:    * Min value for code point.
2100:    *
2101:    * @since 1.5
2102:    */
2103:   public static final int MIN_CODE_POINT = 0; 
2104:  
2105:  
2106:   /**
2107:    * Max value for code point.
2108:    *
2109:    * @since 1.5
2110:    */
2111:   public static final int MAX_CODE_POINT = 0x010ffff;
2112: 
2113: 
2114:   /**
2115:    * Minimum high surrogate code in UTF-16 encoding.
2116:    *
2117:    * @since 1.5
2118:    */
2119:   public static final char MIN_HIGH_SURROGATE = '\ud800';
2120: 
2121:   /**
2122:    * Maximum high surrogate code in UTF-16 encoding.
2123:    *
2124:    * @since 1.5
2125:    */
2126:   public static final char MAX_HIGH_SURROGATE = '\udbff';
2127:  
2128:   /**
2129:    * Minimum low surrogate code in UTF-16 encoding.
2130:    *
2131:    * @since 1.5
2132:    */
2133:   public static final char MIN_LOW_SURROGATE = '\udc00';
2134: 
2135:   /**
2136:    * Maximum low surrogate code in UTF-16 encoding.
2137:    *
2138:    * @since 1.5
2139:    */
2140:   public static final char MAX_LOW_SURROGATE = '\udfff';
2141: 
2142:   /**
2143:    * Minimum surrogate code in UTF-16 encoding.
2144:    *
2145:    * @since 1.5
2146:    */
2147:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2148: 
2149:   /**
2150:    * Maximum low surrogate code in UTF-16 encoding.
2151:    *
2152:    * @since 1.5
2153:    */
2154:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2155: 
2156:   /**
2157:    * Grabs an attribute offset from the Unicode attribute database. The lower
2158:    * 5 bits are the character type, the next 2 bits are flags, and the top
2159:    * 9 bits are the offset into the attribute tables. Note that the top 9
2160:    * bits are meaningless in this context; they are useful only in the native
2161:    * code.
2162:    *
2163:    * @param ch the character to look up
2164:    * @return the character's attribute offset and type
2165:    * @see #TYPE_MASK
2166:    * @see #NO_BREAK_MASK
2167:    * @see #MIRROR_MASK
2168:    */
2169:   private static native char readChar(char ch);
2170: 
2171:   /**
2172:    * Wraps up a character.
2173:    *
2174:    * @param value the character to wrap
2175:    */
2176:   public Character(char value)
2177:   {
2178:     this.value = value;
2179:   }
2180: 
2181:   /**
2182:    * Returns the character which has been wrapped by this class.
2183:    *
2184:    * @return the character wrapped
2185:    */
2186:   public char charValue()
2187:   {
2188:     return value;
2189:   }
2190: 
2191:   /**
2192:    * Returns the numerical value (unsigned) of the wrapped character.
2193:    * Range of returned values: 0x0000-0xFFFF.
2194:    *
2195:    * @return the value of the wrapped character
2196:    */
2197:   public int hashCode()
2198:   {
2199:     return value;
2200:   }
2201: 
2202:   /**
2203:    * Determines if an object is equal to this object. This is only true for
2204:    * another Character object wrapping the same value.
2205:    *
2206:    * @param o object to compare
2207:    * @return true if o is a Character with the same value
2208:    */
2209:   public boolean equals(Object o)
2210:   {
2211:     return o instanceof Character && value == ((Character) o).value;
2212:   }
2213: 
2214:   /**
2215:    * Converts the wrapped character into a String.
2216:    *
2217:    * @return a String containing one character -- the wrapped character
2218:    *         of this instance
2219:    */
2220:   public String toString()
2221:   {
2222:     // This assumes that String.valueOf(char) can create a single-character
2223:     // String more efficiently than through the public API.
2224:     return String.valueOf(value);
2225:   }
2226: 
2227:   /**
2228:    * Returns a String of length 1 representing the specified character.
2229:    *
2230:    * @param ch the character to convert
2231:    * @return a String containing the character
2232:    * @since 1.4
2233:    */
2234:   public static String toString(char ch)
2235:   {
2236:     // This assumes that String.valueOf(char) can create a single-character
2237:     // String more efficiently than through the public API.
2238:     return String.valueOf(ch);
2239:   }
2240: 
2241:   /**
2242:    * Determines if a character is a Unicode lowercase letter. For example,
2243:    * <code>'a'</code> is lowercase.
2244:    * <br>
2245:    * lowercase = [Ll]
2246:    *
2247:    * @param ch character to test
2248:    * @return true if ch is a Unicode lowercase letter, else false
2249:    * @see #isUpperCase(char)
2250:    * @see #isTitleCase(char)
2251:    * @see #toLowerCase(char)
2252:    * @see #getType(char)
2253:    */
2254:   public static boolean isLowerCase(char ch)
2255:   {
2256:     return getType(ch) == LOWERCASE_LETTER;
2257:   }
2258: 
2259:   /**
2260:    * Determines if a character is a Unicode uppercase letter. For example,
2261:    * <code>'A'</code> is uppercase.
2262:    * <br>
2263:    * uppercase = [Lu]
2264:    *
2265:    * @param ch character to test
2266:    * @return true if ch is a Unicode uppercase letter, else false
2267:    * @see #isLowerCase(char)
2268:    * @see #isTitleCase(char)
2269:    * @see #toUpperCase(char)
2270:    * @see #getType(char)
2271:    */
2272:   public static boolean isUpperCase(char ch)
2273:   {
2274:     return getType(ch) == UPPERCASE_LETTER;
2275:   }
2276: 
2277:   /**
2278:    * Determines if a character is a Unicode titlecase letter. For example,
2279:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2280:    * <br>
2281:    * titlecase = [Lt]
2282:    *
2283:    * @param ch character to test
2284:    * @return true if ch is a Unicode titlecase letter, else false
2285:    * @see #isLowerCase(char)
2286:    * @see #isUpperCase(char)
2287:    * @see #toTitleCase(char)
2288:    * @see #getType(char)
2289:    */
2290:   public static boolean isTitleCase(char ch)
2291:   {
2292:     return getType(ch) == TITLECASE_LETTER;
2293:   }
2294: 
2295:   /**
2296:    * Determines if a character is a Unicode decimal digit. For example,
2297:    * <code>'0'</code> is a digit.
2298:    * <br>
2299:    * Unicode decimal digit = [Nd]
2300:    *
2301:    * @param ch character to test
2302:    * @return true if ch is a Unicode decimal digit, else false
2303:    * @see #digit(char, int)
2304:    * @see #forDigit(int, int)
2305:    * @see #getType(char)
2306:    */
2307:   public static boolean isDigit(char ch)
2308:   {
2309:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
2310:   }
2311: 
2312:   /**
2313:    * Determines if a character is part of the Unicode Standard. This is an
2314:    * evolving standard, but covers every character in the data file.
2315:    * <br>
2316:    * defined = not [Cn]
2317:    *
2318:    * @param ch character to test
2319:    * @return true if ch is a Unicode character, else false
2320:    * @see #isDigit(char)
2321:    * @see #isLetter(char)
2322:    * @see #isLetterOrDigit(char)
2323:    * @see #isLowerCase(char)
2324:    * @see #isTitleCase(char)
2325:    * @see #isUpperCase(char)
2326:    */
2327:   public static boolean isDefined(char ch)
2328:   {
2329:     return getType(ch) != UNASSIGNED;
2330:   }
2331: 
2332:   /**
2333:    * Determines if a character is a Unicode letter. Not all letters have case,
2334:    * so this may return true when isLowerCase and isUpperCase return false.
2335:    * <br>
2336:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2337:    *
2338:    * @param ch character to test
2339:    * @return true if ch is a Unicode letter, else false
2340:    * @see #isDigit(char)
2341:    * @see #isJavaIdentifierStart(char)
2342:    * @see #isJavaLetter(char)
2343:    * @see #isJavaLetterOrDigit(char)
2344:    * @see #isLetterOrDigit(char)
2345:    * @see #isLowerCase(char)
2346:    * @see #isTitleCase(char)
2347:    * @see #isUnicodeIdentifierStart(char)
2348:    * @see #isUpperCase(char)
2349:    */
2350:   public static boolean isLetter(char ch)
2351:   {
2352:     return ((1 << getType(ch))
2353:             & ((1 << UPPERCASE_LETTER)
2354:                | (1 << LOWERCASE_LETTER)
2355:                | (1 << TITLECASE_LETTER)
2356:                | (1 << MODIFIER_LETTER)
2357:                | (1 << OTHER_LETTER))) != 0;
2358:   }
2359: 
2360:   /**
2361:    * Determines if a character is a Unicode letter or a Unicode digit. This
2362:    * is the combination of isLetter and isDigit.
2363:    * <br>
2364:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
2365:    *
2366:    * @param ch character to test
2367:    * @return true if ch is a Unicode letter or a Unicode digit, else false
2368:    * @see #isDigit(char)
2369:    * @see #isJavaIdentifierPart(char)
2370:    * @see #isJavaLetter(char)
2371:    * @see #isJavaLetterOrDigit(char)
2372:    * @see #isLetter(char)
2373:    * @see #isUnicodeIdentifierPart(char)
2374:    */
2375:   public static boolean isLetterOrDigit(char ch)
2376:   {
2377:     return ((1 << getType(ch))
2378:             & ((1 << UPPERCASE_LETTER)
2379:                | (1 << LOWERCASE_LETTER)
2380:                | (1 << TITLECASE_LETTER)
2381:                | (1 << MODIFIER_LETTER)
2382:                | (1 << OTHER_LETTER)
2383:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
2384:   }
2385: 
2386:   /**
2387:    * Determines if a character can start a Java identifier. This is the
2388:    * combination of isLetter, any character where getType returns
2389:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2390:    * (like '_').
2391:    *
2392:    * @param ch character to test
2393:    * @return true if ch can start a Java identifier, else false
2394:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
2395:    * @see #isJavaLetterOrDigit(char)
2396:    * @see #isJavaIdentifierStart(char)
2397:    * @see #isJavaIdentifierPart(char)
2398:    * @see #isLetter(char)
2399:    * @see #isLetterOrDigit(char)
2400:    * @see #isUnicodeIdentifierStart(char)
2401:    */
2402:   public static boolean isJavaLetter(char ch)
2403:   {
2404:     return isJavaIdentifierStart(ch);
2405:   }
2406: 
2407:   /**
2408:    * Determines if a character can follow the first letter in
2409:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2410:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2411:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2412:    * or isIdentifierIgnorable.
2413:    *
2414:    * @param ch character to test
2415:    * @return true if ch can follow the first letter in a Java identifier
2416:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
2417:    * @see #isJavaLetter(char)
2418:    * @see #isJavaIdentifierStart(char)
2419:    * @see #isJavaIdentifierPart(char)
2420:    * @see #isLetter(char)
2421:    * @see #isLetterOrDigit(char)
2422:    * @see #isUnicodeIdentifierPart(char)
2423:    * @see #isIdentifierIgnorable(char)
2424:    */
2425:   public static boolean isJavaLetterOrDigit(char ch)
2426:   {
2427:     return isJavaIdentifierPart(ch);
2428:   }
2429: 
2430:   /**
2431:    * Determines if a character can start a Java identifier. This is the
2432:    * combination of isLetter, any character where getType returns
2433:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2434:    * (like '_').
2435:    * <br>
2436:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
2437:    *
2438:    * @param ch character to test
2439:    * @return true if ch can start a Java identifier, else false
2440:    * @see #isJavaIdentifierPart(char)
2441:    * @see #isLetter(char)
2442:    * @see #isUnicodeIdentifierStart(char)
2443:    * @since 1.1
2444:    */
2445:   public static boolean isJavaIdentifierStart(char ch)
2446:   {
2447:     return ((1 << getType(ch))
2448:             & ((1 << UPPERCASE_LETTER)
2449:                | (1 << LOWERCASE_LETTER)
2450:                | (1 << TITLECASE_LETTER)
2451:                | (1 << MODIFIER_LETTER)
2452:                | (1 << OTHER_LETTER)
2453:                | (1 << LETTER_NUMBER)
2454:                | (1 << CURRENCY_SYMBOL)
2455:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
2456:   }
2457: 
2458:   /**
2459:    * Determines if a character can follow the first letter in
2460:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2461:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2462:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2463:    * or isIdentifierIgnorable.
2464:    * <br>
2465:    * Java identifier extender =
2466:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
2467:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2468:    *
2469:    * @param ch character to test
2470:    * @return true if ch can follow the first letter in a Java identifier
2471:    * @see #isIdentifierIgnorable(char)
2472:    * @see #isJavaIdentifierStart(char)
2473:    * @see #isLetterOrDigit(char)
2474:    * @see #isUnicodeIdentifierPart(char)
2475:    * @since 1.1
2476:    */
2477:   public static boolean isJavaIdentifierPart(char ch)
2478:   {
2479:     int category = getType(ch);
2480:     return ((1 << category)
2481:             & ((1 << UPPERCASE_LETTER)
2482:                | (1 << LOWERCASE_LETTER)
2483:                | (1 << TITLECASE_LETTER)
2484:                | (1 << MODIFIER_LETTER)
2485:                | (1 << OTHER_LETTER)
2486:                | (1 << NON_SPACING_MARK)
2487:                | (1 << COMBINING_SPACING_MARK)
2488:                | (1 << DECIMAL_DIGIT_NUMBER)
2489:                | (1 << LETTER_NUMBER)
2490:                | (1 << CURRENCY_SYMBOL)
2491:                | (1 << CONNECTOR_PUNCTUATION)
2492:                | (1 << FORMAT))) != 0
2493:       || (category == CONTROL && isIdentifierIgnorable(ch));
2494:   }
2495: 
2496:   /**
2497:    * Determines if a character can start a Unicode identifier.  Only
2498:    * letters can start a Unicode identifier, but this includes characters
2499:    * in LETTER_NUMBER.
2500:    * <br>
2501:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
2502:    *
2503:    * @param ch character to test
2504:    * @return true if ch can start a Unicode identifier, else false
2505:    * @see #isJavaIdentifierStart(char)
2506:    * @see #isLetter(char)
2507:    * @see #isUnicodeIdentifierPart(char)
2508:    * @since 1.1
2509:    */
2510:   public static boolean isUnicodeIdentifierStart(char ch)
2511:   {
2512:     return ((1 << getType(ch))
2513:             & ((1 << UPPERCASE_LETTER)
2514:                | (1 << LOWERCASE_LETTER)
2515:                | (1 << TITLECASE_LETTER)
2516:                | (1 << MODIFIER_LETTER)
2517:                | (1 << OTHER_LETTER)
2518:                | (1 << LETTER_NUMBER))) != 0;
2519:   }
2520: 
2521:   /**
2522:    * Determines if a character can follow the first letter in
2523:    * a Unicode identifier. This includes letters, connecting punctuation,
2524:    * digits, numeric letters, combining marks, non-spacing marks, and
2525:    * isIdentifierIgnorable.
2526:    * <br>
2527:    * Unicode identifier extender =
2528:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
2529:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2530:    *
2531:    * @param ch character to test
2532:    * @return true if ch can follow the first letter in a Unicode identifier
2533:    * @see #isIdentifierIgnorable(char)
2534:    * @see #isJavaIdentifierPart(char)
2535:    * @see #isLetterOrDigit(char)
2536:    * @see #isUnicodeIdentifierStart(char)
2537:    * @since 1.1
2538:    */
2539:   public static boolean isUnicodeIdentifierPart(char ch)
2540:   {
2541:     int category = getType(ch);
2542:     return ((1 << category)
2543:             & ((1 << UPPERCASE_LETTER)
2544:                | (1 << LOWERCASE_LETTER)
2545:                | (1 << TITLECASE_LETTER)
2546:                | (1 << MODIFIER_LETTER)
2547:                | (1 << OTHER_LETTER)
2548:                | (1 << NON_SPACING_MARK)
2549:                | (1 << COMBINING_SPACING_MARK)
2550:                | (1 << DECIMAL_DIGIT_NUMBER)
2551:                | (1 << LETTER_NUMBER)
2552:                | (1 << CONNECTOR_PUNCTUATION)
2553:                | (1 << FORMAT))) != 0
2554:       || (category == CONTROL && isIdentifierIgnorable(ch));
2555:   }
2556: 
2557:   /**
2558:    * Determines if a character is ignorable in a Unicode identifier. This
2559:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
2560:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
2561:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
2562:    * <code>'\u009F'</code>), and FORMAT characters.
2563:    * <br>
2564:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
2565:    *    |U+007F-U+009F
2566:    *
2567:    * @param ch character to test
2568:    * @return true if ch is ignorable in a Unicode or Java identifier
2569:    * @see #isJavaIdentifierPart(char)
2570:    * @see #isUnicodeIdentifierPart(char)
2571:    * @since 1.1
2572:    */
2573:   public static boolean isIdentifierIgnorable(char ch)
2574:   {
2575:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
2576:                                || (ch <= '\u001B' && ch >= '\u000E')))
2577:       || getType(ch) == FORMAT;
2578:   }
2579: 
2580:   /**
2581:    * Converts a Unicode character into its lowercase equivalent mapping.
2582:    * If a mapping does not exist, then the character passed is returned.
2583:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
2584:    *
2585:    * @param ch character to convert to lowercase
2586:    * @return lowercase mapping of ch, or ch if lowercase mapping does
2587:    *         not exist
2588:    * @see #isLowerCase(char)
2589:    * @see #isUpperCase(char)
2590:    * @see #toTitleCase(char)
2591:    * @see #toUpperCase(char)
2592:    */
2593:   public static native char toLowerCase(char ch);
2594: 
2595:   /**
2596:    * Converts a Unicode character into its uppercase equivalent mapping.
2597:    * If a mapping does not exist, then the character passed is returned.
2598:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
2599:    *
2600:    * @param ch character to convert to uppercase
2601:    * @return uppercase mapping of ch, or ch if uppercase mapping does
2602:    *         not exist
2603:    * @see #isLowerCase(char)
2604:    * @see #isUpperCase(char)
2605:    * @see #toLowerCase(char)
2606:    * @see #toTitleCase(char)
2607:    */
2608:   public static native char toUpperCase(char ch);
2609: 
2610:   /**
2611:    * Converts a Unicode character into its titlecase equivalent mapping.
2612:    * If a mapping does not exist, then the character passed is returned.
2613:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
2614:    *
2615:    * @param ch character to convert to titlecase
2616:    * @return titlecase mapping of ch, or ch if titlecase mapping does
2617:    *         not exist
2618:    * @see #isTitleCase(char)
2619:    * @see #toLowerCase(char)
2620:    * @see #toUpperCase(char)
2621:    */
2622:   public static native char toTitleCase(char ch);
2623: 
2624:   /**
2625:    * Converts a character into a digit of the specified radix. If the radix
2626:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
2627:    * exceeds the radix, or if ch is not a decimal digit or in the case
2628:    * insensitive set of 'a'-'z', the result is -1.
2629:    * <br>
2630:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
2631:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2632:    *
2633:    * @param ch character to convert into a digit
2634:    * @param radix radix in which ch is a digit
2635:    * @return digit which ch represents in radix, or -1 not a valid digit
2636:    * @see #MIN_RADIX
2637:    * @see #MAX_RADIX
2638:    * @see #forDigit(int, int)
2639:    * @see #isDigit(char)
2640:    * @see #getNumericValue(char)
2641:    */
2642:   public static native int digit(char ch, int radix);
2643: 
2644:   /**
2645:    * Returns the Unicode numeric value property of a character. For example,
2646:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
2647:    *
2648:    * <p>This method also returns values for the letters A through Z, (not
2649:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
2650:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
2651:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
2652:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
2653:    * <code>'\uFF5A'</code> (full width variants).
2654:    *
2655:    * <p>If the character lacks a numeric value property, -1 is returned.
2656:    * If the character has a numeric value property which is not representable
2657:    * as a nonnegative integer, such as a fraction, -2 is returned.
2658:    *
2659:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
2660:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2661:    *
2662:    * @param ch character from which the numeric value property will
2663:    *        be retrieved
2664:    * @return the numeric value property of ch, or -1 if it does not exist, or
2665:    *         -2 if it is not representable as a nonnegative integer
2666:    * @see #forDigit(int, int)
2667:    * @see #digit(char, int)
2668:    * @see #isDigit(char)
2669:    * @since 1.1
2670:    */
2671:   public static native int getNumericValue(char ch);
2672: 
2673:   /**
2674:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
2675:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
2676:    * <code>'\r'</code>, and <code>' '</code>.
2677:    * <br>
2678:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
2679:    *
2680:    * @param ch character to test
2681:    * @return true if ch is a space, else false
2682:    * @deprecated Replaced by {@link #isWhitespace(char)}
2683:    * @see #isSpaceChar(char)
2684:    * @see #isWhitespace(char)
2685:    */
2686:   public static boolean isSpace(char ch)
2687:   {
2688:     // Performing the subtraction up front alleviates need to compare longs.
2689:     return ch-- <= ' ' && ((1 << ch)
2690:                            & ((1 << (' ' - 1))
2691:                               | (1 << ('\t' - 1))
2692:                               | (1 << ('\n' - 1))
2693:                               | (1 << ('\r' - 1))
2694:                               | (1 << ('\f' - 1)))) != 0;
2695:   }
2696: 
2697:   /**
2698:    * Determines if a character is a Unicode space character. This includes
2699:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
2700:    * <br>
2701:    * Unicode space = [Zs]|[Zp]|[Zl]
2702:    *
2703:    * @param ch character to test
2704:    * @return true if ch is a Unicode space, else false
2705:    * @see #isWhitespace(char)
2706:    * @since 1.1
2707:    */
2708:   public static boolean isSpaceChar(char ch)
2709:   {
2710:     return ((1 << getType(ch))
2711:             & ((1 << SPACE_SEPARATOR)
2712:                | (1 << LINE_SEPARATOR)
2713:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
2714:   }
2715: 
2716:   /**
2717:    * Determines if a character is Java whitespace. This includes Unicode
2718:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
2719:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
2720:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
2721:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
2722:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
2723:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
2724:    * and <code>'\u001F'</code>.
2725:    * <br>
2726:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
2727:    *
2728:    * @param ch character to test
2729:    * @return true if ch is Java whitespace, else false
2730:    * @see #isSpaceChar(char)
2731:    * @since 1.1
2732:    */
2733:   public static boolean isWhitespace(char ch)
2734:   {
2735:     int attr = readChar(ch);
2736:     return ((((1 << (attr & TYPE_MASK))
2737:               & ((1 << SPACE_SEPARATOR)
2738:                  | (1 << LINE_SEPARATOR)
2739:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
2740:             && (attr & NO_BREAK_MASK) == 0)
2741:       || (ch <= '\u001F' && ((1 << ch)
2742:                              & ((1 << '\t')
2743:                                 | (1 << '\n')
2744:                                 | (1 << '\u000B')
2745:                                 | (1 << '\u000C')
2746:                                 | (1 << '\r')
2747:                                 | (1 << '\u001C')
2748:                                 | (1 << '\u001D')
2749:                                 | (1 << '\u001E')
2750:                                 | (1 << '\u001F'))) != 0);
2751:   }
2752: 
2753:   /**
2754:    * Determines if a character has the ISO Control property.
2755:    * <br>
2756:    * ISO Control = [Cc]
2757:    *
2758:    * @param ch character to test
2759:    * @return true if ch is an ISO Control character, else false
2760:    * @see #isSpaceChar(char)
2761:    * @see #isWhitespace(char)
2762:    * @since 1.1
2763:    */
2764:   public static boolean isISOControl(char ch)
2765:   {
2766:     return getType(ch) == CONTROL;
2767:   }
2768: 
2769:   /**
2770:    * Returns the Unicode general category property of a character.
2771:    *
2772:    * @param ch character from which the general category property will
2773:    *        be retrieved
2774:    * @return the character category property of ch as an integer
2775:    * @see #UNASSIGNED
2776:    * @see #UPPERCASE_LETTER
2777:    * @see #LOWERCASE_LETTER
2778:    * @see #TITLECASE_LETTER
2779:    * @see #MODIFIER_LETTER
2780:    * @see #OTHER_LETTER
2781:    * @see #NON_SPACING_MARK
2782:    * @see #ENCLOSING_MARK
2783:    * @see #COMBINING_SPACING_MARK
2784:    * @see #DECIMAL_DIGIT_NUMBER
2785:    * @see #LETTER_NUMBER
2786:    * @see #OTHER_NUMBER
2787:    * @see #SPACE_SEPARATOR
2788:    * @see #LINE_SEPARATOR
2789:    * @see #PARAGRAPH_SEPARATOR
2790:    * @see #CONTROL
2791:    * @see #FORMAT
2792:    * @see #PRIVATE_USE
2793:    * @see #SURROGATE
2794:    * @see #DASH_PUNCTUATION
2795:    * @see #START_PUNCTUATION
2796:    * @see #END_PUNCTUATION
2797:    * @see #CONNECTOR_PUNCTUATION
2798:    * @see #OTHER_PUNCTUATION
2799:    * @see #MATH_SYMBOL
2800:    * @see #CURRENCY_SYMBOL
2801:    * @see #MODIFIER_SYMBOL
2802:    * @see #INITIAL_QUOTE_PUNCTUATION
2803:    * @see #FINAL_QUOTE_PUNCTUATION
2804:    * @since 1.1
2805:    */
2806:   public static native int getType(char ch);
2807: 
2808:   /**
2809:    * Converts a digit into a character which represents that digit
2810:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2811:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
2812:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2813:    * <br>
2814:    * return value boundary = U+0030-U+0039|U+0061-U+007A
2815:    *
2816:    * @param digit digit to be converted into a character
2817:    * @param radix radix of digit
2818:    * @return character representing digit in radix, or '\0'
2819:    * @see #MIN_RADIX
2820:    * @see #MAX_RADIX
2821:    * @see #digit(char, int)
2822:    */
2823:   public static char forDigit(int digit, int radix)
2824:   {
2825:     if (radix < MIN_RADIX || radix > MAX_RADIX
2826:         || digit < 0 || digit >= radix)
2827:       return '\0';
2828:     return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
2829:   }
2830: 
2831:   /**
2832:    * Returns the Unicode directionality property of the character. This
2833:    * is used in the visual ordering of text.
2834:    *
2835:    * @param ch the character to look up
2836:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2837:    * @see #DIRECTIONALITY_UNDEFINED
2838:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2839:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2840:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2841:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2842:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2843:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2844:    * @see #DIRECTIONALITY_ARABIC_NUMBER
2845:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2846:    * @see #DIRECTIONALITY_NONSPACING_MARK
2847:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2848:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2849:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2850:    * @see #DIRECTIONALITY_WHITESPACE
2851:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
2852:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2853:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2854:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2855:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2856:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2857:    * @since 1.4
2858:    */
2859:   public static native byte getDirectionality(char ch);
2860: 
2861:   /**
2862:    * Determines whether the character is mirrored according to Unicode. For
2863:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2864:    * left-to-right text, but ')' in right-to-left text.
2865:    *
2866:    * @param ch the character to look up
2867:    * @return true if the character is mirrored
2868:    * @since 1.4
2869:    */
2870:   public static boolean isMirrored(char ch)
2871:   {
2872:     return (readChar(ch) & MIRROR_MASK) != 0;
2873:   }
2874: 
2875:   /**
2876:    * Compares another Character to this Character, numerically.
2877:    *
2878:    * @param anotherCharacter Character to compare with this Character
2879:    * @return a negative integer if this Character is less than
2880:    *         anotherCharacter, zero if this Character is equal, and
2881:    *         a positive integer if this Character is greater
2882:    * @throws NullPointerException if anotherCharacter is null
2883:    * @since 1.2
2884:    */
2885:   public int compareTo(Character anotherCharacter)
2886:   {
2887:     return value - anotherCharacter.value;
2888:   }
2889: 
2890:   /**
2891:    * Compares an object to this Character.  Assuming the object is a
2892:    * Character object, this method performs the same comparison as
2893:    * compareTo(Character).
2894:    *
2895:    * @param o object to compare
2896:    * @return the comparison value
2897:    * @throws ClassCastException if o is not a Character object
2898:    * @throws NullPointerException if o is null
2899:    * @see #compareTo(Character)
2900:    * @since 1.2
2901:    */
2902:   public int compareTo(Object o)
2903:   {
2904:     return compareTo((Character) o);
2905:   }
2906: 
2907:   /**
2908:    * Returns an <code>Character</code> object wrapping the value.
2909:    * In contrast to the <code>Character</code> constructor, this method
2910:    * will cache some values.  It is used by boxing conversion.
2911:    *
2912:    * @param val the value to wrap
2913:    * @return the <code>Character</code>
2914:    * 
2915:    * @since 1.5
2916:    */
2917:   public static Character valueOf(char val)
2918:   {
2919:     if (val > MAX_CACHE)
2920:       return new Character(val);
2921:     synchronized (charCache)
2922:       {
2923:     if (charCache[val - MIN_VALUE] == null)
2924:       charCache[val - MIN_VALUE] = new Character(val);
2925:     return charCache[val - MIN_VALUE];
2926:       }
2927:   }
2928: 
2929:   /**
2930:    * Reverse the bytes in val.
2931:    * @since 1.5
2932:    */
2933:   public static char reverseBytes(char val)
2934:   {
2935:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
2936:   }
2937: 
2938:   /**
2939:    * Converts a unicode code point to a UTF-16 representation of that
2940:    * code point.
2941:    * 
2942:    * @param codePoint the unicode code point
2943:    *
2944:    * @return the UTF-16 representation of that code point
2945:    *
2946:    * @throws IllegalArgumentException if the code point is not a valid
2947:    *         unicode code point
2948:    *
2949:    * @since 1.5
2950:    */
2951:   public static char[] toChars(int codePoint)
2952:   {
2953:     char[] result = new char[charCount(codePoint)];
2954:     int ignore = toChars(codePoint, result, 0);
2955:     return result;
2956:   }
2957: 
2958:   /**
2959:    * Converts a unicode code point to its UTF-16 representation.
2960:    *
2961:    * @param codePoint the unicode code point
2962:    * @param dst the target char array
2963:    * @param dstIndex the start index for the target
2964:    *
2965:    * @return number of characters written to <code>dst</code>
2966:    *
2967:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
2968:    *         valid unicode code point
2969:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
2970:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
2971:    *         in <code>dst</code> or if the UTF-16 representation does not
2972:    *         fit into <code>dst</code>
2973:    *
2974:    * @since 1.5
2975:    */
2976:   public static int toChars(int codePoint, char[] dst, int dstIndex)
2977:   {
2978:     if (!isValidCodePoint(codePoint))
2979:       {
2980:         throw new IllegalArgumentException("not a valid code point: "
2981:                                            + codePoint);
2982:       }
2983: 
2984:     int result;
2985:     if (isSupplementaryCodePoint(codePoint))
2986:       {
2987:         // Write second char first to cause IndexOutOfBoundsException
2988:         // immediately.
2989:         final int cp2 = codePoint - 0x10000;
2990:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
2991:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
2992:         result = 2;
2993:       }
2994:     else
2995:       {
2996:         dst[dstIndex] = (char) codePoint;
2997:         result = 1; 
2998:       }
2999:     return result;
3000:   }
3001: 
3002:   /**
3003:    * Return number of 16-bit characters required to represent the given
3004:    * code point.
3005:    *
3006:    * @param codePoint a unicode code point
3007:    *
3008:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
3009:    *
3010:    * @since 1.5
3011:    */
3012:   public static int charCount(int codePoint)
3013:   {
3014:     return 
3015:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
3016:       ? 2 
3017:       : 1;
3018:   }
3019: 
3020:   /**
3021:    * Determines whether the specified code point is
3022:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
3023:    * supplementary character range.
3024:    *
3025:    * @param codePoint a Unicode code point
3026:    *
3027:    * @return <code>true</code> if code point is in supplementary range
3028:    *
3029:    * @since 1.5
3030:    */
3031:   public static boolean isSupplementaryCodePoint(int codePoint)
3032:   {
3033:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
3034:       && codePoint <= MAX_CODE_POINT;
3035:   }
3036: 
3037:   /**
3038:    * Determines whether the specified code point is
3039:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
3040:    *
3041:    * @param codePoint a Unicode code point
3042:    *
3043:    * @return <code>true</code> if code point is valid
3044:    *
3045:    * @since 1.5
3046:    */
3047:   public static boolean isValidCodePoint(int codePoint)
3048:   {
3049:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
3050:   }
3051: 
3052:   /**
3053:    * Return true if the given character is a high surrogate.
3054:    * @param ch the character
3055:    * @return true if the character is a high surrogate character
3056:    *
3057:    * @since 1.5
3058:    */
3059:   public static boolean isHighSurrogate(char ch)
3060:   {
3061:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
3062:   }
3063: 
3064:   /**
3065:    * Return true if the given character is a low surrogate.
3066:    * @param ch the character
3067:    * @return true if the character is a low surrogate character
3068:    *
3069:    * @since 1.5
3070:    */
3071:   public static boolean isLowSurrogate(char ch)
3072:   {
3073:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
3074:   }
3075: 
3076:   /**
3077:    * Return true if the given characters compose a surrogate pair.
3078:    * This is true if the first character is a high surrogate and the
3079:    * second character is a low surrogate.
3080:    * @param ch1 the first character
3081:    * @param ch2 the first character
3082:    * @return true if the characters compose a surrogate pair
3083:    *
3084:    * @since 1.5
3085:    */
3086:   public static boolean isSurrogatePair(char ch1, char ch2)
3087:   {
3088:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
3089:   }
3090: 
3091:   /**
3092:    * Given a valid surrogate pair, this returns the corresponding
3093:    * code point.
3094:    * @param high the high character of the pair
3095:    * @param low the low character of the pair
3096:    * @return the corresponding code point
3097:    *
3098:    * @since 1.5
3099:    */
3100:   public static int toCodePoint(char high, char low)
3101:   {
3102:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
3103:       (low - MIN_LOW_SURROGATE) + 0x10000;
3104:   }
3105: 
3106:   /**
3107:    * Get the code point at the specified index in the CharSequence.
3108:    * This is like CharSequence#charAt(int), but if the character is
3109:    * the start of a surrogate pair, and there is a following
3110:    * character, and this character completes the pair, then the
3111:    * corresponding supplementary code point is returned.  Otherwise,
3112:    * the character at the index is returned.
3113:    *
3114:    * @param sequence the CharSequence
3115:    * @param index the index of the codepoint to get, starting at 0
3116:    * @return the codepoint at the specified index
3117:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3118:    * @since 1.5
3119:    */
3120:   public static int codePointAt(CharSequence sequence, int index)
3121:   {
3122:     int len = sequence.length();
3123:     if (index < 0 || index >= len)
3124:       throw new IndexOutOfBoundsException();
3125:     char high = sequence.charAt(index);
3126:     if (! isHighSurrogate(high) || ++index >= len)
3127:       return high;
3128:     char low = sequence.charAt(index);
3129:     if (! isLowSurrogate(low))
3130:       return high;
3131:     return toCodePoint(high, low);
3132:   }
3133: 
3134:   /**
3135:    * Get the code point at the specified index in the CharSequence.
3136:    * If the character is the start of a surrogate pair, and there is a
3137:    * following character, and this character completes the pair, then
3138:    * the corresponding supplementary code point is returned.
3139:    * Otherwise, the character at the index is returned.
3140:    *
3141:    * @param chars the character array in which to look
3142:    * @param index the index of the codepoint to get, starting at 0
3143:    * @return the codepoint at the specified index
3144:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3145:    * @since 1.5
3146:    */
3147:   public static int codePointAt(char[] chars, int index)
3148:   {
3149:     return codePointAt(chars, index, chars.length);
3150:   }
3151: 
3152:   /**
3153:    * Get the code point at the specified index in the CharSequence.
3154:    * If the character is the start of a surrogate pair, and there is a
3155:    * following character within the specified range, and this
3156:    * character completes the pair, then the corresponding
3157:    * supplementary code point is returned.  Otherwise, the character
3158:    * at the index is returned.
3159:    *
3160:    * @param chars the character array in which to look
3161:    * @param index the index of the codepoint to get, starting at 0
3162:    * @param limit the limit past which characters should not be examined
3163:    * @return the codepoint at the specified index
3164:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
3165:    * limit, or if limit is negative or &gt;= the length of the array
3166:    * @since 1.5
3167:    */
3168:   public static int codePointAt(char[] chars, int index, int limit)
3169:   {
3170:     if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
3171:       throw new IndexOutOfBoundsException();
3172:     char high = chars[index];
3173:     if (! isHighSurrogate(high) || ++index >= limit)
3174:       return high;
3175:     char low = chars[index];
3176:     if (! isLowSurrogate(low))
3177:       return high;
3178:     return toCodePoint(high, low);
3179:   }
3180: 
3181:   /**
3182:    * Get the code point before the specified index.  This is like
3183:    * #codePointAt(char[], int), but checks the characters at
3184:    * <code>index-1</code> and <code>index-2</code> to see if they form
3185:    * a supplementary code point.  If they do not, the character at
3186:    * <code>index-1</code> is returned.
3187:    *
3188:    * @param chars the character array
3189:    * @param index the index just past the codepoint to get, starting at 0
3190:    * @return the codepoint at the specified index
3191:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3192:    * @since 1.5
3193:    */
3194:   public static int codePointBefore(char[] chars, int index)
3195:   {
3196:     return codePointBefore(chars, index, 1);
3197:   }
3198: 
3199:   /**
3200:    * Get the code point before the specified index.  This is like
3201:    * #codePointAt(char[], int), but checks the characters at
3202:    * <code>index-1</code> and <code>index-2</code> to see if they form
3203:    * a supplementary code point.  If they do not, the character at
3204:    * <code>index-1</code> is returned.  The start parameter is used to
3205:    * limit the range of the array which may be examined.
3206:    *
3207:    * @param chars the character array
3208:    * @param index the index just past the codepoint to get, starting at 0
3209:    * @param start the index before which characters should not be examined
3210:    * @return the codepoint at the specified index
3211:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
3212:    * the length of the array, or if limit is negative or &gt;= the
3213:    * length of the array
3214:    * @since 1.5
3215:    */
3216:   public static int codePointBefore(char[] chars, int index, int start)
3217:   {
3218:     if (index < start || index > chars.length
3219:     || start < 0 || start >= chars.length)
3220:       throw new IndexOutOfBoundsException();
3221:     --index;
3222:     char low = chars[index];
3223:     if (! isLowSurrogate(low) || --index < start)
3224:       return low;
3225:     char high = chars[index];
3226:     if (! isHighSurrogate(high))
3227:       return low;
3228:     return toCodePoint(high, low);
3229:   }
3230: 
3231:   /**
3232:    * Get the code point before the specified index.  This is like
3233:    * #codePointAt(CharSequence, int), but checks the characters at
3234:    * <code>index-1</code> and <code>index-2</code> to see if they form
3235:    * a supplementary code point.  If they do not, the character at
3236:    * <code>index-1</code> is returned.
3237:    *
3238:    * @param sequence the CharSequence
3239:    * @param index the index just past the codepoint to get, starting at 0
3240:    * @return the codepoint at the specified index
3241:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3242:    * @since 1.5
3243:    */
3244:   public static int codePointBefore(CharSequence sequence, int index)
3245:   {
3246:     int len = sequence.length();
3247:     if (index < 1 || index > len)
3248:       throw new IndexOutOfBoundsException();
3249:     --index;
3250:     char low = sequence.charAt(index);
3251:     if (! isLowSurrogate(low) || --index < 0)
3252:       return low;
3253:     char high = sequence.charAt(index);
3254:     if (! isHighSurrogate(high))
3255:       return low;
3256:     return toCodePoint(high, low);
3257:   }
3258: } // class Character