Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006, 2007
   3:    Free Software Foundation, Inc.
   4: 
   5: This file is part of GNU Classpath.
   6: 
   7: GNU Classpath is free software; you can redistribute it and/or modify
   8: it under the terms of the GNU General Public License as published by
   9: the Free Software Foundation; either version 2, or (at your option)
  10: any later version.
  11: 
  12: GNU Classpath is distributed in the hope that it will be useful, but
  13: WITHOUT ANY WARRANTY; without even the implied warranty of
  14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15: General Public License for more details.
  16: 
  17: You should have received a copy of the GNU General Public License
  18: along with GNU Classpath; see the file COPYING.  If not, write to the
  19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20: 02110-1301 USA.
  21: 
  22: Linking this library statically or dynamically with other modules is
  23: making a combined work based on this library.  Thus, the terms and
  24: conditions of the GNU General Public License cover the whole
  25: combination.
  26: 
  27: As a special exception, the copyright holders of this library give you
  28: permission to link this library with independent modules to produce an
  29: executable, regardless of the license terms of these independent
  30: modules, and to copy and distribute the resulting executable under
  31: terms of your choice, provided that you also meet, for each linked
  32: independent module, the terms and conditions of the license of that
  33: module.  An independent module is a module which is not derived from
  34: or based on this library.  If you modify this library, you may extend
  35: this exception to your version of the library, but you are not
  36: obligated to do so.  If you do not wish to do so, delete this
  37: exception statement from your version. */
  38: 
  39: /*
  40:  * Note: This class must not be merged with Classpath.  Gcj uses C-style
  41:  * arrays (see include/java-chartables.h) to store the Unicode character
  42:  * database, whereas Classpath uses Java objects (char[] extracted from
  43:  * String constants) in gnu.java.lang.CharData.  Gcj's approach is more
  44:  * efficient, because there is no vtable or data relocation to worry about.
  45:  * However, despite the difference in the database interface, the two
  46:  * versions share identical algorithms.
  47:  */
  48: 
  49: package java.lang;
  50: 
  51: import java.io.Serializable;
  52: import java.text.Collator;
  53: import java.util.Locale;
  54: 
  55: /**
  56:  * Wrapper class for the primitive char data type.  In addition, this class
  57:  * allows one to retrieve property information and perform transformations
  58:  * on the defined characters in the Unicode Standard, Version 4.0.0.
  59:  * java.lang.Character is designed to be very dynamic, and as such, it
  60:  * retrieves information on the Unicode character set from a separate
  61:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  62:  *
  63:  * <p>For predicates, boundaries are used to describe
  64:  * the set of characters for which the method will return true.
  65:  * This syntax uses fairly normal regular expression notation.
  66:  * See 5.13 of the Unicode Standard, Version 4.0, for the
  67:  * boundary specification.
  68:  *
  69:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  70:  * for more information on the Unicode Standard.
  71:  *
  72:  * @author Tom Tromey (tromey@cygnus.com)
  73:  * @author Paul N. Fisher
  74:  * @author Jochen Hoenicke
  75:  * @author Eric Blake (ebb9@email.byu.edu)
  76:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  77:  * @since 1.0
  78:  * @status partly updated to 1.5; some things still missing
  79:  */
  80: public final class Character implements Serializable, Comparable<Character>
  81: {
  82:   /**
  83:    * A subset of Unicode blocks.
  84:    *
  85:    * @author Paul N. Fisher
  86:    * @author Eric Blake (ebb9@email.byu.edu)
  87:    * @since 1.2
  88:    */
  89:   public static class Subset
  90:   {
  91:     /** The name of the subset. */
  92:     private final String name;
  93: 
  94:     /**
  95:      * Construct a new subset of characters.
  96:      *
  97:      * @param name the name of the subset
  98:      * @throws NullPointerException if name is null
  99:      */
 100:     protected Subset(String name)
 101:     {
 102:       // Note that name.toString() is name, unless name was null.
 103:       this.name = name.toString();
 104:     }
 105: 
 106:     /**
 107:      * Compares two Subsets for equality. This is <code>final</code>, and
 108:      * restricts the comparison on the <code>==</code> operator, so it returns
 109:      * true only for the same object.
 110:      *
 111:      * @param o the object to compare
 112:      * @return true if o is this
 113:      */
 114:     public final boolean equals(Object o)
 115:     {
 116:       return o == this;
 117:     }
 118: 
 119:     /**
 120:      * Makes the original hashCode of Object final, to be consistent with
 121:      * equals.
 122:      *
 123:      * @return the hash code for this object
 124:      */
 125:     public final int hashCode()
 126:     {
 127:       return super.hashCode();
 128:     }
 129: 
 130:     /**
 131:      * Returns the name of the subset.
 132:      *
 133:      * @return the name
 134:      */
 135:     public final String toString()
 136:     {
 137:       return name;
 138:     }
 139:   } // class Subset
 140: 
 141:   /**
 142:    * A family of character subsets in the Unicode specification. A character
 143:    * is in at most one of these blocks.
 144:    *
 145:    * This inner class was generated automatically from
 146:    * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
 147:    * This Unicode definition file can be found on the
 148:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 149:    * JDK 1.4 uses Unicode version 3.0.0.
 150:    *
 151:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 152:    * @since 1.2
 153:    */
 154:   public static final class UnicodeBlock extends Subset
 155:   {
 156:     /** The start of the subset. */
 157:     private final int start;
 158: 
 159:     /** The end of the subset. */
 160:     private final int end;
 161: 
 162:     /** The canonical name of the block according to the Unicode standard. */
 163:     private final String canonicalName;
 164: 
 165:     /** Enumeration for the <code>forName()</code> method */
 166:     private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
 167: 
 168:     /**
 169:      * Constructor for strictly defined blocks.
 170:      *
 171:      * @param start the start character of the range
 172:      * @param end the end character of the range
 173:      * @param name the block name
 174:      */
 175:     private UnicodeBlock(int start, int end, String name,
 176:              String canonicalName)
 177:     {
 178:       super(name);
 179:       this.start = start;
 180:       this.end = end;
 181:       this.canonicalName = canonicalName;
 182:     }
 183: 
 184:     /**
 185:      * Returns the Unicode character block which a character belongs to.
 186:      * <strong>Note</strong>: This method does not support the use of
 187:      * supplementary characters.  For such support, <code>of(int)</code>
 188:      * should be used instead.
 189:      *
 190:      * @param ch the character to look up
 191:      * @return the set it belongs to, or null if it is not in one
 192:      */
 193:     public static UnicodeBlock of(char ch)
 194:     {
 195:       return of((int) ch);
 196:     }
 197: 
 198:     /**
 199:      * Returns the Unicode character block which a code point belongs to.
 200:      *
 201:      * @param codePoint the character to look up
 202:      * @return the set it belongs to, or null if it is not in one.
 203:      * @throws IllegalArgumentException if the specified code point is
 204:      *         invalid.
 205:      * @since 1.5
 206:      */
 207:     public static UnicodeBlock of(int codePoint)
 208:     {
 209:       if (codePoint > MAX_CODE_POINT)
 210:     throw new IllegalArgumentException("The supplied integer value is " +
 211:                        "too large to be a codepoint.");
 212:       // Simple binary search for the correct block.
 213:       int low = 0;
 214:       int hi = sets.length - 1;
 215:       while (low <= hi)
 216:         {
 217:           int mid = (low + hi) >> 1;
 218:           UnicodeBlock b = sets[mid];
 219:           if (codePoint < b.start)
 220:             hi = mid - 1;
 221:           else if (codePoint > b.end)
 222:             low = mid + 1;
 223:           else
 224:             return b;
 225:         }
 226:       return null;
 227:     }
 228: 
 229:     /**
 230:      * <p>
 231:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 232:      * by the Unicode standard.  The version of Unicode in use is defined by
 233:      * the <code>Character</code> class, and the names are given in the
 234:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 235:      * The name may be specified in one of three ways:
 236:      * </p>
 237:      * <ol>
 238:      * <li>The canonical, human-readable name used by the Unicode standard.
 239:      * This is the name with all spaces and hyphens retained.  For example,
 240:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 241:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 242:      * <li>The name used for the constants specified by this class, which
 243:      * is the canonical name with all spaces and hyphens replaced with
 244:      * underscores e.g. `BASIC_LATIN'</li>
 245:      * </ol>
 246:      * <p>
 247:      * The names are compared case-insensitively using the case comparison
 248:      * associated with the U.S. English locale.  The method recognises the
 249:      * previous names used for blocks as well as the current ones.  At
 250:      * present, this simply means that the deprecated `SURROGATES_AREA'
 251:      * will be recognised by this method (the <code>of()</code> methods
 252:      * only return one of the three new surrogate blocks).
 253:      * </p>
 254:      *
 255:      * @param blockName the name of the block to look up.
 256:      * @return the specified block.
 257:      * @throws NullPointerException if the <code>blockName</code> is
 258:      *         <code>null</code>.
 259:      * @throws IllegalArgumentException if the name does not match any Unicode
 260:      *         block.
 261:      * @since 1.5
 262:      */
 263:     public static final UnicodeBlock forName(String blockName)
 264:     {
 265:       NameType type;
 266:       if (blockName.indexOf(' ') != -1)
 267:         type = NameType.CANONICAL;
 268:       else if (blockName.indexOf('_') != -1)
 269:         type = NameType.CONSTANT;
 270:       else
 271:         type = NameType.NO_SPACES;
 272:       Collator usCollator = Collator.getInstance(Locale.US);
 273:       usCollator.setStrength(Collator.PRIMARY);
 274:       /* Special case for deprecated blocks not in sets */
 275:       switch (type)
 276:       {
 277:         case CANONICAL:
 278:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 279:             return SURROGATES_AREA;
 280:           break;
 281:         case NO_SPACES:
 282:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 283:             return SURROGATES_AREA;
 284:           break;
 285:         case CONSTANT:
 286:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 287:             return SURROGATES_AREA;
 288:           break;
 289:       }
 290:       /* Other cases */
 291:       switch (type)
 292:       {
 293:         case CANONICAL:
 294:           for (UnicodeBlock block : sets)
 295:             if (usCollator.compare(blockName, block.canonicalName) == 0)
 296:               return block;
 297:           break;
 298:         case NO_SPACES:
 299:           for (UnicodeBlock block : sets)
 300:         {
 301:           String nsName = block.canonicalName.replaceAll(" ","");
 302:           if (usCollator.compare(blockName, nsName) == 0)
 303:         return block;
 304:         }
 305:       break;
 306:         case CONSTANT:
 307:           for (UnicodeBlock block : sets)
 308:             if (usCollator.compare(blockName, block.toString()) == 0)
 309:               return block;
 310:           break;
 311:       }
 312:       throw new IllegalArgumentException("No Unicode block found for " +
 313:                                          blockName + ".");
 314:     }
 315: 
 316:     /**
 317:      * Basic Latin.
 318:      * 0x0000 - 0x007F.
 319:      */
 320:     public static final UnicodeBlock BASIC_LATIN
 321:       = new UnicodeBlock(0x0000, 0x007F,
 322:                          "BASIC_LATIN", 
 323:                          "Basic Latin");
 324: 
 325:     /**
 326:      * Latin-1 Supplement.
 327:      * 0x0080 - 0x00FF.
 328:      */
 329:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 330:       = new UnicodeBlock(0x0080, 0x00FF,
 331:                          "LATIN_1_SUPPLEMENT", 
 332:                          "Latin-1 Supplement");
 333: 
 334:     /**
 335:      * Latin Extended-A.
 336:      * 0x0100 - 0x017F.
 337:      */
 338:     public static final UnicodeBlock LATIN_EXTENDED_A
 339:       = new UnicodeBlock(0x0100, 0x017F,
 340:                          "LATIN_EXTENDED_A", 
 341:                          "Latin Extended-A");
 342: 
 343:     /**
 344:      * Latin Extended-B.
 345:      * 0x0180 - 0x024F.
 346:      */
 347:     public static final UnicodeBlock LATIN_EXTENDED_B
 348:       = new UnicodeBlock(0x0180, 0x024F,
 349:                          "LATIN_EXTENDED_B", 
 350:                          "Latin Extended-B");
 351: 
 352:     /**
 353:      * IPA Extensions.
 354:      * 0x0250 - 0x02AF.
 355:      */
 356:     public static final UnicodeBlock IPA_EXTENSIONS
 357:       = new UnicodeBlock(0x0250, 0x02AF,
 358:                          "IPA_EXTENSIONS", 
 359:                          "IPA Extensions");
 360: 
 361:     /**
 362:      * Spacing Modifier Letters.
 363:      * 0x02B0 - 0x02FF.
 364:      */
 365:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 366:       = new UnicodeBlock(0x02B0, 0x02FF,
 367:                          "SPACING_MODIFIER_LETTERS", 
 368:                          "Spacing Modifier Letters");
 369: 
 370:     /**
 371:      * Combining Diacritical Marks.
 372:      * 0x0300 - 0x036F.
 373:      */
 374:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 375:       = new UnicodeBlock(0x0300, 0x036F,
 376:                          "COMBINING_DIACRITICAL_MARKS", 
 377:                          "Combining Diacritical Marks");
 378: 
 379:     /**
 380:      * Greek.
 381:      * 0x0370 - 0x03FF.
 382:      */
 383:     public static final UnicodeBlock GREEK
 384:       = new UnicodeBlock(0x0370, 0x03FF,
 385:                          "GREEK", 
 386:                          "Greek");
 387: 
 388:     /**
 389:      * Cyrillic.
 390:      * 0x0400 - 0x04FF.
 391:      */
 392:     public static final UnicodeBlock CYRILLIC
 393:       = new UnicodeBlock(0x0400, 0x04FF,
 394:                          "CYRILLIC", 
 395:                          "Cyrillic");
 396: 
 397:     /**
 398:      * Cyrillic Supplementary.
 399:      * 0x0500 - 0x052F.
 400:      * @since 1.5
 401:      */
 402:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 403:       = new UnicodeBlock(0x0500, 0x052F,
 404:                          "CYRILLIC_SUPPLEMENTARY", 
 405:                          "Cyrillic Supplementary");
 406: 
 407:     /**
 408:      * Armenian.
 409:      * 0x0530 - 0x058F.
 410:      */
 411:     public static final UnicodeBlock ARMENIAN
 412:       = new UnicodeBlock(0x0530, 0x058F,
 413:                          "ARMENIAN", 
 414:                          "Armenian");
 415: 
 416:     /**
 417:      * Hebrew.
 418:      * 0x0590 - 0x05FF.
 419:      */
 420:     public static final UnicodeBlock HEBREW
 421:       = new UnicodeBlock(0x0590, 0x05FF,
 422:                          "HEBREW", 
 423:                          "Hebrew");
 424: 
 425:     /**
 426:      * Arabic.
 427:      * 0x0600 - 0x06FF.
 428:      */
 429:     public static final UnicodeBlock ARABIC
 430:       = new UnicodeBlock(0x0600, 0x06FF,
 431:                          "ARABIC", 
 432:                          "Arabic");
 433: 
 434:     /**
 435:      * Syriac.
 436:      * 0x0700 - 0x074F.
 437:      * @since 1.4
 438:      */
 439:     public static final UnicodeBlock SYRIAC
 440:       = new UnicodeBlock(0x0700, 0x074F,
 441:                          "SYRIAC", 
 442:                          "Syriac");
 443: 
 444:     /**
 445:      * Thaana.
 446:      * 0x0780 - 0x07BF.
 447:      * @since 1.4
 448:      */
 449:     public static final UnicodeBlock THAANA
 450:       = new UnicodeBlock(0x0780, 0x07BF,
 451:                          "THAANA", 
 452:                          "Thaana");
 453: 
 454:     /**
 455:      * Devanagari.
 456:      * 0x0900 - 0x097F.
 457:      */
 458:     public static final UnicodeBlock DEVANAGARI
 459:       = new UnicodeBlock(0x0900, 0x097F,
 460:                          "DEVANAGARI", 
 461:                          "Devanagari");
 462: 
 463:     /**
 464:      * Bengali.
 465:      * 0x0980 - 0x09FF.
 466:      */
 467:     public static final UnicodeBlock BENGALI
 468:       = new UnicodeBlock(0x0980, 0x09FF,
 469:                          "BENGALI", 
 470:                          "Bengali");
 471: 
 472:     /**
 473:      * Gurmukhi.
 474:      * 0x0A00 - 0x0A7F.
 475:      */
 476:     public static final UnicodeBlock GURMUKHI
 477:       = new UnicodeBlock(0x0A00, 0x0A7F,
 478:                          "GURMUKHI", 
 479:                          "Gurmukhi");
 480: 
 481:     /**
 482:      * Gujarati.
 483:      * 0x0A80 - 0x0AFF.
 484:      */
 485:     public static final UnicodeBlock GUJARATI
 486:       = new UnicodeBlock(0x0A80, 0x0AFF,
 487:                          "GUJARATI", 
 488:                          "Gujarati");
 489: 
 490:     /**
 491:      * Oriya.
 492:      * 0x0B00 - 0x0B7F.
 493:      */
 494:     public static final UnicodeBlock ORIYA
 495:       = new UnicodeBlock(0x0B00, 0x0B7F,
 496:                          "ORIYA", 
 497:                          "Oriya");
 498: 
 499:     /**
 500:      * Tamil.
 501:      * 0x0B80 - 0x0BFF.
 502:      */
 503:     public static final UnicodeBlock TAMIL
 504:       = new UnicodeBlock(0x0B80, 0x0BFF,
 505:                          "TAMIL", 
 506:                          "Tamil");
 507: 
 508:     /**
 509:      * Telugu.
 510:      * 0x0C00 - 0x0C7F.
 511:      */
 512:     public static final UnicodeBlock TELUGU
 513:       = new UnicodeBlock(0x0C00, 0x0C7F,
 514:                          "TELUGU", 
 515:                          "Telugu");
 516: 
 517:     /**
 518:      * Kannada.
 519:      * 0x0C80 - 0x0CFF.
 520:      */
 521:     public static final UnicodeBlock KANNADA
 522:       = new UnicodeBlock(0x0C80, 0x0CFF,
 523:                          "KANNADA", 
 524:                          "Kannada");
 525: 
 526:     /**
 527:      * Malayalam.
 528:      * 0x0D00 - 0x0D7F.
 529:      */
 530:     public static final UnicodeBlock MALAYALAM
 531:       = new UnicodeBlock(0x0D00, 0x0D7F,
 532:                          "MALAYALAM", 
 533:                          "Malayalam");
 534: 
 535:     /**
 536:      * Sinhala.
 537:      * 0x0D80 - 0x0DFF.
 538:      * @since 1.4
 539:      */
 540:     public static final UnicodeBlock SINHALA
 541:       = new UnicodeBlock(0x0D80, 0x0DFF,
 542:                          "SINHALA", 
 543:                          "Sinhala");
 544: 
 545:     /**
 546:      * Thai.
 547:      * 0x0E00 - 0x0E7F.
 548:      */
 549:     public static final UnicodeBlock THAI
 550:       = new UnicodeBlock(0x0E00, 0x0E7F,
 551:                          "THAI", 
 552:                          "Thai");
 553: 
 554:     /**
 555:      * Lao.
 556:      * 0x0E80 - 0x0EFF.
 557:      */
 558:     public static final UnicodeBlock LAO
 559:       = new UnicodeBlock(0x0E80, 0x0EFF,
 560:                          "LAO", 
 561:                          "Lao");
 562: 
 563:     /**
 564:      * Tibetan.
 565:      * 0x0F00 - 0x0FFF.
 566:      */
 567:     public static final UnicodeBlock TIBETAN
 568:       = new UnicodeBlock(0x0F00, 0x0FFF,
 569:                          "TIBETAN", 
 570:                          "Tibetan");
 571: 
 572:     /**
 573:      * Myanmar.
 574:      * 0x1000 - 0x109F.
 575:      * @since 1.4
 576:      */
 577:     public static final UnicodeBlock MYANMAR
 578:       = new UnicodeBlock(0x1000, 0x109F,
 579:                          "MYANMAR", 
 580:                          "Myanmar");
 581: 
 582:     /**
 583:      * Georgian.
 584:      * 0x10A0 - 0x10FF.
 585:      */
 586:     public static final UnicodeBlock GEORGIAN
 587:       = new UnicodeBlock(0x10A0, 0x10FF,
 588:                          "GEORGIAN", 
 589:                          "Georgian");
 590: 
 591:     /**
 592:      * Hangul Jamo.
 593:      * 0x1100 - 0x11FF.
 594:      */
 595:     public static final UnicodeBlock HANGUL_JAMO
 596:       = new UnicodeBlock(0x1100, 0x11FF,
 597:                          "HANGUL_JAMO", 
 598:                          "Hangul Jamo");
 599: 
 600:     /**
 601:      * Ethiopic.
 602:      * 0x1200 - 0x137F.
 603:      * @since 1.4
 604:      */
 605:     public static final UnicodeBlock ETHIOPIC
 606:       = new UnicodeBlock(0x1200, 0x137F,
 607:                          "ETHIOPIC", 
 608:                          "Ethiopic");
 609: 
 610:     /**
 611:      * Cherokee.
 612:      * 0x13A0 - 0x13FF.
 613:      * @since 1.4
 614:      */
 615:     public static final UnicodeBlock CHEROKEE
 616:       = new UnicodeBlock(0x13A0, 0x13FF,
 617:                          "CHEROKEE", 
 618:                          "Cherokee");
 619: 
 620:     /**
 621:      * Unified Canadian Aboriginal Syllabics.
 622:      * 0x1400 - 0x167F.
 623:      * @since 1.4
 624:      */
 625:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 626:       = new UnicodeBlock(0x1400, 0x167F,
 627:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 628:                          "Unified Canadian Aboriginal Syllabics");
 629: 
 630:     /**
 631:      * Ogham.
 632:      * 0x1680 - 0x169F.
 633:      * @since 1.4
 634:      */
 635:     public static final UnicodeBlock OGHAM
 636:       = new UnicodeBlock(0x1680, 0x169F,
 637:                          "OGHAM", 
 638:                          "Ogham");
 639: 
 640:     /**
 641:      * Runic.
 642:      * 0x16A0 - 0x16FF.
 643:      * @since 1.4
 644:      */
 645:     public static final UnicodeBlock RUNIC
 646:       = new UnicodeBlock(0x16A0, 0x16FF,
 647:                          "RUNIC", 
 648:                          "Runic");
 649: 
 650:     /**
 651:      * Tagalog.
 652:      * 0x1700 - 0x171F.
 653:      * @since 1.5
 654:      */
 655:     public static final UnicodeBlock TAGALOG
 656:       = new UnicodeBlock(0x1700, 0x171F,
 657:                          "TAGALOG", 
 658:                          "Tagalog");
 659: 
 660:     /**
 661:      * Hanunoo.
 662:      * 0x1720 - 0x173F.
 663:      * @since 1.5
 664:      */
 665:     public static final UnicodeBlock HANUNOO
 666:       = new UnicodeBlock(0x1720, 0x173F,
 667:                          "HANUNOO", 
 668:                          "Hanunoo");
 669: 
 670:     /**
 671:      * Buhid.
 672:      * 0x1740 - 0x175F.
 673:      * @since 1.5
 674:      */
 675:     public static final UnicodeBlock BUHID
 676:       = new UnicodeBlock(0x1740, 0x175F,
 677:                          "BUHID", 
 678:                          "Buhid");
 679: 
 680:     /**
 681:      * Tagbanwa.
 682:      * 0x1760 - 0x177F.
 683:      * @since 1.5
 684:      */
 685:     public static final UnicodeBlock TAGBANWA
 686:       = new UnicodeBlock(0x1760, 0x177F,
 687:                          "TAGBANWA", 
 688:                          "Tagbanwa");
 689: 
 690:     /**
 691:      * Khmer.
 692:      * 0x1780 - 0x17FF.
 693:      * @since 1.4
 694:      */
 695:     public static final UnicodeBlock KHMER
 696:       = new UnicodeBlock(0x1780, 0x17FF,
 697:                          "KHMER", 
 698:                          "Khmer");
 699: 
 700:     /**
 701:      * Mongolian.
 702:      * 0x1800 - 0x18AF.
 703:      * @since 1.4
 704:      */
 705:     public static final UnicodeBlock MONGOLIAN
 706:       = new UnicodeBlock(0x1800, 0x18AF,
 707:                          "MONGOLIAN", 
 708:                          "Mongolian");
 709: 
 710:     /**
 711:      * Limbu.
 712:      * 0x1900 - 0x194F.
 713:      * @since 1.5
 714:      */
 715:     public static final UnicodeBlock LIMBU
 716:       = new UnicodeBlock(0x1900, 0x194F,
 717:                          "LIMBU", 
 718:                          "Limbu");
 719: 
 720:     /**
 721:      * Tai Le.
 722:      * 0x1950 - 0x197F.
 723:      * @since 1.5
 724:      */
 725:     public static final UnicodeBlock TAI_LE
 726:       = new UnicodeBlock(0x1950, 0x197F,
 727:                          "TAI_LE", 
 728:                          "Tai Le");
 729: 
 730:     /**
 731:      * Khmer Symbols.
 732:      * 0x19E0 - 0x19FF.
 733:      * @since 1.5
 734:      */
 735:     public static final UnicodeBlock KHMER_SYMBOLS
 736:       = new UnicodeBlock(0x19E0, 0x19FF,
 737:                          "KHMER_SYMBOLS", 
 738:                          "Khmer Symbols");
 739: 
 740:     /**
 741:      * Phonetic Extensions.
 742:      * 0x1D00 - 0x1D7F.
 743:      * @since 1.5
 744:      */
 745:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 746:       = new UnicodeBlock(0x1D00, 0x1D7F,
 747:                          "PHONETIC_EXTENSIONS", 
 748:                          "Phonetic Extensions");
 749: 
 750:     /**
 751:      * Latin Extended Additional.
 752:      * 0x1E00 - 0x1EFF.
 753:      */
 754:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 755:       = new UnicodeBlock(0x1E00, 0x1EFF,
 756:                          "LATIN_EXTENDED_ADDITIONAL", 
 757:                          "Latin Extended Additional");
 758: 
 759:     /**
 760:      * Greek Extended.
 761:      * 0x1F00 - 0x1FFF.
 762:      */
 763:     public static final UnicodeBlock GREEK_EXTENDED
 764:       = new UnicodeBlock(0x1F00, 0x1FFF,
 765:                          "GREEK_EXTENDED", 
 766:                          "Greek Extended");
 767: 
 768:     /**
 769:      * General Punctuation.
 770:      * 0x2000 - 0x206F.
 771:      */
 772:     public static final UnicodeBlock GENERAL_PUNCTUATION
 773:       = new UnicodeBlock(0x2000, 0x206F,
 774:                          "GENERAL_PUNCTUATION", 
 775:                          "General Punctuation");
 776: 
 777:     /**
 778:      * Superscripts and Subscripts.
 779:      * 0x2070 - 0x209F.
 780:      */
 781:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 782:       = new UnicodeBlock(0x2070, 0x209F,
 783:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 784:                          "Superscripts and Subscripts");
 785: 
 786:     /**
 787:      * Currency Symbols.
 788:      * 0x20A0 - 0x20CF.
 789:      */
 790:     public static final UnicodeBlock CURRENCY_SYMBOLS
 791:       = new UnicodeBlock(0x20A0, 0x20CF,
 792:                          "CURRENCY_SYMBOLS", 
 793:                          "Currency Symbols");
 794: 
 795:     /**
 796:      * Combining Marks for Symbols.
 797:      * 0x20D0 - 0x20FF.
 798:      */
 799:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 800:       = new UnicodeBlock(0x20D0, 0x20FF,
 801:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 802:                          "Combining Marks for Symbols");
 803: 
 804:     /**
 805:      * Letterlike Symbols.
 806:      * 0x2100 - 0x214F.
 807:      */
 808:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 809:       = new UnicodeBlock(0x2100, 0x214F,
 810:                          "LETTERLIKE_SYMBOLS", 
 811:                          "Letterlike Symbols");
 812: 
 813:     /**
 814:      * Number Forms.
 815:      * 0x2150 - 0x218F.
 816:      */
 817:     public static final UnicodeBlock NUMBER_FORMS
 818:       = new UnicodeBlock(0x2150, 0x218F,
 819:                          "NUMBER_FORMS", 
 820:                          "Number Forms");
 821: 
 822:     /**
 823:      * Arrows.
 824:      * 0x2190 - 0x21FF.
 825:      */
 826:     public static final UnicodeBlock ARROWS
 827:       = new UnicodeBlock(0x2190, 0x21FF,
 828:                          "ARROWS", 
 829:                          "Arrows");
 830: 
 831:     /**
 832:      * Mathematical Operators.
 833:      * 0x2200 - 0x22FF.
 834:      */
 835:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 836:       = new UnicodeBlock(0x2200, 0x22FF,
 837:                          "MATHEMATICAL_OPERATORS", 
 838:                          "Mathematical Operators");
 839: 
 840:     /**
 841:      * Miscellaneous Technical.
 842:      * 0x2300 - 0x23FF.
 843:      */
 844:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 845:       = new UnicodeBlock(0x2300, 0x23FF,
 846:                          "MISCELLANEOUS_TECHNICAL", 
 847:                          "Miscellaneous Technical");
 848: 
 849:     /**
 850:      * Control Pictures.
 851:      * 0x2400 - 0x243F.
 852:      */
 853:     public static final UnicodeBlock CONTROL_PICTURES
 854:       = new UnicodeBlock(0x2400, 0x243F,
 855:                          "CONTROL_PICTURES", 
 856:                          "Control Pictures");
 857: 
 858:     /**
 859:      * Optical Character Recognition.
 860:      * 0x2440 - 0x245F.
 861:      */
 862:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 863:       = new UnicodeBlock(0x2440, 0x245F,
 864:                          "OPTICAL_CHARACTER_RECOGNITION", 
 865:                          "Optical Character Recognition");
 866: 
 867:     /**
 868:      * Enclosed Alphanumerics.
 869:      * 0x2460 - 0x24FF.
 870:      */
 871:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 872:       = new UnicodeBlock(0x2460, 0x24FF,
 873:                          "ENCLOSED_ALPHANUMERICS", 
 874:                          "Enclosed Alphanumerics");
 875: 
 876:     /**
 877:      * Box Drawing.
 878:      * 0x2500 - 0x257F.
 879:      */
 880:     public static final UnicodeBlock BOX_DRAWING
 881:       = new UnicodeBlock(0x2500, 0x257F,
 882:                          "BOX_DRAWING", 
 883:                          "Box Drawing");
 884: 
 885:     /**
 886:      * Block Elements.
 887:      * 0x2580 - 0x259F.
 888:      */
 889:     public static final UnicodeBlock BLOCK_ELEMENTS
 890:       = new UnicodeBlock(0x2580, 0x259F,
 891:                          "BLOCK_ELEMENTS", 
 892:                          "Block Elements");
 893: 
 894:     /**
 895:      * Geometric Shapes.
 896:      * 0x25A0 - 0x25FF.
 897:      */
 898:     public static final UnicodeBlock GEOMETRIC_SHAPES
 899:       = new UnicodeBlock(0x25A0, 0x25FF,
 900:                          "GEOMETRIC_SHAPES", 
 901:                          "Geometric Shapes");
 902: 
 903:     /**
 904:      * Miscellaneous Symbols.
 905:      * 0x2600 - 0x26FF.
 906:      */
 907:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 908:       = new UnicodeBlock(0x2600, 0x26FF,
 909:                          "MISCELLANEOUS_SYMBOLS", 
 910:                          "Miscellaneous Symbols");
 911: 
 912:     /**
 913:      * Dingbats.
 914:      * 0x2700 - 0x27BF.
 915:      */
 916:     public static final UnicodeBlock DINGBATS
 917:       = new UnicodeBlock(0x2700, 0x27BF,
 918:                          "DINGBATS", 
 919:                          "Dingbats");
 920: 
 921:     /**
 922:      * Miscellaneous Mathematical Symbols-A.
 923:      * 0x27C0 - 0x27EF.
 924:      * @since 1.5
 925:      */
 926:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 927:       = new UnicodeBlock(0x27C0, 0x27EF,
 928:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 929:                          "Miscellaneous Mathematical Symbols-A");
 930: 
 931:     /**
 932:      * Supplemental Arrows-A.
 933:      * 0x27F0 - 0x27FF.
 934:      * @since 1.5
 935:      */
 936:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 937:       = new UnicodeBlock(0x27F0, 0x27FF,
 938:                          "SUPPLEMENTAL_ARROWS_A", 
 939:                          "Supplemental Arrows-A");
 940: 
 941:     /**
 942:      * Braille Patterns.
 943:      * 0x2800 - 0x28FF.
 944:      * @since 1.4
 945:      */
 946:     public static final UnicodeBlock BRAILLE_PATTERNS
 947:       = new UnicodeBlock(0x2800, 0x28FF,
 948:                          "BRAILLE_PATTERNS", 
 949:                          "Braille Patterns");
 950: 
 951:     /**
 952:      * Supplemental Arrows-B.
 953:      * 0x2900 - 0x297F.
 954:      * @since 1.5
 955:      */
 956:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 957:       = new UnicodeBlock(0x2900, 0x297F,
 958:                          "SUPPLEMENTAL_ARROWS_B", 
 959:                          "Supplemental Arrows-B");
 960: 
 961:     /**
 962:      * Miscellaneous Mathematical Symbols-B.
 963:      * 0x2980 - 0x29FF.
 964:      * @since 1.5
 965:      */
 966:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 967:       = new UnicodeBlock(0x2980, 0x29FF,
 968:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 969:                          "Miscellaneous Mathematical Symbols-B");
 970: 
 971:     /**
 972:      * Supplemental Mathematical Operators.
 973:      * 0x2A00 - 0x2AFF.
 974:      * @since 1.5
 975:      */
 976:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 977:       = new UnicodeBlock(0x2A00, 0x2AFF,
 978:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 979:                          "Supplemental Mathematical Operators");
 980: 
 981:     /**
 982:      * Miscellaneous Symbols and Arrows.
 983:      * 0x2B00 - 0x2BFF.
 984:      * @since 1.5
 985:      */
 986:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 987:       = new UnicodeBlock(0x2B00, 0x2BFF,
 988:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 989:                          "Miscellaneous Symbols and Arrows");
 990: 
 991:     /**
 992:      * CJK Radicals Supplement.
 993:      * 0x2E80 - 0x2EFF.
 994:      * @since 1.4
 995:      */
 996:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 997:       = new UnicodeBlock(0x2E80, 0x2EFF,
 998:                          "CJK_RADICALS_SUPPLEMENT", 
 999:                          "CJK Radicals Supplement");
1000: 
1001:     /**
1002:      * Kangxi Radicals.
1003:      * 0x2F00 - 0x2FDF.
1004:      * @since 1.4
1005:      */
1006:     public static final UnicodeBlock KANGXI_RADICALS
1007:       = new UnicodeBlock(0x2F00, 0x2FDF,
1008:                          "KANGXI_RADICALS", 
1009:                          "Kangxi Radicals");
1010: 
1011:     /**
1012:      * Ideographic Description Characters.
1013:      * 0x2FF0 - 0x2FFF.
1014:      * @since 1.4
1015:      */
1016:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1017:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1018:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1019:                          "Ideographic Description Characters");
1020: 
1021:     /**
1022:      * CJK Symbols and Punctuation.
1023:      * 0x3000 - 0x303F.
1024:      */
1025:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1026:       = new UnicodeBlock(0x3000, 0x303F,
1027:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1028:                          "CJK Symbols and Punctuation");
1029: 
1030:     /**
1031:      * Hiragana.
1032:      * 0x3040 - 0x309F.
1033:      */
1034:     public static final UnicodeBlock HIRAGANA
1035:       = new UnicodeBlock(0x3040, 0x309F,
1036:                          "HIRAGANA", 
1037:                          "Hiragana");
1038: 
1039:     /**
1040:      * Katakana.
1041:      * 0x30A0 - 0x30FF.
1042:      */
1043:     public static final UnicodeBlock KATAKANA
1044:       = new UnicodeBlock(0x30A0, 0x30FF,
1045:                          "KATAKANA", 
1046:                          "Katakana");
1047: 
1048:     /**
1049:      * Bopomofo.
1050:      * 0x3100 - 0x312F.
1051:      */
1052:     public static final UnicodeBlock BOPOMOFO
1053:       = new UnicodeBlock(0x3100, 0x312F,
1054:                          "BOPOMOFO", 
1055:                          "Bopomofo");
1056: 
1057:     /**
1058:      * Hangul Compatibility Jamo.
1059:      * 0x3130 - 0x318F.
1060:      */
1061:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1062:       = new UnicodeBlock(0x3130, 0x318F,
1063:                          "HANGUL_COMPATIBILITY_JAMO", 
1064:                          "Hangul Compatibility Jamo");
1065: 
1066:     /**
1067:      * Kanbun.
1068:      * 0x3190 - 0x319F.
1069:      */
1070:     public static final UnicodeBlock KANBUN
1071:       = new UnicodeBlock(0x3190, 0x319F,
1072:                          "KANBUN", 
1073:                          "Kanbun");
1074: 
1075:     /**
1076:      * Bopomofo Extended.
1077:      * 0x31A0 - 0x31BF.
1078:      * @since 1.4
1079:      */
1080:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1081:       = new UnicodeBlock(0x31A0, 0x31BF,
1082:                          "BOPOMOFO_EXTENDED", 
1083:                          "Bopomofo Extended");
1084: 
1085:     /**
1086:      * Katakana Phonetic Extensions.
1087:      * 0x31F0 - 0x31FF.
1088:      * @since 1.5
1089:      */
1090:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1091:       = new UnicodeBlock(0x31F0, 0x31FF,
1092:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1093:                          "Katakana Phonetic Extensions");
1094: 
1095:     /**
1096:      * Enclosed CJK Letters and Months.
1097:      * 0x3200 - 0x32FF.
1098:      */
1099:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1100:       = new UnicodeBlock(0x3200, 0x32FF,
1101:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1102:                          "Enclosed CJK Letters and Months");
1103: 
1104:     /**
1105:      * CJK Compatibility.
1106:      * 0x3300 - 0x33FF.
1107:      */
1108:     public static final UnicodeBlock CJK_COMPATIBILITY
1109:       = new UnicodeBlock(0x3300, 0x33FF,
1110:                          "CJK_COMPATIBILITY", 
1111:                          "CJK Compatibility");
1112: 
1113:     /**
1114:      * CJK Unified Ideographs Extension A.
1115:      * 0x3400 - 0x4DBF.
1116:      * @since 1.4
1117:      */
1118:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1119:       = new UnicodeBlock(0x3400, 0x4DBF,
1120:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1121:                          "CJK Unified Ideographs Extension A");
1122: 
1123:     /**
1124:      * Yijing Hexagram Symbols.
1125:      * 0x4DC0 - 0x4DFF.
1126:      * @since 1.5
1127:      */
1128:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1129:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1130:                          "YIJING_HEXAGRAM_SYMBOLS", 
1131:                          "Yijing Hexagram Symbols");
1132: 
1133:     /**
1134:      * CJK Unified Ideographs.
1135:      * 0x4E00 - 0x9FFF.
1136:      */
1137:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1138:       = new UnicodeBlock(0x4E00, 0x9FFF,
1139:                          "CJK_UNIFIED_IDEOGRAPHS", 
1140:                          "CJK Unified Ideographs");
1141: 
1142:     /**
1143:      * Yi Syllables.
1144:      * 0xA000 - 0xA48F.
1145:      * @since 1.4
1146:      */
1147:     public static final UnicodeBlock YI_SYLLABLES
1148:       = new UnicodeBlock(0xA000, 0xA48F,
1149:                          "YI_SYLLABLES", 
1150:                          "Yi Syllables");
1151: 
1152:     /**
1153:      * Yi Radicals.
1154:      * 0xA490 - 0xA4CF.
1155:      * @since 1.4
1156:      */
1157:     public static final UnicodeBlock YI_RADICALS
1158:       = new UnicodeBlock(0xA490, 0xA4CF,
1159:                          "YI_RADICALS", 
1160:                          "Yi Radicals");
1161: 
1162:     /**
1163:      * Hangul Syllables.
1164:      * 0xAC00 - 0xD7AF.
1165:      */
1166:     public static final UnicodeBlock HANGUL_SYLLABLES
1167:       = new UnicodeBlock(0xAC00, 0xD7AF,
1168:                          "HANGUL_SYLLABLES", 
1169:                          "Hangul Syllables");
1170: 
1171:     /**
1172:      * High Surrogates.
1173:      * 0xD800 - 0xDB7F.
1174:      * @since 1.5
1175:      */
1176:     public static final UnicodeBlock HIGH_SURROGATES
1177:       = new UnicodeBlock(0xD800, 0xDB7F,
1178:                          "HIGH_SURROGATES", 
1179:                          "High Surrogates");
1180: 
1181:     /**
1182:      * High Private Use Surrogates.
1183:      * 0xDB80 - 0xDBFF.
1184:      * @since 1.5
1185:      */
1186:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1187:       = new UnicodeBlock(0xDB80, 0xDBFF,
1188:                          "HIGH_PRIVATE_USE_SURROGATES", 
1189:                          "High Private Use Surrogates");
1190: 
1191:     /**
1192:      * Low Surrogates.
1193:      * 0xDC00 - 0xDFFF.
1194:      * @since 1.5
1195:      */
1196:     public static final UnicodeBlock LOW_SURROGATES
1197:       = new UnicodeBlock(0xDC00, 0xDFFF,
1198:                          "LOW_SURROGATES", 
1199:                          "Low Surrogates");
1200: 
1201:     /**
1202:      * Private Use Area.
1203:      * 0xE000 - 0xF8FF.
1204:      */
1205:     public static final UnicodeBlock PRIVATE_USE_AREA
1206:       = new UnicodeBlock(0xE000, 0xF8FF,
1207:                          "PRIVATE_USE_AREA", 
1208:                          "Private Use Area");
1209: 
1210:     /**
1211:      * CJK Compatibility Ideographs.
1212:      * 0xF900 - 0xFAFF.
1213:      */
1214:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1215:       = new UnicodeBlock(0xF900, 0xFAFF,
1216:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1217:                          "CJK Compatibility Ideographs");
1218: 
1219:     /**
1220:      * Alphabetic Presentation Forms.
1221:      * 0xFB00 - 0xFB4F.
1222:      */
1223:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1224:       = new UnicodeBlock(0xFB00, 0xFB4F,
1225:                          "ALPHABETIC_PRESENTATION_FORMS", 
1226:                          "Alphabetic Presentation Forms");
1227: 
1228:     /**
1229:      * Arabic Presentation Forms-A.
1230:      * 0xFB50 - 0xFDFF.
1231:      */
1232:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1233:       = new UnicodeBlock(0xFB50, 0xFDFF,
1234:                          "ARABIC_PRESENTATION_FORMS_A", 
1235:                          "Arabic Presentation Forms-A");
1236: 
1237:     /**
1238:      * Variation Selectors.
1239:      * 0xFE00 - 0xFE0F.
1240:      * @since 1.5
1241:      */
1242:     public static final UnicodeBlock VARIATION_SELECTORS
1243:       = new UnicodeBlock(0xFE00, 0xFE0F,
1244:                          "VARIATION_SELECTORS", 
1245:                          "Variation Selectors");
1246: 
1247:     /**
1248:      * Combining Half Marks.
1249:      * 0xFE20 - 0xFE2F.
1250:      */
1251:     public static final UnicodeBlock COMBINING_HALF_MARKS
1252:       = new UnicodeBlock(0xFE20, 0xFE2F,
1253:                          "COMBINING_HALF_MARKS", 
1254:                          "Combining Half Marks");
1255: 
1256:     /**
1257:      * CJK Compatibility Forms.
1258:      * 0xFE30 - 0xFE4F.
1259:      */
1260:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1261:       = new UnicodeBlock(0xFE30, 0xFE4F,
1262:                          "CJK_COMPATIBILITY_FORMS", 
1263:                          "CJK Compatibility Forms");
1264: 
1265:     /**
1266:      * Small Form Variants.
1267:      * 0xFE50 - 0xFE6F.
1268:      */
1269:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1270:       = new UnicodeBlock(0xFE50, 0xFE6F,
1271:                          "SMALL_FORM_VARIANTS", 
1272:                          "Small Form Variants");
1273: 
1274:     /**
1275:      * Arabic Presentation Forms-B.
1276:      * 0xFE70 - 0xFEFF.
1277:      */
1278:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1279:       = new UnicodeBlock(0xFE70, 0xFEFF,
1280:                          "ARABIC_PRESENTATION_FORMS_B", 
1281:                          "Arabic Presentation Forms-B");
1282: 
1283:     /**
1284:      * Halfwidth and Fullwidth Forms.
1285:      * 0xFF00 - 0xFFEF.
1286:      */
1287:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1288:       = new UnicodeBlock(0xFF00, 0xFFEF,
1289:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1290:                          "Halfwidth and Fullwidth Forms");
1291: 
1292:     /**
1293:      * Specials.
1294:      * 0xFFF0 - 0xFFFF.
1295:      */
1296:     public static final UnicodeBlock SPECIALS
1297:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1298:                          "SPECIALS", 
1299:                          "Specials");
1300: 
1301:     /**
1302:      * Linear B Syllabary.
1303:      * 0x10000 - 0x1007F.
1304:      * @since 1.5
1305:      */
1306:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1307:       = new UnicodeBlock(0x10000, 0x1007F,
1308:                          "LINEAR_B_SYLLABARY", 
1309:                          "Linear B Syllabary");
1310: 
1311:     /**
1312:      * Linear B Ideograms.
1313:      * 0x10080 - 0x100FF.
1314:      * @since 1.5
1315:      */
1316:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1317:       = new UnicodeBlock(0x10080, 0x100FF,
1318:                          "LINEAR_B_IDEOGRAMS", 
1319:                          "Linear B Ideograms");
1320: 
1321:     /**
1322:      * Aegean Numbers.
1323:      * 0x10100 - 0x1013F.
1324:      * @since 1.5
1325:      */
1326:     public static final UnicodeBlock AEGEAN_NUMBERS
1327:       = new UnicodeBlock(0x10100, 0x1013F,
1328:                          "AEGEAN_NUMBERS", 
1329:                          "Aegean Numbers");
1330: 
1331:     /**
1332:      * Old Italic.
1333:      * 0x10300 - 0x1032F.
1334:      * @since 1.5
1335:      */
1336:     public static final UnicodeBlock OLD_ITALIC
1337:       = new UnicodeBlock(0x10300, 0x1032F,
1338:                          "OLD_ITALIC", 
1339:                          "Old Italic");
1340: 
1341:     /**
1342:      * Gothic.
1343:      * 0x10330 - 0x1034F.
1344:      * @since 1.5
1345:      */
1346:     public static final UnicodeBlock GOTHIC
1347:       = new UnicodeBlock(0x10330, 0x1034F,
1348:                          "GOTHIC", 
1349:                          "Gothic");
1350: 
1351:     /**
1352:      * Ugaritic.
1353:      * 0x10380 - 0x1039F.
1354:      * @since 1.5
1355:      */
1356:     public static final UnicodeBlock UGARITIC
1357:       = new UnicodeBlock(0x10380, 0x1039F,
1358:                          "UGARITIC", 
1359:                          "Ugaritic");
1360: 
1361:     /**
1362:      * Deseret.
1363:      * 0x10400 - 0x1044F.
1364:      * @since 1.5
1365:      */
1366:     public static final UnicodeBlock DESERET
1367:       = new UnicodeBlock(0x10400, 0x1044F,
1368:                          "DESERET", 
1369:                          "Deseret");
1370: 
1371:     /**
1372:      * Shavian.
1373:      * 0x10450 - 0x1047F.
1374:      * @since 1.5
1375:      */
1376:     public static final UnicodeBlock SHAVIAN
1377:       = new UnicodeBlock(0x10450, 0x1047F,
1378:                          "SHAVIAN", 
1379:                          "Shavian");
1380: 
1381:     /**
1382:      * Osmanya.
1383:      * 0x10480 - 0x104AF.
1384:      * @since 1.5
1385:      */
1386:     public static final UnicodeBlock OSMANYA
1387:       = new UnicodeBlock(0x10480, 0x104AF,
1388:                          "OSMANYA", 
1389:                          "Osmanya");
1390: 
1391:     /**
1392:      * Cypriot Syllabary.
1393:      * 0x10800 - 0x1083F.
1394:      * @since 1.5
1395:      */
1396:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1397:       = new UnicodeBlock(0x10800, 0x1083F,
1398:                          "CYPRIOT_SYLLABARY", 
1399:                          "Cypriot Syllabary");
1400: 
1401:     /**
1402:      * Byzantine Musical Symbols.
1403:      * 0x1D000 - 0x1D0FF.
1404:      * @since 1.5
1405:      */
1406:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1407:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1408:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1409:                          "Byzantine Musical Symbols");
1410: 
1411:     /**
1412:      * Musical Symbols.
1413:      * 0x1D100 - 0x1D1FF.
1414:      * @since 1.5
1415:      */
1416:     public static final UnicodeBlock MUSICAL_SYMBOLS
1417:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1418:                          "MUSICAL_SYMBOLS", 
1419:                          "Musical Symbols");
1420: 
1421:     /**
1422:      * Tai Xuan Jing Symbols.
1423:      * 0x1D300 - 0x1D35F.
1424:      * @since 1.5
1425:      */
1426:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1427:       = new UnicodeBlock(0x1D300, 0x1D35F,
1428:                          "TAI_XUAN_JING_SYMBOLS", 
1429:                          "Tai Xuan Jing Symbols");
1430: 
1431:     /**
1432:      * Mathematical Alphanumeric Symbols.
1433:      * 0x1D400 - 0x1D7FF.
1434:      * @since 1.5
1435:      */
1436:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1437:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1438:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1439:                          "Mathematical Alphanumeric Symbols");
1440: 
1441:     /**
1442:      * CJK Unified Ideographs Extension B.
1443:      * 0x20000 - 0x2A6DF.
1444:      * @since 1.5
1445:      */
1446:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1447:       = new UnicodeBlock(0x20000, 0x2A6DF,
1448:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1449:                          "CJK Unified Ideographs Extension B");
1450: 
1451:     /**
1452:      * CJK Compatibility Ideographs Supplement.
1453:      * 0x2F800 - 0x2FA1F.
1454:      * @since 1.5
1455:      */
1456:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1457:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1458:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1459:                          "CJK Compatibility Ideographs Supplement");
1460: 
1461:     /**
1462:      * Tags.
1463:      * 0xE0000 - 0xE007F.
1464:      * @since 1.5
1465:      */
1466:     public static final UnicodeBlock TAGS
1467:       = new UnicodeBlock(0xE0000, 0xE007F,
1468:                          "TAGS", 
1469:                          "Tags");
1470: 
1471:     /**
1472:      * Variation Selectors Supplement.
1473:      * 0xE0100 - 0xE01EF.
1474:      * @since 1.5
1475:      */
1476:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1477:       = new UnicodeBlock(0xE0100, 0xE01EF,
1478:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1479:                          "Variation Selectors Supplement");
1480: 
1481:     /**
1482:      * Supplementary Private Use Area-A.
1483:      * 0xF0000 - 0xFFFFF.
1484:      * @since 1.5
1485:      */
1486:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1487:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1488:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1489:                          "Supplementary Private Use Area-A");
1490: 
1491:     /**
1492:      * Supplementary Private Use Area-B.
1493:      * 0x100000 - 0x10FFFF.
1494:      * @since 1.5
1495:      */
1496:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1497:       = new UnicodeBlock(0x100000, 0x10FFFF,
1498:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1499:                          "Supplementary Private Use Area-B");
1500: 
1501:     /**
1502:      * Surrogates Area.
1503:      * 'D800' - 'DFFF'.
1504:      * @deprecated As of 1.5, the three areas, 
1505:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1506:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1507:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1508:      * by the Unicode standard, should be used in preference to
1509:      * this.  These are also returned from calls to <code>of(int)</code>
1510:      * and <code>of(char)</code>.
1511:      */
1512:     @Deprecated
1513:     public static final UnicodeBlock SURROGATES_AREA
1514:       = new UnicodeBlock(0xD800, 0xDFFF,
1515:                          "SURROGATES_AREA",
1516:              "Surrogates Area");
1517: 
1518:     /**
1519:      * The defined subsets.
1520:      */
1521:     private static final UnicodeBlock sets[] = {
1522:       BASIC_LATIN,
1523:       LATIN_1_SUPPLEMENT,
1524:       LATIN_EXTENDED_A,
1525:       LATIN_EXTENDED_B,
1526:       IPA_EXTENSIONS,
1527:       SPACING_MODIFIER_LETTERS,
1528:       COMBINING_DIACRITICAL_MARKS,
1529:       GREEK,
1530:       CYRILLIC,
1531:       CYRILLIC_SUPPLEMENTARY,
1532:       ARMENIAN,
1533:       HEBREW,
1534:       ARABIC,
1535:       SYRIAC,
1536:       THAANA,
1537:       DEVANAGARI,
1538:       BENGALI,
1539:       GURMUKHI,
1540:       GUJARATI,
1541:       ORIYA,
1542:       TAMIL,
1543:       TELUGU,
1544:       KANNADA,
1545:       MALAYALAM,
1546:       SINHALA,
1547:       THAI,
1548:       LAO,
1549:       TIBETAN,
1550:       MYANMAR,
1551:       GEORGIAN,
1552:       HANGUL_JAMO,
1553:       ETHIOPIC,
1554:       CHEROKEE,
1555:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1556:       OGHAM,
1557:       RUNIC,
1558:       TAGALOG,
1559:       HANUNOO,
1560:       BUHID,
1561:       TAGBANWA,
1562:       KHMER,
1563:       MONGOLIAN,
1564:       LIMBU,
1565:       TAI_LE,
1566:       KHMER_SYMBOLS,
1567:       PHONETIC_EXTENSIONS,
1568:       LATIN_EXTENDED_ADDITIONAL,
1569:       GREEK_EXTENDED,
1570:       GENERAL_PUNCTUATION,
1571:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1572:       CURRENCY_SYMBOLS,
1573:       COMBINING_MARKS_FOR_SYMBOLS,
1574:       LETTERLIKE_SYMBOLS,
1575:       NUMBER_FORMS,
1576:       ARROWS,
1577:       MATHEMATICAL_OPERATORS,
1578:       MISCELLANEOUS_TECHNICAL,
1579:       CONTROL_PICTURES,
1580:       OPTICAL_CHARACTER_RECOGNITION,
1581:       ENCLOSED_ALPHANUMERICS,
1582:       BOX_DRAWING,
1583:       BLOCK_ELEMENTS,
1584:       GEOMETRIC_SHAPES,
1585:       MISCELLANEOUS_SYMBOLS,
1586:       DINGBATS,
1587:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1588:       SUPPLEMENTAL_ARROWS_A,
1589:       BRAILLE_PATTERNS,
1590:       SUPPLEMENTAL_ARROWS_B,
1591:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1592:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1593:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1594:       CJK_RADICALS_SUPPLEMENT,
1595:       KANGXI_RADICALS,
1596:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1597:       CJK_SYMBOLS_AND_PUNCTUATION,
1598:       HIRAGANA,
1599:       KATAKANA,
1600:       BOPOMOFO,
1601:       HANGUL_COMPATIBILITY_JAMO,
1602:       KANBUN,
1603:       BOPOMOFO_EXTENDED,
1604:       KATAKANA_PHONETIC_EXTENSIONS,
1605:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1606:       CJK_COMPATIBILITY,
1607:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1608:       YIJING_HEXAGRAM_SYMBOLS,
1609:       CJK_UNIFIED_IDEOGRAPHS,
1610:       YI_SYLLABLES,
1611:       YI_RADICALS,
1612:       HANGUL_SYLLABLES,
1613:       HIGH_SURROGATES,
1614:       HIGH_PRIVATE_USE_SURROGATES,
1615:       LOW_SURROGATES,
1616:       PRIVATE_USE_AREA,
1617:       CJK_COMPATIBILITY_IDEOGRAPHS,
1618:       ALPHABETIC_PRESENTATION_FORMS,
1619:       ARABIC_PRESENTATION_FORMS_A,
1620:       VARIATION_SELECTORS,
1621:       COMBINING_HALF_MARKS,
1622:       CJK_COMPATIBILITY_FORMS,
1623:       SMALL_FORM_VARIANTS,
1624:       ARABIC_PRESENTATION_FORMS_B,
1625:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1626:       SPECIALS,
1627:       LINEAR_B_SYLLABARY,
1628:       LINEAR_B_IDEOGRAMS,
1629:       AEGEAN_NUMBERS,
1630:       OLD_ITALIC,
1631:       GOTHIC,
1632:       UGARITIC,
1633:       DESERET,
1634:       SHAVIAN,
1635:       OSMANYA,
1636:       CYPRIOT_SYLLABARY,
1637:       BYZANTINE_MUSICAL_SYMBOLS,
1638:       MUSICAL_SYMBOLS,
1639:       TAI_XUAN_JING_SYMBOLS,
1640:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1641:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1642:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1643:       TAGS,
1644:       VARIATION_SELECTORS_SUPPLEMENT,
1645:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1646:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1647:     };
1648:   } // class UnicodeBlock
1649: 
1650:   /**
1651:    * The immutable value of this Character.
1652:    *
1653:    * @serial the value of this Character
1654:    */
1655:   private final char value;
1656: 
1657:   /**
1658:    * Compatible with JDK 1.0+.
1659:    */
1660:   private static final long serialVersionUID = 3786198910865385080L;
1661: 
1662:   /**
1663:    * Smallest value allowed for radix arguments in Java. This value is 2.
1664:    *
1665:    * @see #digit(char, int)
1666:    * @see #forDigit(int, int)
1667:    * @see Integer#toString(int, int)
1668:    * @see Integer#valueOf(String)
1669:    */
1670:   public static final int MIN_RADIX = 2;
1671: 
1672:   /**
1673:    * Largest value allowed for radix arguments in Java. This value is 36.
1674:    *
1675:    * @see #digit(char, int)
1676:    * @see #forDigit(int, int)
1677:    * @see Integer#toString(int, int)
1678:    * @see Integer#valueOf(String)
1679:    */
1680:   public static final int MAX_RADIX = 36;
1681: 
1682:   /**
1683:    * The minimum value the char data type can hold.
1684:    * This value is <code>'\\u0000'</code>.
1685:    */
1686:   public static final char MIN_VALUE = '\u0000';
1687: 
1688:   /**
1689:    * The maximum value the char data type can hold.
1690:    * This value is <code>'\\uFFFF'</code>.
1691:    */
1692:   public static final char MAX_VALUE = '\uFFFF';
1693: 
1694:   /**
1695:    * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
1696:    * @since 1.5
1697:    */
1698:   public static final int MIN_CODE_POINT = 0;
1699: 
1700:   /**
1701:    * The maximum Unicode 4.0 code point, which is greater than the range
1702:    * of the char data type.
1703:    * This value is <code>0x10FFFF</code>.
1704:    * @since 1.5
1705:    */
1706:   public static final int MAX_CODE_POINT = 0x10FFFF;
1707: 
1708:   /**
1709:    * The minimum Unicode high surrogate code unit, or
1710:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1711:    * This value is <code>'\uD800'</code>.
1712:    * @since 1.5
1713:    */
1714:   public static final char MIN_HIGH_SURROGATE = '\uD800';
1715: 
1716:   /**
1717:    * The maximum Unicode high surrogate code unit, or
1718:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1719:    * This value is <code>'\uDBFF'</code>.
1720:    * @since 1.5
1721:    */
1722:   public static final char MAX_HIGH_SURROGATE = '\uDBFF';
1723: 
1724:   /**
1725:    * The minimum Unicode low surrogate code unit, or
1726:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
1727:    * This value is <code>'\uDC00'</code>.
1728:    * @since 1.5
1729:    */
1730:   public static final char MIN_LOW_SURROGATE = '\uDC00';
1731: 
1732:   /**
1733:    * The maximum Unicode low surrogate code unit, or
1734:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
1735:    * This value is <code>'\uDFFF'</code>.
1736:    * @since 1.5
1737:    */
1738:   public static final char MAX_LOW_SURROGATE = '\uDFFF';  
1739: 
1740:   /**
1741:    * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
1742:    * This value is <code>'\uD800'</code>.
1743:    * @since 1.5
1744:    */
1745:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1746: 
1747:   /**
1748:    * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
1749:    * This value is <code>'\uDFFF'</code>.
1750:    * @since 1.5
1751:    */
1752:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1753: 
1754:   /**
1755:    * The lowest possible supplementary Unicode code point (the first code
1756:    * point outside the basic multilingual plane (BMP)).
1757:    * This value is <code>0x10000</code>.
1758:    */ 
1759:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
1760: 
1761:   /**
1762:    * Class object representing the primitive char data type.
1763:    *
1764:    * @since 1.1
1765:    */
1766:   public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
1767: 
1768:   /**
1769:    * The number of bits needed to represent a <code>char</code>.
1770:    * @since 1.5
1771:    */
1772:   public static final int SIZE = 16;
1773: 
1774:   // This caches some Character values, and is used by boxing
1775:   // conversions via valueOf().  We must cache at least 0..127;
1776:   // this constant controls how much we actually cache.
1777:   private static final int MAX_CACHE = 127;
1778:   private static Character[] charCache = new Character[MAX_CACHE + 1];
1779: 
1780:   /**
1781:    * Lu = Letter, Uppercase (Informative).
1782:    *
1783:    * @since 1.1
1784:    */
1785:   public static final byte UPPERCASE_LETTER = 1;
1786: 
1787:   /**
1788:    * Ll = Letter, Lowercase (Informative).
1789:    *
1790:    * @since 1.1
1791:    */
1792:   public static final byte LOWERCASE_LETTER = 2;
1793: 
1794:   /**
1795:    * Lt = Letter, Titlecase (Informative).
1796:    *
1797:    * @since 1.1
1798:    */
1799:   public static final byte TITLECASE_LETTER = 3;
1800: 
1801:   /**
1802:    * Mn = Mark, Non-Spacing (Normative).
1803:    *
1804:    * @since 1.1
1805:    */
1806:   public static final byte NON_SPACING_MARK = 6;
1807: 
1808:   /**
1809:    * Mc = Mark, Spacing Combining (Normative).
1810:    *
1811:    * @since 1.1
1812:    */
1813:   public static final byte COMBINING_SPACING_MARK = 8;
1814: 
1815:   /**
1816:    * Me = Mark, Enclosing (Normative).
1817:    *
1818:    * @since 1.1
1819:    */
1820:   public static final byte ENCLOSING_MARK = 7;
1821: 
1822:   /**
1823:    * Nd = Number, Decimal Digit (Normative).
1824:    *
1825:    * @since 1.1
1826:    */
1827:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1828: 
1829:   /**
1830:    * Nl = Number, Letter (Normative).
1831:    *
1832:    * @since 1.1
1833:    */
1834:   public static final byte LETTER_NUMBER = 10;
1835: 
1836:   /**
1837:    * No = Number, Other (Normative).
1838:    *
1839:    * @since 1.1
1840:    */
1841:   public static final byte OTHER_NUMBER = 11;
1842: 
1843:   /**
1844:    * Zs = Separator, Space (Normative).
1845:    *
1846:    * @since 1.1
1847:    */
1848:   public static final byte SPACE_SEPARATOR = 12;
1849: 
1850:   /**
1851:    * Zl = Separator, Line (Normative).
1852:    *
1853:    * @since 1.1
1854:    */
1855:   public static final byte LINE_SEPARATOR = 13;
1856: 
1857:   /**
1858:    * Zp = Separator, Paragraph (Normative).
1859:    *
1860:    * @since 1.1
1861:    */
1862:   public static final byte PARAGRAPH_SEPARATOR = 14;
1863: 
1864:   /**
1865:    * Cc = Other, Control (Normative).
1866:    *
1867:    * @since 1.1
1868:    */
1869:   public static final byte CONTROL = 15;
1870: 
1871:   /**
1872:    * Cf = Other, Format (Normative).
1873:    *
1874:    * @since 1.1
1875:    */
1876:   public static final byte FORMAT = 16;
1877: 
1878:   /**
1879:    * Cs = Other, Surrogate (Normative).
1880:    *
1881:    * @since 1.1
1882:    */
1883:   public static final byte SURROGATE = 19;
1884: 
1885:   /**
1886:    * Co = Other, Private Use (Normative).
1887:    *
1888:    * @since 1.1
1889:    */
1890:   public static final byte PRIVATE_USE = 18;
1891: 
1892:   /**
1893:    * Cn = Other, Not Assigned (Normative).
1894:    *
1895:    * @since 1.1
1896:    */
1897:   public static final byte UNASSIGNED = 0;
1898: 
1899:   /**
1900:    * Lm = Letter, Modifier (Informative).
1901:    *
1902:    * @since 1.1
1903:    */
1904:   public static final byte MODIFIER_LETTER = 4;
1905: 
1906:   /**
1907:    * Lo = Letter, Other (Informative).
1908:    *
1909:    * @since 1.1
1910:    */
1911:   public static final byte OTHER_LETTER = 5;
1912: 
1913:   /**
1914:    * Pc = Punctuation, Connector (Informative).
1915:    *
1916:    * @since 1.1
1917:    */
1918:   public static final byte CONNECTOR_PUNCTUATION = 23;
1919: 
1920:   /**
1921:    * Pd = Punctuation, Dash (Informative).
1922:    *
1923:    * @since 1.1
1924:    */
1925:   public static final byte DASH_PUNCTUATION = 20;
1926: 
1927:   /**
1928:    * Ps = Punctuation, Open (Informative).
1929:    *
1930:    * @since 1.1
1931:    */
1932:   public static final byte START_PUNCTUATION = 21;
1933: 
1934:   /**
1935:    * Pe = Punctuation, Close (Informative).
1936:    *
1937:    * @since 1.1
1938:    */
1939:   public static final byte END_PUNCTUATION = 22;
1940: 
1941:   /**
1942:    * Pi = Punctuation, Initial Quote (Informative).
1943:    *
1944:    * @since 1.4
1945:    */
1946:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1947: 
1948:   /**
1949:    * Pf = Punctuation, Final Quote (Informative).
1950:    *
1951:    * @since 1.4
1952:    */
1953:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1954: 
1955:   /**
1956:    * Po = Punctuation, Other (Informative).
1957:    *
1958:    * @since 1.1
1959:    */
1960:   public static final byte OTHER_PUNCTUATION = 24;
1961: 
1962:   /**
1963:    * Sm = Symbol, Math (Informative).
1964:    *
1965:    * @since 1.1
1966:    */
1967:   public static final byte MATH_SYMBOL = 25;
1968: 
1969:   /**
1970:    * Sc = Symbol, Currency (Informative).
1971:    *
1972:    * @since 1.1
1973:    */
1974:   public static final byte CURRENCY_SYMBOL = 26;
1975: 
1976:   /**
1977:    * Sk = Symbol, Modifier (Informative).
1978:    *
1979:    * @since 1.1
1980:    */
1981:   public static final byte MODIFIER_SYMBOL = 27;
1982: 
1983:   /**
1984:    * So = Symbol, Other (Informative).
1985:    *
1986:    * @since 1.1
1987:    */
1988:   public static final byte OTHER_SYMBOL = 28;
1989: 
1990:   /**
1991:    * Undefined bidirectional character type. Undefined char values have
1992:    * undefined directionality in the Unicode specification.
1993:    *
1994:    * @since 1.4
1995:    */
1996:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1997: 
1998:   /**
1999:    * Strong bidirectional character type "L".
2000:    *
2001:    * @since 1.4
2002:    */
2003:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2004: 
2005:   /**
2006:    * Strong bidirectional character type "R".
2007:    *
2008:    * @since 1.4
2009:    */
2010:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2011: 
2012:   /**
2013:    * Strong bidirectional character type "AL".
2014:    *
2015:    * @since 1.4
2016:    */
2017:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2018: 
2019:   /**
2020:    * Weak bidirectional character type "EN".
2021:    *
2022:    * @since 1.4
2023:    */
2024:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2025: 
2026:   /**
2027:    * Weak bidirectional character type "ES".
2028:    *
2029:    * @since 1.4
2030:    */
2031:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2032: 
2033:   /**
2034:    * Weak bidirectional character type "ET".
2035:    *
2036:    * @since 1.4
2037:    */
2038:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2039: 
2040:   /**
2041:    * Weak bidirectional character type "AN".
2042:    *
2043:    * @since 1.4
2044:    */
2045:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2046: 
2047:   /**
2048:    * Weak bidirectional character type "CS".
2049:    *
2050:    * @since 1.4
2051:    */
2052:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2053: 
2054:   /**
2055:    * Weak bidirectional character type "NSM".
2056:    *
2057:    * @since 1.4
2058:    */
2059:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2060: 
2061:   /**
2062:    * Weak bidirectional character type "BN".
2063:    *
2064:    * @since 1.4
2065:    */
2066:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2067: 
2068:   /**
2069:    * Neutral bidirectional character type "B".
2070:    *
2071:    * @since 1.4
2072:    */
2073:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2074: 
2075:   /**
2076:    * Neutral bidirectional character type "S".
2077:    *
2078:    * @since 1.4
2079:    */
2080:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2081: 
2082:   /**
2083:    * Strong bidirectional character type "WS".
2084:    *
2085:    * @since 1.4
2086:    */
2087:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2088: 
2089:   /**
2090:    * Neutral bidirectional character type "ON".
2091:    *
2092:    * @since 1.4
2093:    */
2094:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2095: 
2096:   /**
2097:    * Strong bidirectional character type "LRE".
2098:    *
2099:    * @since 1.4
2100:    */
2101:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2102: 
2103:   /**
2104:    * Strong bidirectional character type "LRO".
2105:    *
2106:    * @since 1.4
2107:    */
2108:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2109: 
2110:   /**
2111:    * Strong bidirectional character type "RLE".
2112:    *
2113:    * @since 1.4
2114:    */
2115:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2116: 
2117:   /**
2118:    * Strong bidirectional character type "RLO".
2119:    *
2120:    * @since 1.4
2121:    */
2122:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2123: 
2124:   /**
2125:    * Weak bidirectional character type "PDF".
2126:    *
2127:    * @since 1.4
2128:    */
2129:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2130: 
2131:   /**
2132:    * Mask for grabbing the type out of the result of readChar.
2133:    * @see #readChar(char)
2134:    */
2135:   private static final int TYPE_MASK = 0x1F;
2136: 
2137:   /**
2138:    * Mask for grabbing the non-breaking space flag out of the result of
2139:    * readChar.
2140:    * @see #readChar(char)
2141:    */
2142:   private static final int NO_BREAK_MASK = 0x20;
2143: 
2144:   /**
2145:    * Mask for grabbing the mirrored directionality flag out of the result
2146:    * of readChar.
2147:    * @see #readChar(char)
2148:    */
2149:   private static final int MIRROR_MASK = 0x40;
2150: 
2151:   /**
2152:    * Grabs an attribute offset from the Unicode attribute database. The lower
2153:    * 5 bits are the character type, the next 2 bits are flags, and the top
2154:    * 9 bits are the offset into the attribute tables. Note that the top 9
2155:    * bits are meaningless in this context; they are useful only in the native
2156:    * code.
2157:    *
2158:    * @param ch the character to look up
2159:    * @return the character's attribute offset and type
2160:    * @see #TYPE_MASK
2161:    * @see #NO_BREAK_MASK
2162:    * @see #MIRROR_MASK
2163:    */
2164:   private static native char readChar(char ch);
2165: 
2166:   /**
2167:    * Grabs an attribute offset from the Unicode attribute database. The lower
2168:    * 5 bits are the character type, the next 2 bits are flags, and the top
2169:    * 9 bits are the offset into the attribute tables. Note that the top 9
2170:    * bits are meaningless in this context; they are useful only in the native
2171:    * code.
2172:    *
2173:    * @param codePoint the character to look up
2174:    * @return the character's attribute offset and type
2175:    * @see #TYPE_MASK
2176:    * @see #NO_BREAK_MASK
2177:    * @see #MIRROR_MASK
2178:    */
2179:   private static native char readCodePoint(int codePoint);
2180: 
2181:   /**
2182:    * Wraps up a character.
2183:    *
2184:    * @param value the character to wrap
2185:    */
2186:   public Character(char value)
2187:   {
2188:     this.value = value;
2189:   }
2190: 
2191:   /**
2192:    * Returns the character which has been wrapped by this class.
2193:    *
2194:    * @return the character wrapped
2195:    */
2196:   public char charValue()
2197:   {
2198:     return value;
2199:   }
2200: 
2201:   /**
2202:    * Returns the numerical value (unsigned) of the wrapped character.
2203:    * Range of returned values: 0x0000-0xFFFF.
2204:    *
2205:    * @return the value of the wrapped character
2206:    */
2207:   public int hashCode()
2208:   {
2209:     return value;
2210:   }
2211: 
2212:   /**
2213:    * Determines if an object is equal to this object. This is only true for
2214:    * another Character object wrapping the same value.
2215:    *
2216:    * @param o object to compare
2217:    * @return true if o is a Character with the same value
2218:    */
2219:   public boolean equals(Object o)
2220:   {
2221:     return o instanceof Character && value == ((Character) o).value;
2222:   }
2223: 
2224:   /**
2225:    * Converts the wrapped character into a String.
2226:    *
2227:    * @return a String containing one character -- the wrapped character
2228:    *         of this instance
2229:    */
2230:   public String toString()
2231:   {
2232:     // This assumes that String.valueOf(char) can create a single-character
2233:     // String more efficiently than through the public API.
2234:     return String.valueOf(value);
2235:   }
2236: 
2237:   /**
2238:    * Returns a String of length 1 representing the specified character.
2239:    *
2240:    * @param ch the character to convert
2241:    * @return a String containing the character
2242:    * @since 1.4
2243:    */
2244:   public static String toString(char ch)
2245:   {
2246:     // This assumes that String.valueOf(char) can create a single-character
2247:     // String more efficiently than through the public API.
2248:     return String.valueOf(ch);
2249:   }
2250: 
2251:   /**
2252:    * Determines if a character is a Unicode lowercase letter. For example,
2253:    * <code>'a'</code> is lowercase.
2254:    * <br>
2255:    * lowercase = [Ll]
2256:    *
2257:    * @param ch character to test
2258:    * @return true if ch is a Unicode lowercase letter, else false
2259:    * @see #isUpperCase(char)
2260:    * @see #isTitleCase(char)
2261:    * @see #toLowerCase(char)
2262:    * @see #getType(char)
2263:    */
2264:   public static boolean isLowerCase(char ch)
2265:   {
2266:     return getType(ch) == LOWERCASE_LETTER;
2267:   }
2268: 
2269:   /**
2270:    * Determines if a character is a Unicode lowercase letter. For example,
2271:    * <code>'a'</code> is lowercase.  Unlike isLowerCase(char), this method
2272:    * supports supplementary Unicode code points.
2273:    * <br>
2274:    * lowercase = [Ll]
2275:    *
2276:    * @param codePoint character to test
2277:    * @return true if codePoint is a Unicode lowercase letter, else false
2278:    * @see #isUpperCase(int)
2279:    * @see #isTitleCase(int)
2280:    * @see #toLowerCase(int)
2281:    * @see #getType(int)
2282:    * @since 1.5
2283:    */
2284:   public static boolean isLowerCase(int codePoint)
2285:   {
2286:     return getType(codePoint) == LOWERCASE_LETTER;
2287:   }
2288: 
2289:   /**
2290:    * Determines if a character is a Unicode uppercase letter. For example,
2291:    * <code>'A'</code> is uppercase.
2292:    * <br>
2293:    * uppercase = [Lu]
2294:    *
2295:    * @param ch character to test
2296:    * @return true if ch is a Unicode uppercase letter, else false
2297:    * @see #isLowerCase(char)
2298:    * @see #isTitleCase(char)
2299:    * @see #toUpperCase(char)
2300:    * @see #getType(char)
2301:    */
2302:   public static boolean isUpperCase(char ch)
2303:   {
2304:     return getType(ch) == UPPERCASE_LETTER;
2305:   }
2306: 
2307:   /**
2308:    * Determines if a character is a Unicode uppercase letter. For example,
2309:    * <code>'A'</code> is uppercase.  Unlike isUpperCase(char), this method
2310:    * supports supplementary Unicode code points.
2311:    * <br>
2312:    * uppercase = [Lu]
2313:    *
2314:    * @param codePoint character to test
2315:    * @return true if codePoint is a Unicode uppercase letter, else false
2316:    * @see #isLowerCase(int)
2317:    * @see #isTitleCase(int)
2318:    * @see #toUpperCase(int)
2319:    * @see #getType(int)
2320:    * @since 1.5
2321:    */
2322:   public static boolean isUpperCase(int codePoint)
2323:   {
2324:     return getType(codePoint) == UPPERCASE_LETTER;
2325:   }
2326: 
2327:   /**
2328:    * Determines if a character is a Unicode titlecase letter. For example,
2329:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2330:    * <br>
2331:    * titlecase = [Lt]
2332:    *
2333:    * @param ch character to test
2334:    * @return true if ch is a Unicode titlecase letter, else false
2335:    * @see #isLowerCase(char)
2336:    * @see #isUpperCase(char)
2337:    * @see #toTitleCase(char)
2338:    * @see #getType(char)
2339:    */
2340:   public static boolean isTitleCase(char ch)
2341:   {
2342:     return getType(ch) == TITLECASE_LETTER;
2343:   }
2344: 
2345:   /**
2346:    * Determines if a character is a Unicode titlecase letter. For example,
2347:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2348:    * Unlike isTitleCase(char), this method supports supplementary Unicode
2349:    * code points.
2350:    * <br>
2351:    * titlecase = [Lt]
2352:    *
2353:    * @param codePoint character to test
2354:    * @return true if codePoint is a Unicode titlecase letter, else false
2355:    * @see #isLowerCase(int)
2356:    * @see #isUpperCase(int)
2357:    * @see #toTitleCase(int)
2358:    * @see #getType(int)
2359:    * @since 1.5
2360:    */
2361:   public static boolean isTitleCase(int codePoint)
2362:   {
2363:     return getType(codePoint) == TITLECASE_LETTER;
2364:   }
2365: 
2366:   /**
2367:    * Determines if a character is a Unicode decimal digit. For example,
2368:    * <code>'0'</code> is a digit.
2369:    * <br>
2370:    * Unicode decimal digit = [Nd]
2371:    *
2372:    * @param ch character to test
2373:    * @return true if ch is a Unicode decimal digit, else false
2374:    * @see #digit(char, int)
2375:    * @see #forDigit(int, int)
2376:    * @see #getType(char)
2377:    */
2378:   public static boolean isDigit(char ch)
2379:   {
2380:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
2381:   }
2382: 
2383:   /**
2384:    * Determines if a character is a Unicode decimal digit. For example,
2385:    * <code>'0'</code> is a digit.  Unlike isDigit(char), this method
2386:    * supports supplementary Unicode code points.
2387:    * <br>
2388:    * Unicode decimal digit = [Nd]
2389:    *
2390:    * @param codePoint character to test
2391:    * @return true if ccodePoint is a Unicode decimal digit, else false
2392:    * @see #digit(int, int)
2393:    * @see #forDigit(int, int)
2394:    * @see #getType(int)
2395:    * @since 1.5
2396:    */
2397:   public static boolean isDigit(int codePoint)
2398:   {
2399:     return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2400:   }
2401: 
2402:   /**
2403:    * Determines if a character is part of the Unicode Standard. This is an
2404:    * evolving standard, but covers every character in the data file.
2405:    * <br>
2406:    * defined = not [Cn]
2407:    *
2408:    * @param ch character to test
2409:    * @return true if ch is a Unicode character, else false
2410:    * @see #isDigit(char)
2411:    * @see #isLetter(char)
2412:    * @see #isLetterOrDigit(char)
2413:    * @see #isLowerCase(char)
2414:    * @see #isTitleCase(char)
2415:    * @see #isUpperCase(char)
2416:    */
2417:   public static boolean isDefined(char ch)
2418:   {
2419:     return getType(ch) != UNASSIGNED;
2420:   }
2421: 
2422:   /**
2423:    * Determines if a character is part of the Unicode Standard. This is an
2424:    * evolving standard, but covers every character in the data file.  Unlike
2425:    * isDefined(char), this method supports supplementary Unicode code points.
2426:    * <br>
2427:    * defined = not [Cn]
2428:    *
2429:    * @param codePoint character to test
2430:    * @return true if codePoint is a Unicode character, else false
2431:    * @see #isDigit(int)
2432:    * @see #isLetter(int)
2433:    * @see #isLetterOrDigit(int)
2434:    * @see #isLowerCase(int)
2435:    * @see #isTitleCase(int)
2436:    * @see #isUpperCase(int)
2437:    * @since 1.5
2438:    */
2439:   public static boolean isDefined(int codePoint)
2440:   {
2441:     return getType(codePoint) != UNASSIGNED;
2442:   }
2443: 
2444:   /**
2445:    * Determines if a character is a Unicode letter. Not all letters have case,
2446:    * so this may return true when isLowerCase and isUpperCase return false.
2447:    * <br>
2448:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2449:    *
2450:    * @param ch character to test
2451:    * @return true if ch is a Unicode letter, else false
2452:    * @see #isDigit(char)
2453:    * @see #isJavaIdentifierStart(char)
2454:    * @see #isJavaLetter(char)
2455:    * @see #isJavaLetterOrDigit(char)
2456:    * @see #isLetterOrDigit(char)
2457:    * @see #isLowerCase(char)
2458:    * @see #isTitleCase(char)
2459:    * @see #isUnicodeIdentifierStart(char)
2460:    * @see #isUpperCase(char)
2461:    */
2462:   public static boolean isLetter(char ch)
2463:   {
2464:     return ((1 << getType(ch))
2465:             & ((1 << UPPERCASE_LETTER)
2466:                | (1 << LOWERCASE_LETTER)
2467:                | (1 << TITLECASE_LETTER)
2468:                | (1 << MODIFIER_LETTER)
2469:                | (1 << OTHER_LETTER))) != 0;
2470:   }
2471: 
2472:   /**
2473:    * Determines if a character is a Unicode letter. Not all letters have case,
2474:    * so this may return true when isLowerCase and isUpperCase return false.
2475:    * Unlike isLetter(char), this method supports supplementary Unicode code
2476:    * points.
2477:    * <br>
2478:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2479:    *
2480:    * @param codePoint character to test
2481:    * @return true if codePoint is a Unicode letter, else false
2482:    * @see #isDigit(int)
2483:    * @see #isJavaIdentifierStart(int)
2484:    * @see #isJavaLetter(int)
2485:    * @see #isJavaLetterOrDigit(int)
2486:    * @see #isLetterOrDigit(int)
2487:    * @see #isLowerCase(int)
2488:    * @see #isTitleCase(int)
2489:    * @see #isUnicodeIdentifierStart(int)
2490:    * @see #isUpperCase(int)
2491:    * @since 1.5
2492:    */
2493:   public static boolean isLetter(int codePoint)
2494:   {
2495:     return ((1 << getType(codePoint))
2496:             & ((1 << UPPERCASE_LETTER)
2497:                | (1 << LOWERCASE_LETTER)
2498:                | (1 << TITLECASE_LETTER)
2499:                | (1 << MODIFIER_LETTER)
2500:                | (1 << OTHER_LETTER))) != 0;
2501:   }
2502:   
2503:   /**
2504:    * Returns the index into the given CharSequence that is offset
2505:    * <code>codePointOffset</code> code points from <code>index</code>.
2506:    * @param seq the CharSequence
2507:    * @param index the start position in the CharSequence
2508:    * @param codePointOffset the number of code points offset from the start
2509:    * position
2510:    * @return the index into the CharSequence that is codePointOffset code 
2511:    * points offset from index
2512:    * 
2513:    * @throws NullPointerException if seq is null
2514:    * @throws IndexOutOfBoundsException if index is negative or greater than the
2515:    * length of the sequence.
2516:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
2517:    * subsequence from index to the end of seq has fewer than codePointOffset
2518:    * code points
2519:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2520:    * subsequence from the start of seq to index has fewer than 
2521:    * (-codePointOffset) code points
2522:    * @since 1.5
2523:    */
2524:   public static int offsetByCodePoints(CharSequence seq,
2525:                                        int index,
2526:                                        int codePointOffset)
2527:   {
2528:     int len = seq.length();
2529:     if (index < 0 || index > len)
2530:       throw new IndexOutOfBoundsException();
2531:     
2532:     int numToGo = codePointOffset;
2533:     int offset = index;
2534:     int adjust = 1;
2535:     if (numToGo >= 0)
2536:       {
2537:         for (; numToGo > 0; offset++)
2538:           {
2539:             numToGo--;
2540:             if (Character.isHighSurrogate(seq.charAt(offset))
2541:                 && (offset + 1) < len
2542:                 && Character.isLowSurrogate(seq.charAt(offset + 1)))
2543:               offset++;
2544:           }
2545:         return offset;
2546:       }
2547:     else
2548:       {
2549:         numToGo *= -1;
2550:         for (; numToGo > 0;)
2551:           {
2552:             numToGo--;
2553:             offset--;
2554:             if (Character.isLowSurrogate(seq.charAt(offset))
2555:                 && (offset - 1) >= 0
2556:                 && Character.isHighSurrogate(seq.charAt(offset - 1)))
2557:               offset--;
2558:           }
2559:         return offset;
2560:       }
2561:   }
2562:   
2563:   /**
2564:    * Returns the index into the given char subarray that is offset
2565:    * <code>codePointOffset</code> code points from <code>index</code>.
2566:    * @param a the char array
2567:    * @param start the start index of the subarray
2568:    * @param count the length of the subarray
2569:    * @param index the index to be offset
2570:    * @param codePointOffset the number of code points offset from <code>index
2571:    * </code>
2572:    * @return the index into the char array
2573:    * 
2574:    * @throws NullPointerException if a is null
2575:    * @throws IndexOutOfBoundsException if start or count is negative or if
2576:    * start + count is greater than the length of the array
2577:    * @throws IndexOutOfBoundsException if index is less than start or larger 
2578:    * than start + count
2579:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the
2580:    * subarray from index to start + count - 1 has fewer than codePointOffset
2581:    * code points.
2582:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2583:    * subarray from start to index - 1 has fewer than (-codePointOffset) code
2584:    * points
2585:    * @since 1.5
2586: 
2587:    */
2588:   public static int offsetByCodePoints(char[] a,
2589:                                        int start,
2590:                                        int count,
2591:                                        int index,
2592:                                        int codePointOffset)
2593:   {
2594:     int len = a.length;
2595:     int end = start + count;
2596:     if (start < 0 || count < 0 || end > len || index < start || index > end)
2597:       throw new IndexOutOfBoundsException();
2598:     
2599:     int numToGo = codePointOffset;
2600:     int offset = index;
2601:     int adjust = 1;
2602:     if (numToGo >= 0)
2603:       {
2604:         for (; numToGo > 0; offset++)
2605:           {
2606:             numToGo--;
2607:             if (Character.isHighSurrogate(a[offset])
2608:                 && (offset + 1) < len
2609:                 && Character.isLowSurrogate(a[offset + 1]))
2610:               offset++;
2611:           }
2612:         return offset;
2613:       }
2614:     else
2615:       {
2616:         numToGo *= -1;
2617:         for (; numToGo > 0;)
2618:           {
2619:             numToGo--;
2620:             offset--;
2621:             if (Character.isLowSurrogate(a[offset])
2622:                 && (offset - 1) >= 0
2623:                 && Character.isHighSurrogate(a[offset - 1]))
2624:               offset--;
2625:             if (offset < start)
2626:               throw new IndexOutOfBoundsException();
2627:           }
2628:         return offset;
2629:       }
2630: 
2631:   }
2632: 
2633:   /**
2634:    * Returns the number of Unicode code points in the specified range of the
2635:    * given CharSequence.  The first char in the range is at position
2636:    * beginIndex and the last one is at position endIndex - 1.  Paired 
2637:    * surrogates (supplementary characters are represented by a pair of chars - 
2638:    * one from the high surrogates and one from the low surrogates) 
2639:    * count as just one code point.
2640:    * @param seq the CharSequence to inspect
2641:    * @param beginIndex the beginning of the range
2642:    * @param endIndex the end of the range
2643:    * @return the number of Unicode code points in the given range of the 
2644:    * sequence
2645:    * @throws NullPointerException if seq is null
2646:    * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
2647:    * larger than the length of seq, or if beginIndex is greater than endIndex.
2648:    * @since 1.5
2649:    */
2650:   public static int codePointCount(CharSequence seq, int beginIndex,
2651:                                    int endIndex)
2652:   {
2653:     int len = seq.length();
2654:     if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
2655:       throw new IndexOutOfBoundsException();
2656:         
2657:     int count = 0;
2658:     for (int i = beginIndex; i < endIndex; i++)
2659:       {
2660:         count++;
2661:         // If there is a pairing, count it only once.
2662:         if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
2663:             && isLowSurrogate(seq.charAt(i + 1)))
2664:           i ++;
2665:       }    
2666:     return count;
2667:   }
2668: 
2669:   /**
2670:    * Returns the number of Unicode code points in the specified range of the
2671:    * given char array.  The first char in the range is at position
2672:    * offset and the length of the range is count.  Paired surrogates
2673:    * (supplementary characters are represented by a pair of chars - 
2674:    * one from the high surrogates and one from the low surrogates) 
2675:    * count as just one code point.
2676:    * @param a the char array to inspect
2677:    * @param offset the beginning of the range
2678:    * @param count the length of the range
2679:    * @return the number of Unicode code points in the given range of the 
2680:    * array
2681:    * @throws NullPointerException if a is null
2682:    * @throws IndexOutOfBoundsException if offset or count is negative or if 
2683:    * offset + countendIndex is larger than the length of a.
2684:    * @since 1.5
2685:    */
2686:   public static int codePointCount(char[] a, int offset,
2687:                                    int count)
2688:   {
2689:     int len = a.length;
2690:     int end = offset + count;
2691:     if (offset < 0 || count < 0 || end > len)
2692:       throw new IndexOutOfBoundsException();
2693:         
2694:     int counter = 0;
2695:     for (int i = offset; i < end; i++)
2696:       {
2697:         counter++;
2698:         // If there is a pairing, count it only once.
2699:         if (isHighSurrogate(a[i]) && (i + 1) < end
2700:             && isLowSurrogate(a[i + 1]))
2701:           i ++;
2702:       }    
2703:     return counter;
2704:   }
2705: 
2706:   /**
2707:    * Determines if a character is a Unicode letter or a Unicode digit. This
2708:    * is the combination of isLetter and isDigit.
2709:    * <br>
2710:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
2711:    *
2712:    * @param ch character to test
2713:    * @return true if ch is a Unicode letter or a Unicode digit, else false
2714:    * @see #isDigit(char)
2715:    * @see #isJavaIdentifierPart(char)
2716:    * @see #isJavaLetter(char)
2717:    * @see #isJavaLetterOrDigit(char)
2718:    * @see #isLetter(char)
2719:    * @see #isUnicodeIdentifierPart(char)
2720:    */
2721:   public static boolean isLetterOrDigit(char ch)
2722:   {
2723:     return ((1 << getType(ch))
2724:             & ((1 << UPPERCASE_LETTER)
2725:                | (1 << LOWERCASE_LETTER)
2726:                | (1 << TITLECASE_LETTER)
2727:                | (1 << MODIFIER_LETTER)
2728:                | (1 << OTHER_LETTER)
2729:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
2730:   }
2731: 
2732:   /**
2733:    * Determines if a character is a Unicode letter or a Unicode digit. This
2734:    * is the combination of isLetter and isDigit.  Unlike isLetterOrDigit(char),
2735:    * this method supports supplementary Unicode code points.
2736:    * <br>
2737:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
2738:    *
2739:    * @param codePoint character to test
2740:    * @return true if codePoint is a Unicode letter or a Unicode digit, else false
2741:    * @see #isDigit(int)
2742:    * @see #isJavaIdentifierPart(int)
2743:    * @see #isJavaLetter(int)
2744:    * @see #isJavaLetterOrDigit(int)
2745:    * @see #isLetter(int)
2746:    * @see #isUnicodeIdentifierPart(int)
2747:    * @since 1.5
2748:    */
2749:   public static boolean isLetterOrDigit(int codePoint)
2750:   {
2751:     return ((1 << getType(codePoint)
2752:             & ((1 << UPPERCASE_LETTER)
2753:                | (1 << LOWERCASE_LETTER)
2754:                | (1 << TITLECASE_LETTER)
2755:                | (1 << MODIFIER_LETTER)
2756:                | (1 << OTHER_LETTER)
2757:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0);
2758:   }
2759: 
2760:   /**
2761:    * Determines if a character can start a Java identifier. This is the
2762:    * combination of isLetter, any character where getType returns
2763:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2764:    * (like '_').
2765:    *
2766:    * @param ch character to test
2767:    * @return true if ch can start a Java identifier, else false
2768:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
2769:    * @see #isJavaLetterOrDigit(char)
2770:    * @see #isJavaIdentifierStart(char)
2771:    * @see #isJavaIdentifierPart(char)
2772:    * @see #isLetter(char)
2773:    * @see #isLetterOrDigit(char)
2774:    * @see #isUnicodeIdentifierStart(char)
2775:    */
2776:   public static boolean isJavaLetter(char ch)
2777:   {
2778:     return isJavaIdentifierStart(ch);
2779:   }
2780: 
2781:   /**
2782:    * Determines if a character can start a Java identifier. This is the
2783:    * combination of isLetter, any character where getType returns
2784:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2785:    * (like '_'). Unlike isJavaIdentifierStart(char), this method supports
2786:    * supplementary Unicode code points.
2787:    * <br>
2788:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
2789:    *
2790:    * @param codePoint character to test
2791:    * @return true if codePoint can start a Java identifier, else false
2792:    * @see #isJavaIdentifierPart(int)
2793:    * @see #isLetter(int)
2794:    * @see #isUnicodeIdentifierStart(int)
2795:    * @since 1.5
2796:    */
2797:   public static boolean isJavaIdentifierStart(int codePoint)
2798:   {
2799:     return ((1 << getType(codePoint))
2800:             & ((1 << UPPERCASE_LETTER)
2801:                | (1 << LOWERCASE_LETTER)
2802:                | (1 << TITLECASE_LETTER)
2803:                | (1 << MODIFIER_LETTER)
2804:                | (1 << OTHER_LETTER)
2805:                | (1 << LETTER_NUMBER)
2806:                | (1 << CURRENCY_SYMBOL)
2807:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
2808:   }
2809: 
2810:   /**
2811:    * Determines if a character can follow the first letter in
2812:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2813:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2814:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2815:    * or isIdentifierIgnorable.
2816:    *
2817:    * @param ch character to test
2818:    * @return true if ch can follow the first letter in a Java identifier
2819:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
2820:    * @see #isJavaLetter(char)
2821:    * @see #isJavaIdentifierStart(char)
2822:    * @see #isJavaIdentifierPart(char)
2823:    * @see #isLetter(char)
2824:    * @see #isLetterOrDigit(char)
2825:    * @see #isUnicodeIdentifierPart(char)
2826:    * @see #isIdentifierIgnorable(char)
2827:    */
2828:   public static boolean isJavaLetterOrDigit(char ch)
2829:   {
2830:     return isJavaIdentifierPart(ch);
2831:   }
2832: 
2833:   /**
2834:    * Determines if a character can start a Java identifier. This is the
2835:    * combination of isLetter, any character where getType returns
2836:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2837:    * (like '_').
2838:    * <br>
2839:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
2840:    *
2841:    * @param ch character to test
2842:    * @return true if ch can start a Java identifier, else false
2843:    * @see #isJavaIdentifierPart(char)
2844:    * @see #isLetter(char)
2845:    * @see #isUnicodeIdentifierStart(char)
2846:    * @since 1.1
2847:    */
2848:   public static boolean isJavaIdentifierStart(char ch)
2849:   {
2850:     return ((1 << getType(ch))
2851:             & ((1 << UPPERCASE_LETTER)
2852:                | (1 << LOWERCASE_LETTER)
2853:                | (1 << TITLECASE_LETTER)
2854:                | (1 << MODIFIER_LETTER)
2855:                | (1 << OTHER_LETTER)
2856:                | (1 << LETTER_NUMBER)
2857:                | (1 << CURRENCY_SYMBOL)
2858:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
2859:   }
2860: 
2861:   /**
2862:    * Determines if a character can follow the first letter in
2863:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2864:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2865:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2866:    * or isIdentifierIgnorable.
2867:    * <br>
2868:    * Java identifier extender =
2869:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
2870:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2871:    *
2872:    * @param ch character to test
2873:    * @return true if ch can follow the first letter in a Java identifier
2874:    * @see #isIdentifierIgnorable(char)
2875:    * @see #isJavaIdentifierStart(char)
2876:    * @see #isLetterOrDigit(char)
2877:    * @see #isUnicodeIdentifierPart(char)
2878:    * @since 1.1
2879:    */
2880:   public static boolean isJavaIdentifierPart(char ch)
2881:   {
2882:     int category = getType(ch);
2883:     return ((1 << category)
2884:             & ((1 << UPPERCASE_LETTER)
2885:                | (1 << LOWERCASE_LETTER)
2886:                | (1 << TITLECASE_LETTER)
2887:                | (1 << MODIFIER_LETTER)
2888:                | (1 << OTHER_LETTER)
2889:                | (1 << NON_SPACING_MARK)
2890:                | (1 << COMBINING_SPACING_MARK)
2891:                | (1 << DECIMAL_DIGIT_NUMBER)
2892:                | (1 << LETTER_NUMBER)
2893:                | (1 << CURRENCY_SYMBOL)
2894:                | (1 << CONNECTOR_PUNCTUATION)
2895:                | (1 << FORMAT))) != 0
2896:       || (category == CONTROL && isIdentifierIgnorable(ch));
2897:   }
2898: 
2899:   /**
2900:    * Determines if a character can follow the first letter in
2901:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2902:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2903:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2904:    * or isIdentifierIgnorable. Unlike isJavaIdentifierPart(char), this method
2905:    * supports supplementary Unicode code points.
2906:    * <br>
2907:    * Java identifier extender =
2908:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
2909:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2910:    *
2911:    * @param codePoint character to test
2912:    * @return true if codePoint can follow the first letter in a Java identifier
2913:    * @see #isIdentifierIgnorable(int)
2914:    * @see #isJavaIdentifierStart(int)
2915:    * @see #isLetterOrDigit(int)
2916:    * @see #isUnicodeIdentifierPart(int)
2917:    * @since 1.5
2918:    */
2919:   public static boolean isJavaIdentifierPart(int codePoint)
2920:   {
2921:     int category = getType(codePoint);
2922:     return ((1 << category)
2923:             & ((1 << UPPERCASE_LETTER)
2924:                | (1 << LOWERCASE_LETTER)
2925:                | (1 << TITLECASE_LETTER)
2926:                | (1 << MODIFIER_LETTER)
2927:                | (1 << OTHER_LETTER)
2928:                | (1 << NON_SPACING_MARK)
2929:                | (1 << COMBINING_SPACING_MARK)
2930:                | (1 << DECIMAL_DIGIT_NUMBER)
2931:                | (1 << LETTER_NUMBER)
2932:                | (1 << CURRENCY_SYMBOL)
2933:                | (1 << CONNECTOR_PUNCTUATION)
2934:                | (1 << FORMAT))) != 0
2935:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
2936:   }
2937: 
2938:   /**
2939:    * Determines if a character can start a Unicode identifier.  Only
2940:    * letters can start a Unicode identifier, but this includes characters
2941:    * in LETTER_NUMBER.
2942:    * <br>
2943:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
2944:    *
2945:    * @param ch character to test
2946:    * @return true if ch can start a Unicode identifier, else false
2947:    * @see #isJavaIdentifierStart(char)
2948:    * @see #isLetter(char)
2949:    * @see #isUnicodeIdentifierPart(char)
2950:    * @since 1.1
2951:    */
2952:   public static boolean isUnicodeIdentifierStart(char ch)
2953:   {
2954:     return ((1 << getType(ch))
2955:             & ((1 << UPPERCASE_LETTER)
2956:                | (1 << LOWERCASE_LETTER)
2957:                | (1 << TITLECASE_LETTER)
2958:                | (1 << MODIFIER_LETTER)
2959:                | (1 << OTHER_LETTER)
2960:                | (1 << LETTER_NUMBER))) != 0;
2961:   }
2962: 
2963:   /**
2964:    * Determines if a character can start a Unicode identifier.  Only
2965:    * letters can start a Unicode identifier, but this includes characters
2966:    * in LETTER_NUMBER.  Unlike isUnicodeIdentifierStart(char), this method
2967:    * supports supplementary Unicode code points.
2968:    * <br>
2969:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
2970:    *
2971:    * @param codePoint character to test
2972:    * @return true if codePoint can start a Unicode identifier, else false
2973:    * @see #isJavaIdentifierStart(int)
2974:    * @see #isLetter(int)
2975:    * @see #isUnicodeIdentifierPart(int)
2976:    * @since 1.5
2977:    */
2978:   public static boolean isUnicodeIdentifierStart(int codePoint)
2979:   {
2980:     return ((1 << getType(codePoint))
2981:             & ((1 << UPPERCASE_LETTER)
2982:                | (1 << LOWERCASE_LETTER)
2983:                | (1 << TITLECASE_LETTER)
2984:                | (1 << MODIFIER_LETTER)
2985:                | (1 << OTHER_LETTER)
2986:                | (1 << LETTER_NUMBER))) != 0;
2987:   }
2988: 
2989:   /**
2990:    * Determines if a character can follow the first letter in
2991:    * a Unicode identifier. This includes letters, connecting punctuation,
2992:    * digits, numeric letters, combining marks, non-spacing marks, and
2993:    * isIdentifierIgnorable.
2994:    * <br>
2995:    * Unicode identifier extender =
2996:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
2997:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2998:    *
2999:    * @param ch character to test
3000:    * @return true if ch can follow the first letter in a Unicode identifier
3001:    * @see #isIdentifierIgnorable(char)
3002:    * @see #isJavaIdentifierPart(char)
3003:    * @see #isLetterOrDigit(char)
3004:    * @see #isUnicodeIdentifierStart(char)
3005:    * @since 1.1
3006:    */
3007:   public static boolean isUnicodeIdentifierPart(char ch)
3008:   {
3009:     int category = getType(ch);
3010:     return ((1 << category)
3011:             & ((1 << UPPERCASE_LETTER)
3012:                | (1 << LOWERCASE_LETTER)
3013:                | (1 << TITLECASE_LETTER)
3014:                | (1 << MODIFIER_LETTER)
3015:                | (1 << OTHER_LETTER)
3016:                | (1 << NON_SPACING_MARK)
3017:                | (1 << COMBINING_SPACING_MARK)
3018:                | (1 << DECIMAL_DIGIT_NUMBER)
3019:                | (1 << LETTER_NUMBER)
3020:                | (1 << CONNECTOR_PUNCTUATION)
3021:                | (1 << FORMAT))) != 0
3022:       || (category == CONTROL && isIdentifierIgnorable(ch));
3023:   }
3024: 
3025:   /**
3026:    * Determines if a character can follow the first letter in
3027:    * a Unicode identifier. This includes letters, connecting punctuation,
3028:    * digits, numeric letters, combining marks, non-spacing marks, and
3029:    * isIdentifierIgnorable.  Unlike isUnicodeIdentifierPart(char), this method
3030:    * supports supplementary Unicode code points.
3031:    * <br>
3032:    * Unicode identifier extender =
3033:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3034:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3035:    *
3036:    * @param codePoint character to test
3037:    * @return true if codePoint can follow the first letter in a Unicode 
3038:    *         identifier
3039:    * @see #isIdentifierIgnorable(int)
3040:    * @see #isJavaIdentifierPart(int)
3041:    * @see #isLetterOrDigit(int)
3042:    * @see #isUnicodeIdentifierStart(int)
3043:    * @since 1.5
3044:    */
3045:   public static boolean isUnicodeIdentifierPart(int codePoint)
3046:   {
3047:     int category = getType(codePoint);
3048:     return ((1 << category)
3049:             & ((1 << UPPERCASE_LETTER)
3050:                | (1 << LOWERCASE_LETTER)
3051:                | (1 << TITLECASE_LETTER)
3052:                | (1 << MODIFIER_LETTER)
3053:                | (1 << OTHER_LETTER)
3054:                | (1 << NON_SPACING_MARK)
3055:                | (1 << COMBINING_SPACING_MARK)
3056:                | (1 << DECIMAL_DIGIT_NUMBER)
3057:                | (1 << LETTER_NUMBER)
3058:                | (1 << CONNECTOR_PUNCTUATION)
3059:                | (1 << FORMAT))) != 0
3060:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3061:   }
3062: 
3063:   /**
3064:    * Determines if a character is ignorable in a Unicode identifier. This
3065:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3066:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3067:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3068:    * <code>'\u009F'</code>), and FORMAT characters.
3069:    * <br>
3070:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3071:    *    |U+007F-U+009F
3072:    *
3073:    * @param ch character to test
3074:    * @return true if ch is ignorable in a Unicode or Java identifier
3075:    * @see #isJavaIdentifierPart(char)
3076:    * @see #isUnicodeIdentifierPart(char)
3077:    * @since 1.1
3078:    */
3079:   public static boolean isIdentifierIgnorable(char ch)
3080:   {
3081:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
3082:                                || (ch <= '\u001B' && ch >= '\u000E')))
3083:       || getType(ch) == FORMAT;
3084:   }
3085: 
3086:   /**
3087:    * Determines if a character is ignorable in a Unicode identifier. This
3088:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3089:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3090:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3091:    * <code>'\u009F'</code>), and FORMAT characters.  Unlike 
3092:    * isIdentifierIgnorable(char), this method supports supplementary Unicode
3093:    * code points.
3094:    * <br>
3095:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3096:    *    |U+007F-U+009F
3097:    *
3098:    * @param codePoint character to test
3099:    * @return true if codePoint is ignorable in a Unicode or Java identifier
3100:    * @see #isJavaIdentifierPart(int)
3101:    * @see #isUnicodeIdentifierPart(int)
3102:    * @since 1.5
3103:    */
3104:   public static boolean isIdentifierIgnorable(int codePoint)
3105:   {
3106:     return ((codePoint >= 0 && codePoint <= 0x0008)
3107:         || (codePoint >= 0x000E && codePoint <= 0x001B)
3108:         || (codePoint >= 0x007F && codePoint <= 0x009F)
3109:             || getType(codePoint) == FORMAT);
3110:   }
3111: 
3112:   /**
3113:    * Converts a Unicode character into its lowercase equivalent mapping.
3114:    * If a mapping does not exist, then the character passed is returned.
3115:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3116:    *
3117:    * @param ch character to convert to lowercase
3118:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3119:    *         not exist
3120:    * @see #isLowerCase(char)
3121:    * @see #isUpperCase(char)
3122:    * @see #toTitleCase(char)
3123:    * @see #toUpperCase(char)
3124:    */
3125:   public static native char toLowerCase(char ch);
3126: 
3127:   /**
3128:    * Converts a Unicode character into its lowercase equivalent mapping.
3129:    * If a mapping does not exist, then the character passed is returned.
3130:    * Note that isLowerCase(toLowerCase(codePoint)) does not always return true.
3131:    * Unlike toLowerCase(char), this method supports supplementary Unicode
3132:    * code points.
3133:    *
3134:    * @param codePoint character to convert to lowercase
3135:    * @return lowercase mapping of codePoint, or codePoint if lowercase 
3136:    *         mapping does not exist
3137:    * @see #isLowerCase(int)
3138:    * @see #isUpperCase(int)
3139:    * @see #toTitleCase(int)
3140:    * @see #toUpperCase(int)
3141:    * @since 1.5
3142:    */
3143:   public static native int toLowerCase(int codePoint);
3144: 
3145:   /**
3146:    * Converts a Unicode character into its uppercase equivalent mapping.
3147:    * If a mapping does not exist, then the character passed is returned.
3148:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3149:    *
3150:    * @param ch character to convert to uppercase
3151:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3152:    *         not exist
3153:    * @see #isLowerCase(char)
3154:    * @see #isUpperCase(char)
3155:    * @see #toLowerCase(char)
3156:    * @see #toTitleCase(char)
3157:    */
3158:   public static native char toUpperCase(char ch);
3159: 
3160:   /**
3161:    * Converts a Unicode character into its uppercase equivalent mapping.
3162:    * If a mapping does not exist, then the character passed is returned.
3163:    * Note that isUpperCase(toUpperCase(codePoint)) does not always return true.
3164:    * Unlike toUpperCase(char), this method supports supplementary 
3165:    * Unicode code points.
3166:    *
3167:    * @param codePoint character to convert to uppercase
3168:    * @return uppercase mapping of codePoint, or codePoint if uppercase 
3169:    *         mapping does not exist
3170:    * @see #isLowerCase(int)
3171:    * @see #isUpperCase(int)
3172:    * @see #toLowerCase(int)
3173:    * @see #toTitleCase(int)
3174:    * @since 1.5
3175:    */
3176:   public static native int toUpperCase(int codePoint);
3177: 
3178:   /**
3179:    * Converts a Unicode character into its titlecase equivalent mapping.
3180:    * If a mapping does not exist, then the character passed is returned.
3181:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3182:    *
3183:    * @param ch character to convert to titlecase
3184:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3185:    *         not exist
3186:    * @see #isTitleCase(char)
3187:    * @see #toLowerCase(char)
3188:    * @see #toUpperCase(char)
3189:    */
3190:   public static native char toTitleCase(char ch);
3191: 
3192:   /**
3193:    * Converts a Unicode character into its titlecase equivalent mapping.
3194:    * If a mapping does not exist, then the character passed is returned.
3195:    * Note that isTitleCase(toTitleCase(codePoint)) does not always return true.
3196:    * Unlike toTitleCase(char), this method supports supplementary 
3197:    * Unicode code points.
3198:    * 
3199:    * @param codePoint character to convert to titlecase
3200:    * @return titlecase mapping of codePoint, or codePoint if titlecase 
3201:    *         mapping does not exist
3202:    * @see #isTitleCase(int)
3203:    * @see #toLowerCase(int)
3204:    * @see #toUpperCase(int)
3205:    * @since 1.5
3206:    */
3207:   public static native int toTitleCase(int codePoint);
3208: 
3209:   /**
3210:    * Converts a character into a digit of the specified radix. If the radix
3211:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3212:    * exceeds the radix, or if ch is not a decimal digit or in the case
3213:    * insensitive set of 'a'-'z', the result is -1.
3214:    * <br>
3215:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3216:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3217:    *
3218:    * @param ch character to convert into a digit
3219:    * @param radix radix in which ch is a digit
3220:    * @return digit which ch represents in radix, or -1 not a valid digit
3221:    * @see #MIN_RADIX
3222:    * @see #MAX_RADIX
3223:    * @see #forDigit(int, int)
3224:    * @see #isDigit(char)
3225:    * @see #getNumericValue(char)
3226:    */
3227:   public static native int digit(char ch, int radix);
3228: 
3229:   /**
3230:    * Converts a character into a digit of the specified radix. If the radix
3231:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(int)
3232:    * exceeds the radix, or if codePoint is not a decimal digit or in the case
3233:    * insensitive set of 'a'-'z', the result is -1.  Unlike digit(char, int), 
3234:    * this method supports supplementary Unicode code points.
3235:    * <br>
3236:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3237:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3238:    *
3239:    * @param codePoint character to convert into a digit
3240:    * @param radix radix in which codePoint is a digit
3241:    * @return digit which codePoint represents in radix, or -1 not a valid digit
3242:    * @see #MIN_RADIX
3243:    * @see #MAX_RADIX
3244:    * @see #forDigit(int, int)
3245:    * @see #isDigit(int)
3246:    * @see #getNumericValue(int)
3247:    * @since 1.5
3248:    */
3249:   public static native int digit(int codePoint, int radix);
3250: 
3251:   /**
3252:    * Returns the Unicode numeric value property of a character. For example,
3253:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3254:    *
3255:    * <p>This method also returns values for the letters A through Z, (not
3256:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3257:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3258:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3259:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3260:    * <code>'\uFF5A'</code> (full width variants).
3261:    *
3262:    * <p>If the character lacks a numeric value property, -1 is returned.
3263:    * If the character has a numeric value property which is not representable
3264:    * as a nonnegative integer, such as a fraction, -2 is returned.
3265:    *
3266:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3267:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3268:    *
3269:    * @param ch character from which the numeric value property will
3270:    *        be retrieved
3271:    * @return the numeric value property of ch, or -1 if it does not exist, or
3272:    *         -2 if it is not representable as a nonnegative integer
3273:    * @see #forDigit(int, int)
3274:    * @see #digit(char, int)
3275:    * @see #isDigit(char)
3276:    * @since 1.1
3277:    */
3278:   public static native int getNumericValue(char ch);
3279: 
3280:   /**
3281:    * Returns the Unicode numeric value property of a character. For example,
3282:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3283:    *
3284:    * <p>This method also returns values for the letters A through Z, (not
3285:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3286:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3287:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3288:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3289:    * <code>'\uFF5A'</code> (full width variants).
3290:    *
3291:    * <p>If the character lacks a numeric value property, -1 is returned.
3292:    * If the character has a numeric value property which is not representable
3293:    * as a nonnegative integer, such as a fraction, -2 is returned.
3294:    *
3295:    * Unlike getNumericValue(char), this method supports supplementary Unicode
3296:    * code points.
3297:    *
3298:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3299:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3300:    *
3301:    * @param codePoint character from which the numeric value property will
3302:    *        be retrieved
3303:    * @return the numeric value property of codePoint, or -1 if it does not 
3304:    *         exist, or -2 if it is not representable as a nonnegative integer
3305:    * @see #forDigit(int, int)
3306:    * @see #digit(int, int)
3307:    * @see #isDigit(int)
3308:    * @since 1.5
3309:    */
3310:   public static native int getNumericValue(int codePoint);
3311: 
3312:   /**
3313:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
3314:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3315:    * <code>'\r'</code>, and <code>' '</code>.
3316:    * <br>
3317:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3318:    *
3319:    * @param ch character to test
3320:    * @return true if ch is a space, else false
3321:    * @deprecated Replaced by {@link #isWhitespace(char)}
3322:    * @see #isSpaceChar(char)
3323:    * @see #isWhitespace(char)
3324:    */
3325:   public static boolean isSpace(char ch)
3326:   {
3327:     // Performing the subtraction up front alleviates need to compare longs.
3328:     return ch-- <= ' ' && ((1 << ch)
3329:                            & ((1 << (' ' - 1))
3330:                               | (1 << ('\t' - 1))
3331:                               | (1 << ('\n' - 1))
3332:                               | (1 << ('\r' - 1))
3333:                               | (1 << ('\f' - 1)))) != 0;
3334:   }
3335: 
3336:   /**
3337:    * Determines if a character is a Unicode space character. This includes
3338:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3339:    * <br>
3340:    * Unicode space = [Zs]|[Zp]|[Zl]
3341:    *
3342:    * @param ch character to test
3343:    * @return true if ch is a Unicode space, else false
3344:    * @see #isWhitespace(char)
3345:    * @since 1.1
3346:    */
3347:   public static boolean isSpaceChar(char ch)
3348:   {
3349:     return ((1 << getType(ch))
3350:             & ((1 << SPACE_SEPARATOR)
3351:                | (1 << LINE_SEPARATOR)
3352:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3353:   }
3354: 
3355:   /**
3356:    * Determines if a character is a Unicode space character. This includes
3357:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.  Unlike
3358:    * isSpaceChar(char), this method supports supplementary Unicode code points.
3359:    * <br>
3360:    * Unicode space = [Zs]|[Zp]|[Zl]
3361:    *
3362:    * @param codePoint character to test
3363:    * @return true if codePoint is a Unicode space, else false
3364:    * @see #isWhitespace(int)
3365:    * @since 1.5
3366:    */
3367:   public static boolean isSpaceChar(int codePoint)
3368:   {
3369:     return ((1 << getType(codePoint))
3370:             & ((1 << SPACE_SEPARATOR)
3371:                | (1 << LINE_SEPARATOR)
3372:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3373:   }
3374: 
3375:   /**
3376:    * Determines if a character is Java whitespace. This includes Unicode
3377:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3378:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3379:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3380:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3381:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3382:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3383:    * and <code>'\u001F'</code>.
3384:    * <br>
3385:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3386:    *
3387:    * @param ch character to test
3388:    * @return true if ch is Java whitespace, else false
3389:    * @see #isSpaceChar(char)
3390:    * @since 1.1
3391:    */
3392:   public static boolean isWhitespace(char ch)
3393:   {
3394:     int attr = readChar(ch);
3395:     return ((((1 << (attr & TYPE_MASK))
3396:               & ((1 << SPACE_SEPARATOR)
3397:                  | (1 << LINE_SEPARATOR)
3398:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3399:             && (attr & NO_BREAK_MASK) == 0)
3400:       || (ch <= '\u001F' && ((1 << ch)
3401:                              & ((1 << '\t')
3402:                                 | (1 << '\n')
3403:                                 | (1 << '\u000B')
3404:                                 | (1 << '\u000C')
3405:                                 | (1 << '\r')
3406:                                 | (1 << '\u001C')
3407:                                 | (1 << '\u001D')
3408:                                 | (1 << '\u001E')
3409:                                 | (1 << '\u001F'))) != 0);
3410:   }
3411: 
3412:   /**
3413:    * Determines if a character is Java whitespace. This includes Unicode
3414:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3415:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3416:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3417:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3418:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3419:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3420:    * and <code>'\u001F'</code>.  Unlike isWhitespace(char), this method
3421:    * supports supplementary Unicode code points.
3422:    * <br>
3423:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3424:    *
3425:    * @param codePoint character to test
3426:    * @return true if codePoint is Java whitespace, else false
3427:    * @see #isSpaceChar(int)
3428:    * @since 1.5
3429:    */
3430:   public static boolean isWhitespace(int codePoint)
3431:   {
3432:     int plane = codePoint >>> 16;
3433:     if (plane > 2 && plane != 14)
3434:       return false;
3435:     int attr = readCodePoint(codePoint);
3436:     return ((((1 << (attr & TYPE_MASK))
3437:               & ((1 << SPACE_SEPARATOR)
3438:                  | (1 << LINE_SEPARATOR)
3439:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3440:             && (attr & NO_BREAK_MASK) == 0)
3441:       || (codePoint <= '\u001F' && ((1 << codePoint)
3442:                              & ((1 << '\t')
3443:                                 | (1 << '\n')
3444:                                 | (1 << '\u000B')
3445:                                 | (1 << '\u000C')
3446:                                 | (1 << '\r')
3447:                                 | (1 << '\u001C')
3448:                                 | (1 << '\u001D')
3449:                                 | (1 << '\u001E')
3450:                                 | (1 << '\u001F'))) != 0);
3451:   }
3452: 
3453:   /**
3454:    * Determines if a character has the ISO Control property.
3455:    * <br>
3456:    * ISO Control = [Cc]
3457:    *
3458:    * @param ch character to test
3459:    * @return true if ch is an ISO Control character, else false
3460:    * @see #isSpaceChar(char)
3461:    * @see #isWhitespace(char)
3462:    * @since 1.1
3463:    */
3464:   public static boolean isISOControl(char ch)
3465:   {
3466:     return getType(ch) == CONTROL;
3467:   }
3468: 
3469:   /**
3470:    * Determines if a character has the ISO Control property.  Unlike
3471:    * isISOControl(char), this method supports supplementary unicode
3472:    * code points.
3473:    * <br>
3474:    * ISO Control = [Cc]
3475:    *
3476:    * @param codePoint character to test
3477:    * @return true if codePoint is an ISO Control character, else false
3478:    * @see #isSpaceChar(int)
3479:    * @see #isWhitespace(int)
3480:    * @since 1.5
3481:    */
3482:   public static boolean isISOControl(int codePoint)
3483:   {
3484:     return getType(codePoint) == CONTROL;
3485:   }
3486: 
3487:   /**
3488:    * Returns the Unicode general category property of a character.
3489:    *
3490:    * @param ch character from which the general category property will
3491:    *        be retrieved
3492:    * @return the character category property of ch as an integer
3493:    * @see #UNASSIGNED
3494:    * @see #UPPERCASE_LETTER
3495:    * @see #LOWERCASE_LETTER
3496:    * @see #TITLECASE_LETTER
3497:    * @see #MODIFIER_LETTER
3498:    * @see #OTHER_LETTER
3499:    * @see #NON_SPACING_MARK
3500:    * @see #ENCLOSING_MARK
3501:    * @see #COMBINING_SPACING_MARK
3502:    * @see #DECIMAL_DIGIT_NUMBER
3503:    * @see #LETTER_NUMBER
3504:    * @see #OTHER_NUMBER
3505:    * @see #SPACE_SEPARATOR
3506:    * @see #LINE_SEPARATOR
3507:    * @see #PARAGRAPH_SEPARATOR
3508:    * @see #CONTROL
3509:    * @see #FORMAT
3510:    * @see #PRIVATE_USE
3511:    * @see #SURROGATE
3512:    * @see #DASH_PUNCTUATION
3513:    * @see #START_PUNCTUATION
3514:    * @see #END_PUNCTUATION
3515:    * @see #CONNECTOR_PUNCTUATION
3516:    * @see #OTHER_PUNCTUATION
3517:    * @see #MATH_SYMBOL
3518:    * @see #CURRENCY_SYMBOL
3519:    * @see #MODIFIER_SYMBOL
3520:    * @see #INITIAL_QUOTE_PUNCTUATION
3521:    * @see #FINAL_QUOTE_PUNCTUATION
3522:    * @since 1.1
3523:    */
3524:   public static native int getType(char ch);
3525: 
3526:   /**
3527:    * Returns the Unicode general category property of a character.  Supports
3528:    * supplementary Unicode code points.
3529:    *
3530:    * @param codePoint character from which the general category property will
3531:    *        be retrieved
3532:    * @return the character category property of codePoint as an integer
3533:    * @see #UNASSIGNED
3534:    * @see #UPPERCASE_LETTER
3535:    * @see #LOWERCASE_LETTER
3536:    * @see #TITLECASE_LETTER
3537:    * @see #MODIFIER_LETTER
3538:    * @see #OTHER_LETTER
3539:    * @see #NON_SPACING_MARK
3540:    * @see #ENCLOSING_MARK
3541:    * @see #COMBINING_SPACING_MARK
3542:    * @see #DECIMAL_DIGIT_NUMBER
3543:    * @see #LETTER_NUMBER
3544:    * @see #OTHER_NUMBER
3545:    * @see #SPACE_SEPARATOR
3546:    * @see #LINE_SEPARATOR
3547:    * @see #PARAGRAPH_SEPARATOR
3548:    * @see #CONTROL
3549:    * @see #FORMAT
3550:    * @see #PRIVATE_USE
3551:    * @see #SURROGATE
3552:    * @see #DASH_PUNCTUATION
3553:    * @see #START_PUNCTUATION
3554:    * @see #END_PUNCTUATION
3555:    * @see #CONNECTOR_PUNCTUATION
3556:    * @see #OTHER_PUNCTUATION
3557:    * @see #MATH_SYMBOL
3558:    * @see #CURRENCY_SYMBOL
3559:    * @see #MODIFIER_SYMBOL
3560:    * @see #INITIAL_QUOTE_PUNCTUATION
3561:    * @see #FINAL_QUOTE_PUNCTUATION
3562:    * @since 1.5
3563:    */
3564:   public static native int getType(int codePoint);
3565: 
3566:   /**
3567:    * Converts a digit into a character which represents that digit
3568:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
3569:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
3570:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
3571:    * <br>
3572:    * return value boundary = U+0030-U+0039|U+0061-U+007A
3573:    *
3574:    * @param digit digit to be converted into a character
3575:    * @param radix radix of digit
3576:    * @return character representing digit in radix, or '\0'
3577:    * @see #MIN_RADIX
3578:    * @see #MAX_RADIX
3579:    * @see #digit(char, int)
3580:    */
3581:   public static char forDigit(int digit, int radix)
3582:   {
3583:     if (radix < MIN_RADIX || radix > MAX_RADIX
3584:         || digit < 0 || digit >= radix)
3585:       return '\0';
3586:     return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
3587:   }
3588: 
3589:   /**
3590:    * Returns the Unicode directionality property of the character. This
3591:    * is used in the visual ordering of text.
3592:    *
3593:    * @param ch the character to look up
3594:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
3595:    * @see #DIRECTIONALITY_UNDEFINED
3596:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
3597:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
3598:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
3599:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
3600:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
3601:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
3602:    * @see #DIRECTIONALITY_ARABIC_NUMBER
3603:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
3604:    * @see #DIRECTIONALITY_NONSPACING_MARK
3605:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
3606:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
3607:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
3608:    * @see #DIRECTIONALITY_WHITESPACE
3609:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
3610:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
3611:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
3612:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
3613:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
3614:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
3615:    * @since 1.4
3616:    */
3617:   public static native byte getDirectionality(char ch);
3618: 
3619:   /**
3620:    * Returns the Unicode directionality property of the character. This
3621:    * is used in the visual ordering of text.  Unlike getDirectionality(char),
3622:    * this method supports supplementary Unicode code points.
3623:    *
3624:    * @param codePoint the character to look up
3625:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
3626:    * @see #DIRECTIONALITY_UNDEFINED
3627:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
3628:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
3629:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
3630:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
3631:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
3632:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
3633:    * @see #DIRECTIONALITY_ARABIC_NUMBER
3634:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
3635:    * @see #DIRECTIONALITY_NONSPACING_MARK
3636:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
3637:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
3638:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
3639:    * @see #DIRECTIONALITY_WHITESPACE
3640:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
3641:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
3642:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
3643:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
3644:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
3645:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
3646:    * @since 1.5
3647:    */
3648:   public static native byte getDirectionality(int codePoint);
3649: 
3650:   /**
3651:    * Determines whether the character is mirrored according to Unicode. For
3652:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
3653:    * left-to-right text, but ')' in right-to-left text.
3654:    *
3655:    * @param ch the character to look up
3656:    * @return true if the character is mirrored
3657:    * @since 1.4
3658:    */
3659:   public static boolean isMirrored(char ch)
3660:   {
3661:     return (readChar(ch) & MIRROR_MASK) != 0;
3662:   }
3663: 
3664:   /**
3665:    * Determines whether the character is mirrored according to Unicode. For
3666:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
3667:    * left-to-right text, but ')' in right-to-left text.  Unlike 
3668:    * isMirrored(char), this method supports supplementary Unicode code points.
3669:    *
3670:    * @param codePoint the character to look up
3671:    * @return true if the character is mirrored
3672:    * @since 1.5
3673:    */
3674:   public static boolean isMirrored(int codePoint)
3675:   {
3676:     int plane = codePoint >>> 16;
3677:     if (plane > 2 && plane != 14)
3678:       return false;
3679:     return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
3680:   }
3681: 
3682:   /**
3683:    * Compares another Character to this Character, numerically.
3684:    *
3685:    * @param anotherCharacter Character to compare with this Character
3686:    * @return a negative integer if this Character is less than
3687:    *         anotherCharacter, zero if this Character is equal, and
3688:    *         a positive integer if this Character is greater
3689:    * @throws NullPointerException if anotherCharacter is null
3690:    * @since 1.2
3691:    */
3692:   public int compareTo(Character anotherCharacter)
3693:   {
3694:     return value - anotherCharacter.value;
3695:   }
3696: 
3697:   /**
3698:    * Returns an <code>Character</code> object wrapping the value.
3699:    * In contrast to the <code>Character</code> constructor, this method
3700:    * will cache some values.  It is used by boxing conversion.
3701:    *
3702:    * @param val the value to wrap
3703:    * @return the <code>Character</code>
3704:    *
3705:    * @since 1.5
3706:    */
3707:   public static Character valueOf(char val)
3708:   {
3709:     if (val > MAX_CACHE)
3710:       return new Character(val);
3711:     synchronized (charCache)
3712:       {
3713:     if (charCache[val - MIN_VALUE] == null)
3714:       charCache[val - MIN_VALUE] = new Character(val);
3715:     return charCache[val - MIN_VALUE];
3716:       }
3717:   }
3718: 
3719:   /**
3720:    * Reverse the bytes in val.
3721:    * @since 1.5
3722:    */
3723:   public static char reverseBytes(char val)
3724:   {
3725:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
3726:   }
3727: 
3728:   /**
3729:    * Converts a unicode code point to a UTF-16 representation of that
3730:    * code point.
3731:    * 
3732:    * @param codePoint the unicode code point
3733:    *
3734:    * @return the UTF-16 representation of that code point
3735:    *
3736:    * @throws IllegalArgumentException if the code point is not a valid
3737:    *         unicode code point
3738:    *
3739:    * @since 1.5
3740:    */
3741:   public static char[] toChars(int codePoint)
3742:   {
3743:     if (!isValidCodePoint(codePoint))
3744:       throw new IllegalArgumentException("Illegal Unicode code point : "
3745:                                          + codePoint);
3746:     char[] result = new char[charCount(codePoint)];
3747:     int ignore = toChars(codePoint, result, 0);
3748:     return result;
3749:   }
3750: 
3751:   /**
3752:    * Converts a unicode code point to its UTF-16 representation.
3753:    *
3754:    * @param codePoint the unicode code point
3755:    * @param dst the target char array
3756:    * @param dstIndex the start index for the target
3757:    *
3758:    * @return number of characters written to <code>dst</code>
3759:    *
3760:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
3761:    *         valid unicode code point
3762:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
3763:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
3764:    *         in <code>dst</code> or if the UTF-16 representation does not
3765:    *         fit into <code>dst</code>
3766:    *
3767:    * @since 1.5
3768:    */
3769:   public static int toChars(int codePoint, char[] dst, int dstIndex)
3770:   {
3771:     if (!isValidCodePoint(codePoint))
3772:       {
3773:         throw new IllegalArgumentException("not a valid code point: "
3774:                                            + codePoint);
3775:       }
3776: 
3777:     int result;
3778:     if (isSupplementaryCodePoint(codePoint))
3779:       {
3780:         // Write second char first to cause IndexOutOfBoundsException
3781:         // immediately.
3782:         final int cp2 = codePoint - 0x10000;
3783:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
3784:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
3785:         result = 2;
3786:       }
3787:     else
3788:       {
3789:         dst[dstIndex] = (char) codePoint;
3790:         result = 1; 
3791:       }
3792:     return result;
3793:   }
3794: 
3795:   /**
3796:    * Return number of 16-bit characters required to represent the given
3797:    * code point.
3798:    *
3799:    * @param codePoint a unicode code point
3800:    *
3801:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
3802:    *
3803:    * @since 1.5
3804:    */
3805:   public static int charCount(int codePoint)
3806:   {
3807:     return 
3808:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
3809:       ? 2 
3810:       : 1;
3811:   }
3812: 
3813:   /**
3814:    * Determines whether the specified code point is
3815:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
3816:    * supplementary character range.
3817:    *
3818:    * @param codePoint a Unicode code point
3819:    *
3820:    * @return <code>true</code> if code point is in supplementary range
3821:    *
3822:    * @since 1.5
3823:    */
3824:   public static boolean isSupplementaryCodePoint(int codePoint)
3825:   {
3826:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
3827:       && codePoint <= MAX_CODE_POINT;
3828:   }
3829: 
3830:   /**
3831:    * Determines whether the specified code point is
3832:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
3833:    *
3834:    * @param codePoint a Unicode code point
3835:    *
3836:    * @return <code>true</code> if code point is valid
3837:    *
3838:    * @since 1.5
3839:    */
3840:   public static boolean isValidCodePoint(int codePoint)
3841:   {
3842:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
3843:   }
3844: 
3845:   /**
3846:    * Return true if the given character is a high surrogate.
3847:    * @param ch the character
3848:    * @return true if the character is a high surrogate character
3849:    *
3850:    * @since 1.5
3851:    */
3852:   public static boolean isHighSurrogate(char ch)
3853:   {
3854:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
3855:   }
3856: 
3857:   /**
3858:    * Return true if the given character is a low surrogate.
3859:    * @param ch the character
3860:    * @return true if the character is a low surrogate character
3861:    *
3862:    * @since 1.5
3863:    */
3864:   public static boolean isLowSurrogate(char ch)
3865:   {
3866:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
3867:   }
3868: 
3869:   /**
3870:    * Return true if the given characters compose a surrogate pair.
3871:    * This is true if the first character is a high surrogate and the
3872:    * second character is a low surrogate.
3873:    * @param ch1 the first character
3874:    * @param ch2 the first character
3875:    * @return true if the characters compose a surrogate pair
3876:    *
3877:    * @since 1.5
3878:    */
3879:   public static boolean isSurrogatePair(char ch1, char ch2)
3880:   {
3881:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
3882:   }
3883: 
3884:   /**
3885:    * Given a valid surrogate pair, this returns the corresponding
3886:    * code point.
3887:    * @param high the high character of the pair
3888:    * @param low the low character of the pair
3889:    * @return the corresponding code point
3890:    *
3891:    * @since 1.5
3892:    */
3893:   public static int toCodePoint(char high, char low)
3894:   {
3895:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
3896:       (low - MIN_LOW_SURROGATE) + 0x10000;
3897:   }
3898: 
3899:   /**
3900:    * Get the code point at the specified index in the CharSequence.
3901:    * This is like CharSequence#charAt(int), but if the character is
3902:    * the start of a surrogate pair, and there is a following
3903:    * character, and this character completes the pair, then the
3904:    * corresponding supplementary code point is returned.  Otherwise,
3905:    * the character at the index is returned.
3906:    *
3907:    * @param sequence the CharSequence
3908:    * @param index the index of the codepoint to get, starting at 0
3909:    * @return the codepoint at the specified index
3910:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3911:    * @since 1.5
3912:    */
3913:   public static int codePointAt(CharSequence sequence, int index)
3914:   {
3915:     int len = sequence.length();
3916:     if (index < 0 || index >= len)
3917:       throw new IndexOutOfBoundsException();
3918:     char high = sequence.charAt(index);
3919:     if (! isHighSurrogate(high) || ++index >= len)
3920:       return high;
3921:     char low = sequence.charAt(index);
3922:     if (! isLowSurrogate(low))
3923:       return high;
3924:     return toCodePoint(high, low);
3925:   }
3926: 
3927:   /**
3928:    * Get the code point at the specified index in the CharSequence.
3929:    * If the character is the start of a surrogate pair, and there is a
3930:    * following character, and this character completes the pair, then
3931:    * the corresponding supplementary code point is returned.
3932:    * Otherwise, the character at the index is returned.
3933:    *
3934:    * @param chars the character array in which to look
3935:    * @param index the index of the codepoint to get, starting at 0
3936:    * @return the codepoint at the specified index
3937:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3938:    * @since 1.5
3939:    */
3940:   public static int codePointAt(char[] chars, int index)
3941:   {
3942:     return codePointAt(chars, index, chars.length);
3943:   }
3944: 
3945:   /**
3946:    * Get the code point at the specified index in the CharSequence.
3947:    * If the character is the start of a surrogate pair, and there is a
3948:    * following character within the specified range, and this
3949:    * character completes the pair, then the corresponding
3950:    * supplementary code point is returned.  Otherwise, the character
3951:    * at the index is returned.
3952:    *
3953:    * @param chars the character array in which to look
3954:    * @param index the index of the codepoint to get, starting at 0
3955:    * @param limit the limit past which characters should not be examined
3956:    * @return the codepoint at the specified index
3957:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
3958:    * limit, or if limit is negative or &gt;= the length of the array
3959:    * @since 1.5
3960:    */
3961:   public static int codePointAt(char[] chars, int index, int limit)
3962:   {
3963:     if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
3964:       throw new IndexOutOfBoundsException();
3965:     char high = chars[index];
3966:     if (! isHighSurrogate(high) || ++index >= limit)
3967:       return high;
3968:     char low = chars[index];
3969:     if (! isLowSurrogate(low))
3970:       return high;
3971:     return toCodePoint(high, low);
3972:   }
3973: 
3974:   /**
3975:    * Get the code point before the specified index.  This is like
3976:    * #codePointAt(char[], int), but checks the characters at
3977:    * <code>index-1</code> and <code>index-2</code> to see if they form
3978:    * a supplementary code point.  If they do not, the character at
3979:    * <code>index-1</code> is returned.
3980:    *
3981:    * @param chars the character array
3982:    * @param index the index just past the codepoint to get, starting at 0
3983:    * @return the codepoint at the specified index
3984:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3985:    * @since 1.5
3986:    */
3987:   public static int codePointBefore(char[] chars, int index)
3988:   {
3989:     return codePointBefore(chars, index, 1);
3990:   }
3991: 
3992:   /**
3993:    * Get the code point before the specified index.  This is like
3994:    * #codePointAt(char[], int), but checks the characters at
3995:    * <code>index-1</code> and <code>index-2</code> to see if they form
3996:    * a supplementary code point.  If they do not, the character at
3997:    * <code>index-1</code> is returned.  The start parameter is used to
3998:    * limit the range of the array which may be examined.
3999:    *
4000:    * @param chars the character array
4001:    * @param index the index just past the codepoint to get, starting at 0
4002:    * @param start the index before which characters should not be examined
4003:    * @return the codepoint at the specified index
4004:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4005:    * the length of the array, or if limit is negative or &gt;= the
4006:    * length of the array
4007:    * @since 1.5
4008:    */
4009:   public static int codePointBefore(char[] chars, int index, int start)
4010:   {
4011:     if (index < start || index > chars.length
4012:     || start < 0 || start >= chars.length)
4013:       throw new IndexOutOfBoundsException();
4014:     --index;
4015:     char low = chars[index];
4016:     if (! isLowSurrogate(low) || --index < start)
4017:       return low;
4018:     char high = chars[index];
4019:     if (! isHighSurrogate(high))
4020:       return low;
4021:     return toCodePoint(high, low);
4022:   }
4023: 
4024:   /**
4025:    * Get the code point before the specified index.  This is like
4026:    * #codePointAt(CharSequence, int), but checks the characters at
4027:    * <code>index-1</code> and <code>index-2</code> to see if they form
4028:    * a supplementary code point.  If they do not, the character at
4029:    * <code>index-1</code> is returned.
4030:    *
4031:    * @param sequence the CharSequence
4032:    * @param index the index just past the codepoint to get, starting at 0
4033:    * @return the codepoint at the specified index
4034:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4035:    * @since 1.5
4036:    */
4037:   public static int codePointBefore(CharSequence sequence, int index)
4038:   {
4039:     int len = sequence.length();
4040:     if (index < 1 || index > len)
4041:       throw new IndexOutOfBoundsException();
4042:     --index;
4043:     char low = sequence.charAt(index);
4044:     if (! isLowSurrogate(low) || --index < 0)
4045:       return low;
4046:     char high = sequence.charAt(index);
4047:     if (! isHighSurrogate(high))
4048:       return low;
4049:     return toCodePoint(high, low);
4050:   }
4051: } // class Character
Overview Package Class Use Source Tree Index Deprecated About
		Frames \| No Frames