00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
#ifndef UNICODE_H
00028
#define UNICODE_H
00029
00030
#include "unicode/utypes.h"
00031
#include "unicode/uchar.h"
00032
00033 U_NAMESPACE_BEGIN
00055 class U_COMMON_API Unicode
00056 {
00057
public:
00058
00059
00060
00061
00062
00063
00064
00065
enum {
00067 MIN_VALUE=0,
00068
00074 MAX_VALUE=0x10ffff,
00075
00083 MAX_CHAR_LENGTH=
UTF_MAX_CHAR_LENGTH,
00084
00095 MIN_RADIX=2,
00096
00107 MAX_RADIX=36
00108 };
00109
00116 enum EUnicodeGeneralTypes
00117 {
00118 UNASSIGNED = 0,
00119 UPPERCASE_LETTER = 1,
00120 LOWERCASE_LETTER = 2,
00121 TITLECASE_LETTER = 3,
00122 MODIFIER_LETTER = 4,
00123 OTHER_LETTER = 5,
00124 NON_SPACING_MARK = 6,
00125 ENCLOSING_MARK = 7,
00126 COMBINING_SPACING_MARK = 8,
00127 DECIMAL_DIGIT_NUMBER = 9,
00128 LETTER_NUMBER = 10,
00129 OTHER_NUMBER = 11,
00130 SPACE_SEPARATOR = 12,
00131 LINE_SEPARATOR = 13,
00132 PARAGRAPH_SEPARATOR = 14,
00133 CONTROL = 15,
00134 FORMAT = 16,
00135 PRIVATE_USE = 17,
00136 SURROGATE = 18,
00137 DASH_PUNCTUATION = 19,
00138 START_PUNCTUATION = 20,
00139 END_PUNCTUATION = 21,
00140 CONNECTOR_PUNCTUATION = 22,
00141 OTHER_PUNCTUATION = 23,
00142 MATH_SYMBOL = 24,
00143 CURRENCY_SYMBOL = 25,
00144 MODIFIER_SYMBOL = 26,
00145 OTHER_SYMBOL = 27,
00146 INITIAL_PUNCTUATION = 28,
00147 FINAL_PUNCTUATION = 29,
00148 GENERAL_TYPES_COUNT = 30
00149 };
00150
00151
00157 enum EUnicodeScript
00158 {
00159 kBasicLatin=
UBLOCK_BASIC_LATIN,
00160 kLatin1Supplement,
00161 kLatinExtendedA,
00162 kLatinExtendedB,
00163 kIPAExtension,
00164 kSpacingModifier,
00165 kCombiningDiacritical,
00166 kGreek,
00167 kCyrillic,
00168 kArmenian,
00169 kHebrew,
00170 kArabic,
00171 kSyriac,
00172 kThaana,
00173 kDevanagari,
00174 kBengali,
00175 kGurmukhi,
00176 kGujarati,
00177 kOriya,
00178 kTamil,
00179 kTelugu,
00180 kKannada,
00181 kMalayalam,
00182 kSinhala,
00183 kThai,
00184 kLao,
00185 kTibetan,
00186 kMyanmar,
00187 kGeorgian,
00188 kHangulJamo,
00189 kEthiopic,
00190 kCherokee,
00191 kUnifiedCanadianAboriginalSyllabics,
00192 kogham,
00193 kRunic,
00194 kKhmer,
00195 kMongolian,
00196 kLatinExtendedAdditional,
00197 kGreekExtended,
00198 kGeneralPunctuation,
00199 kSuperSubScript,
00200 kCurrencySymbolScript,
00201 kSymbolCombiningMark,
00202 kLetterlikeSymbol,
00203 kNumberForm,
00204 kArrow,
00205 kMathOperator,
00206 kMiscTechnical,
00207 kControlPicture,
00208 kOpticalCharacter,
00209 kEnclosedAlphanumeric,
00210 kBoxDrawing,
00211 kBlockElement,
00212 kGeometricShape,
00213 kMiscSymbol,
00214 kDingbat,
00215 kBraillePatterns,
00216 kCJKRadicalsSupplement,
00217 kKangxiRadicals,
00218 kIdeographicDescriptionCharacters,
00219 kCJKSymbolPunctuation,
00220 kHiragana,
00221 kKatakana,
00222 kBopomofo,
00223 kHangulCompatibilityJamo,
00224 kKanbun,
00225 kBopomofoExtended,
00226 kEnclosedCJKLetterMonth,
00227 kCJKCompatibility,
00228 kCJKUnifiedIdeographExtensionA,
00229 kCJKUnifiedIdeograph,
00230 kYiSyllables,
00231 kYiRadicals,
00232 kHangulSyllable,
00233 kHighSurrogate,
00234 kHighPrivateUseSurrogate,
00235 kLowSurrogate,
00236 kPrivateUse,
00237 kCJKCompatibilityIdeograph,
00238 kAlphabeticPresentation,
00239 kArabicPresentationA,
00240 kCombiningHalfMark,
00241 kCJKCompatibilityForm,
00242 kSmallFormVariant,
00243 kArabicPresentationB,
00244 kNoScript,
00245 kHalfwidthFullwidthForm,
00246 kScriptCount=
UBLOCK_COUNT
00247 };
00248
00254 enum EDirectionProperty {
00255 LEFT_TO_RIGHT = 0,
00256 RIGHT_TO_LEFT = 1,
00257 EUROPEAN_NUMBER = 2,
00258 EUROPEAN_NUMBER_SEPARATOR = 3,
00259 EUROPEAN_NUMBER_TERMINATOR = 4,
00260 ARABIC_NUMBER = 5,
00261 COMMON_NUMBER_SEPARATOR = 6,
00262 BLOCK_SEPARATOR = 7,
00263 SEGMENT_SEPARATOR = 8,
00264 WHITE_SPACE_NEUTRAL = 9,
00265 OTHER_NEUTRAL = 10,
00266 LEFT_TO_RIGHT_EMBEDDING = 11,
00267 LEFT_TO_RIGHT_OVERRIDE = 12,
00268 RIGHT_TO_LEFT_ARABIC = 13,
00269 RIGHT_TO_LEFT_EMBEDDING = 14,
00270 RIGHT_TO_LEFT_OVERRIDE = 15,
00271 POP_DIRECTIONAL_FORMAT = 16,
00272 DIR_NON_SPACING_MARK = 17,
00273 BOUNDARY_NEUTRAL = 18
00274 };
00275
00282 enum ECellWidths
00283 {
00284 ZERO_WIDTH = 0,
00285 HALF_WIDTH = 1,
00286 FULL_WIDTH = 2,
00287 NEUTRAL = 3
00288 };
00289
00301
static inline UBool isSingle(UChar c);
00302
00312
static inline UBool isLead(UChar c);
00313
00323
static inline UBool isTrail(UChar c);
00324
00336
static inline UBool isSurrogate(UChar32 c);
00337
00351
static inline UBool isUnicodeChar(UChar32 c);
00352
00365
static inline UBool isError(UChar32 c);
00366
00377
static inline UBool isValid(UChar32 c);
00378
00391
static inline UBool needMultipleUChar(UChar32 c);
00392
00402
static inline int32_t charLength(UChar32 c);
00403
00418
static inline int32_t arraySize(int32_t size);
00419
00433
static inline UBool isLowerCase(UChar32 ch);
00434
00447
static inline UBool isUpperCase(UChar32 ch);
00448
00461
static inline UBool isTitleCase(UChar32 ch);
00462
00475
static inline UBool isDigit(UChar32 ch);
00476
00493
static inline UBool isDefined(UChar32 ch);
00494
00506
static inline UBool isControl(UChar32 ch);
00507
00519
static inline UBool isPrintable(UChar32 ch);
00520
00533
static inline UBool isBaseForm(UChar32 ch);
00534
00551
static inline UBool isLetter(UChar32 ch);
00552
00574
static inline UBool isJavaIdentifierStart(UChar32 ch);
00575
00605
static inline UBool isJavaIdentifierPart(UChar32 ch);
00606
00622
static inline UBool isUnicodeIdentifierStart(UChar32 ch);
00623
00651
static inline UBool isUnicodeIdentifierPart(UChar32 ch);
00652
00679
static inline UBool isIdentifierIgnorable(UChar32 ch);
00680
00706
static inline UChar32 toLowerCase(UChar32 ch);
00707
00730
static inline UChar32 toUpperCase(UChar32 ch);
00731
00750
static inline UChar32 toTitleCase(UChar32 ch);
00751
00766
static inline UChar32
00767 foldCase(UChar32 c, uint32_t options);
00768
00778
static inline UBool isSpaceChar(UChar32 ch);
00779
00809
static inline UBool isWhitespace(UChar32 ch);
00810
00846
static inline int8_t getType(UChar32 ch);
00847
00856
static inline uint8_t getCombiningClass(UChar32 c);
00857
00868
static inline EDirectionProperty characterDirection(UChar32 ch);
00869
00881
static inline UBool isMirrored(UChar32 c);
00882
00900
static inline UChar32 charMirror(UChar32 c);
00901
00907
static inline EUnicodeScript getScript(UChar32 ch);
00908
00961
static inline uint16_t getCellWidth(UChar32 ch);
00962
00991
static inline int32_t
00992 getCharName(uint32_t code,
00993
char *buffer, int32_t bufferLength,
00994 UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
00995
01007
static inline int32_t digitValue(UChar32 ch);
01008
01047
static inline int32_t digit(UChar32 ch, int8_t radix);
01048
01077
static inline UChar32 forDigit(int32_t digit, int8_t radix);
01078
01085
static void getUnicodeVersion(UVersionInfo info);
01086
01087
protected:
01088
01089
01090
01091
01092
01093 Unicode();
01094 Unicode(
const Unicode &other);
01095 ~Unicode();
01096
const Unicode &operator=(
const Unicode &other);
01097 };
01098
01099
01100
01101
inline UBool
01102 Unicode::isSingle(
UChar c) {
01103
return UTF_IS_SINGLE(c);
01104 }
01105
01106
inline UBool
01107 Unicode::isLead(UChar c) {
01108
return UTF_IS_LEAD(c);
01109 }
01110
01111
inline UBool
01112 Unicode::isTrail(UChar c) {
01113
return UTF_IS_TRAIL(c);
01114 }
01115
01116
inline UBool
01117 Unicode::isSurrogate(
UChar32 c) {
01118
return UTF_IS_SURROGATE(c);
01119 }
01120
01121
inline UBool
01122 Unicode::isUnicodeChar(UChar32 c) {
01123
return UTF_IS_UNICODE_CHAR(c);
01124 }
01125
01126
inline UBool
01127 Unicode::isError(UChar32 c) {
01128
return UTF_IS_ERROR(c);
01129 }
01130
01131
inline UBool
01132 Unicode::isValid(UChar32 c) {
01133
return UTF_IS_VALID(c);
01134 }
01135
01136
inline UBool
01137 Unicode::needMultipleUChar(UChar32 c) {
01138
return UTF_NEED_MULTIPLE_UCHAR(c);
01139 }
01140
01141
inline int32_t
01142 Unicode::charLength(UChar32 c) {
01143
return UTF_CHAR_LENGTH(c);
01144 }
01145
01146
inline int32_t
01147 Unicode::arraySize(int32_t size) {
01148
return UTF_ARRAY_SIZE(size);
01149 }
01150
01151
01152
inline UBool
01153 Unicode::isLowerCase(UChar32 ch) {
01154
return u_islower(ch);
01155 }
01156
01157
01158
inline UBool
01159 Unicode::isUpperCase(UChar32 ch) {
01160
return u_isupper(ch);
01161 }
01162
01163
01164
inline UBool
01165 Unicode::isTitleCase(UChar32 ch) {
01166
return u_istitle(ch);
01167 }
01168
01169
01170
inline UBool
01171 Unicode::isDigit(UChar32 ch) {
01172
return u_isdigit(ch);
01173 }
01174
01175
01176
inline UBool
01177 Unicode::isDefined(UChar32 ch) {
01178
return u_isdefined(ch);
01179 }
01180
01181
01182
inline UBool
01183 Unicode::isControl(UChar32 ch) {
01184
return u_iscntrl(ch);
01185 }
01186
01187
01188
inline UBool
01189 Unicode::isPrintable(UChar32 ch) {
01190
return u_isprint(ch);
01191 }
01192
01193
01194
inline UBool
01195 Unicode::isBaseForm(UChar32 ch) {
01196
return u_isbase(ch);
01197 }
01198
01199
01200
inline UBool
01201 Unicode::isLetter(UChar32 ch) {
01202
return u_isalpha(ch);
01203 }
01204
01205
01206
inline UBool
01207 Unicode::isJavaIdentifierStart(UChar32 ch) {
01208
return u_isJavaIDStart(ch);
01209 }
01210
01211
01212
01213
inline UBool
01214 Unicode::isJavaIdentifierPart(UChar32 ch) {
01215
return u_isJavaIDPart(ch);
01216 }
01217
01218
01219
inline UBool
01220 Unicode::isUnicodeIdentifierStart(UChar32 ch) {
01221
return u_isIDStart(ch);
01222 }
01223
01224
01225
01226
inline UBool
01227 Unicode::isUnicodeIdentifierPart(UChar32 ch) {
01228
return u_isIDPart(ch);
01229 }
01230
01231
01232
inline UBool
01233 Unicode::isIdentifierIgnorable(UChar32 ch) {
01234
return u_isIDIgnorable(ch);
01235 }
01236
01237
01238
inline UChar32
01239 Unicode::toLowerCase(UChar32 ch) {
01240
return u_tolower(ch);
01241 }
01242
01243
01244
inline UChar32
01245 Unicode::toUpperCase(UChar32 ch) {
01246
return u_toupper(ch);
01247 }
01248
01249
01250
inline UChar32
01251 Unicode::toTitleCase(UChar32 ch) {
01252
return u_totitle(ch);
01253 }
01254
01255
01256
inline UChar32
01257 Unicode::foldCase(UChar32 ch, uint32_t options) {
01258
return u_foldCase(ch, options);
01259 }
01260
01261
01262
inline UBool
01263 Unicode::isSpaceChar(UChar32 ch) {
01264
return u_isspace(ch);
01265 }
01266
01267
01268
inline UBool
01269 Unicode::isWhitespace(UChar32 ch) {
01270
return u_isWhitespace(ch);
01271 }
01272
01273
01274
inline int8_t
01275 Unicode::getType(UChar32 ch) {
01276
return u_charType(ch);
01277 }
01278
01279
inline uint8_t
01280 Unicode::getCombiningClass(UChar32 c) {
01281
return u_getCombiningClass(c);
01282 }
01283
01284
01285
inline Unicode::EDirectionProperty
01286 Unicode::characterDirection(UChar32 ch) {
01287
return (
EDirectionProperty)
u_charDirection(ch);
01288 }
01289
01290
01291
inline UBool
01292 Unicode::isMirrored(UChar32 ch) {
01293
return u_isMirrored(ch);
01294 }
01295
01296
01297
inline UChar32
01298 Unicode::charMirror(UChar32 ch) {
01299
return u_charMirror(ch);
01300 }
01301
01302
01303
inline Unicode::EUnicodeScript
01304 Unicode::getScript(UChar32 ch) {
01305
return (
EUnicodeScript)
u_charScript(ch);
01306 }
01307
01308
01309
inline uint16_t
01310 Unicode::getCellWidth(UChar32 ch) {
01311
return u_charCellWidth(ch);
01312 }
01313
01314
inline int32_t
01315 Unicode::getCharName(uint32_t code,
01316
char *buffer, int32_t bufferLength,
01317
UCharNameChoice nameChoice) {
01318 UErrorCode errorCode=
U_ZERO_ERROR;
01319 int32_t length=
u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
01320
return U_SUCCESS(errorCode) ? length : 0;
01321 }
01322
01323
inline int32_t
01324 Unicode::digitValue(UChar32 ch) {
01325
return u_charDigitValue(ch);
01326 }
01327
01328
inline int32_t
01329 Unicode::digit(UChar32 ch, int8_t radix) {
01330
return u_digit(ch, radix);
01331 }
01332
01333
inline UChar32
01334 Unicode::forDigit(int32_t digit, int8_t radix) {
01335
return u_forDigit(digit, radix);
01336 }
01337
01338
inline void
01339 Unicode::getUnicodeVersion(UVersionInfo versionArray) {
01340
u_getUnicodeVersion(versionArray);
01341 }
01342 U_NAMESPACE_END
01343
01344
#endif