Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

normlzr.h

00001 /* 00002 ******************************************************************** 00003 * COPYRIGHT: 00004 * Copyright (c) 1996-2001, International Business Machines Corporation and 00005 * others. All Rights Reserved. 00006 ******************************************************************** 00007 */ 00008 00009 #ifndef NORMLZR_H 00010 #define NORMLZR_H 00011 00012 #include "unicode/utypes.h" 00013 #include "unicode/unistr.h" 00014 #include "unicode/chariter.h" 00015 #include "unicode/unorm.h" 00016 00017 struct UCharIterator; 00018 typedef struct UCharIterator UCharIterator; 00019 00020 U_NAMESPACE_BEGIN 00111 class U_COMMON_API Normalizer 00112 { 00113 public: 00119 enum { 00120 DONE=0xffff 00121 }; 00122 00123 // Constructors 00124 00135 Normalizer(const UnicodeString& str, UNormalizationMode mode); 00136 00148 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); 00149 00160 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 00161 00166 Normalizer(const Normalizer& copy); 00167 00172 ~Normalizer(); 00173 00174 00175 //------------------------------------------------------------------------- 00176 // Static utility methods 00177 //------------------------------------------------------------------------- 00178 00196 static void normalize(const UnicodeString& source, 00197 UNormalizationMode mode, int32_t options, 00198 UnicodeString& result, 00199 UErrorCode &status); 00200 00222 static void compose(const UnicodeString& source, 00223 UBool compat, int32_t options, 00224 UnicodeString& result, 00225 UErrorCode &status); 00226 00249 static void decompose(const UnicodeString& source, 00250 UBool compat, int32_t options, 00251 UnicodeString& result, 00252 UErrorCode &status); 00253 00272 static UNormalizationCheckResult 00273 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 00274 00275 /* 00276 * Concatenate normalized strings, making sure that the result is normalized as well. 00277 * 00278 * If both the left and the right strings are in 00279 * the normalization form according to "mode", 00280 * then the result will be 00281 * 00282 * \code 00283 * dest=normalize(left+right, mode) 00284 * \endcode 00285 * 00286 * For details see unorm_concatenate in unorm.h. 00287 * 00288 * @param left Left source string. 00289 * @param right Right source string. 00290 * @param dest The output string. 00291 * @param mode The normalization mode. 00292 * @param options A bit set of normalization options. 00293 * @param pErrorCode ICU error code in/out parameter. 00294 * Must fulfill U_SUCCESS before the function call. 00295 * @return result 00296 * 00297 * @see unorm_concatenate 00298 * @see normalize 00299 * @see unorm_next 00300 * @see unorm_previous 00301 * 00302 * @draft ICU 2.1 00303 */ 00304 static UnicodeString & 00305 concatenate(UnicodeString &left, UnicodeString &right, 00306 UnicodeString &result, 00307 UNormalizationMode mode, int32_t options, 00308 UErrorCode &errorCode); 00309 00310 //------------------------------------------------------------------------- 00311 // Iteration API 00312 //------------------------------------------------------------------------- 00313 00322 UChar32 current(void); 00323 00332 UChar32 first(void); 00333 00342 UChar32 last(void); 00343 00352 UChar32 next(void); 00353 00362 UChar32 previous(void); 00363 00383 UChar32 setIndex(int32_t index); 00384 00394 void setIndexOnly(int32_t index); 00395 00401 void reset(void); 00402 00417 int32_t getIndex(void) const; 00418 00427 int32_t startIndex(void) const; 00428 00439 int32_t endIndex(void) const; 00440 00449 UBool operator==(const Normalizer& that) const; 00450 00459 inline UBool operator!=(const Normalizer& that) const; 00460 00467 Normalizer* clone(void) const; 00468 00475 int32_t hashCode(void) const; 00476 00477 //------------------------------------------------------------------------- 00478 // Property access methods 00479 //------------------------------------------------------------------------- 00480 00496 void setMode(UNormalizationMode newMode); 00497 00508 UNormalizationMode getUMode(void) const; 00509 00526 void setOption(int32_t option, 00527 UBool value); 00528 00539 UBool getOption(int32_t option) const; 00540 00549 void setText(const UnicodeString& newText, 00550 UErrorCode &status); 00551 00560 void setText(const CharacterIterator& newText, 00561 UErrorCode &status); 00562 00572 void setText(const UChar* newText, 00573 int32_t length, 00574 UErrorCode &status); 00581 void getText(UnicodeString& result); 00582 00583 //------------------------------------------------------------------------- 00584 // Deprecated APIs 00585 //------------------------------------------------------------------------- 00586 00591 enum { 00592 COMPAT_BIT = 1, 00593 DECOMP_BIT = 2, 00594 COMPOSE_BIT = 4, 00595 FCD_BIT = 8 00596 }; 00597 00602 enum EMode { 00616 NO_OP = 0, 00617 00633 COMPOSE = COMPOSE_BIT, 00634 00650 COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT, 00651 00667 DECOMP = DECOMP_BIT, 00668 00684 DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT, 00685 00689 FCD = FCD_BIT 00690 }; 00691 00693 enum { 00712 IGNORE_HANGUL = 0x001 00713 }; 00714 00725 Normalizer(const UnicodeString& str, 00726 EMode mode); 00727 00746 Normalizer(const UnicodeString& str, 00747 EMode mode, 00748 int32_t opt); 00749 00761 Normalizer(const UChar* str, 00762 int32_t length, 00763 EMode mode); 00764 00780 Normalizer(const UChar* str, 00781 int32_t length, 00782 EMode mode, 00783 int32_t option); 00784 00795 Normalizer(const CharacterIterator& iter, 00796 EMode mode); 00797 00813 Normalizer(const CharacterIterator& iter, 00814 EMode mode, 00815 int32_t opt); 00816 00837 inline static void 00838 normalize(const UnicodeString& source, 00839 EMode mode, 00840 int32_t options, 00841 UnicodeString& result, 00842 UErrorCode &status); 00843 00860 inline static UNormalizationCheckResult 00861 quickCheck(const UnicodeString& source, 00862 EMode mode, 00863 UErrorCode& status); 00864 00872 inline static UNormalizationMode getUNormalizationMode(EMode mode, 00873 UErrorCode& status); 00874 00882 inline static EMode getNormalizerEMode(UNormalizationMode mode, 00883 UErrorCode& status); 00884 00911 inline void setMode(EMode newMode); 00912 00919 inline EMode getMode(void) const; 00920 00921 private: 00922 //------------------------------------------------------------------------- 00923 // Private functions 00924 //------------------------------------------------------------------------- 00925 00926 // Private utility methods for iteration 00927 // For documentation, see the source code 00928 UBool nextNormalize(); 00929 UBool previousNormalize(); 00930 00931 void init(CharacterIterator *iter); 00932 void clearBuffer(void); 00933 00934 // Helper, without UErrorCode, for easier transitional code 00935 // remove after 2002-sep-30 with EMode etc. 00936 inline static UNormalizationMode getUMode(EMode mode); 00937 00938 //------------------------------------------------------------------------- 00939 // Private data 00940 //------------------------------------------------------------------------- 00941 00942 UNormalizationMode fUMode; 00943 int32_t fOptions; 00944 00945 // The input text and our position in it 00946 UCharIterator *text; 00947 00948 // The normalization buffer is the result of normalization 00949 // of the source in [currentIndex..nextIndex[ . 00950 int32_t currentIndex, nextIndex; 00951 00952 // A buffer for holding intermediate results 00953 UnicodeString buffer; 00954 int32_t bufferPos; 00955 }; 00956 00957 //------------------------------------------------------------------------- 00958 // Inline implementations 00959 //------------------------------------------------------------------------- 00960 00961 inline UBool 00962 Normalizer::operator!= (const Normalizer& other) const 00963 { return ! operator==(other); } 00964 00965 inline void 00966 Normalizer::normalize(const UnicodeString& source, 00967 EMode mode, int32_t options, 00968 UnicodeString& result, 00969 UErrorCode &status) { 00970 normalize(source, getUNormalizationMode(mode, status), options, result, status); 00971 } 00972 00973 inline UNormalizationCheckResult 00974 Normalizer::quickCheck(const UnicodeString& source, 00975 EMode mode, 00976 UErrorCode &status) { 00977 return quickCheck(source, getUNormalizationMode(mode, status), status); 00978 } 00979 00980 inline void 00981 Normalizer::setMode(EMode newMode) { 00982 UErrorCode status = U_ZERO_ERROR; 00983 fUMode = getUNormalizationMode(newMode, status); 00984 } 00985 00986 inline Normalizer::EMode 00987 Normalizer::getMode() const { 00988 UErrorCode status = U_ZERO_ERROR; 00989 return getNormalizerEMode(fUMode, status); 00990 } 00991 00992 inline UNormalizationMode Normalizer::getUNormalizationMode( 00993 Normalizer::EMode mode, UErrorCode &status) 00994 { 00995 if (U_SUCCESS(status)) 00996 { 00997 switch (mode) 00998 { 00999 case Normalizer::NO_OP : 01000 return UNORM_NONE; 01001 case Normalizer::COMPOSE : 01002 return UNORM_NFC; 01003 case Normalizer::COMPOSE_COMPAT : 01004 return UNORM_NFKC; 01005 case Normalizer::DECOMP : 01006 return UNORM_NFD; 01007 case Normalizer::DECOMP_COMPAT : 01008 return UNORM_NFKD; 01009 case Normalizer::FCD: 01010 return UNORM_FCD; 01011 default : 01012 status = U_ILLEGAL_ARGUMENT_ERROR; 01013 } 01014 } 01015 return UNORM_DEFAULT; 01016 } 01017 01018 inline UNormalizationMode 01019 Normalizer::getUMode(Normalizer::EMode mode) { 01020 switch(mode) { 01021 case Normalizer::NO_OP : 01022 return UNORM_NONE; 01023 case Normalizer::COMPOSE : 01024 return UNORM_NFC; 01025 case Normalizer::COMPOSE_COMPAT : 01026 return UNORM_NFKC; 01027 case Normalizer::DECOMP : 01028 return UNORM_NFD; 01029 case Normalizer::DECOMP_COMPAT : 01030 return UNORM_NFKD; 01031 case Normalizer::FCD: 01032 return UNORM_FCD; 01033 default : 01034 return UNORM_DEFAULT; 01035 } 01036 } 01037 01038 inline Normalizer::EMode Normalizer::getNormalizerEMode( 01039 UNormalizationMode mode, UErrorCode &status) 01040 { 01041 if (U_SUCCESS(status)) 01042 { 01043 switch (mode) 01044 { 01045 case UNORM_NONE : 01046 return Normalizer::NO_OP; 01047 case UNORM_NFD : 01048 return Normalizer::DECOMP; 01049 case UNORM_NFKD : 01050 return Normalizer::DECOMP_COMPAT; 01051 case UNORM_NFC : 01052 return Normalizer::COMPOSE; 01053 case UNORM_NFKC : 01054 return Normalizer::COMPOSE_COMPAT; 01055 case UNORM_FCD: 01056 return Normalizer::FCD; 01057 default : 01058 status = U_ILLEGAL_ARGUMENT_ERROR; 01059 } 01060 } 01061 return Normalizer::DECOMP_COMPAT; 01062 } 01063 01064 U_NAMESPACE_END 01065 #endif // _NORMLZR

Generated on Fri Aug 13 09:53:50 2004 for ICU 2.1 by doxygen 1.3.7