normlzr.h

Go to the documentation of this file.
00001 /*
00002  ********************************************************************
00003  * COPYRIGHT:
00004  * Copyright (c) 1996-2005, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 
00019 #if !UCONFIG_NO_NORMALIZATION
00020 
00021 #include "unicode/uobject.h"
00022 #include "unicode/unistr.h"
00023 #include "unicode/chariter.h"
00024 #include "unicode/unorm.h"
00025 
00026 
00027 struct UCharIterator;
00028 typedef struct UCharIterator UCharIterator; 
00030 U_NAMESPACE_BEGIN
00120 class U_COMMON_API Normalizer : public UObject {
00121 public:
00127   enum {
00128       DONE=0xffff
00129   };
00130 
00131   // Constructors
00132 
00143   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00144 
00156   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00157 
00168   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00169 
00175   Normalizer(const Normalizer& copy);
00176 
00181   virtual ~Normalizer();
00182 
00183 
00184   //-------------------------------------------------------------------------
00185   // Static utility methods
00186   //-------------------------------------------------------------------------
00187 
00202   static void U_EXPORT2 normalize(const UnicodeString& source,
00203                         UNormalizationMode mode, int32_t options,
00204                         UnicodeString& result,
00205                         UErrorCode &status);
00206 
00224   static void U_EXPORT2 compose(const UnicodeString& source,
00225                       UBool compat, int32_t options,
00226                       UnicodeString& result,
00227                       UErrorCode &status);
00228 
00246   static void U_EXPORT2 decompose(const UnicodeString& source,
00247                         UBool compat, int32_t options,
00248                         UnicodeString& result,
00249                         UErrorCode &status);
00250 
00271   static inline UNormalizationCheckResult
00272   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00273 
00287   static inline UNormalizationCheckResult
00288   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00289 
00310   static inline UBool
00311   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00312 
00328   static inline UBool
00329   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00330 
00360   static UnicodeString &
00361   U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
00362               UnicodeString &result,
00363               UNormalizationMode mode, int32_t options,
00364               UErrorCode &errorCode);
00365 
00430   static inline int32_t
00431   compare(const UnicodeString &s1, const UnicodeString &s2,
00432           uint32_t options,
00433           UErrorCode &errorCode);
00434 
00435   //-------------------------------------------------------------------------
00436   // Iteration API
00437   //-------------------------------------------------------------------------
00438 
00447   UChar32              current(void);
00448 
00457   UChar32              first(void);
00458 
00467   UChar32              last(void);
00468 
00483   UChar32              next(void);
00484 
00499   UChar32              previous(void);
00500 
00510   void                 setIndexOnly(int32_t index);
00511 
00517   void                reset(void);
00518 
00533   int32_t            getIndex(void) const;
00534 
00543   int32_t            startIndex(void) const;
00544 
00555   int32_t            endIndex(void) const;
00556 
00565   UBool        operator==(const Normalizer& that) const;
00566 
00575   inline UBool        operator!=(const Normalizer& that) const;
00576 
00583   Normalizer*        clone(void) const;
00584 
00591   int32_t                hashCode(void) const;
00592 
00593   //-------------------------------------------------------------------------
00594   // Property access methods
00595   //-------------------------------------------------------------------------
00596 
00612   void setMode(UNormalizationMode newMode);
00613 
00624   UNormalizationMode getUMode(void) const;
00625 
00642   void setOption(int32_t option,
00643          UBool value);
00644 
00655   UBool getOption(int32_t option) const;
00656 
00665   void setText(const UnicodeString& newText,
00666            UErrorCode &status);
00667 
00676   void setText(const CharacterIterator& newText,
00677            UErrorCode &status);
00678 
00688   void setText(const UChar* newText,
00689                     int32_t length,
00690             UErrorCode &status);
00697   void            getText(UnicodeString&  result);
00698 
00704   static UClassID U_EXPORT2 getStaticClassID();
00705 
00711   virtual UClassID getDynamicClassID() const;
00712 
00713 private:
00714   //-------------------------------------------------------------------------
00715   // Private functions
00716   //-------------------------------------------------------------------------
00717 
00718   Normalizer(); // default constructor not implemented
00719   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
00720 
00721   // Private utility methods for iteration
00722   // For documentation, see the source code
00723   UBool nextNormalize();
00724   UBool previousNormalize();
00725 
00726   void    init(CharacterIterator *iter);
00727   void    clearBuffer(void);
00728 
00729   //-------------------------------------------------------------------------
00730   // Private data
00731   //-------------------------------------------------------------------------
00732 
00733   UNormalizationMode  fUMode;
00734   int32_t             fOptions;
00735 
00736   // The input text and our position in it
00737   UCharIterator       *text;
00738 
00739   // The normalization buffer is the result of normalization
00740   // of the source in [currentIndex..nextIndex[ .
00741   int32_t         currentIndex, nextIndex;
00742 
00743   // A buffer for holding intermediate results
00744   UnicodeString       buffer;
00745   int32_t         bufferPos;
00746 
00747 };
00748 
00749 //-------------------------------------------------------------------------
00750 // Inline implementations
00751 //-------------------------------------------------------------------------
00752 
00753 inline UBool
00754 Normalizer::operator!= (const Normalizer& other) const
00755 { return ! operator==(other); }
00756 
00757 inline UNormalizationCheckResult
00758 Normalizer::quickCheck(const UnicodeString& source,
00759                        UNormalizationMode mode,
00760                        UErrorCode &status) {
00761     if(U_FAILURE(status)) {
00762         return UNORM_MAYBE;
00763     }
00764 
00765     return unorm_quickCheck(source.getBuffer(), source.length(),
00766                             mode, &status);
00767 }
00768 
00769 inline UNormalizationCheckResult
00770 Normalizer::quickCheck(const UnicodeString& source,
00771                        UNormalizationMode mode, int32_t options,
00772                        UErrorCode &status) {
00773     if(U_FAILURE(status)) {
00774         return UNORM_MAYBE;
00775     }
00776 
00777     return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00778                                        mode, options, &status);
00779 }
00780 
00781 inline UBool
00782 Normalizer::isNormalized(const UnicodeString& source,
00783                          UNormalizationMode mode,
00784                          UErrorCode &status) {
00785     if(U_FAILURE(status)) {
00786         return FALSE;
00787     }
00788 
00789     return unorm_isNormalized(source.getBuffer(), source.length(),
00790                               mode, &status);
00791 }
00792 
00793 inline UBool
00794 Normalizer::isNormalized(const UnicodeString& source,
00795                          UNormalizationMode mode, int32_t options,
00796                          UErrorCode &status) {
00797     if(U_FAILURE(status)) {
00798         return FALSE;
00799     }
00800 
00801     return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00802                                          mode, options, &status);
00803 }
00804 
00805 inline int32_t
00806 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00807                     uint32_t options,
00808                     UErrorCode &errorCode) {
00809   // all argument checking is done in unorm_compare
00810   return unorm_compare(s1.getBuffer(), s1.length(),
00811                        s2.getBuffer(), s2.length(),
00812                        options,
00813                        &errorCode);
00814 }
00815 
00816 U_NAMESPACE_END
00817 
00818 #endif /* #if !UCONFIG_NO_NORMALIZATION */
00819 
00820 #endif // NORMLZR_H

Generated on Mon Mar 24 16:24:20 2008 for ICU 3.4.1 by  doxygen 1.4.7