uniset.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 * Copyright (C) 1999-2005, International Business Machines Corporation
00004 * and others. All Rights Reserved.
00005 ***************************************************************************
00006 *   Date        Name        Description
00007 *   10/20/99    alan        Creation.
00008 ***************************************************************************
00009 */
00010 
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013 
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017 
00023 U_NAMESPACE_BEGIN
00024 
00025 class ParsePosition;
00026 class SymbolTable;
00027 class UVector;
00028 class RuleCharacterIterator;
00029 
00261 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00262 
00263     int32_t len; // length of list used; 0 <= len <= capacity
00264     int32_t capacity; // capacity of list
00265     int32_t bufferCapacity; // capacity of buffer
00266     UChar32* list; // MUST be terminated with HIGH
00267     UChar32* buffer; // internal buffer, may be NULL
00268 
00269     UVector* strings; // maintained in sorted order
00270 
00280     UnicodeString pat;
00281 
00282 public:
00283 
00284     enum {
00289         MIN_VALUE = 0,
00290 
00295         MAX_VALUE = 0x10ffff
00296     };
00297 
00298     //----------------------------------------------------------------
00299     // Constructors &c
00300     //----------------------------------------------------------------
00301 
00302 public:
00303 
00308     UnicodeSet();
00309 
00318     UnicodeSet(UChar32 start, UChar32 end);
00319 
00328     UnicodeSet(const UnicodeString& pattern,
00329                UErrorCode& status);
00330 
00343     UnicodeSet(const UnicodeString& pattern,
00344                uint32_t options,
00345                const SymbolTable* symbols,
00346                UErrorCode& status);
00347 
00361     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00362                uint32_t options,
00363                const SymbolTable* symbols,
00364                UErrorCode& status);
00365 
00366 #ifdef U_USE_UNICODESET_DEPRECATES
00367 
00373     UnicodeSet(int8_t category, UErrorCode& status);
00374 #endif
00375 
00380     UnicodeSet(const UnicodeSet& o);
00381 
00386     virtual ~UnicodeSet();
00387 
00392     UnicodeSet& operator=(const UnicodeSet& o);
00393 
00405     virtual UBool operator==(const UnicodeSet& o) const;
00406 
00412     UBool operator!=(const UnicodeSet& o) const;
00413 
00420     virtual UnicodeFunctor* clone() const;
00421 
00429     virtual int32_t hashCode(void) const;
00430 
00431     //----------------------------------------------------------------
00432     // Public API
00433     //----------------------------------------------------------------
00434 
00444     UnicodeSet& set(UChar32 start, UChar32 end);
00445 
00451     static UBool resemblesPattern(const UnicodeString& pattern,
00452                                   int32_t pos);
00453 
00465     UnicodeSet& applyPattern(const UnicodeString& pattern,
00466                              UErrorCode& status);
00467 
00483     UnicodeSet& applyPattern(const UnicodeString& pattern,
00484                              uint32_t options,
00485                              const SymbolTable* symbols,
00486                              UErrorCode& status);
00487 
00518     UnicodeSet& applyPattern(const UnicodeString& pattern,
00519                              ParsePosition& pos,
00520                              uint32_t options,
00521                              const SymbolTable* symbols,
00522                              UErrorCode& status);
00523 
00536     virtual UnicodeString& toPattern(UnicodeString& result,
00537                              UBool escapeUnprintable = FALSE) const;
00538 
00560     UnicodeSet& applyIntPropertyValue(UProperty prop,
00561                                       int32_t value,
00562                                       UErrorCode& ec);
00563 
00592     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00593                                    const UnicodeString& value,
00594                                    UErrorCode& ec);
00595 
00604     virtual int32_t size(void) const;
00605 
00612     virtual UBool isEmpty(void) const;
00613 
00620     virtual UBool contains(UChar32 c) const;
00621 
00630     virtual UBool contains(UChar32 start, UChar32 end) const;
00631 
00639     UBool contains(const UnicodeString& s) const;
00640 
00648     virtual UBool containsAll(const UnicodeSet& c) const;
00649 
00657     UBool containsAll(const UnicodeString& s) const;
00658 
00667     UBool containsNone(UChar32 start, UChar32 end) const;
00668 
00676     UBool containsNone(const UnicodeSet& c) const;
00677 
00685     UBool containsNone(const UnicodeString& s) const;
00686 
00695     inline UBool containsSome(UChar32 start, UChar32 end) const;
00696 
00704     inline UBool containsSome(const UnicodeSet& s) const;
00705 
00713     inline UBool containsSome(const UnicodeString& s) const;
00714 
00719     virtual UMatchDegree matches(const Replaceable& text,
00720                          int32_t& offset,
00721                          int32_t limit,
00722                          UBool incremental);
00723 
00724 private:
00746     static int32_t matchRest(const Replaceable& text,
00747                              int32_t start, int32_t limit,
00748                              const UnicodeString& s);
00749 
00759     int32_t findCodePoint(UChar32 c) const;
00760 
00761 public:
00762 
00770     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00771 
00780     int32_t indexOf(UChar32 c) const;
00781 
00791     UChar32 charAt(int32_t index) const;
00792 
00806     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00807 
00814     UnicodeSet& add(UChar32 c);
00815 
00826     UnicodeSet& add(const UnicodeString& s);
00827 
00828  private:
00834     static int32_t getSingleCP(const UnicodeString& s);
00835 
00836     void _add(const UnicodeString& s);
00837 
00838  public:
00846     UnicodeSet& addAll(const UnicodeString& s);
00847 
00855     UnicodeSet& retainAll(const UnicodeString& s);
00856 
00864     UnicodeSet& complementAll(const UnicodeString& s);
00865 
00873     UnicodeSet& removeAll(const UnicodeString& s);
00874 
00883     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
00884 
00885 
00893     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
00894 
00907     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00908 
00909 
00914     UnicodeSet& retain(UChar32 c);
00915 
00928     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00929 
00936     UnicodeSet& remove(UChar32 c);
00937 
00946     UnicodeSet& remove(const UnicodeString& s);
00947 
00954     virtual UnicodeSet& complement(void);
00955 
00969     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00970 
00977     UnicodeSet& complement(UChar32 c);
00978 
00988     UnicodeSet& complement(const UnicodeString& s);
00989 
01001     virtual UnicodeSet& addAll(const UnicodeSet& c);
01002 
01013     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01014 
01025     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01026 
01036     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01037 
01043     virtual UnicodeSet& clear(void);
01044 
01068     UnicodeSet& closeOver(int32_t attribute);
01069 
01077     virtual int32_t getRangeCount(void) const;
01078 
01086     virtual UChar32 getRangeStart(int32_t index) const;
01087 
01095     virtual UChar32 getRangeEnd(int32_t index) const;
01096 
01145     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01146 
01152     virtual UnicodeSet& compact();
01153 
01165     static UClassID U_EXPORT2 getStaticClassID(void);
01166 
01175     virtual UClassID getDynamicClassID(void) const;
01176 
01177 private:
01178 
01179     // Private API for the USet API
01180 
01181     friend class USetAccess;
01182 
01183     int32_t getStringCount() const;
01184 
01185     const UnicodeString* getString(int32_t index) const;
01186 
01187     //----------------------------------------------------------------
01188     // RuleBasedTransliterator support
01189     //----------------------------------------------------------------
01190 
01191 private:
01192 
01198     virtual UBool matchesIndexValue(uint8_t v) const;
01199 
01200 private:
01201 
01202     //----------------------------------------------------------------
01203     // Implementation: Pattern parsing
01204     //----------------------------------------------------------------
01205 
01206     void applyPattern(RuleCharacterIterator& chars,
01207                       const SymbolTable* symbols,
01208                       UnicodeString& rebuiltPat,
01209                       uint32_t options,
01210                       UErrorCode& ec);
01211 
01212     //----------------------------------------------------------------
01213     // Implementation: Utility methods
01214     //----------------------------------------------------------------
01215 
01216     void ensureCapacity(int32_t newLen);
01217 
01218     void ensureBufferCapacity(int32_t newLen);
01219 
01220     void swapBuffers(void);
01221 
01222     UBool allocateStrings();
01223 
01224     UnicodeString& _toPattern(UnicodeString& result,
01225                               UBool escapeUnprintable) const;
01226 
01227     UnicodeString& _generatePattern(UnicodeString& result,
01228                                     UBool escapeUnprintable) const;
01229 
01230     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01231 
01232     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01233 
01234     //----------------------------------------------------------------
01235     // Implementation: Fundamental operators
01236     //----------------------------------------------------------------
01237 
01238     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01239 
01240     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01241 
01242     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01243 
01249     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01250                                           int32_t pos);
01251 
01252     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01253                                           int32_t iterOpts);
01254 
01293     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01294                                      ParsePosition& ppos,
01295                                      UErrorCode &ec);
01296 
01297     void applyPropertyPattern(RuleCharacterIterator& chars,
01298                               UnicodeString& rebuiltPat,
01299                               UErrorCode& ec);
01300 
01305     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01306 
01316     void applyFilter(Filter filter,
01317                      void* context,
01318                      int32_t src,
01319                      UErrorCode &status);
01320 
01324     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
01325 
01326     friend class UnicodeSetIterator;
01327 };
01328 
01329 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01330     return !operator==(o);
01331 }
01332 
01333 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01334     return !containsNone(start, end);
01335 }
01336 
01337 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01338     return !containsNone(s);
01339 }
01340 
01341 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01342     return !containsNone(s);
01343 }
01344 
01345 U_NAMESPACE_END
01346 
01347 #endif

Generated on Mon Mar 24 16:24:21 2008 for ICU 3.4.1 by  doxygen 1.4.7