ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
unistr.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 
35 struct UConverter; // unicode/ucnv.h
36 class StringThreadTest;
37 
38 #ifndef U_COMPARE_CODE_POINT_ORDER
39 /* see also ustring.h and unorm.h */
45 #define U_COMPARE_CODE_POINT_ORDER 0x8000
46 #endif
47 
48 #ifndef USTRING_H
49 
52 U_STABLE int32_t U_EXPORT2
53 u_strlen(const UChar *s);
54 #endif
55 
57 
58 class BreakIterator; // unicode/brkiter.h
59 class Locale; // unicode/locid.h
61 class UnicodeStringAppendable; // unicode/appendable.h
62 
63 /* The <iostream> include has been moved to unicode/ustream.h */
64 
75 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
76 
94 #if defined(U_DECLARE_UTF16)
95 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
96 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
97 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
98 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
99 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
100 #else
101 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
102 #endif
103 
117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
118 
189 {
190 public:
191 
200  enum EInvariant {
205  kInvariant
206  };
207 
208  //========================================
209  // Read-only operations
210  //========================================
211 
212  /* Comparison - bitwise only - for international comparison use collation */
213 
221  inline UBool operator== (const UnicodeString& text) const;
222 
230  inline UBool operator!= (const UnicodeString& text) const;
231 
239  inline UBool operator> (const UnicodeString& text) const;
240 
248  inline UBool operator< (const UnicodeString& text) const;
249 
257  inline UBool operator>= (const UnicodeString& text) const;
258 
266  inline UBool operator<= (const UnicodeString& text) const;
267 
279  inline int8_t compare(const UnicodeString& text) const;
280 
295  inline int8_t compare(int32_t start,
296  int32_t length,
297  const UnicodeString& text) const;
298 
316  inline int8_t compare(int32_t start,
317  int32_t length,
318  const UnicodeString& srcText,
319  int32_t srcStart,
320  int32_t srcLength) const;
321 
334  inline int8_t compare(const UChar *srcChars,
335  int32_t srcLength) const;
336 
351  inline int8_t compare(int32_t start,
352  int32_t length,
353  const UChar *srcChars) const;
354 
372  inline int8_t compare(int32_t start,
373  int32_t length,
374  const UChar *srcChars,
375  int32_t srcStart,
376  int32_t srcLength) const;
377 
395  inline int8_t compareBetween(int32_t start,
396  int32_t limit,
397  const UnicodeString& srcText,
398  int32_t srcStart,
399  int32_t srcLimit) const;
400 
418  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
419 
439  inline int8_t compareCodePointOrder(int32_t start,
440  int32_t length,
441  const UnicodeString& srcText) const;
442 
464  inline int8_t compareCodePointOrder(int32_t start,
465  int32_t length,
466  const UnicodeString& srcText,
467  int32_t srcStart,
468  int32_t srcLength) const;
469 
488  inline int8_t compareCodePointOrder(const UChar *srcChars,
489  int32_t srcLength) const;
490 
510  inline int8_t compareCodePointOrder(int32_t start,
511  int32_t length,
512  const UChar *srcChars) const;
513 
535  inline int8_t compareCodePointOrder(int32_t start,
536  int32_t length,
537  const UChar *srcChars,
538  int32_t srcStart,
539  int32_t srcLength) const;
540 
562  inline int8_t compareCodePointOrderBetween(int32_t start,
563  int32_t limit,
564  const UnicodeString& srcText,
565  int32_t srcStart,
566  int32_t srcLimit) const;
567 
586  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
587 
608  inline int8_t caseCompare(int32_t start,
609  int32_t length,
610  const UnicodeString& srcText,
611  uint32_t options) const;
612 
635  inline int8_t caseCompare(int32_t start,
636  int32_t length,
637  const UnicodeString& srcText,
638  int32_t srcStart,
639  int32_t srcLength,
640  uint32_t options) const;
641 
661  inline int8_t caseCompare(const UChar *srcChars,
662  int32_t srcLength,
663  uint32_t options) const;
664 
685  inline int8_t caseCompare(int32_t start,
686  int32_t length,
687  const UChar *srcChars,
688  uint32_t options) const;
689 
712  inline int8_t caseCompare(int32_t start,
713  int32_t length,
714  const UChar *srcChars,
715  int32_t srcStart,
716  int32_t srcLength,
717  uint32_t options) const;
718 
741  inline int8_t caseCompareBetween(int32_t start,
742  int32_t limit,
743  const UnicodeString& srcText,
744  int32_t srcStart,
745  int32_t srcLimit,
746  uint32_t options) const;
747 
755  inline UBool startsWith(const UnicodeString& text) const;
756 
767  inline UBool startsWith(const UnicodeString& srcText,
768  int32_t srcStart,
769  int32_t srcLength) const;
770 
779  inline UBool startsWith(const UChar *srcChars,
780  int32_t srcLength) const;
781 
791  inline UBool startsWith(const UChar *srcChars,
792  int32_t srcStart,
793  int32_t srcLength) const;
794 
802  inline UBool endsWith(const UnicodeString& text) const;
803 
814  inline UBool endsWith(const UnicodeString& srcText,
815  int32_t srcStart,
816  int32_t srcLength) const;
817 
826  inline UBool endsWith(const UChar *srcChars,
827  int32_t srcLength) const;
828 
839  inline UBool endsWith(const UChar *srcChars,
840  int32_t srcStart,
841  int32_t srcLength) const;
842 
843 
844  /* Searching - bitwise only */
845 
854  inline int32_t indexOf(const UnicodeString& text) const;
855 
865  inline int32_t indexOf(const UnicodeString& text,
866  int32_t start) const;
867 
879  inline int32_t indexOf(const UnicodeString& text,
880  int32_t start,
881  int32_t length) const;
882 
899  inline int32_t indexOf(const UnicodeString& srcText,
900  int32_t srcStart,
901  int32_t srcLength,
902  int32_t start,
903  int32_t length) const;
904 
916  inline int32_t indexOf(const UChar *srcChars,
917  int32_t srcLength,
918  int32_t start) const;
919 
932  inline int32_t indexOf(const UChar *srcChars,
933  int32_t srcLength,
934  int32_t start,
935  int32_t length) const;
936 
953  int32_t indexOf(const UChar *srcChars,
954  int32_t srcStart,
955  int32_t srcLength,
956  int32_t start,
957  int32_t length) const;
958 
966  inline int32_t indexOf(UChar c) const;
967 
976  inline int32_t indexOf(UChar32 c) const;
977 
986  inline int32_t indexOf(UChar c,
987  int32_t start) const;
988 
998  inline int32_t indexOf(UChar32 c,
999  int32_t start) const;
1000 
1011  inline int32_t indexOf(UChar c,
1012  int32_t start,
1013  int32_t length) const;
1014 
1026  inline int32_t indexOf(UChar32 c,
1027  int32_t start,
1028  int32_t length) const;
1029 
1038  inline int32_t lastIndexOf(const UnicodeString& text) const;
1039 
1049  inline int32_t lastIndexOf(const UnicodeString& text,
1050  int32_t start) const;
1051 
1063  inline int32_t lastIndexOf(const UnicodeString& text,
1064  int32_t start,
1065  int32_t length) const;
1066 
1083  inline int32_t lastIndexOf(const UnicodeString& srcText,
1084  int32_t srcStart,
1085  int32_t srcLength,
1086  int32_t start,
1087  int32_t length) const;
1088 
1099  inline int32_t lastIndexOf(const UChar *srcChars,
1100  int32_t srcLength,
1101  int32_t start) const;
1102 
1115  inline int32_t lastIndexOf(const UChar *srcChars,
1116  int32_t srcLength,
1117  int32_t start,
1118  int32_t length) const;
1119 
1136  int32_t lastIndexOf(const UChar *srcChars,
1137  int32_t srcStart,
1138  int32_t srcLength,
1139  int32_t start,
1140  int32_t length) const;
1141 
1149  inline int32_t lastIndexOf(UChar c) const;
1150 
1159  inline int32_t lastIndexOf(UChar32 c) const;
1160 
1169  inline int32_t lastIndexOf(UChar c,
1170  int32_t start) const;
1171 
1181  inline int32_t lastIndexOf(UChar32 c,
1182  int32_t start) const;
1183 
1194  inline int32_t lastIndexOf(UChar c,
1195  int32_t start,
1196  int32_t length) const;
1197 
1209  inline int32_t lastIndexOf(UChar32 c,
1210  int32_t start,
1211  int32_t length) const;
1212 
1213 
1214  /* Character access */
1215 
1224  inline UChar charAt(int32_t offset) const;
1225 
1233  inline UChar operator[] (int32_t offset) const;
1234 
1246  inline UChar32 char32At(int32_t offset) const;
1247 
1263  inline int32_t getChar32Start(int32_t offset) const;
1264 
1281  inline int32_t getChar32Limit(int32_t offset) const;
1282 
1333  int32_t moveIndex32(int32_t index, int32_t delta) const;
1334 
1335  /* Substring extraction */
1336 
1352  inline void extract(int32_t start,
1353  int32_t length,
1354  UChar *dst,
1355  int32_t dstStart = 0) const;
1356 
1378  int32_t
1379  extract(UChar *dest, int32_t destCapacity,
1380  UErrorCode &errorCode) const;
1381 
1392  inline void extract(int32_t start,
1393  int32_t length,
1394  UnicodeString& target) const;
1395 
1407  inline void extractBetween(int32_t start,
1408  int32_t limit,
1409  UChar *dst,
1410  int32_t dstStart = 0) const;
1411 
1421  virtual void extractBetween(int32_t start,
1422  int32_t limit,
1423  UnicodeString& target) const;
1424 
1446  int32_t extract(int32_t start,
1447  int32_t startLength,
1448  char *target,
1449  int32_t targetCapacity,
1450  enum EInvariant inv) const;
1451 
1452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1453 
1473  int32_t extract(int32_t start,
1474  int32_t startLength,
1475  char *target,
1476  uint32_t targetLength) const;
1477 
1478 #endif
1479 
1480 #if !UCONFIG_NO_CONVERSION
1481 
1507  inline int32_t extract(int32_t start,
1508  int32_t startLength,
1509  char *target,
1510  const char *codepage = 0) const;
1511 
1541  int32_t extract(int32_t start,
1542  int32_t startLength,
1543  char *target,
1544  uint32_t targetLength,
1545  const char *codepage) const;
1546 
1564  int32_t extract(char *dest, int32_t destCapacity,
1565  UConverter *cnv,
1566  UErrorCode &errorCode) const;
1567 
1568 #endif
1569 
1583  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1584 
1595  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1596 
1608  void toUTF8(ByteSink &sink) const;
1609 
1610 #if U_HAVE_STD_STRING
1611 
1624  template<typename StringClass>
1625  StringClass &toUTF8String(StringClass &result) const {
1626  StringByteSink<StringClass> sbs(&result);
1627  toUTF8(sbs);
1628  return result;
1629  }
1630 
1631 #endif
1632 
1648  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1649 
1650  /* Length operations */
1651 
1660  inline int32_t length(void) const;
1661 
1675  int32_t
1676  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1677 
1701  UBool
1702  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1703 
1709  inline UBool isEmpty(void) const;
1710 
1720  inline int32_t getCapacity(void) const;
1721 
1722  /* Other operations */
1723 
1729  inline int32_t hashCode(void) const;
1730 
1743  inline UBool isBogus(void) const;
1744 
1745 
1746  //========================================
1747  // Write operations
1748  //========================================
1749 
1750  /* Assignment operations */
1751 
1759  UnicodeString &operator=(const UnicodeString &srcText);
1760 
1781  UnicodeString &fastCopyFrom(const UnicodeString &src);
1782 
1790  inline UnicodeString& operator= (UChar ch);
1791 
1799  inline UnicodeString& operator= (UChar32 ch);
1800 
1812  inline UnicodeString& setTo(const UnicodeString& srcText,
1813  int32_t srcStart);
1814 
1828  inline UnicodeString& setTo(const UnicodeString& srcText,
1829  int32_t srcStart,
1830  int32_t srcLength);
1831 
1840  inline UnicodeString& setTo(const UnicodeString& srcText);
1841 
1850  inline UnicodeString& setTo(const UChar *srcChars,
1851  int32_t srcLength);
1852 
1861  UnicodeString& setTo(UChar srcChar);
1862 
1871  UnicodeString& setTo(UChar32 srcChar);
1872 
1893  UnicodeString &setTo(UBool isTerminated,
1894  const UChar *text,
1895  int32_t textLength);
1896 
1916  UnicodeString &setTo(UChar *buffer,
1917  int32_t buffLength,
1918  int32_t buffCapacity);
1919 
1960  void setToBogus();
1961 
1969  UnicodeString& setCharAt(int32_t offset,
1970  UChar ch);
1971 
1972 
1973  /* Append operations */
1974 
1982  inline UnicodeString& operator+= (UChar ch);
1983 
1991  inline UnicodeString& operator+= (UChar32 ch);
1992 
2000  inline UnicodeString& operator+= (const UnicodeString& srcText);
2001 
2016  inline UnicodeString& append(const UnicodeString& srcText,
2017  int32_t srcStart,
2018  int32_t srcLength);
2019 
2027  inline UnicodeString& append(const UnicodeString& srcText);
2028 
2042  inline UnicodeString& append(const UChar *srcChars,
2043  int32_t srcStart,
2044  int32_t srcLength);
2045 
2055  inline UnicodeString& append(const UChar *srcChars,
2056  int32_t srcLength);
2057 
2064  inline UnicodeString& append(UChar srcChar);
2065 
2072  inline UnicodeString& append(UChar32 srcChar);
2073 
2074 
2075  /* Insert operations */
2076 
2090  inline UnicodeString& insert(int32_t start,
2091  const UnicodeString& srcText,
2092  int32_t srcStart,
2093  int32_t srcLength);
2094 
2103  inline UnicodeString& insert(int32_t start,
2104  const UnicodeString& srcText);
2105 
2119  inline UnicodeString& insert(int32_t start,
2120  const UChar *srcChars,
2121  int32_t srcStart,
2122  int32_t srcLength);
2123 
2133  inline UnicodeString& insert(int32_t start,
2134  const UChar *srcChars,
2135  int32_t srcLength);
2136 
2145  inline UnicodeString& insert(int32_t start,
2146  UChar srcChar);
2147 
2156  inline UnicodeString& insert(int32_t start,
2157  UChar32 srcChar);
2158 
2159 
2160  /* Replace operations */
2161 
2179  UnicodeString& replace(int32_t start,
2180  int32_t length,
2181  const UnicodeString& srcText,
2182  int32_t srcStart,
2183  int32_t srcLength);
2184 
2197  UnicodeString& replace(int32_t start,
2198  int32_t length,
2199  const UnicodeString& srcText);
2200 
2218  UnicodeString& replace(int32_t start,
2219  int32_t length,
2220  const UChar *srcChars,
2221  int32_t srcStart,
2222  int32_t srcLength);
2223 
2236  inline UnicodeString& replace(int32_t start,
2237  int32_t length,
2238  const UChar *srcChars,
2239  int32_t srcLength);
2240 
2252  inline UnicodeString& replace(int32_t start,
2253  int32_t length,
2254  UChar srcChar);
2255 
2267  inline UnicodeString& replace(int32_t start,
2268  int32_t length,
2269  UChar32 srcChar);
2270 
2280  inline UnicodeString& replaceBetween(int32_t start,
2281  int32_t limit,
2282  const UnicodeString& srcText);
2283 
2298  inline UnicodeString& replaceBetween(int32_t start,
2299  int32_t limit,
2300  const UnicodeString& srcText,
2301  int32_t srcStart,
2302  int32_t srcLimit);
2303 
2314  virtual void handleReplaceBetween(int32_t start,
2315  int32_t limit,
2316  const UnicodeString& text);
2317 
2323  virtual UBool hasMetaData() const;
2324 
2340  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2341 
2342  /* Search and replace operations */
2343 
2352  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2353  const UnicodeString& newText);
2354 
2366  inline UnicodeString& findAndReplace(int32_t start,
2367  int32_t length,
2368  const UnicodeString& oldText,
2369  const UnicodeString& newText);
2370 
2388  UnicodeString& findAndReplace(int32_t start,
2389  int32_t length,
2390  const UnicodeString& oldText,
2391  int32_t oldStart,
2392  int32_t oldLength,
2393  const UnicodeString& newText,
2394  int32_t newStart,
2395  int32_t newLength);
2396 
2397 
2398  /* Remove operations */
2399 
2405  inline UnicodeString& remove(void);
2406 
2415  inline UnicodeString& remove(int32_t start,
2416  int32_t length = (int32_t)INT32_MAX);
2417 
2426  inline UnicodeString& removeBetween(int32_t start,
2427  int32_t limit = (int32_t)INT32_MAX);
2428 
2438  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2439 
2440  /* Length operations */
2441 
2453  UBool padLeading(int32_t targetLength,
2454  UChar padChar = 0x0020);
2455 
2467  UBool padTrailing(int32_t targetLength,
2468  UChar padChar = 0x0020);
2469 
2476  inline UBool truncate(int32_t targetLength);
2477 
2483  UnicodeString& trim(void);
2484 
2485 
2486  /* Miscellaneous operations */
2487 
2493  inline UnicodeString& reverse(void);
2494 
2503  inline UnicodeString& reverse(int32_t start,
2504  int32_t length);
2505 
2512  UnicodeString& toUpper(void);
2513 
2521  UnicodeString& toUpper(const Locale& locale);
2522 
2529  UnicodeString& toLower(void);
2530 
2538  UnicodeString& toLower(const Locale& locale);
2539 
2540 #if !UCONFIG_NO_BREAK_ITERATION
2541 
2568  UnicodeString &toTitle(BreakIterator *titleIter);
2569 
2597  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2598 
2630  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2631 
2632 #endif
2633 
2645  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2646 
2647  //========================================
2648  // Access to the internal buffer
2649  //========================================
2650 
2694  UChar *getBuffer(int32_t minCapacity);
2695 
2716  void releaseBuffer(int32_t newLength=-1);
2717 
2748  inline const UChar *getBuffer() const;
2749 
2783  inline const UChar *getTerminatedBuffer();
2784 
2785  //========================================
2786  // Constructors
2787  //========================================
2788 
2792  UnicodeString();
2793 
2805  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2806 
2812  UnicodeString(UChar ch);
2813 
2819  UnicodeString(UChar32 ch);
2820 
2827  UnicodeString(const UChar *text);
2828 
2836  UnicodeString(const UChar *text,
2837  int32_t textLength);
2838 
2858  UnicodeString(UBool isTerminated,
2859  const UChar *text,
2860  int32_t textLength);
2861 
2880  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2881 
2882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2883 
2890  UnicodeString(const char *codepageData);
2891 
2898  UnicodeString(const char *codepageData, int32_t dataLength);
2899 
2900 #endif
2901 
2902 #if !UCONFIG_NO_CONVERSION
2903 
2921  UnicodeString(const char *codepageData, const char *codepage);
2922 
2940  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2941 
2963  UnicodeString(
2964  const char *src, int32_t srcLength,
2965  UConverter *cnv,
2966  UErrorCode &errorCode);
2967 
2968 #endif
2969 
2994  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2995 
2996 
3002  UnicodeString(const UnicodeString& that);
3003 
3010  UnicodeString(const UnicodeString& src, int32_t srcStart);
3011 
3019  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3020 
3037  virtual Replaceable *clone() const;
3038 
3042  virtual ~UnicodeString();
3043 
3057  static UnicodeString fromUTF8(const StringPiece &utf8);
3058 
3070  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3071 
3072  /* Miscellaneous operations */
3073 
3108  UnicodeString unescape() const;
3109 
3129  UChar32 unescapeAt(int32_t &offset) const;
3130 
3136  static UClassID U_EXPORT2 getStaticClassID();
3137 
3143  virtual UClassID getDynamicClassID() const;
3144 
3145  //========================================
3146  // Implementation methods
3147  //========================================
3148 
3149 protected:
3154  virtual int32_t getLength() const;
3155 
3161  virtual UChar getCharAt(int32_t offset) const;
3162 
3168  virtual UChar32 getChar32At(int32_t offset) const;
3169 
3170 private:
3171  // For char* constructors. Could be made public.
3172  UnicodeString &setToUTF8(const StringPiece &utf8);
3173  // For extract(char*).
3174  // We could make a toUTF8(target, capacity, errorCode) public but not
3175  // this version: New API will be cleaner if we make callers create substrings
3176  // rather than having start+length on every method,
3177  // and it should take a UErrorCode&.
3178  int32_t
3179  toUTF8(int32_t start, int32_t len,
3180  char *target, int32_t capacity) const;
3181 
3182 
3183  inline int8_t
3184  doCompare(int32_t start,
3185  int32_t length,
3186  const UnicodeString& srcText,
3187  int32_t srcStart,
3188  int32_t srcLength) const;
3189 
3190  int8_t doCompare(int32_t start,
3191  int32_t length,
3192  const UChar *srcChars,
3193  int32_t srcStart,
3194  int32_t srcLength) const;
3195 
3196  inline int8_t
3197  doCompareCodePointOrder(int32_t start,
3198  int32_t length,
3199  const UnicodeString& srcText,
3200  int32_t srcStart,
3201  int32_t srcLength) const;
3202 
3203  int8_t doCompareCodePointOrder(int32_t start,
3204  int32_t length,
3205  const UChar *srcChars,
3206  int32_t srcStart,
3207  int32_t srcLength) const;
3208 
3209  inline int8_t
3210  doCaseCompare(int32_t start,
3211  int32_t length,
3212  const UnicodeString &srcText,
3213  int32_t srcStart,
3214  int32_t srcLength,
3215  uint32_t options) const;
3216 
3217  int8_t
3218  doCaseCompare(int32_t start,
3219  int32_t length,
3220  const UChar *srcChars,
3221  int32_t srcStart,
3222  int32_t srcLength,
3223  uint32_t options) const;
3224 
3225  int32_t doIndexOf(UChar c,
3226  int32_t start,
3227  int32_t length) const;
3228 
3229  int32_t doIndexOf(UChar32 c,
3230  int32_t start,
3231  int32_t length) const;
3232 
3233  int32_t doLastIndexOf(UChar c,
3234  int32_t start,
3235  int32_t length) const;
3236 
3237  int32_t doLastIndexOf(UChar32 c,
3238  int32_t start,
3239  int32_t length) const;
3240 
3241  void doExtract(int32_t start,
3242  int32_t length,
3243  UChar *dst,
3244  int32_t dstStart) const;
3245 
3246  inline void doExtract(int32_t start,
3247  int32_t length,
3248  UnicodeString& target) const;
3249 
3250  inline UChar doCharAt(int32_t offset) const;
3251 
3252  UnicodeString& doReplace(int32_t start,
3253  int32_t length,
3254  const UnicodeString& srcText,
3255  int32_t srcStart,
3256  int32_t srcLength);
3257 
3258  UnicodeString& doReplace(int32_t start,
3259  int32_t length,
3260  const UChar *srcChars,
3261  int32_t srcStart,
3262  int32_t srcLength);
3263 
3264  UnicodeString& doReverse(int32_t start,
3265  int32_t length);
3266 
3267  // calculate hash code
3268  int32_t doHashCode(void) const;
3269 
3270  // get pointer to start of array
3271  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3272  inline UChar* getArrayStart(void);
3273  inline const UChar* getArrayStart(void) const;
3274 
3275  // A UnicodeString object (not necessarily its current buffer)
3276  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3277  inline UBool isWritable() const;
3278 
3279  // Is the current buffer writable?
3280  inline UBool isBufferWritable() const;
3281 
3282  // None of the following does releaseArray().
3283  inline void setLength(int32_t len); // sets only fShortLength and fLength
3284  inline void setToEmpty(); // sets fFlags=kShortString
3285  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3286 
3287  // allocate the array; result may be fStackBuffer
3288  // sets refCount to 1 if appropriate
3289  // sets fArray, fCapacity, and fFlags
3290  // returns boolean for success or failure
3291  UBool allocate(int32_t capacity);
3292 
3293  // release the array if owned
3294  void releaseArray(void);
3295 
3296  // turn a bogus string into an empty one
3297  void unBogus();
3298 
3299  // implements assigment operator, copy constructor, and fastCopyFrom()
3300  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3301 
3302  // Pin start and limit to acceptable values.
3303  inline void pinIndex(int32_t& start) const;
3304  inline void pinIndices(int32_t& start,
3305  int32_t& length) const;
3306 
3307 #if !UCONFIG_NO_CONVERSION
3308 
3309  /* Internal extract() using UConverter. */
3310  int32_t doExtract(int32_t start, int32_t length,
3311  char *dest, int32_t destCapacity,
3312  UConverter *cnv,
3313  UErrorCode &errorCode) const;
3314 
3315  /*
3316  * Real constructor for converting from codepage data.
3317  * It assumes that it is called with !fRefCounted.
3318  *
3319  * If <code>codepage==0</code>, then the default converter
3320  * is used for the platform encoding.
3321  * If <code>codepage</code> is an empty string (<code>""</code>),
3322  * then a simple conversion is performed on the codepage-invariant
3323  * subset ("invariant characters") of the platform encoding. See utypes.h.
3324  */
3325  void doCodepageCreate(const char *codepageData,
3326  int32_t dataLength,
3327  const char *codepage);
3328 
3329  /*
3330  * Worker function for creating a UnicodeString from
3331  * a codepage string using a UConverter.
3332  */
3333  void
3334  doCodepageCreate(const char *codepageData,
3335  int32_t dataLength,
3336  UConverter *converter,
3337  UErrorCode &status);
3338 
3339 #endif
3340 
3341  /*
3342  * This function is called when write access to the array
3343  * is necessary.
3344  *
3345  * We need to make a copy of the array if
3346  * the buffer is read-only, or
3347  * the buffer is refCounted (shared), and refCount>1, or
3348  * the buffer is too small.
3349  *
3350  * Return FALSE if memory could not be allocated.
3351  */
3352  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3353  int32_t growCapacity = -1,
3354  UBool doCopyArray = TRUE,
3355  int32_t **pBufferToDelete = 0,
3356  UBool forceClone = FALSE);
3357 
3358  // common function for case mappings
3359  UnicodeString &
3360  caseMap(BreakIterator *titleIter,
3361  const char *locale,
3362  uint32_t options,
3363  int32_t toWhichCase);
3364 
3365  // ref counting
3366  void addRef(void);
3367  int32_t removeRef(void);
3368  int32_t refCount(void) const;
3369 
3370  // constants
3371  enum {
3372  // Set the stack buffer size so that sizeof(UnicodeString) is,
3373  // naturally (without padding), a multiple of sizeof(pointer).
3374  US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3375  kInvalidUChar=0xffff, // invalid UChar index
3376  kGrowSize=128, // grow size for this buffer
3377  kInvalidHashCode=0, // invalid hash code
3378  kEmptyHashCode=1, // hash code for empty string
3379 
3380  // bit flag values for fFlags
3381  kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3382  kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3383  kRefCounted=4, // there is a refCount field before the characters in fArray
3384  kBufferIsReadonly=8,// do not write to this buffer
3385  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3386  // and releaseBuffer(newLength) must be called
3387 
3388  // combined values for convenience
3389  kShortString=kUsingStackBuffer,
3390  kLongString=kRefCounted,
3391  kReadonlyAlias=kBufferIsReadonly,
3392  kWritableAlias=0
3393  };
3394 
3395  friend class StringThreadTest;
3396  friend class UnicodeStringAppendable;
3397 
3398  union StackBufferOrFields; // forward declaration necessary before friend declaration
3399  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3400 
3401  /*
3402  * The following are all the class fields that are stored
3403  * in each UnicodeString object.
3404  * Note that UnicodeString has virtual functions,
3405  * therefore there is an implicit vtable pointer
3406  * as the first real field.
3407  * The fields should be aligned such that no padding is necessary.
3408  * On 32-bit machines, the size should be 32 bytes,
3409  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3410  *
3411  * We use a hack to achieve this.
3412  *
3413  * With at least some compilers, each of the following is forced to
3414  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3415  * rounded up with additional padding if the fields do not already fit that requirement:
3416  * - sizeof(class UnicodeString)
3417  * - offsetof(UnicodeString, fUnion)
3418  * - sizeof(fUnion)
3419  * - sizeof(fFields)
3420  *
3421  * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3422  * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3423  * (Padding at the end of fFields is ok:
3424  * As long as there is no padding after fStackBuffer, it is not wasted space.)
3425  *
3426  * We further assume that the compiler does not reorder the fields,
3427  * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3428  * with at most some padding (but no other field) in between.
3429  * (Padding there would be wasted space, but functionally harmless.)
3430  *
3431  * We use a few more sizeof(pointer)'s chunks of space with
3432  * fRestOfStackBuffer, fShortLength and fFlags,
3433  * to get up exactly to the intended sizeof(UnicodeString).
3434  */
3435  // (implicit) *vtable;
3436  union StackBufferOrFields {
3437  // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3438  // else fFields is used
3439  UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
3440  struct {
3441  UChar *fArray; // the Unicode data
3442  int32_t fCapacity; // capacity of fArray (in UChars)
3443  int32_t fLength; // number of characters in fArray if >127; else undefined
3444  } fFields;
3445  } fUnion;
3446  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3447  int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3448  uint8_t fFlags; // bit flags: see constants above
3449 };
3450 
3460 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3461 
3462 //========================================
3463 // Inline members
3464 //========================================
3465 
3466 //========================================
3467 // Privates
3468 //========================================
3469 
3470 inline void
3471 UnicodeString::pinIndex(int32_t& start) const
3472 {
3473  // pin index
3474  if(start < 0) {
3475  start = 0;
3476  } else if(start > length()) {
3477  start = length();
3478  }
3479 }
3480 
3481 inline void
3482 UnicodeString::pinIndices(int32_t& start,
3483  int32_t& _length) const
3484 {
3485  // pin indices
3486  int32_t len = length();
3487  if(start < 0) {
3488  start = 0;
3489  } else if(start > len) {
3490  start = len;
3491  }
3492  if(_length < 0) {
3493  _length = 0;
3494  } else if(_length > (len - start)) {
3495  _length = (len - start);
3496  }
3497 }
3498 
3499 inline UChar*
3500 UnicodeString::getArrayStart()
3501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3502 
3503 inline const UChar*
3504 UnicodeString::getArrayStart() const
3505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3506 
3507 //========================================
3508 // Read-only implementation methods
3509 //========================================
3510 inline int32_t
3512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3513 
3514 inline int32_t
3516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3517 
3518 inline int32_t
3520 { return doHashCode(); }
3521 
3522 inline UBool
3524 { return (UBool)(fFlags & kIsBogus); }
3525 
3526 inline UBool
3527 UnicodeString::isWritable() const
3528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3529 
3530 inline UBool
3531 UnicodeString::isBufferWritable() const
3532 {
3533  return (UBool)(
3534  !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3535  (!(fFlags&kRefCounted) || refCount()==1));
3536 }
3537 
3538 inline const UChar *
3540  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3541  return 0;
3542  } else if(fFlags&kUsingStackBuffer) {
3543  return fUnion.fStackBuffer;
3544  } else {
3545  return fUnion.fFields.fArray;
3546  }
3547 }
3548 
3549 //========================================
3550 // Read-only alias methods
3551 //========================================
3552 inline int8_t
3553 UnicodeString::doCompare(int32_t start,
3554  int32_t thisLength,
3555  const UnicodeString& srcText,
3556  int32_t srcStart,
3557  int32_t srcLength) const
3558 {
3559  if(srcText.isBogus()) {
3560  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3561  } else {
3562  srcText.pinIndices(srcStart, srcLength);
3563  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3564  }
3565 }
3566 
3567 inline UBool
3569 {
3570  if(isBogus()) {
3571  return text.isBogus();
3572  } else {
3573  int32_t len = length(), textLength = text.length();
3574  return
3575  !text.isBogus() &&
3576  len == textLength &&
3577  doCompare(0, len, text, 0, textLength) == 0;
3578  }
3579 }
3580 
3581 inline UBool
3583 { return (! operator==(text)); }
3584 
3585 inline UBool
3587 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3588 
3589 inline UBool
3591 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3592 
3593 inline UBool
3595 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3596 
3597 inline UBool
3599 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3600 
3601 inline int8_t
3603 { return doCompare(0, length(), text, 0, text.length()); }
3604 
3605 inline int8_t
3607  int32_t _length,
3608  const UnicodeString& srcText) const
3609 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3610 
3611 inline int8_t
3613  int32_t srcLength) const
3614 { return doCompare(0, length(), srcChars, 0, srcLength); }
3615 
3616 inline int8_t
3618  int32_t _length,
3619  const UnicodeString& srcText,
3620  int32_t srcStart,
3621  int32_t srcLength) const
3622 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3623 
3624 inline int8_t
3626  int32_t _length,
3627  const UChar *srcChars) const
3628 { return doCompare(start, _length, srcChars, 0, _length); }
3629 
3630 inline int8_t
3632  int32_t _length,
3633  const UChar *srcChars,
3634  int32_t srcStart,
3635  int32_t srcLength) const
3636 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3637 
3638 inline int8_t
3640  int32_t limit,
3641  const UnicodeString& srcText,
3642  int32_t srcStart,
3643  int32_t srcLimit) const
3644 { return doCompare(start, limit - start,
3645  srcText, srcStart, srcLimit - srcStart); }
3646 
3647 inline int8_t
3648 UnicodeString::doCompareCodePointOrder(int32_t start,
3649  int32_t thisLength,
3650  const UnicodeString& srcText,
3651  int32_t srcStart,
3652  int32_t srcLength) const
3653 {
3654  if(srcText.isBogus()) {
3655  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3656  } else {
3657  srcText.pinIndices(srcStart, srcLength);
3658  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3659  }
3660 }
3661 
3662 inline int8_t
3664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3665 
3666 inline int8_t
3668  int32_t _length,
3669  const UnicodeString& srcText) const
3670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3671 
3672 inline int8_t
3674  int32_t srcLength) const
3675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3676 
3677 inline int8_t
3679  int32_t _length,
3680  const UnicodeString& srcText,
3681  int32_t srcStart,
3682  int32_t srcLength) const
3683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3684 
3685 inline int8_t
3687  int32_t _length,
3688  const UChar *srcChars) const
3689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3690 
3691 inline int8_t
3693  int32_t _length,
3694  const UChar *srcChars,
3695  int32_t srcStart,
3696  int32_t srcLength) const
3697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3698 
3699 inline int8_t
3701  int32_t limit,
3702  const UnicodeString& srcText,
3703  int32_t srcStart,
3704  int32_t srcLimit) const
3705 { return doCompareCodePointOrder(start, limit - start,
3706  srcText, srcStart, srcLimit - srcStart); }
3707 
3708 inline int8_t
3709 UnicodeString::doCaseCompare(int32_t start,
3710  int32_t thisLength,
3711  const UnicodeString &srcText,
3712  int32_t srcStart,
3713  int32_t srcLength,
3714  uint32_t options) const
3715 {
3716  if(srcText.isBogus()) {
3717  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3718  } else {
3719  srcText.pinIndices(srcStart, srcLength);
3720  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3721  }
3722 }
3723 
3724 inline int8_t
3725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3726  return doCaseCompare(0, length(), text, 0, text.length(), options);
3727 }
3728 
3729 inline int8_t
3731  int32_t _length,
3732  const UnicodeString &srcText,
3733  uint32_t options) const {
3734  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3735 }
3736 
3737 inline int8_t
3739  int32_t srcLength,
3740  uint32_t options) const {
3741  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3742 }
3743 
3744 inline int8_t
3746  int32_t _length,
3747  const UnicodeString &srcText,
3748  int32_t srcStart,
3749  int32_t srcLength,
3750  uint32_t options) const {
3751  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3752 }
3753 
3754 inline int8_t
3756  int32_t _length,
3757  const UChar *srcChars,
3758  uint32_t options) const {
3759  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3760 }
3761 
3762 inline int8_t
3764  int32_t _length,
3765  const UChar *srcChars,
3766  int32_t srcStart,
3767  int32_t srcLength,
3768  uint32_t options) const {
3769  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3770 }
3771 
3772 inline int8_t
3774  int32_t limit,
3775  const UnicodeString &srcText,
3776  int32_t srcStart,
3777  int32_t srcLimit,
3778  uint32_t options) const {
3779  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3780 }
3781 
3782 inline int32_t
3784  int32_t srcStart,
3785  int32_t srcLength,
3786  int32_t start,
3787  int32_t _length) const
3788 {
3789  if(!srcText.isBogus()) {
3790  srcText.pinIndices(srcStart, srcLength);
3791  if(srcLength > 0) {
3792  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3793  }
3794  }
3795  return -1;
3796 }
3797 
3798 inline int32_t
3800 { return indexOf(text, 0, text.length(), 0, length()); }
3801 
3802 inline int32_t
3804  int32_t start) const {
3805  pinIndex(start);
3806  return indexOf(text, 0, text.length(), start, length() - start);
3807 }
3808 
3809 inline int32_t
3811  int32_t start,
3812  int32_t _length) const
3813 { return indexOf(text, 0, text.length(), start, _length); }
3814 
3815 inline int32_t
3817  int32_t srcLength,
3818  int32_t start) const {
3819  pinIndex(start);
3820  return indexOf(srcChars, 0, srcLength, start, length() - start);
3821 }
3822 
3823 inline int32_t
3825  int32_t srcLength,
3826  int32_t start,
3827  int32_t _length) const
3828 { return indexOf(srcChars, 0, srcLength, start, _length); }
3829 
3830 inline int32_t
3832  int32_t start,
3833  int32_t _length) const
3834 { return doIndexOf(c, start, _length); }
3835 
3836 inline int32_t
3838  int32_t start,
3839  int32_t _length) const
3840 { return doIndexOf(c, start, _length); }
3841 
3842 inline int32_t
3844 { return doIndexOf(c, 0, length()); }
3845 
3846 inline int32_t
3848 { return indexOf(c, 0, length()); }
3849 
3850 inline int32_t
3852  int32_t start) const {
3853  pinIndex(start);
3854  return doIndexOf(c, start, length() - start);
3855 }
3856 
3857 inline int32_t
3859  int32_t start) const {
3860  pinIndex(start);
3861  return indexOf(c, start, length() - start);
3862 }
3863 
3864 inline int32_t
3866  int32_t srcLength,
3867  int32_t start,
3868  int32_t _length) const
3869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3870 
3871 inline int32_t
3873  int32_t srcLength,
3874  int32_t start) const {
3875  pinIndex(start);
3876  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3877 }
3878 
3879 inline int32_t
3881  int32_t srcStart,
3882  int32_t srcLength,
3883  int32_t start,
3884  int32_t _length) const
3885 {
3886  if(!srcText.isBogus()) {
3887  srcText.pinIndices(srcStart, srcLength);
3888  if(srcLength > 0) {
3889  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3890  }
3891  }
3892  return -1;
3893 }
3894 
3895 inline int32_t
3897  int32_t start,
3898  int32_t _length) const
3899 { return lastIndexOf(text, 0, text.length(), start, _length); }
3900 
3901 inline int32_t
3903  int32_t start) const {
3904  pinIndex(start);
3905  return lastIndexOf(text, 0, text.length(), start, length() - start);
3906 }
3907 
3908 inline int32_t
3910 { return lastIndexOf(text, 0, text.length(), 0, length()); }
3911 
3912 inline int32_t
3914  int32_t start,
3915  int32_t _length) const
3916 { return doLastIndexOf(c, start, _length); }
3917 
3918 inline int32_t
3920  int32_t start,
3921  int32_t _length) const {
3922  return doLastIndexOf(c, start, _length);
3923 }
3924 
3925 inline int32_t
3927 { return doLastIndexOf(c, 0, length()); }
3928 
3929 inline int32_t
3931  return lastIndexOf(c, 0, length());
3932 }
3933 
3934 inline int32_t
3936  int32_t start) const {
3937  pinIndex(start);
3938  return doLastIndexOf(c, start, length() - start);
3939 }
3940 
3941 inline int32_t
3943  int32_t start) const {
3944  pinIndex(start);
3945  return lastIndexOf(c, start, length() - start);
3946 }
3947 
3948 inline UBool
3950 { return compare(0, text.length(), text, 0, text.length()) == 0; }
3951 
3952 inline UBool
3954  int32_t srcStart,
3955  int32_t srcLength) const
3956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3957 
3958 inline UBool
3959 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
3960  if(srcLength < 0) {
3961  srcLength = u_strlen(srcChars);
3962  }
3963  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
3964 }
3965 
3966 inline UBool
3967 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
3968  if(srcLength < 0) {
3969  srcLength = u_strlen(srcChars);
3970  }
3971  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
3972 }
3973 
3974 inline UBool
3976 { return doCompare(length() - text.length(), text.length(),
3977  text, 0, text.length()) == 0; }
3978 
3979 inline UBool
3981  int32_t srcStart,
3982  int32_t srcLength) const {
3983  srcText.pinIndices(srcStart, srcLength);
3984  return doCompare(length() - srcLength, srcLength,
3985  srcText, srcStart, srcLength) == 0;
3986 }
3987 
3988 inline UBool
3990  int32_t srcLength) const {
3991  if(srcLength < 0) {
3992  srcLength = u_strlen(srcChars);
3993  }
3994  return doCompare(length() - srcLength, srcLength,
3995  srcChars, 0, srcLength) == 0;
3996 }
3997 
3998 inline UBool
4000  int32_t srcStart,
4001  int32_t srcLength) const {
4002  if(srcLength < 0) {
4003  srcLength = u_strlen(srcChars + srcStart);
4004  }
4005  return doCompare(length() - srcLength, srcLength,
4006  srcChars, srcStart, srcLength) == 0;
4007 }
4008 
4009 //========================================
4010 // replace
4011 //========================================
4012 inline UnicodeString&
4014  int32_t _length,
4015  const UnicodeString& srcText)
4016 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4017 
4018 inline UnicodeString&
4020  int32_t _length,
4021  const UnicodeString& srcText,
4022  int32_t srcStart,
4023  int32_t srcLength)
4024 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4025 
4026 inline UnicodeString&
4028  int32_t _length,
4029  const UChar *srcChars,
4030  int32_t srcLength)
4031 { return doReplace(start, _length, srcChars, 0, srcLength); }
4032 
4033 inline UnicodeString&
4035  int32_t _length,
4036  const UChar *srcChars,
4037  int32_t srcStart,
4038  int32_t srcLength)
4039 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4040 
4041 inline UnicodeString&
4043  int32_t _length,
4044  UChar srcChar)
4045 { return doReplace(start, _length, &srcChar, 0, 1); }
4046 
4047 inline UnicodeString&
4049  int32_t _length,
4050  UChar32 srcChar) {
4051  UChar buffer[U16_MAX_LENGTH];
4052  int32_t count = 0;
4053  UBool isError = FALSE;
4054  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
4055  return doReplace(start, _length, buffer, 0, count);
4056 }
4057 
4058 inline UnicodeString&
4060  int32_t limit,
4061  const UnicodeString& srcText)
4062 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4063 
4064 inline UnicodeString&
4066  int32_t limit,
4067  const UnicodeString& srcText,
4068  int32_t srcStart,
4069  int32_t srcLimit)
4070 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4071 
4072 inline UnicodeString&
4074  const UnicodeString& newText)
4075 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4076  newText, 0, newText.length()); }
4077 
4078 inline UnicodeString&
4080  int32_t _length,
4081  const UnicodeString& oldText,
4082  const UnicodeString& newText)
4083 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4084  newText, 0, newText.length()); }
4085 
4086 // ============================
4087 // extract
4088 // ============================
4089 inline void
4090 UnicodeString::doExtract(int32_t start,
4091  int32_t _length,
4092  UnicodeString& target) const
4093 { target.replace(0, target.length(), *this, start, _length); }
4094 
4095 inline void
4097  int32_t _length,
4098  UChar *target,
4099  int32_t targetStart) const
4100 { doExtract(start, _length, target, targetStart); }
4101 
4102 inline void
4104  int32_t _length,
4105  UnicodeString& target) const
4106 { doExtract(start, _length, target); }
4107 
4108 #if !UCONFIG_NO_CONVERSION
4109 
4110 inline int32_t
4112  int32_t _length,
4113  char *dst,
4114  const char *codepage) const
4115 
4116 {
4117  // This dstSize value will be checked explicitly
4118  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4119 }
4120 
4121 #endif
4122 
4123 inline void
4125  int32_t limit,
4126  UChar *dst,
4127  int32_t dstStart) const {
4128  pinIndex(start);
4129  pinIndex(limit);
4130  doExtract(start, limit - start, dst, dstStart);
4131 }
4132 
4133 inline UnicodeString
4134 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4135  return tempSubString(start, limit - start);
4136 }
4137 
4138 inline UChar
4139 UnicodeString::doCharAt(int32_t offset) const
4140 {
4141  if((uint32_t)offset < (uint32_t)length()) {
4142  return getArrayStart()[offset];
4143  } else {
4144  return kInvalidUChar;
4145  }
4146 }
4147 
4148 inline UChar
4149 UnicodeString::charAt(int32_t offset) const
4150 { return doCharAt(offset); }
4151 
4152 inline UChar
4153 UnicodeString::operator[] (int32_t offset) const
4154 { return doCharAt(offset); }
4155 
4156 inline UChar32
4157 UnicodeString::char32At(int32_t offset) const
4158 {
4159  int32_t len = length();
4160  if((uint32_t)offset < (uint32_t)len) {
4161  const UChar *array = getArrayStart();
4162  UChar32 c;
4163  U16_GET(array, 0, offset, len, c);
4164  return c;
4165  } else {
4166  return kInvalidUChar;
4167  }
4168 }
4169 
4170 inline int32_t
4171 UnicodeString::getChar32Start(int32_t offset) const {
4172  if((uint32_t)offset < (uint32_t)length()) {
4173  const UChar *array = getArrayStart();
4174  U16_SET_CP_START(array, 0, offset);
4175  return offset;
4176  } else {
4177  return 0;
4178  }
4179 }
4180 
4181 inline int32_t
4182 UnicodeString::getChar32Limit(int32_t offset) const {
4183  int32_t len = length();
4184  if((uint32_t)offset < (uint32_t)len) {
4185  const UChar *array = getArrayStart();
4186  U16_SET_CP_LIMIT(array, 0, offset, len);
4187  return offset;
4188  } else {
4189  return len;
4190  }
4191 }
4192 
4193 inline UBool
4195  return fShortLength == 0;
4196 }
4197 
4198 //========================================
4199 // Write implementation methods
4200 //========================================
4201 inline void
4202 UnicodeString::setLength(int32_t len) {
4203  if(len <= 127) {
4204  fShortLength = (int8_t)len;
4205  } else {
4206  fShortLength = (int8_t)-1;
4207  fUnion.fFields.fLength = len;
4208  }
4209 }
4210 
4211 inline void
4212 UnicodeString::setToEmpty() {
4213  fShortLength = 0;
4214  fFlags = kShortString;
4215 }
4216 
4217 inline void
4218 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4219  setLength(len);
4220  fUnion.fFields.fArray = array;
4221  fUnion.fFields.fCapacity = capacity;
4222 }
4223 
4224 inline const UChar *
4226  if(!isWritable()) {
4227  return 0;
4228  } else {
4229  UChar *array = getArrayStart();
4230  int32_t len = length();
4231  if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4232  /*
4233  * kRefCounted: Do not write the NUL if the buffer is shared.
4234  * That is mostly safe, except when the length of one copy was modified
4235  * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4236  * Then the NUL would be written into the middle of another copy's string.
4237  */
4238  if(!(fFlags&kBufferIsReadonly)) {
4239  /*
4240  * We must not write to a readonly buffer, but it is known to be
4241  * NUL-terminated if len<capacity.
4242  * A shared, allocated buffer (refCount()>1) must not have its contents
4243  * modified, but the NUL at [len] is beyond the string contents,
4244  * and multiple string objects and threads writing the same NUL into the
4245  * same location is harmless.
4246  * In all other cases, the buffer is fully writable and it is anyway safe
4247  * to write the NUL.
4248  *
4249  * Note: An earlier version of this code tested whether there is a NUL
4250  * at [len] already, but, while safe, it generated lots of warnings from
4251  * tools like valgrind and Purify.
4252  */
4253  array[len] = 0;
4254  }
4255  return array;
4256  } else if(cloneArrayIfNeeded(len+1)) {
4257  array = getArrayStart();
4258  array[len] = 0;
4259  return array;
4260  } else {
4261  return 0;
4262  }
4263  }
4264 }
4265 
4266 inline UnicodeString&
4268 { return doReplace(0, length(), &ch, 0, 1); }
4269 
4270 inline UnicodeString&
4272 { return replace(0, length(), ch); }
4273 
4274 inline UnicodeString&
4276  int32_t srcStart,
4277  int32_t srcLength)
4278 {
4279  unBogus();
4280  return doReplace(0, length(), srcText, srcStart, srcLength);
4281 }
4282 
4283 inline UnicodeString&
4285  int32_t srcStart)
4286 {
4287  unBogus();
4288  srcText.pinIndex(srcStart);
4289  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4290 }
4291 
4292 inline UnicodeString&
4294 {
4295  return copyFrom(srcText);
4296 }
4297 
4298 inline UnicodeString&
4299 UnicodeString::setTo(const UChar *srcChars,
4300  int32_t srcLength)
4301 {
4302  unBogus();
4303  return doReplace(0, length(), srcChars, 0, srcLength);
4304 }
4305 
4306 inline UnicodeString&
4308 {
4309  unBogus();
4310  return doReplace(0, length(), &srcChar, 0, 1);
4311 }
4312 
4313 inline UnicodeString&
4315 {
4316  unBogus();
4317  return replace(0, length(), srcChar);
4318 }
4319 
4320 inline UnicodeString&
4322  int32_t srcStart,
4323  int32_t srcLength)
4324 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4325 
4326 inline UnicodeString&
4328 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4329 
4330 inline UnicodeString&
4332  int32_t srcStart,
4333  int32_t srcLength)
4334 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4335 
4336 inline UnicodeString&
4338  int32_t srcLength)
4339 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4340 
4341 inline UnicodeString&
4343 { return doReplace(length(), 0, &srcChar, 0, 1); }
4344 
4345 inline UnicodeString&
4347  UChar buffer[U16_MAX_LENGTH];
4348  int32_t _length = 0;
4349  UBool isError = FALSE;
4350  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4351  return doReplace(length(), 0, buffer, 0, _length);
4352 }
4353 
4354 inline UnicodeString&
4356 { return doReplace(length(), 0, &ch, 0, 1); }
4357 
4358 inline UnicodeString&
4360  return append(ch);
4361 }
4362 
4363 inline UnicodeString&
4365 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4366 
4367 inline UnicodeString&
4369  const UnicodeString& srcText,
4370  int32_t srcStart,
4371  int32_t srcLength)
4372 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4373 
4374 inline UnicodeString&
4376  const UnicodeString& srcText)
4377 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4378 
4379 inline UnicodeString&
4381  const UChar *srcChars,
4382  int32_t srcStart,
4383  int32_t srcLength)
4384 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4385 
4386 inline UnicodeString&
4388  const UChar *srcChars,
4389  int32_t srcLength)
4390 { return doReplace(start, 0, srcChars, 0, srcLength); }
4391 
4392 inline UnicodeString&
4394  UChar srcChar)
4395 { return doReplace(start, 0, &srcChar, 0, 1); }
4396 
4397 inline UnicodeString&
4399  UChar32 srcChar)
4400 { return replace(start, 0, srcChar); }
4401 
4402 
4403 inline UnicodeString&
4405 {
4406  // remove() of a bogus string makes the string empty and non-bogus
4407  // we also un-alias a read-only alias to deal with NUL-termination
4408  // issues with getTerminatedBuffer()
4409  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4410  setToEmpty();
4411  } else {
4412  fShortLength = 0;
4413  }
4414  return *this;
4415 }
4416 
4417 inline UnicodeString&
4419  int32_t _length)
4420 {
4421  if(start <= 0 && _length == INT32_MAX) {
4422  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4423  return remove();
4424  }
4425  return doReplace(start, _length, NULL, 0, 0);
4426 }
4427 
4428 inline UnicodeString&
4430  int32_t limit)
4431 { return doReplace(start, limit - start, NULL, 0, 0); }
4432 
4433 inline UnicodeString &
4434 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4435  truncate(limit);
4436  return doReplace(0, start, NULL, 0, 0);
4437 }
4438 
4439 inline UBool
4440 UnicodeString::truncate(int32_t targetLength)
4441 {
4442  if(isBogus() && targetLength == 0) {
4443  // truncate(0) of a bogus string makes the string empty and non-bogus
4444  unBogus();
4445  return FALSE;
4446  } else if((uint32_t)targetLength < (uint32_t)length()) {
4447  setLength(targetLength);
4448  if(fFlags&kBufferIsReadonly) {
4449  fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
4450  }
4451  return TRUE;
4452  } else {
4453  return FALSE;
4454  }
4455 }
4456 
4457 inline UnicodeString&
4459 { return doReverse(0, length()); }
4460 
4461 inline UnicodeString&
4463  int32_t _length)
4464 { return doReverse(start, _length); }
4465 
4467 
4468 #endif