#include <unistr.h>
Inheritance diagram for UnicodeString:
Public Types | |
enum | EInvariant { kInvariant } |
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a Unicode string from an invariant-character char * string. More... | |
Public Member Functions | |
UBool | operator== (const UnicodeString &text) const |
Equality operator. | |
UBool | operator!= (const UnicodeString &text) const |
Inequality operator. | |
UBool | operator> (const UnicodeString &text) const |
Greater than operator. | |
UBool | operator< (const UnicodeString &text) const |
Less than operator. | |
UBool | operator>= (const UnicodeString &text) const |
Greater than or equal operator. | |
UBool | operator<= (const UnicodeString &text) const |
Less than or equal operator. | |
int8_t | compare (const UnicodeString &text) const |
Compare the characters bitwise in this UnicodeString to the characters in text . | |
int8_t | compare (int32_t start, int32_t length, const UnicodeString &text) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in text . | |
int8_t | compare (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in srcText in the range [srcStart , srcStart + srcLength ). | |
int8_t | compare (const UChar *srcChars, int32_t srcLength) const |
Compare the characters bitwise in this UnicodeString with the first srcLength characters in srcChars . | |
int8_t | compare (int32_t start, int32_t length, const UChar *srcChars) const |
Compare the characters bitwise in the range [start , start + length ) with the first length characters in srcChars . | |
int8_t | compare (int32_t start, int32_t length, const UChar *srcChars, int32_t srcStart, int32_t srcLength) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in srcChars in the range [srcStart , srcStart + srcLength ). | |
int8_t | compareBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const |
Compare the characters bitwise in the range [start , limit ) with the characters in srcText in the range [srcStart , srcLimit ). | |
int8_t | compareCodePointOrder (const UnicodeString &text) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrder (int32_t start, int32_t length, const UnicodeString &srcText) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrder (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrder (const UChar *srcChars, int32_t srcLength) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrder (int32_t start, int32_t length, const UChar *srcChars) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrder (int32_t start, int32_t length, const UChar *srcChars, int32_t srcStart, int32_t srcLength) const |
Compare two Unicode strings in code point order. | |
int8_t | compareCodePointOrderBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const |
Compare two Unicode strings in code point order. | |
int8_t | caseCompare (const UnicodeString &text, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompare (int32_t start, int32_t length, const UnicodeString &srcText, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompare (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompare (const UChar *srcChars, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompare (int32_t start, int32_t length, const UChar *srcChars, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompare (int32_t start, int32_t length, const UChar *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
int8_t | caseCompareBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit, uint32_t options) const |
Compare two strings case-insensitively using full case folding. | |
UBool | startsWith (const UnicodeString &text) const |
Determine if this starts with the characters in text . | |
UBool | startsWith (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const |
Determine if this starts with the characters in srcText in the range [srcStart , srcStart + srcLength ). | |
UBool | startsWith (const UChar *srcChars, int32_t srcLength) const |
Determine if this starts with the characters in srcChars . | |
UBool | startsWith (const UChar *srcChars, int32_t srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcChars in the range [srcStart , srcStart + srcLength ). | |
UBool | endsWith (const UnicodeString &text) const |
Determine if this ends with the characters in text . | |
UBool | endsWith (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcText in the range [srcStart , srcStart + srcLength ). | |
UBool | endsWith (const UChar *srcChars, int32_t srcLength) const |
Determine if this ends with the characters in srcChars . | |
UBool | endsWith (const UChar *srcChars, int32_t srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcChars in the range [srcStart , srcStart + srcLength ). | |
int32_t | indexOf (const UnicodeString &text) const |
Locate in this the first occurrence of the characters in text , using bitwise comparison. | |
int32_t | indexOf (const UnicodeString &text, int32_t start) const |
Locate in this the first occurrence of the characters in text starting at offset start , using bitwise comparison. | |
int32_t | indexOf (const UnicodeString &text, int32_t start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in text , using bitwise comparison. | |
int32_t | indexOf (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcText in the range [srcStart , srcStart + srcLength ), using bitwise comparison. | |
int32_t | indexOf (const UChar *srcChars, int32_t srcLength, int32_t start) const |
Locate in this the first occurrence of the characters in srcChars starting at offset start , using bitwise comparison. | |
int32_t | indexOf (const UChar *srcChars, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcChars , using bitwise comparison. | |
int32_t | indexOf (const UChar *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcChars in the range [srcStart , srcStart + srcLength ), using bitwise comparison. | |
int32_t | indexOf (UChar c) const |
Locate in this the first occurrence of the BMP code point c , using bitwise comparison. | |
int32_t | indexOf (UChar32 c) const |
Locate in this the first occurrence of the code point c , using bitwise comparison. | |
int32_t | indexOf (UChar c, int32_t start) const |
Locate in this the first occurrence of the BMP code point c , starting at offset start , using bitwise comparison. | |
int32_t | indexOf (UChar32 c, int32_t start) const |
Locate in this the first occurrence of the code point c starting at offset start , using bitwise comparison. | |
int32_t | indexOf (UChar c, int32_t start, int32_t length) const |
Locate in this the first occurrence of the BMP code point c in the range [start , start + length ), using bitwise comparison. | |
int32_t | indexOf (UChar32 c, int32_t start, int32_t length) const |
Locate in this the first occurrence of the code point c in the range [start , start + length ), using bitwise comparison. | |
int32_t | lastIndexOf (const UnicodeString &text) const |
Locate in this the last occurrence of the characters in text , using bitwise comparison. | |
int32_t | lastIndexOf (const UnicodeString &text, int32_t start) const |
Locate in this the last occurrence of the characters in text starting at offset start , using bitwise comparison. | |
int32_t | lastIndexOf (const UnicodeString &text, int32_t start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in text , using bitwise comparison. | |
int32_t | lastIndexOf (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcText in the range [srcStart , srcStart + srcLength ), using bitwise comparison. | |
int32_t | lastIndexOf (const UChar *srcChars, int32_t srcLength, int32_t start) const |
Locate in this the last occurrence of the characters in srcChars starting at offset start , using bitwise comparison. | |
int32_t | lastIndexOf (const UChar *srcChars, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcChars , using bitwise comparison. | |
int32_t | lastIndexOf (const UChar *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcChars in the range [srcStart , srcStart + srcLength ), using bitwise comparison. | |
int32_t | lastIndexOf (UChar c) const |
Locate in this the last occurrence of the BMP code point c , using bitwise comparison. | |
int32_t | lastIndexOf (UChar32 c) const |
Locate in this the last occurrence of the code point c , using bitwise comparison. | |
int32_t | lastIndexOf (UChar c, int32_t start) const |
Locate in this the last occurrence of the BMP code point c starting at offset start , using bitwise comparison. | |
int32_t | lastIndexOf (UChar32 c, int32_t start) const |
Locate in this the last occurrence of the code point c starting at offset start , using bitwise comparison. | |
int32_t | lastIndexOf (UChar c, int32_t start, int32_t length) const |
Locate in this the last occurrence of the BMP code point c in the range [start , start + length ), using bitwise comparison. | |
int32_t | lastIndexOf (UChar32 c, int32_t start, int32_t length) const |
Locate in this the last occurrence of the code point c in the range [start , start + length ), using bitwise comparison. | |
UChar | charAt (int32_t offset) const |
Return the code unit at offset offset . | |
UChar | operator[] (int32_t offset) const |
Return the code unit at offset offset . | |
UChar32 | char32At (int32_t offset) const |
Return the code point that contains the code unit at offset offset . | |
int32_t | getChar32Start (int32_t offset) const |
Adjust a random-access offset so that it points to the beginning of a Unicode character. | |
int32_t | getChar32Limit (int32_t offset) const |
Adjust a random-access offset so that it points behind a Unicode character. | |
int32_t | moveIndex32 (int32_t index, int32_t delta) const |
Move the code unit index along the string by delta code points. | |
void | extract (int32_t start, int32_t length, UChar *dst, int32_t dstStart=0) const |
Copy the characters in the range [start , start + length ) into the array dst , beginning at dstStart . | |
int32_t | extract (UChar *dest, int32_t destCapacity, UErrorCode &errorCode) const |
Copy the contents of the string into dest. | |
void | extract (int32_t start, int32_t length, UnicodeString &target) const |
Copy the characters in the range [start , start + length ) into the UnicodeString target . | |
void | extractBetween (int32_t start, int32_t limit, UChar *dst, int32_t dstStart=0) const |
Copy the characters in the range [start , limit ) into the array dst , beginning at dstStart . | |
virtual void | extractBetween (int32_t start, int32_t limit, UnicodeString &target) const |
Copy the characters in the range [start , limit ) into the UnicodeString target . | |
int32_t | extract (int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const |
Copy the characters in the range [start , start + length ) into an array of characters. | |
int32_t | extract (int32_t start, int32_t startLength, char *target, const char *codepage=0) const |
Copy the characters in the range [start , start + length ) into an array of characters in a specified codepage. | |
int32_t | extract (int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage=0) const |
Copy the characters in the range [start , start + length ) into an array of characters in a specified codepage. | |
int32_t | extract (char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const |
Convert the UnicodeString into a codepage string using an existing UConverter. | |
int32_t | length (void) const |
Return the length of the UnicodeString object. | |
int32_t | countChar32 (int32_t start=0, int32_t length=INT32_MAX) const |
Count Unicode code points in the length UChar code units of the string. | |
UBool | hasMoreChar32Than (int32_t start, int32_t length, int32_t number) const |
Check if the length UChar code units of the string contain more Unicode code points than a certain number. | |
UBool | isEmpty (void) const |
Determine if this string is empty. | |
int32_t | getCapacity (void) const |
Return the capacity of the internal buffer of the UnicodeString object. | |
int32_t | hashCode (void) const |
Generate a hash code for this object. | |
UBool | isBogus (void) const |
Determine if this object contains a valid string. | |
UnicodeString & | operator= (const UnicodeString &srcText) |
Assignment operator. | |
UnicodeString & | fastCopyFrom (const UnicodeString &src) |
Almost the same as the assignment operator. | |
UnicodeString & | operator= (UChar ch) |
Assignment operator. | |
UnicodeString & | operator= (UChar32 ch) |
Assignment operator. | |
UnicodeString & | setTo (const UnicodeString &srcText, int32_t srcStart) |
Set the text in the UnicodeString object to the characters in srcText in the range [srcStart , srcText.length() ). | |
UnicodeString & | setTo (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) |
Set the text in the UnicodeString object to the characters in srcText in the range [srcStart , srcStart + srcLength ). | |
UnicodeString & | setTo (const UnicodeString &srcText) |
Set the text in the UnicodeString object to the characters in srcText . | |
UnicodeString & | setTo (const UChar *srcChars, int32_t srcLength) |
Set the characters in the UnicodeString object to the characters in srcChars . | |
UnicodeString & | setTo (UChar srcChar) |
Set the characters in the UnicodeString object to the code unit srcChar . | |
UnicodeString & | setTo (UChar32 srcChar) |
Set the characters in the UnicodeString object to the code point srcChar . | |
UnicodeString & | setTo (UBool isTerminated, const UChar *text, int32_t textLength) |
Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. | |
UnicodeString & | setTo (UChar *buffer, int32_t buffLength, int32_t buffCapacity) |
Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. | |
void | setToBogus () |
Make this UnicodeString object invalid. | |
UnicodeString & | setCharAt (int32_t offset, UChar ch) |
Set the character at the specified offset to the specified character. | |
UnicodeString & | operator+= (UChar ch) |
Append operator. | |
UnicodeString & | operator+= (UChar32 ch) |
Append operator. | |
UnicodeString & | operator+= (const UnicodeString &srcText) |
Append operator. | |
UnicodeString & | append (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) |
Append the characters in srcText in the range [srcStart , srcStart + srcLength ) to the UnicodeString object at offset start . | |
UnicodeString & | append (const UnicodeString &srcText) |
Append the characters in srcText to the UnicodeString object at offset start . | |
UnicodeString & | append (const UChar *srcChars, int32_t srcStart, int32_t srcLength) |
Append the characters in srcChars in the range [srcStart , srcStart + srcLength ) to the UnicodeString object at offset start . | |
UnicodeString & | append (const UChar *srcChars, int32_t srcLength) |
Append the characters in srcChars to the UnicodeString object at offset start . | |
UnicodeString & | append (UChar srcChar) |
Append the code unit srcChar to the UnicodeString object. | |
UnicodeString & | append (UChar32 srcChar) |
Append the code point srcChar to the UnicodeString object. | |
UnicodeString & | insert (int32_t start, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) |
Insert the characters in srcText in the range [srcStart , srcStart + srcLength ) into the UnicodeString object at offset start . | |
UnicodeString & | insert (int32_t start, const UnicodeString &srcText) |
Insert the characters in srcText into the UnicodeString object at offset start . | |
UnicodeString & | insert (int32_t start, const UChar *srcChars, int32_t srcStart, int32_t srcLength) |
Insert the characters in srcChars in the range [srcStart , srcStart + srcLength ) into the UnicodeString object at offset start . | |
UnicodeString & | insert (int32_t start, const UChar *srcChars, int32_t srcLength) |
Insert the characters in srcChars into the UnicodeString object at offset start . | |
UnicodeString & | insert (int32_t start, UChar srcChar) |
Insert the code unit srcChar into the UnicodeString object at offset start . | |
UnicodeString & | insert (int32_t start, UChar32 srcChar) |
Insert the code point srcChar into the UnicodeString object at offset start . | |
UnicodeString & | replace (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcText in the range [srcStart , srcStart + srcLength ). | |
UnicodeString & | replace (int32_t start, int32_t length, const UnicodeString &srcText) |
Replace the characters in the range [start , start + length ) with the characters in srcText . | |
UnicodeString & | replace (int32_t start, int32_t length, const UChar *srcChars, int32_t srcStart, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcChars in the range [srcStart , srcStart + srcLength ). | |
UnicodeString & | replace (int32_t start, int32_t length, const UChar *srcChars, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcChars . | |
UnicodeString & | replace (int32_t start, int32_t length, UChar srcChar) |
Replace the characters in the range [start , start + length ) with the code unit srcChar . | |
UnicodeString & | replace (int32_t start, int32_t length, UChar32 srcChar) |
Replace the characters in the range [start , start + length ) with the code point srcChar . | |
UnicodeString & | replaceBetween (int32_t start, int32_t limit, const UnicodeString &srcText) |
Replace the characters in the range [start , limit ) with the characters in srcText . | |
UnicodeString & | replaceBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) |
Replace the characters in the range [start , limit ) with the characters in srcText in the range [srcStart , srcLimit ). | |
virtual void | handleReplaceBetween (int32_t start, int32_t limit, const UnicodeString &text) |
Replace a substring of this object with the given text. | |
virtual UBool | hasMetaData () const |
Replaceable API. | |
virtual void | copy (int32_t start, int32_t limit, int32_t dest) |
Copy a substring of this object, retaining attribute (out-of-band) information. | |
UnicodeString & | findAndReplace (const UnicodeString &oldText, const UnicodeString &newText) |
Replace all occurrences of characters in oldText with the characters in newText. | |
UnicodeString & | findAndReplace (int32_t start, int32_t length, const UnicodeString &oldText, const UnicodeString &newText) |
Replace all occurrences of characters in oldText with characters in newText in the range [start , start + length ). | |
UnicodeString & | findAndReplace (int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength) |
Replace all occurrences of characters in oldText in the range [oldStart , oldStart + oldLength ) with the characters in newText in the range [newStart , newStart + newLength ) in the range [start , start + length ). | |
UnicodeString & | remove (void) |
Remove all characters from the UnicodeString object. | |
UnicodeString & | remove (int32_t start, int32_t length=(int32_t) INT32_MAX) |
Remove the characters in the range [start , start + length ) from the UnicodeString object. | |
UnicodeString & | removeBetween (int32_t start, int32_t limit=(int32_t) INT32_MAX) |
Remove the characters in the range [start , limit ) from the UnicodeString object. | |
UBool | padLeading (int32_t targetLength, UChar padChar=0x0020) |
Pad the start of this UnicodeString with the character padChar . | |
UBool | padTrailing (int32_t targetLength, UChar padChar=0x0020) |
Pad the end of this UnicodeString with the character padChar . | |
UBool | truncate (int32_t targetLength) |
Truncate this UnicodeString to the targetLength . | |
UnicodeString & | trim (void) |
Trims leading and trailing whitespace from this UnicodeString. | |
UnicodeString & | reverse (void) |
Reverse this UnicodeString in place. | |
UnicodeString & | reverse (int32_t start, int32_t length) |
Reverse the range [start , start + length ) in this UnicodeString. | |
UnicodeString & | toUpper (void) |
Convert the characters in this to UPPER CASE following the conventions of the default locale. | |
UnicodeString & | toUpper (const Locale &locale) |
Convert the characters in this to UPPER CASE following the conventions of a specific locale. | |
UnicodeString & | toLower (void) |
Convert the characters in this to lower case following the conventions of the default locale. | |
UnicodeString & | toLower (const Locale &locale) |
Convert the characters in this to lower case following the conventions of a specific locale. | |
UnicodeString & | toTitle (BreakIterator *titleIter) |
Titlecase this string, convenience function using the default locale. | |
UnicodeString & | toTitle (BreakIterator *titleIter, const Locale &locale) |
Titlecase this string. | |
UnicodeString & | toTitle (BreakIterator *titleIter, const Locale &locale, uint32_t options) |
Titlecase this string, with options. | |
UnicodeString & | foldCase (uint32_t options=0) |
Case-fold the characters in this string. | |
UChar * | getBuffer (int32_t minCapacity) |
Get a read/write pointer to the internal buffer. | |
void | releaseBuffer (int32_t newLength=-1) |
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity). | |
const UChar * | getBuffer () const |
Get a read-only pointer to the internal buffer. | |
const UChar * | getTerminatedBuffer () |
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated. | |
UnicodeString () | |
Construct an empty UnicodeString. | |
UnicodeString (int32_t capacity, UChar32 c, int32_t count) | |
Construct a UnicodeString with capacity to hold capacity UChars. | |
UnicodeString (UChar ch) | |
Single UChar (code unit) constructor. | |
UnicodeString (UChar32 ch) | |
Single UChar32 (code point) constructor. | |
UnicodeString (const UChar *text) | |
UChar* constructor. | |
UnicodeString (const UChar *text, int32_t textLength) | |
UChar* constructor. | |
UnicodeString (UBool isTerminated, const UChar *text, int32_t textLength) | |
Readonly-aliasing UChar* constructor. | |
UnicodeString (UChar *buffer, int32_t buffLength, int32_t buffCapacity) | |
Writable-aliasing UChar* constructor. | |
UnicodeString (const char *codepageData, const char *codepage=0) | |
char* constructor. | |
UnicodeString (const char *codepageData, int32_t dataLength, const char *codepage=0) | |
char* constructor. | |
UnicodeString (const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode) | |
char * / UConverter constructor. | |
UnicodeString (const char *src, int32_t length, enum EInvariant inv) | |
Constructs a Unicode string from an invariant-character char * string. | |
UnicodeString (const UnicodeString &that) | |
Copy constructor. | |
UnicodeString (const UnicodeString &src, int32_t srcStart) | |
'Substring' constructor from tail of source string. | |
UnicodeString (const UnicodeString &src, int32_t srcStart, int32_t srcLength) | |
'Substring' constructor from subrange of source string. | |
virtual Replaceable * | clone () const |
Clone this object, an instance of a subclass of Replaceable. | |
virtual | ~UnicodeString () |
Destructor. | |
UnicodeString | unescape () const |
Unescape a string of characters and return a string containing the result. | |
UChar32 | unescapeAt (int32_t &offset) const |
Unescape a single escape sequence and return the represented character. | |
virtual UClassID | getDynamicClassID () const |
ICU "poor man's RTTI", returns a UClassID for the actual class. | |
Static Public Member Functions | |
static UClassID | getStaticClassID () |
ICU "poor man's RTTI", returns a UClassID for this class. | |
Protected Member Functions | |
virtual int32_t | getLength () const |
Implement Replaceable::getLength() (see jitterbug 1027). | |
virtual UChar | getCharAt (int32_t offset) const |
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709). | |
virtual UChar32 | getChar32At (int32_t offset) const |
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709). | |
Friends | |
class | StringCharacterIterator |
class | StringThreadTest |
It is a concrete implementation of the abstract class Replaceable (for transliteration).
The UnicodeString class is not suitable for subclassing.
For an overview of Unicode strings in C and C++ see the User Guide Strings chapter.
In ICU, a Unicode string consists of 16-bit Unicode code units. A Unicode character may be stored with either one code unit (the most common case) or with a matched pair of special code units ("surrogates"). The data type for code units is UChar. For single-character handling, a Unicode character code point is a value in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
Indexes and offsets into and lengths of strings always count code units, not code points. This is the same as with multi-byte char* strings in traditional string handling. Operations on partial strings typically do not test for code point boundaries. If necessary, the user needs to take care of such boundaries by testing for the code unit values or by using functions like UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
UnicodeString methods are more lenient with regard to input parameter values than other ICU APIs. In particular:
In string comparisons, two UnicodeString objects that are both "bogus" compare equal (to be transitive and prevent endless loops in sorting), and a "bogus" string compares less than any non-"bogus" one.
Const UnicodeString methods are thread-safe. Multiple threads can use const methods on the same UnicodeString object simultaneously, but non-const methods must not be called concurrently (in multiple threads) with any other (const or non-const) methods.
Similarly, const UnicodeString & parameters are thread-safe. One object may be passed in as such a parameter concurrently in multiple threads. This includes the const UnicodeString & parameters for copy construction, assignment, and cloning.
UnicodeString uses several storage methods. String contents can be stored inside the UnicodeString object itself, in an allocated and shared buffer, or in an outside buffer that is "aliased". Most of this is done transparently, but careful aliasing in particular provides significant performance improvements. Also, the internal buffer is accessible via special functions. For details see the User Guide Strings chapter.
Definition at line 183 of file unistr.h.
|
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a Unicode string from an invariant-character char * string. Use the macro US_INV instead of the full qualification for this value.
|
|
Construct an empty UnicodeString.
|
|
Construct a UnicodeString with capacity to hold
|
|
Single UChar (code unit) constructor.
|
|
Single UChar32 (code point) constructor.
|
|
UChar* constructor.
|
|
UChar* constructor.
|
|
Readonly-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified. In an assignment to another UnicodeString, the text will be aliased again, so that both strings then alias the same readonly-text.
|
|
Writable-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(UChar *dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.
|
|
char* constructor.
codepage is an empty string ("" ), then a simple conversion is performed on the codepage-invariant subset ("invariant characters") of the platform encoding. See utypes.h. Recommendation: For invariant-character strings use the constructor UnicodeString(const char *src, int32_t length, enum EInvariant inv) because it avoids object code dependencies of UnicodeString on the conversion code.
|
|
char* constructor.
|
|
char * / UConverter constructor. This constructor uses an existing UConverter object to convert the codepage string to Unicode and construct a UnicodeString from that. The converter is reset at first. If the error code indicates a failure before this constructor is called, or if an error occurs during conversion or construction, then the string will be bogus. This function avoids the overhead of opening and closing a converter if multiple strings are constructed.
|
|
Constructs a Unicode string from an invariant-character char * string. About invariant characters see utypes.h. This constructor has no runtime dependency on conversion code and is therefore recommended over ones taking a charset name string (where the empty string "" indicates invariant-character conversion). Use the macro US_INV as the third, signature-distinguishing parameter. For example: void fn(const char *s) { UnicodeString ustr(s, -1, US_INV); // use ustr ... }
|
|
Copy constructor.
|
|
'Substring' constructor from tail of source string.
|
|
'Substring' constructor from subrange of source string.
|
|
Destructor.
|
|
Append the code point
Definition at line 4038 of file unistr.h. References FALSE, U16_APPEND, and U16_MAX_LENGTH. |
|
Append the code unit
|
|
Append the characters in
|
|
Append the characters in
|
|
Append the characters in
Definition at line 4019 of file unistr.h. References fLength. |
|
Append the characters in
Definition at line 4013 of file unistr.h. Referenced by operator+=(), and Transliterator::setID(). |
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
Definition at line 3487 of file unistr.h. References fLength. |
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
Definition at line 3482 of file unistr.h. References fLength. |
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
|
|
Return the code point that contains the code unit at offset If the offset is not valid (0..length()-1) then U+ffff is returned.
Reimplemented from Replaceable. Definition at line 3904 of file unistr.h. References U16_GET. |
|
Return the code unit at offset If the offset is not valid (0..length()-1) then U+ffff is returned.
Reimplemented from Replaceable. |
|
Clone this object, an instance of a subclass of Replaceable. Clones can be used concurrently in multiple threads. If a subclass does not implement clone(), or if an error occurs, then NULL is returned. The clone functions in all subclasses return a pointer to a Replaceable because some compilers do not support covariant (same-as-this) return types; cast to the appropriate subclass if necessary. The caller must delete the clone.
Reimplemented from Replaceable. |
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in this UnicodeString with the first
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in the range [
Definition at line 3363 of file unistr.h. References fLength. |
|
Compare the characters bitwise in this UnicodeString to the characters in
Definition at line 3359 of file unistr.h. References fLength. Referenced by startsWith(). |
|
Compare the characters bitwise in the range [
|
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
Definition at line 3424 of file unistr.h. References fLength. |
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
Definition at line 3420 of file unistr.h. References fLength. |
|
Compare two Unicode strings in code point order. The result may be different from the results of compare(), operator<, etc. if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Copy a substring of this object, retaining attribute (out-of-band) information. This method is used to duplicate or reorder substrings. The destination index must not overlap the source range.
Implements Replaceable. |
|
Count Unicode code points in the length UChar code units of the string. A code point may occupy either one or two UChar code units. Counting code points involves reading all code units. This functions is basically the inverse of moveIndex32().
|
|
Determine if this ends with the characters in
Definition at line 3751 of file unistr.h. References u_strlen(). |
|
Determine if this ends with the characters in
Definition at line 3741 of file unistr.h. References u_strlen(). |
|
Determine if this ends with the characters in
Definition at line 3732 of file unistr.h. References pinIndices(). |
|
Determine if this ends with the characters in
Definition at line 3727 of file unistr.h. References fLength. |
|
Convert the UnicodeString into a codepage string using an existing UConverter. The output string is NUL-terminated if possible. This function avoids the overhead of opening and closing a converter if multiple strings are extracted.
|
|
Copy the characters in the range [
This function does not write any more than Recommendation: For invariant-character strings use extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const because it avoids object code dependencies of UnicodeString on the conversion code.
|
|
Copy the characters in the range [ The output string is NUL-terminated. Recommendation: For invariant-character strings use extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const because it avoids object code dependencies of UnicodeString on the conversion code.
Definition at line 3863 of file unistr.h. References extract(). |
|
Copy the characters in the range [ All characters must be invariant (see utypes.h). Use US_INV as the last, signature-distinguishing parameter.
This function does not write any more than
|
|
Copy the characters in the range [
|
|
Copy the contents of the string into dest. This is a convenience function that checks if there is enough space in dest, extracts the entire string if possible, and NUL-terminates dest if possible. If the string fits into dest but cannot be NUL-terminated (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the string itself does not fit into dest (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
If the string aliases to
|
|
Copy the characters in the range [
If the string aliases to
Definition at line 3848 of file unistr.h. Referenced by extract(). |
|
Copy the characters in the range [ Replaceable API.
Implements Replaceable. |
|
Copy the characters in the range [
|
|
Almost the same as the assignment operator.
Replace the characters in this UnicodeString with the characters from This function works the same for all strings except for ones that are readonly aliases. Starting with ICU 2.4, the assignment operator and the copy constructor allocate a new buffer and copy the buffer contents even for readonly aliases. This function implements the old, more efficient but less safe behavior of making this string also a readonly alias to the same buffer. The fastCopyFrom function must be used only if it is known that the lifetime of this UnicodeString is at least as long as the lifetime of the aliased buffer including its contents, for example for strings from resource bundles or aliases to string contents.
|
|
Replace all occurrences of characters in oldText in the range [
|
|
Replace all occurrences of characters in oldText with characters in newText in the range [
Definition at line 3831 of file unistr.h. References findAndReplace(), and fLength. |
|
Replace all occurrences of characters in oldText with the characters in newText.
Definition at line 3825 of file unistr.h. References fLength. Referenced by findAndReplace(). |
|
Case-fold the characters in this string. Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'I' in CaseFolding.txt. The result may be longer or shorter than the original.
|
|
Get a read-only pointer to the internal buffer. This can be called at any time on a valid UnicodeString. It returns 0 if the string is bogus, or during an "open" getBuffer(minCapacity). It can be called as many times as desired. The pointer that it returns will remain valid until the UnicodeString object is modified, at which time the pointer is semantically invalidated and must not be used any more. The capacity of the buffer can be determined with getCapacity(). The part after length() may or may not be initialized and valid, depending on the history of the UnicodeString object.
The buffer contents is (probably) not NUL-terminated. You can check if it is with The buffer may reside in read-only memory. Its contents must not be modified.
|
|
Get a read/write pointer to the internal buffer. The buffer is guaranteed to be large enough for at least minCapacity UChars, writable, and is still owned by the UnicodeString object. Calls to getBuffer(minCapacity) must not be nested, and must be matched with calls to releaseBuffer(newLength). If the string buffer was read-only or shared, then it will be reallocated and copied. An attempted nested call will return 0, and will not further modify the state of the UnicodeString object. It also returns 0 if the string is bogus. The actual capacity of the string buffer may be larger than minCapacity. getCapacity() returns the actual capacity. For many operations, the full capacity should be used to avoid reallocations. While the buffer is "open" between getBuffer(minCapacity) and releaseBuffer(newLength), the following applies:
Referenced by Normalizer::compare(), Normalizer::isNormalized(), and Normalizer::quickCheck(). |
|
Return the capacity of the internal buffer of the UnicodeString object. This is useful together with the getBuffer functions. See there for details.
|
|
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709).
Implements Replaceable. |
|
Adjust a random-access offset so that it points behind a Unicode character. The offset that is passed in points behind any code unit of a code point, while the returned offset will point behind the last code unit of the same code point. In UTF-16, if the input offset points behind the first surrogate (i.e., to the second surrogate) of a surrogate pair, then the returned offset will point behind the second surrogate (i.e., to the first surrogate).
Definition at line 3926 of file unistr.h. References U16_SET_CP_LIMIT. |
|
Adjust a random-access offset so that it points to the beginning of a Unicode character. The offset that is passed in points to any code unit of a code point, while the returned offset will point to the first code unit of the same code point. In UTF-16, if the input offset points to a second surrogate of a surrogate pair, then the returned offset will point to the first surrogate.
Definition at line 3916 of file unistr.h. References U16_SET_CP_START. |
|
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709).
Implements Replaceable. |
|
ICU "poor man's RTTI", returns a UClassID for the actual class.
Implements UObject. |
|
Implement Replaceable::getLength() (see jitterbug 1027).
Implements Replaceable. |
|
ICU "poor man's RTTI", returns a UClassID for this class.
|
|
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated. This can be called at any time on a valid UnicodeString. It returns 0 if the string is bogus, or during an "open" getBuffer(minCapacity), or if the buffer cannot be NUL-terminated (because memory allocation failed). It can be called as many times as desired. The pointer that it returns will remain valid until the UnicodeString object is modified, at which time the pointer is semantically invalidated and must not be used any more. The capacity of the buffer can be determined with getCapacity(). The part after length()+1 may or may not be initialized and valid, depending on the history of the UnicodeString object. The buffer contents is guaranteed to be NUL-terminated. getTerminatedBuffer() may reallocate the buffer if a terminating NUL is written. For this reason, this function is not const, unlike getBuffer(). Note that a UnicodeString may also contain NUL characters as part of its contents. The buffer may reside in read-only memory. Its contents must not be modified.
|
|
Replace a substring of this object with the given text.
Implements Replaceable. |
|
Generate a hash code for this object.
|
|
Replaceable API.
Reimplemented from Replaceable. |
|
Check if the length UChar code units of the string contain more Unicode code points than a certain number. This is more efficient than counting all code points in this part of the string and comparing that number with a threshold. This function may not need to scan the string at all if the length falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to (countChar32(start, length)>number). A Unicode code point may occupy either one or two UChar code units.
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the BMP code point
|
|
Locate in this the first occurrence of the code point
Definition at line 3615 of file unistr.h. References indexOf(). |
|
Locate in this the first occurrence of the BMP code point
|
|
Locate in this the first occurrence of the code point
Definition at line 3604 of file unistr.h. References indexOf(). |
|
Locate in this the first occurrence of the BMP code point
|
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence in the range [
Definition at line 3581 of file unistr.h. References indexOf(). |
|
Locate in this the first occurrence of the characters in
Definition at line 3573 of file unistr.h. References indexOf(). |
|
Locate in this the first occurrence in the range [
Definition at line 3540 of file unistr.h. References getArrayStart(), indexOf(), isBogus(), and pinIndices(). |
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence of the characters in
|
|
Locate in this the first occurrence of the characters in
Definition at line 3556 of file unistr.h. References fLength. Referenced by indexOf(). |
|
Insert the code point
Definition at line 4090 of file unistr.h. References replace(). |
|
Insert the code unit
|
|
Insert the characters in
|
|
Insert the characters in
|
|
Insert the characters in
Definition at line 4067 of file unistr.h. References fLength. |
|
Insert the characters in
|
|
Determine if this object contains a valid string. A bogus string has no value. It is different from an empty string. It can be used to indicate that no string value is available. getBuffer() and getTerminatedBuffer() return NULL, and length() returns 0.
Definition at line 3295 of file unistr.h. Referenced by indexOf(), lastIndexOf(), operator==(), remove(), and truncate(). |
|
Determine if this string is empty.
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the BMP code point
|
|
Locate in this the last occurrence of the code point
Definition at line 3699 of file unistr.h. References lastIndexOf(). |
|
Locate in this the last occurrence of the BMP code point
|
|
Locate in this the last occurrence of the code point
Definition at line 3687 of file unistr.h. References lastIndexOf(). |
|
Locate in this the last occurrence of the BMP code point
|
|
Locate in this the last occurrence in the range [
|
|
Locate in this the last occurrence in the range [
Definition at line 3622 of file unistr.h. References lastIndexOf(). |
|
Locate in this the last occurrence of the characters in
Definition at line 3629 of file unistr.h. References lastIndexOf(). |
|
Locate in this the last occurrence in the range [
Definition at line 3637 of file unistr.h. References getArrayStart(), isBogus(), lastIndexOf(), and pinIndices(). |
|
Locate in this the last occurrence in the range [
Definition at line 3653 of file unistr.h. References fLength, and lastIndexOf(). |
|
Locate in this the last occurrence of the characters in
Definition at line 3659 of file unistr.h. References fLength, and lastIndexOf(). |
|
Locate in this the last occurrence of the characters in
Definition at line 3666 of file unistr.h. References fLength. Referenced by lastIndexOf(). |
|
Return the length of the UnicodeString object. The length is the number of UChar code units are in the UnicodeString. If you want the number of code points, please use countChar32().
Reimplemented from Replaceable. Definition at line 3283 of file unistr.h. Referenced by Normalizer::compare(), Normalizer::isNormalized(), Normalizer::quickCheck(), and Transliterator::setID(). |
|
Move the code unit index along the string by delta code points. Interpret the input index as a code unit-based offset into the string, move the index forward or backward by delta code points, and return the resulting index. The input index should point to the first code unit of a code point, if there is more than one. Both input and output indexes are code unit-based as for all string indexes/offsets in ICU (and other libraries, like MBCS char*). If delta<0 then the index is moved backward (toward the start of the string). If delta>0 then the index is moved forward (toward the end of the string). This behaves like CharacterIterator::move32(delta, kCurrent).
Behavior for out-of-bounds indexes: Examples: // s has code points 'a' U+10000 'b' U+10ffff U+2029 UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
// initial index: position of U+10000 int32_t index=1;
// the following examples will all result in index==4, position of U+10ffff
// skip 2 code points from some position in the string index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
// go to the 3rd code point from the start of s (0-based) index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
// go to the next-to-last code point of s index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
|
|
Inequality operator. Performs only bitwise comparison.
|
|
Append operator.
Append the characters in
Definition at line 4056 of file unistr.h. References fLength. |
|
Append operator.
Append the code point
Definition at line 4051 of file unistr.h. References append(). |
|
Append operator.
Append the code unit
|
|
Less than operator. Performs only bitwise comparison.
Definition at line 3347 of file unistr.h. References fLength. |
|
Less than or equal operator. Performs only bitwise comparison.
Definition at line 3355 of file unistr.h. References fLength. |
|
Assignment operator.
Replace the characters in this UnicodeString with the code point
Definition at line 3962 of file unistr.h. References replace(). |
|
Assignment operator.
Replace the characters in this UnicodeString with the code unit
|
|
Assignment operator.
Replace the characters in this UnicodeString with the characters from
|
|
Equality operator. Performs only bitwise comparison.
|
|
Greater than operator. Performs only bitwise comparison.
Definition at line 3343 of file unistr.h. References fLength. |
|
Greater than or equal operator. Performs only bitwise comparison.
Definition at line 3351 of file unistr.h. References fLength. |
|
Return the code unit at offset If the offset is not valid (0..length()-1) then U+ffff is returned.
|
|
Pad the start of this UnicodeString with the character If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the beginning of this UnicodeString.
|
|
Pad the end of this UnicodeString with the character If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the end of this UnicodeString.
|
|
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity). This function must be called in a matched pair with getBuffer(minCapacity). releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". It will set the string length to newLength, at most to the current capacity. If newLength==-1 then it will set the length according to the first NUL in the buffer, or to the capacity if there is no NUL. After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
|
|
Remove the characters in the range [
|
|
Remove all characters from the UnicodeString object.
Definition at line 4096 of file unistr.h. References isBogus(). Referenced by remove(). |
|
Remove the characters in the range [
Definition at line 4119 of file unistr.h. References NULL. |
|
Replace the characters in the range [
Definition at line 3800 of file unistr.h. References FALSE, U16_APPEND, and U16_MAX_LENGTH. |
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
Definition at line 3765 of file unistr.h. References fLength. |
|
Replace the characters in the range [
Definition at line 3771 of file unistr.h. Referenced by insert(), operator=(), and setTo(). |
|
Replace the characters in the range [
|
|
Replace the characters in the range [
Definition at line 3811 of file unistr.h. References fLength. |
|
Reverse the range [
|
|
Reverse this UnicodeString in place.
|
|
Set the character at the specified offset to the specified character.
|
|
Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(UChar *dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.
|
|
Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified. In an assignment to another UnicodeString, the text will be aliased again, so that both strings then alias the same readonly-text.
|
|
Set the characters in the UnicodeString object to the code point
Definition at line 4006 of file unistr.h. References replace(). |
|
Set the characters in the UnicodeString object to the code unit
|
|
Set the characters in the UnicodeString object to the characters in
|
|
Set the text in the UnicodeString object to the characters in
Definition at line 3984 of file unistr.h. References fLength. |
|
Set the text in the UnicodeString object to the characters in
|
|
Set the text in the UnicodeString object to the characters in
Definition at line 3975 of file unistr.h. References fLength, and pinIndex(). |
|
Make this UnicodeString object invalid. The string will test TRUE with isBogus(). A bogus string has no value. It is different from an empty string. It can be used to indicate that no string value is available. getBuffer() and getTerminatedBuffer() return NULL, and length() returns 0. This utility function is used throughout the UnicodeString implementation to indicate that a UnicodeString operation failed, and may be used in other functions, especially but not exclusively when such functions do not take a UErrorCode for simplicity. The following methods, and no others, will clear a string object's bogus flag:
The simplest ways to turn a bogus string into an empty one is to use the remove() function. Examples for other functions that are equivalent to "set to empty string": if(s.isBogus()) { s.remove(); // set to an empty string (remove all), or s.remove(0, INT32_MAX); // set to an empty string (remove all), or s.truncate(0); // set to an empty string (complete truncation), or s=UnicodeString(); // assign an empty string, or s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or static const UChar nul=0; s.setTo(&nul, 0); // set to an empty C Unicode string }
|
|
Determine if this ends with the characters in
|
|
Determine if this starts with the characters in
|
|
Determine if this starts with the characters in
|
|
Determine if this starts with the characters in
|
|
Convert the characters in this to lower case following the conventions of a specific locale.
|
|
Convert the characters in this to lower case following the conventions of the default locale.
|
|
Titlecase this string, with options. Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others. (This can be modified with options.) The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21. This function uses only the setText(), first() and next() methods of the provided break iterator.
|
|
Titlecase this string. Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others. The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21. This function uses only the setText(), first() and next() methods of the provided break iterator.
|
|
Titlecase this string, convenience function using the default locale. Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others. The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21. This function uses only the setText(), first() and next() methods of the provided break iterator.
|
|
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
|
|
Convert the characters in this to UPPER CASE following the conventions of the default locale.
|
|
Trims leading and trailing whitespace from this UnicodeString.
|
|
Truncate this UnicodeString to the
Definition at line 4124 of file unistr.h. References FALSE, isBogus(), and TRUE. Referenced by Transliterator::setID(). |
|
Unescape a string of characters and return a string containing the result. The following escape sequences are recognized: \uhhhh 4 hex digits; h in [0-9A-Fa-f] \Uhhhhhhhh 8 hex digits \xhh 1-2 hex digits \ooo 1-3 octal digits; o in [0-7] \cX control-X; X is masked with 0x1F as well as the standard ANSI C escapes: \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A, \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B, \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C Anything else following a backslash is generically escaped. For example, "[a\\-z]" returns "[a-z]". If an escape sequence is ill-formed, this method returns an empty string. An example of an ill-formed sequence is "\\u" followed by fewer than 4 hex digits. This function is similar to u_unescape() but not identical to it. The latter takes a source char*, so it does escape recognition and also invariant conversion.
|
|
Unescape a single escape sequence and return the represented character. See unescape() for a listing of the recognized escape sequences. The character at offset-1 is assumed (without checking) to be a backslash. If the escape sequence is ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is returned.
|