52 return mIter == mString->mData.begin();
57 return mIter == mString->mData.end();
62 return mIter - mString->mData.begin();
67 mIter = mString->mData.begin() + index;
73 return mString->getChar( current_index );
79 int change = mString->setChar( current_index, uc );
80 _jump_to( current_index );
87 if ( _test_end() )
return;
93 lead_half = mIter[-1];
103 if ( _test_begin() )
return;
108 lead_half = mIter[-1];
218 return _getCharacter();
223 return _setCharacter( uc );
334 return _getCharacter();
555 assign( str, index, length );
558 #if MYGUI_IS_NATIVE_WCHAR_T
624 return mData.max_size();
629 mData.reserve( size );
634 mData.resize( num, val );
639 mData.swap( from.mData );
644 return mData.empty();
649 return mData.c_str();
659 return mData.capacity();
672 tmp.mData.swap( data );
684 #if MYGUI_IS_NATIVE_WCHAR_T
688 mData.push_back( static_cast<code_point>( val ) );
694 mData.push_back( val );
699 mData.push_back( static_cast<code_point>( val ) );
715 return *m_buffer.mStrBuffer;
721 return m_buffer.mStrBuffer->c_str();
726 _load_buffer_UTF32();
727 return *m_buffer.mUTF32StrBuffer;
732 _load_buffer_UTF32();
733 return m_buffer.mUTF32StrBuffer->c_str();
739 return *m_buffer.mWStrBuffer;
745 return m_buffer.mWStrBuffer->c_str();
750 return mData.at( loc );
755 return mData.at( loc );
768 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
785 if ( newSize > existingSize ) {
787 insert( loc + 1, 1, cp[1] );
790 if ( newSize < existingSize ) {
798 if ( l == 2 )
at( loc + 1 ) = cp[1];
805 i.
mIter = mData.begin();
821 i.
mIter = mData.end();
837 i.
mIter = mData.end();
853 i.
mIter = mData.begin();
874 mData.assign( str.mData );
886 mData.assign( str, num );
892 mData.assign( str.mData, index, len );
898 mData.assign( num, ch );
905 mData.reserve( wstr.length() );
906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
908 std::wstring::const_iterator i, ie = wstr.end();
909 for ( i = wstr.begin(); i != ie; i++ ) {
911 mData.push_back( tmp );
913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
916 std::wstring::const_iterator i, ie = wstr.end();
917 for ( i = wstr.begin(); i != ie; i++ ) {
920 if ( l > 0 ) mData.push_back( cp[0] );
921 if ( l > 1 ) mData.push_back( cp[1] );
927 #if MYGUI_IS_NATIVE_WCHAR_T
953 unsigned char utf8buf[7];
960 std::string::const_iterator i, ie = str.end();
961 for ( i = str.begin(); i != ie; i++ ) {
963 for (
size_t j = 0; j < utf8len; j++ ) {
964 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
966 utf8buf[utf8len] = 0;
971 append( utf16buff, utf16len );
978 std::string tmp( c_str );
991 mData.append( str.mData );
1003 mData.append( str.mData, index, len );
1009 mData.append( str, num );
1015 mData.append( num, ch );
1025 #if MYGUI_IS_NATIVE_WCHAR_T
1028 std::wstring tmp( w_str, num );
1034 return append( num, static_cast<unicode_char>( ch ) );
1046 append( num, static_cast<code_point>( ch ) );
1076 mData.insert( index, str.mData );
1082 mData.insert( index1, str.mData, index2, num );
1093 mData.insert( index, str, num );
1097 #if MYGUI_IS_NATIVE_WCHAR_T
1115 mData.insert( index, num, ch );
1119 #if MYGUI_IS_NATIVE_WCHAR_T
1122 insert( index, num, static_cast<unicode_char>( ch ) );
1129 insert( index, num, static_cast<code_point>( ch ) );
1138 return insert( index, num, cp[0] );
1142 insert( index, 1, cp[1] );
1143 insert( index, 1, cp[0] );
1150 mData.insert( i.
mIter, num, ch );
1152 #if MYGUI_IS_NATIVE_WCHAR_T
1155 insert( i, num, static_cast<unicode_char>( ch ) );
1161 insert( i, num, static_cast<code_point>( ch ) );
1198 mData.erase( index );
1200 mData.erase( index, num );
1206 mData.replace( index1, num1, str.mData, 0,
npos );
1212 mData.replace( index1, num1, str.mData, 0, num2 );
1218 mData.replace( index1, num1, str.mData, index2, num2 );
1228 return replace( index1, num1, str, 0, num );
1233 mData.replace( index, num1, num2, ch );
1243 return replace( index1, num1, num, ch );
1248 return mData.compare( str.mData );
1253 return mData.compare( str );
1258 return mData.compare( index, length, str.mData );
1263 return mData.compare( index, length, str.mData, index2, length2 );
1268 return mData.compare( index, length, str, length2 );
1271 #if MYGUI_IS_NATIVE_WCHAR_T
1274 UString tmp( w_str, length2 );
1275 return compare( index, length, tmp );
1281 UString tmp( c_str, length2 );
1282 return compare( index, length, tmp );
1287 return mData.find( str.
c_str(), index );
1302 #if MYGUI_IS_NATIVE_WCHAR_T
1306 return mData.find( tmp.c_str(), index,
length );
1312 return find( static_cast<code_point>( ch ), index );
1317 return mData.find( ch, index );
1320 #if MYGUI_IS_NATIVE_WCHAR_T
1323 return find( static_cast<unicode_char>( ch ), index );
1336 return mData.rfind( str.
c_str(), index );
1342 return mData.rfind( tmp.
c_str(), index, num );
1348 return mData.rfind( tmp.
c_str(), index, num );
1351 #if MYGUI_IS_NATIVE_WCHAR_T
1355 return mData.rfind( tmp.c_str(), index, num );
1361 return rfind( static_cast<code_point>( ch ), index );
1366 return mData.rfind( ch, index );
1369 #if MYGUI_IS_NATIVE_WCHAR_T
1372 return rfind( static_cast<unicode_char>( ch ), index );
1387 while ( i < num && ( index + i ) < len ) {
1405 return find_first_of( static_cast<code_point>( ch ), index );
1408 #if MYGUI_IS_NATIVE_WCHAR_T
1411 return find_first_of( static_cast<unicode_char>( ch ), index );
1426 while ( i < num && ( index + i ) < len ) {
1447 #if MYGUI_IS_NATIVE_WCHAR_T
1465 if ( index > len ) index = len - 1;
1467 while ( i < num && ( index - i ) !=
npos ) {
1489 #if MYGUI_IS_NATIVE_WCHAR_T
1492 return find_last_of( static_cast<unicode_char>( ch ), index );
1507 if ( index > len ) index = len - 1;
1509 while ( i < num && ( index - i ) !=
npos ) {
1536 #if MYGUI_IS_NATIVE_WCHAR_T
1577 #if MYGUI_IS_NATIVE_WCHAR_T
1621 UString::operator std::string()
const
1623 return std::string(
asUTF8() );
1627 UString::operator std::wstring()
const
1629 return std::wstring(
asWStr() );
1635 if ( 0xD800 <= cp && cp <= 0xDFFF )
1642 if ( 0xD800 <= cp && cp <= 0xDBFF )
1649 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1656 if ( 0xD800 <= cp && cp <= 0xDBFF )
1672 bool wordPair =
false;
1675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1686 unsigned short cU = cp1, cL = cp2;
1690 out_uc = ( cU & 0x03FF ) << 10;
1691 out_uc |= ( cL & 0x03FF );
1699 if ( in_uc <= 0xFFFF ) {
1708 tmp =
static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1713 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1722 return ( cp & ~_cont_mask ) != _cont;
1727 if ( !( cp & 0x80 ) )
return 1;
1728 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1729 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1730 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1731 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1732 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1733 throw invalid_data(
"invalid UTF-8 sequence header value" );
1746 if ( !( uc & ~0x0000007F ) )
return 1;
1747 if ( !( uc & ~0x000007FF ) )
return 2;
1748 if ( !( uc & ~0x0000FFFF ) )
return 3;
1749 if ( !( uc & ~0x001FFFFF ) )
return 4;
1750 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1751 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1767 c = in_cp[i] & _lead5_mask;
1770 c = in_cp[i] & _lead4_mask;
1773 c = in_cp[i] & _lead3_mask;
1776 c = in_cp[i] & _lead2_mask;
1779 c = in_cp[i] & _lead1_mask;
1783 for ( ++i; i <
len; i++ ) {
1784 if (( in_cp[i] & ~_cont_mask ) != _cont )
1787 c |= ( in_cp[i] & _cont_mask );
1800 for (
size_t i = len - 1; i > 0; i-- ) {
1801 out_cp[i] =
static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1808 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1811 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1814 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1817 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1820 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1824 out_cp[0] =
static_cast<unsigned char>(( c ) & 0x7F);
1834 std::string tmp( reinterpret_cast<const char*>( c_str ) );
1840 std::string::const_iterator i, ie = str.end();
1846 if (( *i ) & 0x80 ) {
1847 unsigned char c = ( *i );
1848 size_t contBytes = 0;
1851 if (( c & ~_lead1_mask ) == _lead1 ) {
1852 if ( c == _lead1 )
throw invalid_data(
"overlong UTF-8 sequence" );
1855 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1857 if ( c == _lead2 ) {
1859 if (( c & _lead2 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1862 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1864 if ( c == _lead3 ) {
1866 if (( c & _lead3 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1869 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1871 if ( c == _lead4 ) {
1873 if (( c & _lead4 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1876 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1878 if ( c == _lead5 ) {
1880 if (( c & _lead5 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1885 while ( contBytes-- ) {
1887 if (( c & ~_cont_mask ) != _cont )
1897 void UString::_init()
1899 m_buffer.mVoidBuffer = 0;
1900 m_bufferType = bt_none;
1904 void UString::_cleanBuffer()
const
1906 if ( m_buffer.mVoidBuffer != 0 ) {
1907 switch ( m_bufferType ) {
1909 delete m_buffer.mStrBuffer;
1912 delete m_buffer.mWStrBuffer;
1914 case bt_utf32string:
1915 delete m_buffer.mUTF32StrBuffer;
1921 assert(
"This should never happen - mVoidBuffer should never contain something if we "
1922 "don't know the type");
1925 m_buffer.mVoidBuffer = 0;
1927 m_bufferType = bt_none;
1931 void UString::_getBufferStr()
const
1933 if ( m_bufferType != bt_string ) {
1935 m_buffer.mStrBuffer =
new std::string();
1936 m_bufferType = bt_string;
1938 m_buffer.mStrBuffer->clear();
1941 void UString::_getBufferWStr()
const
1943 if ( m_bufferType != bt_wstring ) {
1945 m_buffer.mWStrBuffer =
new std::wstring();
1946 m_bufferType = bt_wstring;
1948 m_buffer.mWStrBuffer->clear();
1951 void UString::_getBufferUTF32Str()
const
1953 if ( m_bufferType != bt_utf32string ) {
1956 m_bufferType = bt_utf32string;
1958 m_buffer.mUTF32StrBuffer->clear();
1961 void UString::_load_buffer_UTF8()
const
1964 std::string& buffer = ( *m_buffer.mStrBuffer );
1965 buffer.reserve(
length() );
1967 unsigned char utf8buf[6];
1968 char* charbuf = (
char* )utf8buf;
1974 c = i.getCharacter();
1978 buffer.push_back( charbuf[j++] );
1982 void UString::_load_buffer_WStr()
const
1985 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
1986 buffer.reserve(
length() );
1987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
1989 for ( i =
begin(); i != ie; ++i ) {
1990 buffer.push_back((
wchar_t )( *i ) );
1992 #else // wchar_t fits UTF-32
1996 c = i.getCharacter();
1997 buffer.push_back((
wchar_t )c );
2002 void UString::_load_buffer_UTF32()
const
2004 _getBufferUTF32Str();
2005 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2006 buffer.reserve(
length() );
2012 c = i.getCharacter();
2013 buffer.push_back( c );
code_point & operator[](size_type index)
code point dereference operator
std::basic_string< code_point > dstring
base iterator class for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator--()
pre-decrement
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space ...
const value_type & operator*() const
dereference operator
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
UString & append(const UString &str)
appends str on to the end of the current string
_rev_iterator & operator--()
pre-decrement
void _seekRev(size_type c)
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
int _setCharacter(unicode_char uc)
bool operator>(const UString &right) const
greater than operator
const code_point * c_str() const
returns a pointer to the first character in the current string
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character ...
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str...
UString()
default constructor, creates an empty string
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator*() const
dereference operator
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
_rev_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
size_t size_type
size type used to indicate string size and character positions within the string
This exception is used when invalid data streams are encountered.
_const_rev_iterator & operator++()
pre-increment
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
_const_fwd_iterator const_iterator
const iterator
static const size_type npos
the usual constant representing: not found, no limit, etc
const forward iterator for UString
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
bool operator<(const UString &right) const
less than operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator*() const
dereference operator
_const_rev_iterator operator+(difference_type n)
addition operator
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2)
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
void push_back(unicode_char val)
appends val to the end of the string
int compare(const UString &str) const
compare str to the current string
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
void clear()
deletes all of the elements in the string
const reverse iterator for UString
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
iterator begin()
returns an iterator to the first element of the string
_fwd_iterator iterator
iterator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream ...
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
_const_fwd_iterator & operator++()
pre-increment
bool operator==(const UString &right) const
equality operator
uint16 code_point
a single UTF-16 code point
_fwd_iterator operator+(difference_type n)
addition operator
void _seekFwd(size_type c)
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
unicode_char _getCharacter() const
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
void swap(UString &from)
exchanges the elements of the current string with those of from
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc...
_fwd_iterator operator-(difference_type n)
subtraction operator
_rev_iterator operator+(difference_type n)
addition operator
size_type length() const
Returns the number of code points in the current string.
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
code_point value_type
value type typedef for use in iterators
value_type & operator[](difference_type n) const
dereference at offset operator
void reserve(size_type size)
sets the capacity of the string to at least size code points
const value_type & operator[](difference_type n) const
dereference at offset operator
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream ...
bool operator<=(const UString &right) const
less than or equal operator
forward iterator for UString
uint32 unicode_char
a single 32-bit Unicode character
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
_const_rev_iterator & operator--()
pre-decrement
void _become(const _base_iterator &i)
_fwd_iterator & operator--()
pre-decrement
_rev_iterator & operator++()
pre-increment
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream ...
forward iterator for UString
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream ...
size_type length_Characters() const
Returns the number of Unicode characters in the string.
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
size_type _get_index() const
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
_rev_iterator & operator+=(difference_type n)
addition assignment operator
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
value_type & operator[](difference_type n) const
dereference at offset operator
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const code_point * data() const
returns a pointer to the first character in the current string
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
iterator end()
returns an iterator just past the end of the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str ...
size_type size() const
Returns the number of code points in the current string.
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str...
value_type & operator*() const
dereference operator
const value_type & operator[](difference_type n) const
dereference at offset operator
bool operator!=(const UString &right) const
inequality operator
float len(float x, float y)
bool empty() const
returns true if the string has no elements, false otherwise
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator & operator++()
pre-increment
void _jump_to(size_type index)
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array