37 return mIter == mString->mData.begin();
42 return mIter == mString->mData.end();
47 return mIter - mString->mData.begin();
52 mIter = mString->mData.begin() + index;
58 return mString->getChar( current_index );
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
72 if ( _test_end() )
return;
78 lead_half = mIter[-1];
88 if ( _test_begin() )
return;
93 lead_half = mIter[-1];
206 return _getCharacter();
211 return _setCharacter( uc );
325 return _getCharacter();
543#if MYGUI_IS_NATIVE_WCHAR_T
615 return mData.max_size();
620 mData.reserve(
size );
625 mData.resize( num, val );
630 mData.swap( from.mData );
635 return mData.empty();
640 return mData.c_str();
650 return mData.capacity();
663 tmp.mData.swap(
data );
675#if MYGUI_IS_NATIVE_WCHAR_T
679 mData.push_back(
static_cast<code_point>( val ) );
685 mData.push_back( val );
690 mData.push_back(
static_cast<code_point>( val ) );
706 return *m_buffer.mStrBuffer;
712 return m_buffer.mStrBuffer->c_str();
717 _load_buffer_UTF32();
718 return *m_buffer.mUTF32StrBuffer;
723 _load_buffer_UTF32();
724 return m_buffer.mUTF32StrBuffer->c_str();
730 return *m_buffer.mWStrBuffer;
736 return m_buffer.mWStrBuffer->c_str();
741 return mData.at( loc );
746 return mData.at( loc );
759 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
776 if ( newSize > existingSize ) {
778 insert( loc + 1, 1, cp[1] );
781 if ( newSize < existingSize ) {
789 if ( l == 2 )
at( loc + 1 ) = cp[1];
796 i.
mIter = mData.begin();
812 i.
mIter = mData.end();
828 i.
mIter = mData.end();
844 i.
mIter = mData.begin();
865 mData.assign( str.mData );
877 mData.assign( str, num );
883 mData.assign( str.mData, index,
len );
889 mData.assign( num, ch );
896 mData.reserve( wstr.length() );
899 std::wstring::const_iterator i, ie = wstr.end();
900 for ( i = wstr.begin(); i != ie; i++ ) {
902 mData.push_back( tmp );
907 std::wstring::const_iterator i, ie = wstr.end();
908 for ( i = wstr.begin(); i != ie; i++ ) {
911 if ( l > 0 ) mData.push_back( cp[0] );
912 if ( l > 1 ) mData.push_back( cp[1] );
918#if MYGUI_IS_NATIVE_WCHAR_T
944 unsigned char utf8buf[7];
951 std::string::const_iterator i, ie = str.end();
952 for ( i = str.begin(); i != ie; i++ ) {
954 for (
size_t j = 0; j < utf8len; j++ ) {
955 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
957 utf8buf[utf8len] = 0;
962 append( utf16buff, utf16len );
969 for (
const auto& character : str)
978 std::string tmp(
c_str );
991 mData.append( str.mData );
1003 mData.append( str.mData, index,
len );
1009 mData.append( str, num );
1015 mData.append( num, ch );
1025#if MYGUI_IS_NATIVE_WCHAR_T
1028 std::wstring tmp( w_str, num );
1076 mData.insert( index, str.mData );
1082 mData.insert( index1, str.mData, index2, num );
1093 mData.insert( index, str, num );
1097#if MYGUI_IS_NATIVE_WCHAR_T
1115 mData.insert( index, num, ch );
1119#if MYGUI_IS_NATIVE_WCHAR_T
1138 return insert( index, num, cp[0] );
1142 insert( index, 1, cp[1] );
1143 insert( index, 1, cp[0] );
1150 mData.insert( i.
mIter, num, ch );
1152#if MYGUI_IS_NATIVE_WCHAR_T
1198 mData.erase( index );
1200 mData.erase( index, num );
1206 mData.replace( index1, num1, str.mData, 0,
npos );
1212 mData.replace( index1, num1, str.mData, 0, num2 );
1218 mData.replace( index1, num1, str.mData, index2, num2 );
1228 return replace( index1, num1, str, 0, num );
1233 mData.replace( index, num1, num2, ch );
1243 return replace( index1, num1, num, ch );
1248 return mData.compare( str.mData );
1253 return mData.compare( str );
1258 return mData.compare( index,
length, str.mData );
1263 return mData.compare( index,
length, str.mData, index2, length2 );
1268 return mData.compare( index,
length, str, length2 );
1271#if MYGUI_IS_NATIVE_WCHAR_T
1274 UString tmp( w_str, length2 );
1287 return mData.find( str.
c_str(), index );
1302#if MYGUI_IS_NATIVE_WCHAR_T
1306 return mData.find( tmp.c_str(), index,
length );
1317 return mData.find( ch, index );
1320#if MYGUI_IS_NATIVE_WCHAR_T
1336 return mData.rfind( str.
c_str(), index );
1342 return mData.rfind( tmp.
c_str(), index, num );
1348 return mData.rfind( tmp.
c_str(), index, num );
1351#if MYGUI_IS_NATIVE_WCHAR_T
1355 return mData.rfind( tmp.c_str(), index, num );
1366 return mData.rfind( ch, index );
1369#if MYGUI_IS_NATIVE_WCHAR_T
1387 while ( i < num && ( index + i ) <
len ) {
1408#if MYGUI_IS_NATIVE_WCHAR_T
1426 while ( i < num && ( index + i ) <
len ) {
1447#if MYGUI_IS_NATIVE_WCHAR_T
1465 if ( index >
len ) index =
len - 1;
1467 while ( i < num && ( index - i ) !=
npos ) {
1489#if MYGUI_IS_NATIVE_WCHAR_T
1507 if ( index >
len ) index =
len - 1;
1509 while ( i < num && ( index - i ) !=
npos ) {
1536#if MYGUI_IS_NATIVE_WCHAR_T
1577#if MYGUI_IS_NATIVE_WCHAR_T
1621 UString::operator std::string()
const
1623 return std::string(
asUTF8() );
1627 UString::operator std::wstring()
const
1629 return std::wstring(
asWStr() );
1635 if ( 0xD800 <= cp && cp <= 0xDFFF )
1642 if ( 0xD800 <= cp && cp <= 0xDBFF )
1649 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1656 if ( 0xD800 <= cp && cp <= 0xDBFF )
1672 bool wordPair =
false;
1675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1686 unsigned short cU = cp1, cL = cp2;
1690 out_uc = ( cU & 0x03FF ) << 10;
1691 out_uc |= ( cL & 0x03FF );
1699 if ( in_uc <= 0xFFFF ) {
1708 tmp =
static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1713 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1722 return ( cp & ~_cont_mask ) != _cont;
1727 if ( !( cp & 0x80 ) )
return 1;
1728 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1729 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1730 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1731 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1732 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1748 if ( !( uc & ~0x0000007F ) )
return 1;
1749 if ( !( uc & ~0x000007FF ) )
return 2;
1750 if ( !( uc & ~0x0000FFFF ) )
return 3;
1751 if ( !( uc & ~0x001FFFFF ) )
return 4;
1752 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1753 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1771 c = in_cp[i] & _lead5_mask;
1774 c = in_cp[i] & _lead4_mask;
1777 c = in_cp[i] & _lead3_mask;
1780 c = in_cp[i] & _lead2_mask;
1783 c = in_cp[i] & _lead1_mask;
1788 for ( ++i; i <
len; i++ )
1790 if (( in_cp[i] & ~_cont_mask ) != _cont )
1797 c |= ( in_cp[i] & _cont_mask );
1810 for (
size_t i =
len - 1; i > 0; i-- ) {
1811 out_cp[i] =
static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1818 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1821 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1824 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1827 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1830 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1834 out_cp[0] =
static_cast<unsigned char>(( c ) & 0x7F);
1844 std::string tmp(
reinterpret_cast<const char*
>(
c_str ) );
1850 std::string::const_iterator i, ie = str.end();
1856 if (( *i ) & 0x80 ) {
1857 unsigned char c = ( *i );
1858 size_t contBytes = 0;
1861 if (( c & ~_lead1_mask ) == _lead1 ) {
1869 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1871 if ( c == _lead2 ) {
1873 if (( c & _lead2 ) == _cont )
1880 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1882 if ( c == _lead3 ) {
1884 if (( c & _lead3 ) == _cont )
1891 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1893 if ( c == _lead4 ) {
1895 if (( c & _lead4 ) == _cont )
1902 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1904 if ( c == _lead5 ) {
1906 if (( c & _lead5 ) == _cont )
1915 while ( contBytes-- ) {
1917 if (( c & ~_cont_mask ) != _cont )
1930 void UString::_init()
1932 m_buffer.mVoidBuffer =
nullptr;
1933 m_bufferType = bt_none;
1937 void UString::_cleanBuffer()
const
1939 if ( m_buffer.mVoidBuffer !=
nullptr ) {
1940 switch ( m_bufferType ) {
1942 delete m_buffer.mStrBuffer;
1945 delete m_buffer.mWStrBuffer;
1947 case bt_utf32string:
1948 delete m_buffer.mUTF32StrBuffer;
1953 assert(
"This should never happen - mVoidBuffer should never contain something if we "
1954 "don't know the type");
1957 m_buffer.mVoidBuffer =
nullptr;
1959 m_bufferType = bt_none;
1963 void UString::_getBufferStr()
const
1965 if ( m_bufferType != bt_string ) {
1967 m_buffer.mStrBuffer =
new std::string();
1968 m_bufferType = bt_string;
1970 m_buffer.mStrBuffer->clear();
1973 void UString::_getBufferWStr()
const
1975 if ( m_bufferType != bt_wstring ) {
1977 m_buffer.mWStrBuffer =
new std::wstring();
1978 m_bufferType = bt_wstring;
1980 m_buffer.mWStrBuffer->clear();
1983 void UString::_getBufferUTF32Str()
const
1985 if ( m_bufferType != bt_utf32string ) {
1988 m_bufferType = bt_utf32string;
1990 m_buffer.mUTF32StrBuffer->clear();
1993 void UString::_load_buffer_UTF8()
const
1996 std::string& buffer = ( *m_buffer.mStrBuffer );
1997 buffer.reserve(
length() );
1999 unsigned char utf8buf[6];
2000 char* charbuf = (
char* )utf8buf;
2006 c = i.getCharacter();
2010 buffer.push_back( charbuf[j++] );
2014 void UString::_load_buffer_WStr()
const
2017 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2018 buffer.reserve(
length() );
2021 for ( i =
begin(); i != ie; ++i ) {
2022 buffer.push_back((
wchar_t )( *i ) );
2028 c = i.getCharacter();
2029 buffer.push_back((
wchar_t )c );
2034 void UString::_load_buffer_UTF32()
const
2036 _getBufferUTF32Str();
2037 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2038 buffer.reserve(
length() );
2044 c = i.getCharacter();
2045 buffer.push_back( c );
base iterator class for UString
int _setCharacter(unicode_char uc)
size_type _get_index() const
void _become(const _base_iterator &i)
void _seekRev(size_type c)
ptrdiff_t difference_type
void _jump_to(size_type index)
void _seekFwd(size_type c)
unicode_char _getCharacter() const
const forward iterator for UString
_const_fwd_iterator & operator=(const _const_fwd_iterator &i)
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator=(const _fwd_iterator &i)
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)