00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00024
00025
00030 template <typename In, typename Out>
00031 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale)
00032 {
00033 #ifdef __MINGW32__
00034
00035
00036
00037
00038 while (Begin < End)
00039 {
00040 char Char = 0;
00041 if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0)
00042 *Output++ = Char;
00043 else if (Replacement)
00044 *Output++ = Replacement;
00045 }
00046
00047 #else
00048
00049
00050 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
00051
00052
00053 while (Begin < End)
00054 *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement);
00055
00056 #endif
00057
00058 return Output;
00059 }
00060
00061
00066 template <typename In, typename Out>
00067 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale)
00068 {
00069 #ifdef __MINGW32__
00070
00071
00072
00073
00074 while (Begin < End)
00075 {
00076 wchar_t Char = 0;
00077 mbtowc(&Char, &*Begin, 1);
00078 Begin++;
00079 *Output++ = static_cast<Uint32>(Char);
00080 }
00081
00082 #else
00083
00084
00085 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
00086
00087
00088 while (Begin < End)
00089 *Output++ = static_cast<Uint32>(Facet.widen(*Begin++));
00090
00091 #endif
00092
00093 return Output;
00094 }
00095
00096
00101 template <typename In, typename Out>
00102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
00103 {
00104 while (Begin < End)
00105 {
00106 Uint32 c = 0;
00107 int TrailingBytes = UTF8TrailingBytes[*Begin];
00108 if (Begin + TrailingBytes < End)
00109 {
00110
00111 switch (TrailingBytes)
00112 {
00113 case 5 : c += *Begin++; c <<= 6;
00114 case 4 : c += *Begin++; c <<= 6;
00115 case 3 : c += *Begin++; c <<= 6;
00116 case 2 : c += *Begin++; c <<= 6;
00117 case 1 : c += *Begin++; c <<= 6;
00118 case 0 : c += *Begin++;
00119 }
00120 c -= UTF8Offsets[TrailingBytes];
00121
00122
00123 if (c < 0xFFFF)
00124 {
00125
00126 if ((c >= 0xD800) && (c <= 0xDFFF))
00127 {
00128
00129 if (Replacement)
00130 *Output++ = Replacement;
00131 }
00132 else
00133 {
00134
00135 *Output++ = static_cast<Uint16>(c);
00136 }
00137 }
00138 else if (c > 0x0010FFFF)
00139 {
00140
00141 if (Replacement)
00142 *Output++ = Replacement;
00143 }
00144 else
00145 {
00146
00147 c -= 0x0010000;
00148 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
00149 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
00150 }
00151 }
00152 }
00153
00154 return Output;
00155 }
00156
00157
00162 template <typename In, typename Out>
00163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
00164 {
00165 while (Begin < End)
00166 {
00167 Uint32 c = 0;
00168 int TrailingBytes = UTF8TrailingBytes[*Begin];
00169 if (Begin + TrailingBytes < End)
00170 {
00171
00172 switch (TrailingBytes)
00173 {
00174 case 5 : c += *Begin++; c <<= 6;
00175 case 4 : c += *Begin++; c <<= 6;
00176 case 3 : c += *Begin++; c <<= 6;
00177 case 2 : c += *Begin++; c <<= 6;
00178 case 1 : c += *Begin++; c <<= 6;
00179 case 0 : c += *Begin++;
00180 }
00181 c -= UTF8Offsets[TrailingBytes];
00182
00183
00184 if ((c < 0xD800) || (c > 0xDFFF))
00185 {
00186
00187 *Output++ = c;
00188 }
00189 else
00190 {
00191
00192 if (Replacement)
00193 *Output++ = Replacement;
00194 }
00195 }
00196 }
00197
00198 return Output;
00199 }
00200
00201
00206 template <typename In, typename Out>
00207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
00208 {
00209 while (Begin < End)
00210 {
00211 Uint32 c = *Begin++;
00212
00213
00214 if ((c >= 0xD800) && (c <= 0xDBFF))
00215 {
00216 if (Begin < End)
00217 {
00218
00219 Uint32 d = *Begin++;
00220 if ((d >= 0xDC00) && (d <= 0xDFFF))
00221 c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
00222 }
00223 else
00224 {
00225
00226 if (Replacement)
00227 *Output++ = Replacement;
00228 }
00229 }
00230
00231
00232 if (c > 0x0010FFFF)
00233 {
00234
00235 if (Replacement)
00236 *Output++ = Replacement;
00237 }
00238 else
00239 {
00240
00241
00242
00243 int BytesToWrite = 1;
00244 if (c < 0x80) BytesToWrite = 1;
00245 else if (c < 0x800) BytesToWrite = 2;
00246 else if (c < 0x10000) BytesToWrite = 3;
00247 else if (c <= 0x0010FFFF) BytesToWrite = 4;
00248
00249
00250 Uint8 Bytes[4];
00251 switch (BytesToWrite)
00252 {
00253 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00254 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00255 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00256 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
00257 }
00258
00259
00260 const Uint8* CurByte = Bytes;
00261 switch (BytesToWrite)
00262 {
00263 case 4 : *Output++ = *CurByte++;
00264 case 3 : *Output++ = *CurByte++;
00265 case 2 : *Output++ = *CurByte++;
00266 case 1 : *Output++ = *CurByte++;
00267 }
00268 }
00269 }
00270
00271 return Output;
00272 }
00273
00274
00279 template <typename In, typename Out>
00280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
00281 {
00282 while (Begin < End)
00283 {
00284 Uint16 c = *Begin++;
00285 if ((c >= 0xD800) && (c <= 0xDBFF))
00286 {
00287
00288 if (Begin < End)
00289 {
00290 Uint16 d = *Begin++;
00291 if ((d >= 0xDC00) && (d <= 0xDFFF))
00292 {
00293
00294 *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
00295 }
00296 else
00297 {
00298
00299 if (Replacement)
00300 *Output++ = Replacement;
00301 }
00302 }
00303 }
00304 else if ((c >= 0xDC00) && (c <= 0xDFFF))
00305 {
00306
00307 if (Replacement)
00308 *Output++ = Replacement;
00309 }
00310 else
00311 {
00312
00313 *Output++ = static_cast<Uint32>(c);
00314 }
00315 }
00316
00317 return Output;
00318 }
00319
00320
00325 template <typename In, typename Out>
00326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
00327 {
00328 while (Begin < End)
00329 {
00330 Uint32 c = *Begin++;
00331 if (c > 0x0010FFFF)
00332 {
00333
00334 if (Replacement)
00335 *Output++ = Replacement;
00336 }
00337 else
00338 {
00339
00340
00341
00342 int BytesToWrite = 1;
00343 if (c < 0x80) BytesToWrite = 1;
00344 else if (c < 0x800) BytesToWrite = 2;
00345 else if (c < 0x10000) BytesToWrite = 3;
00346 else if (c <= 0x0010FFFF) BytesToWrite = 4;
00347
00348
00349 Uint8 Bytes[4];
00350 switch (BytesToWrite)
00351 {
00352 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00353 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00354 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00355 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
00356 }
00357
00358
00359 const Uint8* CurByte = Bytes;
00360 switch (BytesToWrite)
00361 {
00362 case 4 : *Output++ = *CurByte++;
00363 case 3 : *Output++ = *CurByte++;
00364 case 2 : *Output++ = *CurByte++;
00365 case 1 : *Output++ = *CurByte++;
00366 }
00367 }
00368 }
00369
00370 return Output;
00371 }
00372
00373
00378 template <typename In, typename Out>
00379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
00380 {
00381 while (Begin < End)
00382 {
00383 Uint32 c = *Begin++;
00384 if (c < 0xFFFF)
00385 {
00386
00387 if ((c >= 0xD800) && (c <= 0xDFFF))
00388 {
00389
00390 if (Replacement)
00391 *Output++ = Replacement;
00392 }
00393 else
00394 {
00395
00396 *Output++ = static_cast<Uint16>(c);
00397 }
00398 }
00399 else if (c > 0x0010FFFF)
00400 {
00401
00402 if (Replacement)
00403 *Output++ = Replacement;
00404 }
00405 else
00406 {
00407
00408 c -= 0x0010000;
00409 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
00410 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
00411 }
00412 }
00413
00414 return Output;
00415 }
00416
00417
00421 template <typename In>
00422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End)
00423 {
00424 std::size_t Length = 0;
00425 while (Begin < End)
00426 {
00427 int NbBytes = UTF8TrailingBytes[*Begin];
00428 if (Begin + NbBytes < End)
00429 ++Length;
00430
00431 Begin += NbBytes + 1;
00432 }
00433
00434 return Length;
00435 }
00436
00437
00441 template <typename In>
00442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End)
00443 {
00444 std::size_t Length = 0;
00445 while (Begin < End)
00446 {
00447 if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF))
00448 {
00449 ++Begin;
00450 if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF)))
00451 {
00452 ++Length;
00453 }
00454 }
00455 else
00456 {
00457 ++Length;
00458 }
00459
00460 ++Begin;
00461 }
00462
00463 return Length;
00464 }
00465
00466
00470 template <typename In>
00471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End)
00472 {
00473 return End - Begin;
00474 }