nux-1.14.0
|
00001 /* 00002 * Copyright 2010 Inalogic® Inc. 00003 * 00004 * This program is free software: you can redistribute it and/or modify it 00005 * under the terms of the GNU Lesser General Public License, as 00006 * published by the Free Software Foundation; either version 2.1 or 3.0 00007 * of the License. 00008 * 00009 * This program is distributed in the hope that it will be useful, but 00010 * WITHOUT ANY WARRANTY; without even the implied warranties of 00011 * MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR 00012 * PURPOSE. See the applicable version of the GNU Lesser General Public 00013 * License for more details. 00014 * 00015 * You should have received a copy of both the GNU Lesser General Public 00016 * License along with this program. If not, see <http://www.gnu.org/licenses/> 00017 * 00018 * Authored by: Jay Taoko <jaytaoko@inalogic.com> 00019 * 00020 */ 00021 00022 00023 #ifndef NUNICODE_H 00024 #define NUNICODE_H 00025 00026 00027 namespace nux 00028 { 00029 00030 // UTF-16 is the primary encoding mechanism used by Microsoft Windows 2000, Windows 2000 Server, Windows XP and Windows 2003 Server. 00031 // Unicode Byte Order Mark (BOM) 00032 enum {UNICODE_UTF32_BE = 0x0000FEFF }; 00033 enum {UNICODE_UTF32_LE = 0xFFFE0000 }; 00034 enum {UNICODE_UTF16_BE = 0xFEFF }; 00035 enum {UNICODE_UTF16_LE = 0xFFFE }; 00036 enum {UNICODE_UTF8 = 0xEFBBBF }; 00037 00038 const BYTE UTF32_BE[] = {0x04 /*size*/, 0x00, 0x00, 0xFE, 0xFF }; 00039 const BYTE UTF32_LE[] = {0x04 /*size*/, 0xFF, 0xFE, 0x00, 0x00 }; 00040 const BYTE UTF16_BE[] = {0x02 /*size*/, 0xFE, 0xFF }; 00041 const BYTE UTF16_LE[] = {0x02 /*size*/, 0xFF, 0xFE }; 00042 const BYTE UTF8[] = {0x03 /*size*/, 0xEF, 0xBB, 0xBF }; 00043 00044 enum {UNICODE_BOM = 0xfeff}; 00045 00046 // UTF-16 is the default encoding form of the Unicode Standard 00047 // On Linux and Mac OS X, wchar_t is 4 bytes! 00048 // On windows wchar_t is 2 bytes! 00049 00050 #ifdef UNICODE 00051 inline TCHAR ConvertAnsiCharToTCHAR (ANSICHAR In) 00052 { 00053 TCHAR output; 00054 const t_UTF8 *source_start = &In; 00055 const t_UTF8 *source_end = source_start + 1; 00056 t_UTF16 *target_start = reinterpret_cast<t_UTF16 *> (&output); 00057 t_UTF16 *target_end = target_start + sizeof (wchar_t); 00058 00059 ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion); 00060 00061 if (res != conversionOK) 00062 { 00063 output = 0; 00064 } 00065 00066 return output; 00067 } 00068 00069 inline ANSICHAR ConvertTCHARToAnsiChar (TCHAR In) 00070 { 00071 ANSICHAR output; 00072 const t_UTF16 *source_start = &In; 00073 const t_UTF16 *source_end = source_start + 1; 00074 t_UTF8 *target_start = reinterpret_cast<t_UTF8 *> (&output); 00075 t_UTF8 *target_end = target_start + sizeof (wchar_t); 00076 00077 ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion); 00078 00079 if (res != conversionOK) 00080 { 00081 output = 0; 00082 } 00083 00084 return output; 00085 } 00086 inline TCHAR ConvertUnicodeCharToTCHAR (UNICHAR In) 00087 { 00088 return In; 00089 } 00090 inline UNICHAR ConvertTCHARToUnicodeChar (TCHAR In) 00091 { 00092 return In; 00093 } 00094 #else 00095 inline TCHAR ConvertUnicodeCharToTCHAR (UNICHAR In) 00096 { 00097 TCHAR output; 00098 const t_UTF16 *source_start = &In; 00099 const t_UTF16 *source_end = source_start + 1; 00100 t_UTF8 *target_start = reinterpret_cast<t_UTF8 *> (&output); 00101 t_UTF8 *target_end = target_start + sizeof (wchar_t); 00102 00103 ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion); 00104 00105 if (res != conversionOK) 00106 { 00107 output = 0; 00108 } 00109 00110 return output; 00111 } 00112 00113 inline UNICHAR ConvertTCHARToUnicodeChar (TCHAR In) 00114 { 00115 UNICHAR output; 00116 const t_UTF8 *source_start = reinterpret_cast<const t_UTF8 *> (&In); 00117 const t_UTF8 *source_end = source_start + 1; 00118 t_UTF16 *target_start = reinterpret_cast<t_UTF16 *> (&output); 00119 t_UTF16 *target_end = target_start + sizeof (wchar_t); 00120 00121 ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion); 00122 00123 if (res != conversionOK) 00124 { 00125 output = 0; 00126 } 00127 00128 return output; 00129 } 00130 00131 inline TCHAR ConvertAnsiCharToTCHAR (ANSICHAR In) 00132 { 00133 return In; 00134 } 00135 inline ANSICHAR ConvertTCHARToAnsiChar (TCHAR In) 00136 { 00137 return In; 00138 } 00139 #endif 00140 00144 inline ANSICHAR ConvertUnicodeCharToAnsiChar (UNICHAR In) 00145 { 00146 TCHAR output; 00147 const t_UTF16 *source_start = &In; 00148 const t_UTF16 *source_end = source_start + 1; 00149 t_UTF8 *target_start = reinterpret_cast<t_UTF8 *> (&output); 00150 t_UTF8 *target_end = target_start + sizeof (wchar_t); 00151 00152 ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion); 00153 00154 if (res != conversionOK) 00155 { 00156 output = 0; 00157 } 00158 00159 return output; 00160 } 00161 00165 inline UNICHAR ConvertAnsiCharToUnicodeChar (ANSICHAR In) 00166 { 00167 UNICHAR output; 00168 const t_UTF8 *source_start = reinterpret_cast<const t_UTF8 *> (&In); 00169 const t_UTF8 *source_end = source_start + 1; 00170 t_UTF16 *target_start = reinterpret_cast<t_UTF16 *> (&output); 00171 t_UTF16 *target_end = target_start + sizeof (wchar_t); 00172 00173 ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion); 00174 00175 if (res != conversionOK) 00176 { 00177 output = 0; 00178 } 00179 00180 return output; 00181 } 00182 00183 class UnicharToAnsicharConvertion 00184 { 00185 public: 00186 // Default to ANSI code page 00187 UnicharToAnsicharConvertion() {} 00188 00194 ANSICHAR *Convert (const UNICHAR *Source); 00195 /*{ 00196 std::wstring utf16string(Source); 00197 size_t utf16size = utf16string.length(); 00198 size_t utf8size = 6 * utf16size; 00199 ANSICHAR *utf8string = new ANSICHAR[utf8size+1]; 00200 00201 const t_UTF16 *source_start = utf16string.c_str(); 00202 const t_UTF16 *source_end = source_start + utf16size; 00203 t_UTF8* target_start = reinterpret_cast<t_UTF8*>(utf8string); 00204 t_UTF8* target_end = target_start + utf8size; 00205 00206 ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion); 00207 if (res != conversionOK) 00208 { 00209 delete utf8string; 00210 utf8string = 0; 00211 } 00212 // mark end of string 00213 *target_start = 0; 00214 return utf8string; 00215 }*/ 00216 }; 00217 00219 class AnsicharToUnicharConvertion 00220 { 00221 public: 00222 AnsicharToUnicharConvertion() {} 00223 00229 UNICHAR *Convert (const ANSICHAR *Source); 00230 }; 00231 00233 // TCHAR can be ansi or unicode depending if UNICODE is defined or not. 00234 class TCharToAnsiConvertion 00235 { 00236 public: 00237 NUX_INLINE TCharToAnsiConvertion() {} 00238 00244 NUX_INLINE ANSICHAR *Convert (const TCHAR *Source) 00245 { 00246 // Determine whether we need to allocate memory or not 00247 #ifdef UNICODE 00248 UnicharToAnsicharConvertion convert; 00249 return convert.Convert (Source); 00250 #else 00251 size_t length = strlen (Source) + 1; 00252 size_t size = length * sizeof (ANSICHAR); 00253 ANSICHAR *Dest = new ANSICHAR[size]; 00254 STRNCPY_S (Dest, size, Source, length); 00255 return Dest; 00256 #endif 00257 } 00258 }; 00259 00261 // TCHAR can be ansi or unicode depending if UNICODE is defined or not. 00262 class TCharToUnicharConvertion 00263 { 00264 public: 00265 NUX_INLINE TCharToUnicharConvertion() {} 00266 00272 NUX_INLINE UNICHAR *Convert (const TCHAR *Source) 00273 { 00274 // Determine whether we need to allocate memory or not 00275 #ifdef UNICODE 00276 size_t length = strlen (Source) + 1; 00277 size_t size = length * sizeof (UNICHAR); 00278 UNICHAR *Dest = new UNICHAR[size]; 00279 STRNCPY_S (Dest, size, Source, length); 00280 return Dest; 00281 #else 00282 AnsicharToUnicharConvertion convert; 00283 return convert.Convert (Source); 00284 #endif 00285 } 00286 }; 00287 00289 // TCHAR can be ansi or unicode depending if UNICODE is defined or not. 00290 class AnsiToTCharConversion 00291 { 00292 public: 00293 NUX_INLINE AnsiToTCharConversion() {} 00294 00300 NUX_INLINE TCHAR *Convert (const ANSICHAR *Source) 00301 { 00302 #ifdef UNICODE 00303 AnsicharToUnicharConvertion convert; 00304 return convert.Convert (Source); 00305 #else 00306 size_t length = strlen (Source) + 1; 00307 size_t size = length; 00308 TCHAR *Dest = new TCHAR[size]; 00309 STRNCPY_S (Dest, size, Source, length); 00310 return Dest; 00311 #endif 00312 } 00313 }; 00314 00318 template < typename CONVERT_TO, typename CONVERT_FROM, typename BASE_CONVERTER, DWORD DefaultConversionSize = 128 > 00319 class NCharacterConversion: public BASE_CONVERTER 00320 { 00321 CONVERT_TO *ConvertedString; 00322 00323 // Hide the default constructor 00324 NCharacterConversion(); 00325 00326 public: 00330 explicit inline NCharacterConversion (const CONVERT_FROM *Source) 00331 { 00332 if (Source != NULL) 00333 { 00334 // Use base class' convert method 00335 ConvertedString = BASE_CONVERTER::Convert (Source); 00336 } 00337 else 00338 { 00339 ConvertedString = NULL; 00340 } 00341 } 00342 00346 inline ~NCharacterConversion() 00347 { 00348 if (ConvertedString != NULL) 00349 { 00350 delete [] ConvertedString; 00351 } 00352 } 00353 00354 // Operator to get access to the converted string 00355 inline operator CONVERT_TO* (void) const 00356 { 00357 return ConvertedString; 00358 } 00359 }; 00360 00361 // Conversion typedefs 00362 // typedef NCharacterConversion<TCHAR, ANSICHAR, AnsiToTCharConversion> ANSI_To_TCHAR_Conversion; 00363 // typedef NCharacterConversion<ANSICHAR, TCHAR, TCharToAnsiConvertion> TCHAR_To_ANSI_Conversion; 00364 // typedef NCharacterConversion<ANSICHAR, UNICHAR, UnicharToAnsicharConvertion> UNICHAR_To_ANSICHAR_Conversion; 00365 // typedef NCharacterConversion<UNICHAR, ANSICHAR, AnsicharToUnicharConvertion> ANSICHAR_To_UNICHAR_Conversion; 00366 00367 } 00368 00369 #endif // NUNICODE_H