nux-1.14.0
|
00001 /* 00002 * Copyright 2010 Inalogic® Inc. 00003 * 00004 * This program is free software: you can redistribute it and/or modify it 00005 * under the terms of the GNU Lesser General Public License, as 00006 * published by the Free Software Foundation; either version 2.1 or 3.0 00007 * of the License. 00008 * 00009 * This program is distributed in the hope that it will be useful, but 00010 * WITHOUT ANY WARRANTY; without even the implied warranties of 00011 * MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR 00012 * PURPOSE. See the applicable version of the GNU Lesser General Public 00013 * License for more details. 00014 * 00015 * You should have received a copy of both the GNU Lesser General Public 00016 * License along with this program. If not, see <http://www.gnu.org/licenses/> 00017 * 00018 * Authored by: Jay Taoko <jaytaoko@inalogic.com> 00019 * 00020 */ 00021 00022 00023 #include "NuxCore.h" 00024 00025 namespace nux 00026 { 00027 // 00028 // Load a binary file to a dynamic array. 00029 // 00030 bool LoadFileToArray (std::vector<BYTE>& Result, const TCHAR *Filename, NFileManager &FileManager ) 00031 { 00032 NSerializer *Reader = FileManager.CreateFileReader ( Filename ); 00033 00034 if ( !Reader ) 00035 return FALSE; 00036 00037 Result.clear(); 00038 00039 if (Reader->GetFileSize() < 0) 00040 { 00041 Reader->Close(); 00042 delete Reader; 00043 return FALSE; 00044 } 00045 00046 Result.resize (Reader->GetFileSize() ); 00047 Reader->Serialize (&Result[0], Result.size() ); 00048 bool Success = Reader->Close(); 00049 delete Reader; 00050 return Success; 00051 } 00052 00056 bool LoadTextFileToAnsiArray ( std::vector<ANSICHAR>& Result, const TCHAR *Filename, NFileManager &FileManager ) 00057 { 00058 Result.clear(); 00059 NSerializer *Reader = FileManager.CreateFileReader ( Filename ); 00060 00061 if ( !Reader ) 00062 return FALSE; 00063 00064 int Size = Reader->GetFileSize(); 00065 00066 if (Size < 0) 00067 { 00068 Reader->Close(); 00069 delete Reader; 00070 return FALSE; 00071 } 00072 00073 std::vector<BYTE> ByteArray; 00074 ByteArray.clear(); 00075 ByteArray.resize (Size); 00076 Reader->Serialize (&ByteArray[0], Result.size() ); 00077 bool Success = Reader->Close(); 00078 delete Reader; 00079 00080 if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) // (BYTE)ByteArray[0]==0xff && (BYTE)ByteArray[1]==0xfe ) 00081 { 00082 // UTF16 - Little Endian 00083 int numElement = Size / sizeof (UNICHAR) + 1; // +1 for null char 00084 Result.resize (numElement); 00085 00086 for ( int i = 0; i < numElement - 1; i++ ) 00087 Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 ); 00088 00089 Result[numElement] = 0; 00090 } 00091 else if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) 00092 { 00093 // UTF16 - Big Endian. 00094 int numElement = Size / sizeof (TCHAR) + 1; // +1 for null char 00095 Result.resize (numElement); 00096 00097 for ( int i = 0; i < numElement - 1; i++ ) 00098 Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 ); 00099 00100 Result[numElement] = 0; 00101 } 00102 else 00103 { 00104 // ANSI. 00105 Result.clear(); 00106 Result.resize (Size + 1); // +1 for null char 00107 00108 for (int i = 0; i < Size; i++) 00109 Result[i] = ByteArray[i]; 00110 00111 Result[Size] = 0; 00112 } 00113 00114 return Success; 00115 } 00116 00120 bool LoadTextFileToUnicodeArray ( std::vector<UNICHAR>& Result, const TCHAR *Filename, NFileManager &FileManager ) 00121 { 00122 Result.clear(); 00123 NSerializer *Reader = FileManager.CreateFileReader ( Filename ); 00124 00125 if ( !Reader ) 00126 return FALSE; 00127 00128 int Size = Reader->GetFileSize(); 00129 00130 if (Size < 0) 00131 { 00132 Reader->Close(); 00133 delete Reader; 00134 return FALSE; 00135 } 00136 00137 std::vector<BYTE> ByteArray; 00138 ByteArray.clear(); 00139 ByteArray.resize (Size); 00140 Reader->Serialize ( &ByteArray[0], Result.size() ); 00141 bool Success = Reader->Close(); 00142 delete Reader; 00143 00144 if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) // (BYTE)ByteArray[0]==0xff && (BYTE)ByteArray[1]==0xfe ) 00145 { 00146 // UTF16 - Little Endian 00147 int numElement = Size + 1; // +1 for null char 00148 Result.resize (numElement); 00149 00150 for ( int i = 0; i < numElement - 1; i++ ) 00151 Result[i] = ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 ); 00152 00153 Result[numElement] = 0; 00154 } 00155 else if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) 00156 { 00157 // UTF16 - Big Endian. 00158 int numElement = Size + 1; // +1 for null char 00159 Result.resize (numElement); 00160 00161 for ( int i = 0; i < numElement - 1; i++ ) 00162 Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 ); 00163 00164 Result[numElement] = 0; 00165 } 00166 else 00167 { 00168 // There is no way to detect that a file really contains ascii. Or is there? 00169 // Make sure this file is really ascii. 00170 /* 00171 However as an additional check to 00172 the heuristic of looking for unprintable characters, another trick is to 00173 check if the newline string is consistent. It should always be either "\n" 00174 (for UNIX-like systems), "\r" (for Mac-like systems) or "\r\n" (for 00175 Windows-like systems). If the file starts switching around between these, it 00176 probably isn't a valid ASCII file on any of the above three platforms. 00177 */ 00178 00179 BOOL isASCII = TRUE; 00180 00181 for ( int i = 0; i < Size; i++ ) 00182 { 00183 if (Result[i] == 0 && (i != Size - 1) ) 00184 { 00185 isASCII = FALSE; 00186 } 00187 00188 if ( (Result[i] < 0x20 || Result[i] >= 0xFF) && ( (Result[i] != 0x0A/*New Line, Line feed*/) && (Result[i] != 0x0D/*Carriage return*/) ) ) 00189 { 00190 isASCII = FALSE; 00191 } 00192 00193 // http://www.websiterepairguy.com/articles/os/crlf.html 00194 /* 00195 The carriage return is often referred to by the capital letters CR. 00196 On a Macintosh, every line has a CR at the end. 00197 00198 Under Linux (a variant of Unix), the end of a line is indicated by 00199 a line feed. Every line ends with a line feed or LF. 00200 00201 Calling the end of a line an LF versus a CR is not just semantics. 00202 These are 2 very real characters with 2 very real and very separate 00203 numeric representations on a computer. A CR is a 13 in the ASCII table 00204 of characters and an LF is a 10 in the ASCII table of characters. 00205 00206 Contributing to the confusion is that fact that Microsoft Windows does 00207 things yet another way. Under Microsoft Windows, lines end with a combination 00208 of 2 characters -- a CR followed by a LF. Symbolically, this is represented 00209 as CRLF or carriage return, line feed. 00210 */ 00211 // Todo. Check if the file mixes a combination of \n (Linux) \r (Mac) and \r\n (Windows). 00212 // If it does, the file is not ASCII. 00213 00214 if (isASCII == FALSE) 00215 return FALSE; 00216 } 00217 00218 Result.clear(); 00219 Result.resize (Size + 1); 00220 00221 for ( int i = 0; i < Size; i++ ) 00222 Result[i] = ConvertAnsiCharToUnicodeChar (ByteArray[i]); 00223 00224 Result[Size] = 0; 00225 } 00226 00227 00228 return Success; 00229 } 00230 00235 bool LoadFileToString ( NString &Result, const TCHAR *Filename, NFileManager &FileManager ) 00236 { 00237 NSerializer *Reader = FileManager.CreateFileReader (Filename); 00238 00239 if ( !Reader ) 00240 { 00241 nuxDebugMsg (TEXT ("[LoadFileToString] Cannot read from file: %s"), Filename); 00242 return false; 00243 } 00244 00245 t_u32 Size = Reader->GetFileSize(); 00246 std::vector<ANSICHAR> ByteArray (Size + 2); 00247 Reader->Serialize (&ByteArray[0], Size); 00248 bool Success = Reader->Close(); 00249 delete Reader; 00250 ByteArray[Size+0] = 0; 00251 ByteArray[Size+1] = 0; 00252 std::vector<TCHAR> ResultArray; 00253 00254 // Detect Unicode Byte Order Mark 00255 // EF BB BF UTF-8 00256 // FF FE UTF-16, little endian 00257 // FE FF UTF-16, big endian 00258 // FF FE 00 00 UTF-32, little endian 00259 // 00 00 FE FF UTF-32, big-endian 00260 // Note: Microsoft uses UTF-16, little endian byte order. 00261 00262 // Little Endian UTF-16: size should be >=2, even, and the first two bytes should be 0xFF followed by 0xFE 00263 if ( (Size >= 2) && ! (Size & 1) && ( (BYTE) ByteArray[0] == 0xff) && ( (BYTE) ByteArray[1] == 0xfe) ) 00264 { 00265 // UTF16 - Little Endian 00266 int numElement = Size / sizeof (TCHAR); 00267 ResultArray.clear(); 00268 ResultArray.resize (numElement); 00269 00270 for ( int i = 0; i < numElement - 1; i++ ) 00271 ResultArray[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 ); 00272 00273 ResultArray[numElement] = 0; 00274 } 00275 else if ( (Size >= 2) && ! (Size & 1) && ( (BYTE) ByteArray[0] == 0xfe) && ( (BYTE) ByteArray[1] == 0xff) ) 00276 { 00277 // UTF16 - Big Endian. 00278 int numElement = Size / sizeof (TCHAR); 00279 ResultArray.clear(); 00280 ResultArray.resize (numElement); 00281 00282 for (int i = 0; i < numElement - 1; i++) 00283 ResultArray[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 ); 00284 00285 ResultArray[numElement] = 0; 00286 } 00287 else 00288 { 00289 // ANSI. 00290 ResultArray.clear(); 00291 ResultArray.resize (Size + 1); 00292 00293 for (t_u32 i = 0; i < Size; i++) 00294 ResultArray[i] = ConvertAnsiCharToTCHAR (ByteArray[i]); 00295 00296 ResultArray[Size] = 0; 00297 } 00298 00299 Result = &ResultArray[0]; 00300 return Success; 00301 } 00302 00306 bool SaveArrayToFile ( const std::vector<BYTE>& Array, const TCHAR *Filename, NFileManager &FileManager ) 00307 { 00308 NSerializer *Ar = FileManager.CreateFileWriter ( Filename ); 00309 00310 if ( !Ar ) 00311 return 0; 00312 00313 Ar->Serialize ( const_cast<BYTE *> (&Array[0]), Array.size() ); 00314 delete Ar; 00315 return 1; 00316 } 00317 00321 bool SaveStringToFile ( const NString &String, const TCHAR *Filename, NFileManager &FileManager ) 00322 { 00323 if ( !String.Length() ) 00324 return 0; 00325 00326 NSerializer *Ar = FileManager.CreateFileWriter ( Filename ); 00327 00328 if ( !Ar ) 00329 return 0; 00330 00331 bool SaveAsUnicode = false, Success = true; 00332 #if UNICODE 00333 00334 for ( int i = 0; i < String.Length(); i++ ) 00335 { 00336 // Test if the UNICODE 0xABCD is the same as the ASCII 0x00CB. 00337 if ( (*String) [i] != (TCHAR) (ANSIUCHAR) ConvertTCHARToAnsiChar ( (*String) [i]) ) 00338 { 00339 //The string need to be written in ASCII. We write the string as UTF16-BigEndian 00340 Ar->Serialize (NUX_CONST_CAST (BYTE *, &UTF16_BE[1]), UTF16_BE[0] /*size*/); 00341 SaveAsUnicode = true; 00342 break; 00343 } 00344 } 00345 00346 #endif 00347 00348 if ( SaveAsUnicode || (sizeof (TCHAR) == 1) ) 00349 { 00350 t_u32 s = (t_u32) String.Length() * sizeof (TCHAR); 00351 Ar->Serialize ( NUX_CONST_CAST (TCHAR *, String.GetTCharPtr() ), (t_u32) s); 00352 } 00353 else 00354 { 00355 t_u32 s = (t_u32) String.Length(); 00356 std::vector<ANSICHAR> AnsiBuffer ( (t_u32) s); 00357 00358 // Cast all character down from UTF16 to ANSI 00359 for (t_u32 i = 0; i < (t_u32) String.Length(); i++ ) 00360 AnsiBuffer[i] = ConvertTCHARToAnsiChar ( (t_u32) String[i]); 00361 00362 // serialize 00363 s = (t_u32) String.Length(); 00364 Ar->Serialize ( NUX_CONST_CAST (ANSICHAR *, &AnsiBuffer[0]), s); 00365 } 00366 00367 delete Ar; 00368 00369 if ( !Success ) 00370 GFileManager.Delete ( Filename ); 00371 00372 return Success; 00373 } 00374 00375 }