Back to index

nux  3.0.0
FileIO.cpp
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Inalogic® Inc.
00003  *
00004  * This program is free software: you can redistribute it and/or modify it
00005  * under the terms of the GNU Lesser General Public License, as
00006  * published by the  Free Software Foundation; either version 2.1 or 3.0
00007  * of the License.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranties of
00011  * MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR
00012  * PURPOSE.  See the applicable version of the GNU Lesser General Public
00013  * License for more details.
00014  *
00015  * You should have received a copy of both the GNU Lesser General Public
00016  * License along with this program. If not, see <http://www.gnu.org/licenses/>
00017  *
00018  * Authored by: Jay Taoko <jaytaoko@inalogic.com>
00019  *
00020  */
00021 
00022 
00023 #include "NuxCore.h"
00024 
00025 namespace nux
00026 {
00027 //
00028 // Load a binary file to a dynamic array.
00029 //
00030   bool LoadFileToArray (std::vector<BYTE>& Result, const TCHAR *Filename, NFileManager &FileManager )
00031   {
00032     NSerializer *Reader = FileManager.CreateFileReader ( Filename );
00033 
00034     if ( !Reader )
00035       return FALSE;
00036 
00037     Result.clear();
00038 
00039     if (Reader->GetFileSize() < 0)
00040     {
00041       Reader->Close();
00042       delete Reader;
00043       return FALSE;
00044     }
00045 
00046     Result.resize (Reader->GetFileSize() );
00047     Reader->Serialize (&Result[0], Result.size() );
00048     bool Success = Reader->Close();
00049     delete Reader;
00050     return Success;
00051   }
00052 
00056   bool LoadTextFileToAnsiArray ( std::vector<ANSICHAR>& Result, const TCHAR *Filename, NFileManager &FileManager )
00057   {
00058     Result.clear();
00059     NSerializer *Reader = FileManager.CreateFileReader ( Filename );
00060 
00061     if ( !Reader )
00062       return FALSE;
00063 
00064     int Size = Reader->GetFileSize();
00065 
00066     if (Size < 0)
00067     {
00068       Reader->Close();
00069       delete Reader;
00070       return FALSE;
00071     }
00072 
00073     std::vector<BYTE> ByteArray;
00074     ByteArray.clear();
00075     ByteArray.resize (Size);
00076     Reader->Serialize (&ByteArray[0], Result.size() );
00077     bool Success = Reader->Close();
00078     delete Reader;
00079 
00080     if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) // (BYTE)ByteArray[0]==0xff && (BYTE)ByteArray[1]==0xfe )
00081     {
00082       // UTF16 - Little Endian
00083       int numElement = Size / sizeof (UNICHAR) + 1; // +1 for null char
00084       Result.resize (numElement);
00085 
00086       for ( int i = 0; i < numElement - 1; i++ )
00087         Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 );
00088 
00089       Result[numElement] = 0;
00090     }
00091     else if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0)
00092     {
00093       // UTF16 - Big Endian.
00094       int numElement = Size / sizeof (TCHAR) + 1; // +1 for null char
00095       Result.resize (numElement);
00096 
00097       for ( int i = 0; i < numElement - 1; i++ )
00098         Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 );
00099 
00100       Result[numElement] = 0;
00101     }
00102     else
00103     {
00104       // ANSI.
00105       Result.clear();
00106       Result.resize (Size + 1); // +1 for null char
00107 
00108       for (int i = 0; i < Size; i++)
00109         Result[i] = ByteArray[i];
00110 
00111       Result[Size] = 0;
00112     }
00113 
00114     return Success;
00115   }
00116 
00120   bool LoadTextFileToUnicodeArray ( std::vector<UNICHAR>& Result, const TCHAR *Filename, NFileManager &FileManager )
00121   {
00122     Result.clear();
00123     NSerializer *Reader = FileManager.CreateFileReader ( Filename );
00124 
00125     if ( !Reader )
00126       return FALSE;
00127 
00128     int Size = Reader->GetFileSize();
00129 
00130     if (Size < 0)
00131     {
00132       Reader->Close();
00133       delete Reader;
00134       return FALSE;
00135     }
00136 
00137     std::vector<BYTE> ByteArray;
00138     ByteArray.clear();
00139     ByteArray.resize (Size);
00140     Reader->Serialize ( &ByteArray[0], Result.size() );
00141     bool Success = Reader->Close();
00142     delete Reader;
00143 
00144     if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0) // (BYTE)ByteArray[0]==0xff && (BYTE)ByteArray[1]==0xfe )
00145     {
00146       // UTF16 - Little Endian
00147       int numElement = Size + 1; // +1 for null char
00148       Result.resize (numElement);
00149 
00150       for ( int i = 0; i < numElement - 1; i++ )
00151         Result[i] = ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 );
00152 
00153       Result[numElement] = 0;
00154     }
00155     else if ( Size >= 2 && ! (Size & 1) && Memcmp (&Result[0], &NUX_UTF16_LE[1], NUX_UTF16_LE[0]) == 0)
00156     {
00157       // UTF16 - Big Endian.
00158       int numElement = Size + 1; // +1 for null char
00159       Result.resize (numElement);
00160 
00161       for ( int i = 0; i < numElement - 1; i++ )
00162         Result[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 );
00163 
00164       Result[numElement] = 0;
00165     }
00166     else
00167     {
00168       // There is no way to detect that a file really contains ascii. Or is there?
00169       // Make sure this file is really ascii.
00170       /*
00171       However as an additional check to
00172       the heuristic of looking for unprintable characters, another trick is to
00173       check if the newline string is consistent. It should always be either "\n"
00174       (for UNIX-like systems), "\r" (for Mac-like systems) or "\r\n" (for
00175       Windows-like systems). If the file starts switching around between these, it
00176       probably isn't a valid ASCII file on any of the above three platforms.
00177       */
00178 
00179       BOOL isASCII = TRUE;
00180 
00181       for ( int i = 0; i < Size; i++ )
00182       {
00183         if (Result[i] == 0 && (i != Size - 1) )
00184         {
00185           isASCII = FALSE;
00186         }
00187 
00188         if ( (Result[i] < 0x20 || Result[i] >= 0xFF) && ( (Result[i] != 0x0A/*New Line, Line feed*/) && (Result[i] != 0x0D/*Carriage return*/) ) )
00189         {
00190           isASCII = FALSE;
00191         }
00192 
00193         // http://www.websiterepairguy.com/articles/os/crlf.html
00194         /*
00195             The carriage return is often referred to by the capital letters CR.
00196             On a Macintosh, every line has a CR at the end.
00197 
00198             Under Linux (a variant of Unix), the end of a line is indicated by
00199             a line feed. Every line ends with a line feed or LF.
00200 
00201             Calling the end of a line an LF versus a CR is not just semantics.
00202             These are 2 very real characters with 2 very real and very separate
00203             numeric representations on a computer. A CR is a 13 in the ASCII table
00204             of characters and an LF is a 10 in the ASCII table of characters.
00205 
00206             Contributing to the confusion is that fact that Microsoft Windows does
00207             things yet another way. Under Microsoft Windows, lines end with a combination
00208             of 2 characters -- a CR followed by a LF. Symbolically, this is represented
00209             as CRLF or carriage return, line feed.
00210         */
00211         // Todo. Check if the file mixes a combination of \n (Linux) \r (Mac) and \r\n (Windows).
00212         // If it does, the file is not ASCII.
00213 
00214         if (isASCII == FALSE)
00215           return FALSE;
00216       }
00217 
00218       Result.clear();
00219       Result.resize (Size + 1);
00220 
00221       for ( int i = 0; i < Size; i++ )
00222         Result[i] = ConvertAnsiCharToUnicodeChar (ByteArray[i]);
00223 
00224       Result[Size] = 0;
00225     }
00226 
00227 
00228     return Success;
00229   }
00230 
00235   bool LoadFileToString ( NString &Result, const TCHAR *Filename, NFileManager &FileManager )
00236   {
00237     NSerializer *Reader = FileManager.CreateFileReader (Filename);
00238 
00239     if ( !Reader )
00240     {
00241       nuxDebugMsg (TEXT ("[LoadFileToString] Cannot read from file: %s"), Filename);
00242       return false;
00243     }
00244 
00245     unsigned int Size = Reader->GetFileSize();
00246     std::vector<ANSICHAR> ByteArray (Size + 2);
00247     Reader->Serialize (&ByteArray[0], Size);
00248     bool Success = Reader->Close();
00249     delete Reader;
00250     ByteArray[Size+0] = 0;
00251     ByteArray[Size+1] = 0;
00252     std::vector<TCHAR> ResultArray;
00253 
00254     // Detect Unicode Byte Order Mark
00255     //      EF BB BF            UTF-8
00256     //      FF FE            UTF-16, little endian
00257     //      FE FF            UTF-16, big endian
00258     //      FF FE 00 00     UTF-32, little endian
00259     //      00 00 FE FF     UTF-32, big-endian
00260     // Note: Microsoft uses UTF-16, little endian byte order.
00261 
00262     // Little Endian UTF-16: size should be >=2, even, and the first two bytes should be 0xFF followed by 0xFE
00263     if ( (Size >= 2) && ! (Size & 1) && ( (BYTE) ByteArray[0] == 0xff) && ( (BYTE) ByteArray[1] == 0xfe) )
00264     {
00265       // UTF16 - Little Endian
00266       int numElement = Size / sizeof (TCHAR);
00267       ResultArray.clear();
00268       ResultArray.resize (numElement);
00269 
00270       for ( int i = 0; i < numElement - 1; i++ )
00271         ResultArray[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+2] + (WORD) (ANSIUCHAR) ByteArray[i*2+3] * 256 );
00272 
00273       ResultArray[numElement] = 0;
00274     }
00275     else if ( (Size >= 2) && ! (Size & 1) && ( (BYTE) ByteArray[0] == 0xfe) && ( (BYTE) ByteArray[1] == 0xff) )
00276     {
00277       // UTF16 - Big Endian.
00278       int numElement = Size / sizeof (TCHAR);
00279       ResultArray.clear();
00280       ResultArray.resize (numElement);
00281 
00282       for (int i = 0; i < numElement - 1; i++)
00283         ResultArray[i] = ConvertUnicodeCharToTCHAR ( (WORD) (ANSIUCHAR) ByteArray[i*2+3] + (WORD) (ANSIUCHAR) ByteArray[i*2+2] * 256 );
00284 
00285       ResultArray[numElement] = 0;
00286     }
00287     else
00288     {
00289       // ANSI.
00290       ResultArray.clear();
00291       ResultArray.resize (Size + 1);
00292 
00293       for (unsigned int i = 0; i < Size; i++)
00294         ResultArray[i] = ConvertAnsiCharToTCHAR (ByteArray[i]);
00295 
00296       ResultArray[Size] = 0;
00297     }
00298 
00299     Result = &ResultArray[0];
00300     return Success;
00301   }
00302 
00306   bool SaveArrayToFile ( const std::vector<BYTE>& Array, const TCHAR *Filename, NFileManager &FileManager )
00307   {
00308     NSerializer *Ar = FileManager.CreateFileWriter ( Filename );
00309 
00310     if ( !Ar )
00311       return 0;
00312 
00313     Ar->Serialize ( const_cast<BYTE *> (&Array[0]), Array.size() );
00314     delete Ar;
00315     return 1;
00316   }
00317 
00321   bool SaveStringToFile ( const NString &String, const TCHAR *Filename, NFileManager &FileManager )
00322   {
00323     if ( !String.Length() )
00324       return 0;
00325 
00326     NSerializer *Ar = FileManager.CreateFileWriter ( Filename );
00327 
00328     if ( !Ar )
00329       return 0;
00330 
00331     bool SaveAsUnicode = false, Success = true;
00332 #if UNICODE
00333 
00334     for ( int i = 0; i < String.Length(); i++ )
00335     {
00336       // Test if the UNICODE 0xABCD is the same as the ASCII 0x00CB.
00337       if ( (*String) [i] != (TCHAR) (ANSIUCHAR) ConvertTCHARToAnsiChar ( (*String) [i]) )
00338       {
00339         //The string need to be written in ASCII. We write the string as UTF16-BigEndian
00340         Ar->Serialize (NUX_CONST_CAST (BYTE *, &UTF16_BE[1]), UTF16_BE[0] /*size*/);
00341         SaveAsUnicode = true;
00342         break;
00343       }
00344     }
00345 
00346 #endif
00347 
00348     if ( SaveAsUnicode || (sizeof (TCHAR) == 1) )
00349     {
00350       unsigned int s = (unsigned int) String.Length() * sizeof (TCHAR);
00351       Ar->Serialize ( NUX_CONST_CAST (TCHAR *, String.GetTCharPtr() ), (unsigned int) s);
00352     }
00353     else
00354     {
00355       unsigned int s = (unsigned int) String.Length();
00356       std::vector<ANSICHAR> AnsiBuffer ( (unsigned int) s);
00357 
00358       // Cast all character down from UTF16 to ANSI
00359       for (unsigned int i = 0; i < (unsigned int) String.Length(); i++ )
00360         AnsiBuffer[i] = ConvertTCHARToAnsiChar ( (unsigned int) String[i]);
00361 
00362       // serialize
00363       s = (unsigned int) String.Length();
00364       Ar->Serialize ( NUX_CONST_CAST (ANSICHAR *, &AnsiBuffer[0]), s);
00365     }
00366 
00367     delete Ar;
00368 
00369     if ( !Success )
00370       GFileManager.Delete ( Filename );
00371 
00372     return Success;
00373   }
00374 
00375 }