Back to index

nux  3.0.0
NUnicode.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Inalogic® Inc.
00003  *
00004  * This program is free software: you can redistribute it and/or modify it
00005  * under the terms of the GNU Lesser General Public License, as
00006  * published by the  Free Software Foundation; either version 2.1 or 3.0
00007  * of the License.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranties of
00011  * MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR
00012  * PURPOSE.  See the applicable version of the GNU Lesser General Public
00013  * License for more details.
00014  *
00015  * You should have received a copy of both the GNU Lesser General Public
00016  * License along with this program. If not, see <http://www.gnu.org/licenses/>
00017  *
00018  * Authored by: Jay Taoko <jaytaoko@inalogic.com>
00019  *
00020  */
00021 
00022 
00023 #ifndef NUNICODE_H
00024 #define NUNICODE_H
00025 
00026 
00027 namespace nux
00028 {
00029 
00030 // UTF-16 is the primary encoding mechanism used by Microsoft Windows 2000, Windows 2000 Server, Windows XP and Windows 2003 Server.
00031 // Unicode Byte Order Mark (BOM)
00032   enum {UNICODE_UTF32_BE   = 0x0000FEFF };
00033   enum {UNICODE_UTF32_LE   = 0xFFFE0000 };
00034   enum {UNICODE_UTF16_BE   = 0xFEFF };
00035   enum {UNICODE_UTF16_LE   = 0xFFFE };
00036   enum {UNICODE_UTF8       = 0xEFBBBF };
00037 
00038   const BYTE UTF32_BE[]   = {0x04 /*size*/, 0x00, 0x00, 0xFE, 0xFF };
00039   const BYTE UTF32_LE[]   = {0x04 /*size*/, 0xFF, 0xFE, 0x00, 0x00 };
00040   const BYTE UTF16_BE[]   = {0x02 /*size*/, 0xFE, 0xFF };
00041   const BYTE UTF16_LE[]   = {0x02 /*size*/, 0xFF, 0xFE };
00042   const BYTE UTF8[]       = {0x03 /*size*/, 0xEF, 0xBB, 0xBF };
00043 
00044   enum {UNICODE_BOM   = 0xfeff};
00045 
00046 // UTF-16 is the default encoding form of the Unicode Standard
00047 // On Linux and Mac OS X, wchar_t is 4 bytes!
00048 // On windows wchar_t is 2 bytes!
00049 
00050 #ifdef UNICODE
00051   inline TCHAR ConvertAnsiCharToTCHAR (ANSICHAR In)
00052   {
00053     TCHAR output;
00054     const unsigned char *source_start = &In;
00055     const unsigned char *source_end = source_start + 1;
00056     wchar_t *target_start = reinterpret_cast<wchar_t *> (&output);
00057     wchar_t *target_end = target_start + sizeof (wchar_t);
00058 
00059     ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion);
00060 
00061     if (res != conversionOK)
00062     {
00063       output = 0;
00064     }
00065 
00066     return output;
00067   }
00068 
00069   inline ANSICHAR ConvertTCHARToAnsiChar (TCHAR In)
00070   {
00071     ANSICHAR output;
00072     const wchar_t *source_start = &In;
00073     const wchar_t *source_end = source_start + 1;
00074     unsigned char *target_start = reinterpret_cast<unsigned char *> (&output);
00075     unsigned char *target_end = target_start + sizeof (wchar_t);
00076 
00077     ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion);
00078 
00079     if (res != conversionOK)
00080     {
00081       output = 0;
00082     }
00083 
00084     return output;
00085   }
00086   inline TCHAR ConvertUnicodeCharToTCHAR (UNICHAR In)
00087   {
00088     return In;
00089   }
00090   inline UNICHAR  ConvertTCHARToUnicodeChar (TCHAR In)
00091   {
00092     return In;
00093   }
00094 #else
00095   inline TCHAR ConvertUnicodeCharToTCHAR (UNICHAR In)
00096   {
00097     TCHAR output;
00098     const wchar_t *source_start = &In;
00099     const wchar_t *source_end = source_start + 1;
00100     unsigned char *target_start = reinterpret_cast<unsigned char *> (&output);
00101     unsigned char *target_end = target_start + sizeof (wchar_t);
00102 
00103     ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion);
00104 
00105     if (res != conversionOK)
00106     {
00107       output = 0;
00108     }
00109 
00110     return output;
00111   }
00112 
00113   inline UNICHAR ConvertTCHARToUnicodeChar (TCHAR In)
00114   {
00115     UNICHAR output;
00116     const unsigned char *source_start = reinterpret_cast<const unsigned char *> (&In);
00117     const unsigned char *source_end = source_start + 1;
00118     wchar_t *target_start = reinterpret_cast<wchar_t *> (&output);
00119     wchar_t *target_end = target_start + sizeof (wchar_t);
00120 
00121     ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion);
00122 
00123     if (res != conversionOK)
00124     {
00125       output = 0;
00126     }
00127 
00128     return output;
00129   }
00130 
00131   inline TCHAR ConvertAnsiCharToTCHAR (ANSICHAR In)
00132   {
00133     return In;
00134   }
00135   inline ANSICHAR ConvertTCHARToAnsiChar (TCHAR In)
00136   {
00137     return In;
00138   }
00139 #endif
00140 
00144   inline ANSICHAR ConvertUnicodeCharToAnsiChar (UNICHAR In)
00145   {
00146     TCHAR output;
00147     const wchar_t *source_start = &In;
00148     const wchar_t *source_end = source_start + 1;
00149     unsigned char *target_start = reinterpret_cast<unsigned char *> (&output);
00150     unsigned char *target_end = target_start + sizeof (wchar_t);
00151 
00152     ConversionResult res = ConvertUTF16toUTF8 (&source_start, source_end, &target_start, target_end, lenientConversion);
00153 
00154     if (res != conversionOK)
00155     {
00156       output = 0;
00157     }
00158 
00159     return output;
00160   }
00161 
00165   inline UNICHAR ConvertAnsiCharToUnicodeChar (ANSICHAR In)
00166   {
00167     UNICHAR output;
00168     const unsigned char *source_start = reinterpret_cast<const unsigned char *> (&In);
00169     const unsigned char *source_end = source_start + 1;
00170     wchar_t *target_start = reinterpret_cast<wchar_t *> (&output);
00171     wchar_t *target_end = target_start + sizeof (wchar_t);
00172 
00173     ConversionResult res = ConvertUTF8toUTF16 (&source_start, source_end, &target_start, target_end, lenientConversion);
00174 
00175     if (res != conversionOK)
00176     {
00177       output = 0;
00178     }
00179 
00180     return output;
00181   }
00182 
00183   class UnicharToAnsicharConvertion
00184   {
00185   public:
00186     // Default to ANSI code page
00187     UnicharToAnsicharConvertion() {}
00188 
00194     ANSICHAR *Convert (const UNICHAR *Source);
00195     /*{
00196         std::wstring utf16string(Source);
00197         size_t utf16size = utf16string.length();
00198         size_t utf8size = 6 * utf16size;
00199         ANSICHAR *utf8string = new ANSICHAR[utf8size+1];
00200 
00201         const wchar_t *source_start = utf16string.c_str();
00202         const wchar_t *source_end = source_start + utf16size;
00203         unsigned char* target_start = reinterpret_cast<unsigned char*>(utf8string);
00204         unsigned char* target_end = target_start + utf8size;
00205 
00206         ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion);
00207         if (res != conversionOK)
00208         {
00209             delete utf8string;
00210             utf8string = 0;
00211         }
00212         // mark end of string
00213         *target_start = 0;
00214         return utf8string;
00215     }*/
00216   };
00217 
00219   class AnsicharToUnicharConvertion
00220   {
00221   public:
00222     AnsicharToUnicharConvertion() {}
00223 
00229     UNICHAR *Convert (const ANSICHAR *Source);
00230   };
00231 
00233 // TCHAR can be ansi or unicode depending if UNICODE is defined or not.
00234   class TCharToAnsiConvertion
00235   {
00236   public:
00237     NUX_INLINE TCharToAnsiConvertion() {}
00238 
00244     NUX_INLINE ANSICHAR *Convert (const TCHAR *Source)
00245     {
00246       // Determine whether we need to allocate memory or not
00247 #ifdef UNICODE
00248       UnicharToAnsicharConvertion convert;
00249       return convert.Convert (Source);
00250 #else
00251       size_t length = strlen (Source) + 1;
00252       size_t size = length * sizeof (ANSICHAR);
00253       ANSICHAR *Dest = new ANSICHAR[size];
00254       STRNCPY_S (Dest, size, Source, length);
00255       return Dest;
00256 #endif
00257     }
00258   };
00259 
00261   // TCHAR can be ansi or unicode depending if UNICODE is defined or not.
00262   class TCharToUnicharConvertion
00263   {
00264   public:
00265     NUX_INLINE TCharToUnicharConvertion() {}
00266 
00272     NUX_INLINE UNICHAR *Convert (const TCHAR *Source)
00273     {
00274       // Determine whether we need to allocate memory or not
00275 #ifdef UNICODE
00276       size_t length = strlen (Source) + 1;
00277       size_t size = length * sizeof (UNICHAR);
00278       UNICHAR *Dest = new UNICHAR[size];
00279       STRNCPY_S (Dest, size, Source, length);
00280       return Dest;
00281 #else
00282       AnsicharToUnicharConvertion convert;
00283       return convert.Convert (Source);
00284 #endif
00285     }
00286   };
00287 
00289 // TCHAR can be ansi or unicode depending if UNICODE is defined or not.
00290   class AnsiToTCharConversion
00291   {
00292   public:
00293     NUX_INLINE AnsiToTCharConversion() {}
00294 
00300     NUX_INLINE TCHAR *Convert (const ANSICHAR *Source)
00301     {
00302 #ifdef UNICODE
00303       AnsicharToUnicharConvertion convert;
00304       return convert.Convert (Source);
00305 #else
00306       size_t length = strlen (Source) + 1;
00307       size_t size = length;
00308       TCHAR *Dest = new TCHAR[size];
00309       STRNCPY_S (Dest, size, Source, length);
00310       return Dest;
00311 #endif
00312     }
00313   };
00314 
00318   template < typename CONVERT_TO, typename CONVERT_FROM, typename BASE_CONVERTER, DWORD DefaultConversionSize = 128 >
00319   class NCharacterConversion:   public BASE_CONVERTER
00320   {
00321     CONVERT_TO *ConvertedString;
00322 
00323     // Hide the default constructor
00324     NCharacterConversion();
00325 
00326   public:
00330     explicit inline NCharacterConversion (const CONVERT_FROM *Source)
00331     {
00332       if (Source != NULL)
00333       {
00334         // Use base class' convert method
00335         ConvertedString = BASE_CONVERTER::Convert (Source);
00336       }
00337       else
00338       {
00339         ConvertedString = NULL;
00340       }
00341     }
00342 
00346     inline ~NCharacterConversion()
00347     {
00348       if (ConvertedString != NULL)
00349       {
00350         delete [] ConvertedString;
00351       }
00352     }
00353 
00354     // Operator to get access to the converted string
00355     inline operator CONVERT_TO* (void) const
00356     {
00357       return ConvertedString;
00358     }
00359   };
00360 
00361 // Conversion typedefs
00362 // typedef NCharacterConversion<TCHAR, ANSICHAR, AnsiToTCharConversion>           ANSI_To_TCHAR_Conversion;
00363 // typedef NCharacterConversion<ANSICHAR, TCHAR, TCharToAnsiConvertion>           TCHAR_To_ANSI_Conversion;
00364 // typedef NCharacterConversion<ANSICHAR, UNICHAR, UnicharToAnsicharConvertion>   UNICHAR_To_ANSICHAR_Conversion;
00365 // typedef NCharacterConversion<UNICHAR, ANSICHAR, AnsicharToUnicharConvertion>   ANSICHAR_To_UNICHAR_Conversion;
00366 
00367 }
00368 
00369 #endif // NUNICODE_H