Back to index

libsfml  1.6+dfsg2
Unicode.cpp
Go to the documentation of this file.
00001 
00002 //
00003 // SFML - Simple and Fast Multimedia Library
00004 // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
00005 //
00006 // This software is provided 'as-is', without any express or implied warranty.
00007 // In no event will the authors be held liable for any damages arising from the use of this software.
00008 //
00009 // Permission is granted to anyone to use this software for any purpose,
00010 // including commercial applications, and to alter it and redistribute it freely,
00011 // subject to the following restrictions:
00012 //
00013 // 1. The origin of this software must not be misrepresented;
00014 //    you must not claim that you wrote the original software.
00015 //    If you use this software in a product, an acknowledgment
00016 //    in the product documentation would be appreciated but is not required.
00017 //
00018 // 2. Altered source versions must be plainly marked as such,
00019 //    and must not be misrepresented as being the original software.
00020 //
00021 // 3. This notice may not be removed or altered from any source distribution.
00022 //
00024 
00026 // Headers
00028 #include <SFML/System/Unicode.hpp>
00029 #include <stdexcept>
00030 #include <string.h>
00031 
00032 
00034 // References :
00035 //
00036 // http://www.unicode.org/
00037 // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
00038 // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
00039 // http://people.w3.org/rishida/scripts/uniview/conversion
00040 //
00042 
00043 namespace
00044 {
00046     // Generic utility function to compute the number
00047     // of characters in a null-terminated string of any type
00049     template <typename T>
00050     std::size_t StrLen(const T* Str)
00051     {
00052         std::size_t Length = 0;
00053         while (*Str++) Length++;
00054         return Length;
00055     }
00056 
00058     // Get the current system locale
00060     std::locale GetCurrentLocale()
00061     {
00062         try
00063         {
00064             return std::locale("");
00065         }
00066         catch (std::runtime_error&)
00067         {
00068             // It seems some implementations don't know the "" locale
00069             // (Mac OS, MinGW)
00070 
00071             return std::locale();
00072         }
00073     }
00074 }
00075 
00076 namespace sf
00077 {
00079 // Static member data
00081 const int Unicode::UTF8TrailingBytes[256] =
00082 {
00083     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00084     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00085     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00086     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00087     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00088     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00089     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00090     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
00091 };
00092 const Uint32 Unicode::UTF8Offsets[6] =
00093 {
00094     0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
00095 };
00096 const Uint8 Unicode::UTF8FirstBytes[7] =
00097 {
00098     0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
00099 };
00100 
00101 
00105 Unicode::Text::Text()
00106 {
00107     // Nothing to do
00108 }
00109 
00110 
00114 Unicode::Text::Text(const char* Str)
00115 {
00116     if (Str)
00117     {
00118         std::size_t Length = StrLen(Str);
00119         if (Length > 0)
00120         {
00121             myUTF32String.reserve(Length + 1);
00122             Unicode::ANSIToUTF32(Str, Str + Length, std::back_inserter(myUTF32String));
00123         }
00124     }
00125 }
00126 Unicode::Text::Text(const wchar_t* Str)
00127 {
00128     if (Str)
00129     {
00130         std::size_t Length = StrLen(Str);
00131         if (Length > 0)
00132         {
00133             // See comments below, in Unicode::Text::Text(const std::wstring&)
00134             myUTF32String.reserve(Length + 1);
00135             switch (sizeof(wchar_t))
00136             {
00137                 case 2 : Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0); break;
00138                 case 4 : std::copy(Str, Str + Length, std::back_inserter(myUTF32String));                break;
00139                 default : break;
00140             }
00141         }
00142     }
00143 }
00144 Unicode::Text::Text(const Uint8* Str)
00145 {
00146     if (Str)
00147     {
00148         std::size_t Length = StrLen(Str);
00149         if (Length > 0)
00150         {
00151             myUTF32String.reserve(Length + 1);
00152             Unicode::UTF8ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0);
00153         }
00154     }
00155 }
00156 Unicode::Text::Text(const Uint16* Str)
00157 {
00158     if (Str)
00159     {
00160         std::size_t Length = StrLen(Str);
00161         if (Length > 0)
00162         {
00163             myUTF32String.reserve(Length+ 1);
00164             Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0);
00165         }
00166     }
00167 }
00168 Unicode::Text::Text(const Uint32* Str)
00169 {
00170     if (Str)
00171         myUTF32String = Str;
00172 }
00173 Unicode::Text::Text(const std::string& Str)
00174 {
00175     myUTF32String.reserve(Str.length() + 1);
00176     Unicode::ANSIToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String));
00177 }
00178 Unicode::Text::Text(const std::wstring& Str)
00179 {
00180     // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and
00181     // 4-byte large wchar_t are encoded using UTF-32 (Unix)
00182     // Is that always true ? (some platforms may use JIS Japanese encoding)
00183     // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations
00184 
00185     myUTF32String.reserve(Str.length() + 1);
00186 
00187     // Select the proper function according to the (supposed) wchar_t system encoding
00188     switch (sizeof(wchar_t))
00189     {
00190         // wchar_t uses UTF-16 -- need a conversion
00191         case 2 :
00192         {
00193             Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
00194             break;
00195         }
00196 
00197         // wchar_t uses UTF-32 -- direct copy
00198         case 4 :
00199         {
00200             std::copy(Str.begin(), Str.end(), std::back_inserter(myUTF32String));
00201             break;
00202         }
00203 
00204         // This should never happen
00205         default : break;
00206     }
00207 }
00208 Unicode::Text::Text(const Unicode::UTF8String& Str)
00209 {
00210     myUTF32String.reserve(Str.length() + 1);
00211     Unicode::UTF8ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
00212 }
00213 Unicode::Text::Text(const Unicode::UTF16String& Str)
00214 {
00215     myUTF32String.reserve(Str.length() + 1);
00216     Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
00217 }
00218 Unicode::Text::Text(const Unicode::UTF32String& Str)
00219 {
00220     myUTF32String = Str;
00221 }
00222 
00223 
00227 Unicode::Text::operator std::string() const
00228 {
00229     std::string Output;
00230     Output.reserve(myUTF32String.length() + 1);
00231     Unicode::UTF32ToANSI(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0, Unicode::GetDefaultLocale());
00232     return Output;
00233 }
00234 Unicode::Text::operator std::wstring() const
00235 {
00236     // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and
00237     // 4-byte large wchar_t are encoded using UTF-32 (Unix)
00238     // Is that always true ? (some platforms may use JIS Japanese encoding)
00239     // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations
00240 
00241     std::wstring Output;
00242     Output.reserve(myUTF32String.length() + 1);
00243 
00244     // Select the proper function according to the (supposed) wchar_t system encoding
00245     switch (sizeof(wchar_t))
00246     {
00247         // wchar_t uses UTF-16 -- need a conversion
00248         case 2 :
00249         {
00250             UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
00251             break;
00252         }
00253 
00254         // wchar_t uses UTF-32 -- direct copy
00255         case 4 :
00256         {
00257             std::copy(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output));
00258             break;
00259         }
00260 
00261         // This should never happen
00262         default : break;
00263     }
00264     return Output;
00265 }
00266 Unicode::Text::operator sf::Unicode::UTF8String() const
00267 {
00268     Unicode::UTF8String Output;
00269     Output.reserve(myUTF32String.length() * 4 + 1);
00270     Unicode::UTF32ToUTF8(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
00271     return Output;
00272 }
00273 Unicode::Text::operator sf::Unicode::UTF16String() const
00274 {
00275     Unicode::UTF16String Output;
00276     Output.reserve(myUTF32String.length() * 2 + 1);
00277     Unicode::UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
00278     return Output;
00279 }
00280 Unicode::Text::operator const sf::Unicode::UTF32String&() const
00281 {
00282     return myUTF32String;
00283 }
00284 
00285 
00289 const std::locale& Unicode::GetDefaultLocale()
00290 {
00291     // It seems getting the default locale is a very expensive operation,
00292     // so we only do it once and then store the locale for reuse.
00293     // Warning : this code won't be aware of any change of the default locale during runtime
00294 
00295     static std::locale DefaultLocale = GetCurrentLocale();
00296 
00297     return DefaultLocale;
00298 }
00299 
00300 } // namespace sf