Back to index

lightning-sunbird  0.9+nobinonly
nsUNIXCharset.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include <locale.h>
00039 #include "nsIPlatformCharset.h"
00040 #include "pratom.h"
00041 #include "nsGREResProperties.h"
00042 #include "nsCOMPtr.h"
00043 #include "nsReadableUtils.h"
00044 #include "nsLocaleCID.h"
00045 #include "nsUConvDll.h"
00046 #include "nsIComponentManager.h"
00047 #include "nsIServiceManager.h"
00048 #include "nsIUnicodeDecoder.h"
00049 #include "nsIUnicodeEncoder.h"
00050 #include "nsICharsetConverterManager.h"
00051 #include "nsEncoderDecoderUtils.h"
00052 #if HAVE_GNU_LIBC_VERSION_H
00053 #include <gnu/libc-version.h>
00054 #endif
00055 #ifdef HAVE_NL_TYPES_H
00056 #include <nl_types.h>
00057 #endif
00058 #if HAVE_LANGINFO_CODESET
00059 #include <langinfo.h>
00060 #endif
00061 #include "nsPlatformCharset.h"
00062 #include "nsAutoLock.h"
00063 #include "prinit.h"
00064 #include "nsUnicharUtils.h"
00065 
00066 NS_IMPL_THREADSAFE_ISUPPORTS1(nsPlatformCharset, nsIPlatformCharset)
00067 
00068 static nsGREResProperties *gNLInfo = nsnull;
00069 static nsGREResProperties *gInfo_deprecated = nsnull;
00070 static PRInt32 gCnt=0;
00071 
00072 //this lock is for protecting above static variable operation
00073 static PRLock  *gLock = nsnull;
00074 
00075 static PRStatus InitLock(void)
00076 {
00077   gLock = PR_NewLock();
00078   if (gLock)
00079     return PR_SUCCESS;
00080   return PR_FAILURE;
00081 }
00082 
00083 nsPlatformCharset::nsPlatformCharset()
00084 {
00085   PR_AtomicIncrement(&gCnt);
00086   static PRCallOnceType once;
00087   PR_CallOnce(&once, InitLock);
00088   NS_ASSERTION(gLock, "Can't allocate a lock?!");
00089 }
00090 
00091 nsresult
00092 nsPlatformCharset::ConvertLocaleToCharsetUsingDeprecatedConfig(nsAString& locale, nsACString& oResult)
00093 {
00094 
00095   // locked for thread safety 
00096   {
00097     nsAutoLock guard(gLock);
00098     if (!gInfo_deprecated) {
00099       nsGREResProperties *info =
00100           new nsGREResProperties(NS_LITERAL_CSTRING("unixcharset.properties"));
00101       NS_ASSERTION(info, "cannot create nsGREResProperties");
00102       gInfo_deprecated = info;
00103     }
00104   }
00105 
00106   if (gInfo_deprecated && !(locale.IsEmpty())) {
00107     nsAutoString platformLocaleKey;
00108     // note: NS_LITERAL_STRING("locale." OSTYPE ".") does not compile on AIX
00109     platformLocaleKey.AssignLiteral("locale.");
00110     platformLocaleKey.AppendWithConversion(OSTYPE);
00111     platformLocaleKey.AppendLiteral(".");
00112     platformLocaleKey.Append(locale);
00113 
00114     nsAutoString charset;
00115     nsresult res = gInfo_deprecated->Get(platformLocaleKey, charset);
00116     if (NS_SUCCEEDED(res))  {
00117       LossyCopyUTF16toASCII(charset, oResult);
00118       return NS_OK;
00119     }
00120     nsAutoString localeKey;
00121     localeKey.AssignLiteral("locale.all.");
00122     localeKey.Append(locale);
00123     res = gInfo_deprecated->Get(localeKey, charset);
00124     if (NS_SUCCEEDED(res))  {
00125       LossyCopyUTF16toASCII(charset, oResult);
00126       return NS_OK;
00127     }
00128    }
00129    NS_ASSERTION(0, "unable to convert locale to charset using deprecated config");
00130    mCharset.AssignLiteral("ISO-8859-1");
00131    oResult.AssignLiteral("ISO-8859-1");
00132    return NS_SUCCESS_USING_FALLBACK_LOCALE;
00133 }
00134 
00135 nsPlatformCharset::~nsPlatformCharset()
00136 {
00137   PR_AtomicDecrement(&gCnt);
00138   if (!gCnt) {
00139     if (gNLInfo) {
00140       delete gNLInfo;
00141       gNLInfo = nsnull;
00142       PR_DestroyLock(gLock);
00143       gLock = nsnull;
00144     }
00145     if (gInfo_deprecated) {
00146       delete gInfo_deprecated;
00147       gInfo_deprecated = nsnull;
00148     }
00149   }
00150 }
00151 
00152 NS_IMETHODIMP 
00153 nsPlatformCharset::GetCharset(nsPlatformCharsetSel selector, nsACString& oResult)
00154 {
00155   oResult = mCharset; 
00156   return NS_OK;
00157 }
00158 
00159 NS_IMETHODIMP 
00160 nsPlatformCharset::GetDefaultCharsetForLocale(const nsAString& localeName, nsACString &oResult)
00161 {
00162   // 
00163   // if this locale is the user's locale then use the charset 
00164   // we already determined at initialization
00165   // 
00166   if (mLocale.Equals(localeName) ||
00167     // support the 4.x behavior
00168     (mLocale.LowerCaseEqualsLiteral("en_us") && 
00169      localeName.LowerCaseEqualsLiteral("c"))) {
00170     oResult = mCharset;
00171     return NS_OK;
00172   }
00173 
00174 #if HAVE_LANGINFO_CODESET
00175   //
00176   // This locale appears to be a different locale from the user's locale. 
00177   // To do this we would need to lock the global resource we are currently 
00178   // using or use a library that provides multi locale support. 
00179   // ICU is a possible example of a multi locale library.
00180   //     http://oss.software.ibm.com/icu/
00181   //
00182   // A more common cause of hitting this warning than the above is that 
00183   // Mozilla is launched under an ll_CC.UTF-8 locale. In xpLocale, 
00184   // we only store the language and the region (ll-CC) losing 'UTF-8', which
00185   // leads |mLocale| to be different from |localeName|. Although we lose
00186   // 'UTF-8', we init'd |mCharset| with the value obtained via 
00187   // |nl_langinfo(CODESET)| so that we're all right here.
00188   // 
00189   NS_WARNING("GetDefaultCharsetForLocale: need to add multi locale support");
00190 #ifdef DEBUG_jungshik
00191   printf("localeName=%s mCharset=%s\n", NS_ConvertUTF16toUTF8(localeName).get(),
00192          mCharset.get());
00193 #endif
00194   // until we add multi locale support: use the the charset of the user's locale
00195   oResult = mCharset;
00196   return NS_SUCCESS_USING_FALLBACK_LOCALE;
00197 #endif
00198 
00199   //
00200   // convert from locale to charset
00201   // using the deprecated locale to charset mapping 
00202   //
00203   nsAutoString localeStr(localeName);
00204   nsresult res = ConvertLocaleToCharsetUsingDeprecatedConfig(localeStr, oResult);
00205   if (NS_SUCCEEDED(res))
00206     return res;
00207 
00208   NS_ASSERTION(0, "unable to convert locale to charset using deprecated config");
00209   oResult.AssignLiteral("ISO-8859-1");
00210   return NS_SUCCESS_USING_FALLBACK_LOCALE;
00211 }
00212 
00213 nsresult
00214 nsPlatformCharset::InitGetCharset(nsACString &oString)
00215 {
00216   char* nl_langinfo_codeset = nsnull;
00217   nsCString aCharset;
00218   nsresult res;
00219 
00220 #if HAVE_LANGINFO_CODESET
00221   nl_langinfo_codeset = nl_langinfo(CODESET);
00222   NS_ASSERTION(nl_langinfo_codeset, "cannot get nl_langinfo(CODESET)");
00223 
00224   //
00225   // see if we can use nl_langinfo(CODESET) directly
00226   //
00227   if (nl_langinfo_codeset) {
00228     aCharset.Assign(nl_langinfo_codeset);
00229     res = VerifyCharset(aCharset);
00230     if (NS_SUCCEEDED(res)) {
00231       oString = aCharset;
00232       return res;
00233     }
00234   }
00235 
00236   // locked for thread safety 
00237   {
00238     nsAutoLock guard(gLock);
00239 
00240     if (!gNLInfo) {
00241       nsCAutoString propertyFile;
00242       // note: NS_LITERAL_CSTRING("unixcharset." OSARCH ".properties") does not compile on AIX
00243       propertyFile.AssignLiteral("unixcharset.");
00244       propertyFile.Append(OSARCH);
00245       propertyFile.AppendLiteral(".properties");
00246       nsGREResProperties *info = new nsGREResProperties(propertyFile);
00247       NS_ASSERTION(info, "cannot create nsGREResProperties");
00248       if (info) {
00249         PRBool didLoad = info->DidLoad();
00250         if (!didLoad) {
00251           delete info;
00252           info = nsnull;
00253         }
00254       }
00255       gNLInfo = info;
00256     }
00257   }
00258 
00259   //
00260   // See if we are remapping nl_langinfo(CODESET)
00261   //
00262   if (gNLInfo && nl_langinfo_codeset) {
00263     nsAutoString localeKey;
00264 
00265 #if HAVE_GNU_GET_LIBC_VERSION
00266     //
00267     // look for an glibc version specific charset remap
00268     //
00269     const char *glibc_version = gnu_get_libc_version();
00270     if ((glibc_version != nsnull) && (strlen(glibc_version))) {
00271       localeKey.AssignLiteral("nllic.");
00272       localeKey.AppendWithConversion(glibc_version);
00273       localeKey.AppendLiteral(".");
00274       localeKey.AppendWithConversion(nl_langinfo_codeset);
00275       nsAutoString uCharset;
00276       res = gNLInfo->Get(localeKey, uCharset);
00277       if (NS_SUCCEEDED(res)) {
00278         aCharset.AssignWithConversion(uCharset);
00279         res = VerifyCharset(aCharset);
00280         if (NS_SUCCEEDED(res)) {
00281           oString = aCharset;
00282           return res;
00283         }
00284       }
00285     }
00286 #endif
00287 
00288     //
00289     // look for a charset specific charset remap
00290     //
00291     localeKey.AssignLiteral("nllic.");
00292     localeKey.AppendWithConversion(nl_langinfo_codeset);
00293     nsAutoString uCharset;
00294     res = gNLInfo->Get(localeKey, uCharset);
00295     if (NS_SUCCEEDED(res)) {
00296       aCharset.AssignWithConversion(uCharset);
00297       res = VerifyCharset(aCharset);
00298       if (NS_SUCCEEDED(res)) {
00299         oString = aCharset;
00300         return res;
00301       }
00302     }
00303   }
00304 
00305   NS_ASSERTION(0, "unable to use nl_langinfo(CODESET)");
00306 #endif
00307 
00308   //
00309   // try falling back on a deprecated (locale based) name
00310   //
00311   char* locale = setlocale(LC_CTYPE, nsnull);
00312   nsAutoString localeStr;
00313   localeStr.AssignWithConversion(locale);
00314   res = ConvertLocaleToCharsetUsingDeprecatedConfig(localeStr, oString);
00315   if (NS_SUCCEEDED(res)) {
00316     return res; // succeeded
00317   }
00318 
00319   oString.Truncate();
00320   return res;
00321 }
00322 
00323 NS_IMETHODIMP 
00324 nsPlatformCharset::Init()
00325 {
00326   nsCAutoString charset;
00327   nsresult res = NS_OK;
00328 
00329   //
00330   // remember default locale so we can use the
00331   // same charset when asked for the same locale
00332   //
00333   char* locale = setlocale(LC_CTYPE, nsnull);
00334   NS_ASSERTION(locale, "cannot setlocale");
00335   if (locale) {
00336     CopyASCIItoUTF16(locale, mLocale); 
00337   } else {
00338     mLocale.AssignLiteral("en_US");
00339   }
00340 
00341   res = InitGetCharset(charset);
00342   if (NS_SUCCEEDED(res)) {
00343     mCharset = charset;
00344     return res; // succeeded
00345   }
00346 
00347   // last resort fallback
00348   NS_ASSERTION(0, "unable to convert locale to charset using deprecated config");
00349   mCharset.AssignLiteral("ISO-8859-1");
00350   return NS_SUCCESS_USING_FALLBACK_LOCALE;
00351 }
00352 
00353 nsresult
00354 nsPlatformCharset::VerifyCharset(nsCString &aCharset)
00355 {
00356   nsresult res;
00357   //
00358   // get the convert manager
00359   //
00360   nsCOMPtr <nsICharsetConverterManager>  charsetConverterManager;
00361   charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
00362   if (NS_FAILED(res))
00363     return res;
00364 
00365   //
00366   // check if we can get an input converter
00367   //
00368   nsCOMPtr <nsIUnicodeEncoder> enc;
00369   res = charsetConverterManager->GetUnicodeEncoder(aCharset.get(), getter_AddRefs(enc));
00370   if (NS_FAILED(res)) {
00371     NS_ASSERTION(0, "failed to create encoder");
00372     return res;
00373   }
00374 
00375   //
00376   // check if we can get an output converter
00377   //
00378   nsCOMPtr <nsIUnicodeDecoder> dec;
00379   res = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), getter_AddRefs(dec));
00380   if (NS_FAILED(res)) {
00381     NS_ASSERTION(0, "failed to create decoder");
00382     return res;
00383   }
00384 
00385   //
00386   // check if we recognize the charset string
00387   //
00388 
00389   nsCAutoString result;
00390   res = charsetConverterManager->GetCharsetAlias(aCharset.get(), result);
00391   if (NS_FAILED(res)) {
00392     return res;
00393   }
00394 
00395   //
00396   // return the preferred string
00397   //
00398 
00399   aCharset.Assign(result);
00400   NS_ASSERTION(NS_SUCCEEDED(res), "failed to get preferred charset name, using non-preferred");
00401   return NS_OK;
00402 }
00403 
00404 nsresult 
00405 nsPlatformCharset::InitInfo()
00406 {  
00407   return NS_OK;
00408 }