Back to index

lightning-sunbird  0.9+nobinonly
nsUTF8ConverterService.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim:expandtab:shiftwidth=2:tabstop=4: 
00003  */
00004 /* ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is mozilla.org code.
00018  *
00019  * The Initial Developers of the Original Code are
00020  * Naoki Hotta <nhotta@netscape.com> and Jungshik Shin <jshin@mailaps.org>.
00021  * Portions created by the Initial Developer are Copyright (C) 2002, 2003
00022  * the Initial Developers. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either the GNU General Public License Version 2 or later (the "GPL"), or
00028  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 #include "nsString.h"
00040 #include "nsIUnicodeEncoder.h"
00041 #include "nsICharsetConverterManager.h"
00042 #include "nsReadableUtils.h"
00043 #include "nsIServiceManager.h"
00044 #include "nsUConvDll.h"
00045 #include "prmem.h"
00046 #include "nsUTF8ConverterService.h"
00047 #include "nsEscape.h"
00048 #include "nsAutoPtr.h"
00049 
00050 NS_IMPL_ISUPPORTS1(nsUTF8ConverterService, nsIUTF8ConverterService)
00051 
00052 static nsresult 
00053 ToUTF8(const nsACString &aString, const char *aCharset, nsACString &aResult)
00054 {
00055   nsresult rv;
00056   if (!aCharset || !*aCharset)
00057     return NS_ERROR_INVALID_ARG;
00058 
00059   nsCOMPtr<nsICharsetConverterManager> ccm;
00060 
00061   ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
00062   NS_ENSURE_SUCCESS(rv, rv);
00063 
00064   nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
00065   rv = ccm->GetUnicodeDecoder(aCharset,
00066                               getter_AddRefs(unicodeDecoder));
00067   NS_ENSURE_SUCCESS(rv, rv);
00068 
00069   PRInt32 srcLen = aString.Length();
00070   PRInt32 dstLen;
00071   const nsAFlatCString& inStr = PromiseFlatCString(aString);
00072   rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
00073   NS_ENSURE_SUCCESS(rv, rv);
00074 
00075   nsAutoArrayPtr<PRUnichar> ustr(new PRUnichar[dstLen]);
00076   NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
00077 
00078   rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
00079   if (NS_SUCCEEDED(rv)){
00080     // Tru64 Cxx and IRIX MIPSpro 7.3  need an explicit get()
00081     CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
00082   }
00083   return rv;
00084 }
00085 
00086 NS_IMETHODIMP  
00087 nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, 
00088                                             const char *aCharset, 
00089                                             PRBool aSkipCheck, 
00090                                             nsACString &aUTF8String)
00091 {
00092   // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
00093   // check is requested. It may not be asked for if a caller suspects
00094   // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or 
00095   // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
00096   if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
00097     aUTF8String = aString;
00098     return NS_OK;
00099   }
00100 
00101   aUTF8String.Truncate();
00102 
00103   nsresult rv = ToUTF8(aString, aCharset, aUTF8String);
00104 
00105   // additional protection for cases where check is skipped and  the input
00106   // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
00107   // was wrong.) We don't check ASCIIness assuming there's no charset
00108   // incompatible with ASCII (we don't support EBCDIC).
00109   if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
00110     aUTF8String = aString;
00111     return NS_OK;
00112   }
00113 
00114   return rv;
00115 }
00116 
00117 NS_IMETHODIMP  
00118 nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, 
00119                                              const char *aCharset, 
00120                                              nsACString &aUTF8Spec)
00121 {
00122   // assume UTF-8 if the spec contains unescaped non-ASCII characters.
00123   // No valid spec in Mozilla would break this assumption.
00124   if (!IsASCII(aSpec)) {
00125     aUTF8Spec = aSpec;
00126     return NS_OK;
00127   }
00128 
00129   aUTF8Spec.Truncate();
00130 
00131   nsCAutoString unescapedSpec; 
00132   // NS_UnescapeURL does not fill up unescapedSpec unless there's at least 
00133   // one character to unescape.
00134   PRBool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), 
00135                                   esc_OnlyNonASCII, unescapedSpec);
00136 
00137   if (!written) {
00138     aUTF8Spec = aSpec;
00139     return NS_OK;
00140   }
00141   // return if ASCII only or escaped UTF-8
00142   if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
00143     aUTF8Spec = unescapedSpec;
00144     return NS_OK;
00145   }
00146 
00147   return ToUTF8(unescapedSpec, aCharset, aUTF8Spec);
00148 }
00149