Back to index

lightning-sunbird  0.9+nobinonly
nsTextToSubURI.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Pierre Phaneuf <pp@ludusdesign.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "nsString.h"
00039 #include "nsIUnicodeEncoder.h"
00040 #include "nsICharsetConverterManager.h"
00041 #include "nsReadableUtils.h"
00042 #include "nsITextToSubURI.h"
00043 #include "nsIServiceManager.h"
00044 #include "nsUConvDll.h"
00045 #include "nsEscape.h"
00046 #include "prmem.h"
00047 #include "nsTextToSubURI.h"
00048 #include "nsCRT.h"
00049 
00050 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00051 
00052 nsTextToSubURI::nsTextToSubURI()
00053 {
00054 }
00055 nsTextToSubURI::~nsTextToSubURI()
00056 {
00057 }
00058 
00059 NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)
00060 
00061 NS_IMETHODIMP  nsTextToSubURI::ConvertAndEscape(
00062   const char *charset, const PRUnichar *text, char **_retval) 
00063 {
00064   if(nsnull == _retval)
00065     return NS_ERROR_NULL_POINTER;
00066   *_retval = nsnull;
00067   nsresult rv = NS_OK;
00068   
00069   // Get Charset, get the encoder.
00070   nsICharsetConverterManager *ccm;
00071   rv = CallGetService(kCharsetConverterManagerCID, &ccm);
00072   if(NS_SUCCEEDED(rv)) {
00073      nsIUnicodeEncoder *encoder;
00074      rv = ccm->GetUnicodeEncoder(charset, &encoder);
00075      NS_RELEASE(ccm);
00076      if (NS_SUCCEEDED(rv)) {
00077        rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, (PRUnichar)'?');
00078        if(NS_SUCCEEDED(rv))
00079        {
00080           char buf[256];
00081           char *pBuf = buf;
00082           PRInt32 ulen = nsCRT::strlen(text);
00083           PRInt32 outlen = 0;
00084           if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) 
00085           {
00086              if(outlen >= 256) {
00087                 pBuf = (char*)PR_Malloc(outlen+1);
00088              }
00089              if(nsnull == pBuf) {
00090                 outlen = 255;
00091                 pBuf = buf;
00092              }
00093              PRInt32 bufLen = outlen;
00094              if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
00095                 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
00096                 PRInt32 finLen = bufLen - outlen;
00097                 if (finLen > 0) {
00098                   if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
00099                     outlen += finLen;
00100                 }
00101                 pBuf[outlen] = '\0';
00102                 *_retval = nsEscape(pBuf, url_XPAlphas);
00103                 if(nsnull == *_retval)
00104                   rv = NS_ERROR_OUT_OF_MEMORY;
00105              }
00106           }
00107           if(pBuf != buf)
00108              PR_Free(pBuf);
00109        }
00110        NS_RELEASE(encoder);
00111      }
00112   }
00113   
00114   return rv;
00115 }
00116 
00117 NS_IMETHODIMP  nsTextToSubURI::UnEscapeAndConvert(
00118   const char *charset, const char *text, PRUnichar **_retval) 
00119 {
00120   if(nsnull == _retval)
00121     return NS_ERROR_NULL_POINTER;
00122   *_retval = nsnull;
00123   nsresult rv = NS_OK;
00124   
00125   // unescape the string, unescape changes the input
00126   char *unescaped = nsCRT::strdup((char *) text);
00127   if (nsnull == unescaped)
00128     return NS_ERROR_OUT_OF_MEMORY;
00129   unescaped = nsUnescape(unescaped);
00130   NS_ASSERTION(unescaped, "nsUnescape returned null");
00131 
00132   // Convert from the charset to unicode
00133   nsCOMPtr<nsICharsetConverterManager> ccm = 
00134            do_GetService(kCharsetConverterManagerCID, &rv); 
00135   if (NS_SUCCEEDED(rv)) {
00136     nsIUnicodeDecoder *decoder;
00137     rv = ccm->GetUnicodeDecoder(charset, &decoder);
00138     if (NS_SUCCEEDED(rv)) {
00139       PRUnichar *pBuf = nsnull;
00140       PRInt32 len = strlen(unescaped);
00141       PRInt32 outlen = 0;
00142       if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
00143         pBuf = (PRUnichar *) PR_Malloc((outlen+1)*sizeof(PRUnichar*));
00144         if (nsnull == pBuf)
00145           rv = NS_ERROR_OUT_OF_MEMORY;
00146         else {
00147           if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
00148             pBuf[outlen] = 0;
00149             *_retval = pBuf;
00150           }
00151         }
00152       }
00153       NS_RELEASE(decoder);
00154     }
00155   }
00156   PR_Free(unescaped);
00157 
00158   return rv;
00159 }
00160 
00161 static PRBool statefulCharset(const char *charset)
00162 {
00163   if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
00164       !nsCRT::strcasecmp(charset, "UTF-7") ||
00165       !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
00166     return PR_TRUE;
00167 
00168   return PR_FALSE;
00169 }
00170 
00171 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
00172                                              const nsAFlatCString &aURI, 
00173                                              PRBool aIRI, 
00174                                              nsAString &_retval)
00175 {
00176   nsresult rv = NS_OK;
00177 
00178   // check for 7bit encoding the data may not be ASCII after we decode
00179   PRBool isStatefulCharset = statefulCharset(aCharset.get());
00180 
00181   if (!isStatefulCharset && IsASCII(aURI)) {
00182     CopyASCIItoUTF16(aURI, _retval);
00183     return rv;
00184   }
00185 
00186   if (!isStatefulCharset && aIRI) {
00187     if (IsUTF8(aURI)) {
00188       CopyUTF8toUTF16(aURI, _retval);
00189       return rv;
00190     }
00191   }
00192 
00193   // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
00194   NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
00195 
00196   nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
00197 
00198   charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
00199   NS_ENSURE_SUCCESS(rv, rv);
00200 
00201   nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
00202   rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), 
00203                                                   getter_AddRefs(unicodeDecoder));
00204   NS_ENSURE_SUCCESS(rv, rv);
00205 
00206   PRInt32 srcLen = aURI.Length();
00207   PRInt32 dstLen;
00208   rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
00209   NS_ENSURE_SUCCESS(rv, rv);
00210 
00211   PRUnichar *ustr = (PRUnichar *) nsMemory::Alloc(dstLen * sizeof(PRUnichar));
00212   NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
00213 
00214   rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
00215 
00216   if (NS_SUCCEEDED(rv))
00217     _retval.Assign(ustr, dstLen);
00218   
00219   nsMemory::Free(ustr);
00220 
00221   return rv;
00222 }
00223 
00224 NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, 
00225                                                 const nsACString &aURIFragment, 
00226                                                 nsAString &_retval)
00227 {
00228   nsCAutoString unescapedSpec;
00229   // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
00230   NS_UnescapeURL(PromiseFlatCString(aURIFragment), 
00231                  esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
00232 
00233   // in case of failure, return escaped URI
00234   if (NS_FAILED(convertURItoUnicode(
00235                 PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval)))
00236     // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
00237     CopyUTF8toUTF16(aURIFragment, _retval); 
00238   return NS_OK;
00239 }
00240 
00241 NS_IMETHODIMP  nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, 
00242                                                    const nsACString &aURIFragment, 
00243                                                    nsAString &_retval)
00244 {
00245   nsCAutoString unescapedSpec;
00246   NS_UnescapeURL(PromiseFlatCString(aURIFragment),
00247                  esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
00248 
00249   return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval);
00250 }
00251 
00252 //----------------------------------------------------------------------