Back to index

lightning-sunbird  0.9+nobinonly
nsMIMEHeaderParamImpl.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim:expandtab:shiftwidth=2:tabstop=4:
00003  */
00004 /* ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is mozilla.org code.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Netscape Communications Corporation.
00021  * Portions created by the Initial Developer are Copyright (C) 1998
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *   rhp@netscape.com
00026  *   Jungshik Shin <jshin@mailaps.org>
00027  *   John G Myers   <jgmyers@netscape.com>
00028  *   Takayuki Tei   <taka@netscape.com>
00029  *
00030  * Alternatively, the contents of this file may be used under the terms of
00031  * either the GNU General Public License Version 2 or later (the "GPL"), or
00032  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00033  * in which case the provisions of the GPL or the LGPL are applicable instead
00034  * of those above. If you wish to allow use of your version of this file only
00035  * under the terms of either the GPL or the LGPL, and not to allow others to
00036  * use your version of this file under the terms of the MPL, indicate your
00037  * decision by deleting the provisions above and replace them with the notice
00038  * and other provisions required by the GPL or the LGPL. If you do not delete
00039  * the provisions above, a recipient may use your version of this file under
00040  * the terms of any one of the MPL, the GPL or the LGPL.
00041  *
00042  * ***** END LICENSE BLOCK ***** */
00043 
00044 #include <string.h>
00045 #include "prtypes.h"
00046 #include "prmem.h"
00047 #include "prprf.h"
00048 #include "plstr.h"
00049 #include "plbase64.h"
00050 #include "nsCRT.h"
00051 #include "nsMemory.h"
00052 #include "nsCOMPtr.h"
00053 #include "nsEscape.h"
00054 #include "nsIUTF8ConverterService.h"
00055 #include "nsUConvCID.h"
00056 #include "nsIServiceManager.h"
00057 #include "nsMIMEHeaderParamImpl.h"
00058 #include "nsReadableUtils.h"
00059 #include "nsNativeCharsetUtils.h"
00060 
00061 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
00062   
00063 static char *DecodeQ(const char *, PRUint32);
00064 static PRBool Is7bitNonAsciiString(const char *, PRUint32);
00065 static void CopyRawHeader(const char *, PRUint32, const char *, nsACString &);
00066 static nsresult DecodeRFC2047Str(const char *, const char *, PRBool, nsACString&);
00067 
00068 // XXX The chance of UTF-7 being used in the message header is really
00069 // low, but in theory it's possible. 
00070 #define IS_7BIT_NON_ASCII_CHARSET(cset)            \
00071     (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
00072      !nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
00073      !nsCRT::strncasecmp((cset), "UTF-7", 5))   
00074 
00075 NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
00076 
00077 // XXX : aTryLocaleCharset is not yet effective.
00078 NS_IMETHODIMP 
00079 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 
00080                                     const char *aParamName,
00081                                     const nsACString& aFallbackCharset, 
00082                                     PRBool aTryLocaleCharset, 
00083                                     char **aLang, nsAString& aResult)
00084 {
00085     aResult.Truncate();
00086     nsresult rv;
00087 
00088     // get parameter (decode RFC 2231 if it's RFC 2231-encoded and 
00089     // return charset.)
00090     nsXPIDLCString med;
00091     nsXPIDLCString charset;
00092     rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
00093                               getter_Copies(charset), aLang, getter_Copies(med));
00094     if (NS_FAILED(rv))
00095         return rv; 
00096 
00097     // convert to UTF-8 after charset conversion and RFC 2047 decoding 
00098     // if necessary.
00099     
00100     nsCAutoString str1;
00101     rv = DecodeParameter(med, charset.get(), nsnull, PR_FALSE, str1);
00102     NS_ENSURE_SUCCESS(rv, rv);
00103 
00104     if (!aFallbackCharset.IsEmpty())
00105     {
00106         nsCAutoString str2;
00107         nsCOMPtr<nsIUTF8ConverterService> 
00108           cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
00109         if (NS_SUCCEEDED(rv) &&
00110             NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
00111                 PromiseFlatCString(aFallbackCharset).get(), PR_FALSE, str2))) {
00112           CopyUTF8toUTF16(str2, aResult);
00113           return NS_OK;
00114         }
00115     }
00116 
00117     if (IsUTF8(str1)) {
00118       CopyUTF8toUTF16(str1, aResult);
00119       return NS_OK;
00120     }
00121 
00122     if (aTryLocaleCharset) 
00123       return NS_CopyNativeToUnicode(str1, aResult);
00124 
00125     return NS_ERROR_FAILURE;
00126 }
00127 
00128 // moved almost verbatim from mimehdrs.cpp
00129 // char *
00130 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
00131 //                            char **charset, char **language)
00132 //
00133 // The format of these header lines  is
00134 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
00135 NS_IMETHODIMP 
00136 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, 
00137                                             const char *aParamName,
00138                                             char **aCharset,
00139                                             char **aLang,
00140                                             char **aResult)
00141 {
00142   if (!aHeaderValue ||  !*aHeaderValue || !aResult)
00143     return NS_ERROR_INVALID_ARG;
00144 
00145   *aResult = nsnull;
00146 
00147   if (aCharset) *aCharset = nsnull;
00148   if (aLang) *aLang = nsnull;
00149 
00150   const char *str = aHeaderValue;
00151 
00152   // skip leading white space.
00153   for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
00154     ;
00155   const char *start = str;
00156   
00157   // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
00158   // For instance, return 'inline' in the following case:
00159   // Content-Disposition: inline; filename=.....
00160   if (!aParamName || !*aParamName) 
00161     {
00162       for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
00163         ;
00164       if (str == start)
00165         return NS_ERROR_UNEXPECTED;
00166       *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
00167       (*aResult)[str - start] = '\0';  // null-terminate
00168       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
00169       return NS_OK;
00170     }
00171 
00172   /* Skip forward to first ';' */
00173   for (; *str && *str != ';' && *str != ','; ++str)
00174     ;
00175   if (*str)
00176     str++;
00177   /* Skip over following whitespace */
00178   for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
00179     ;
00180 
00181   // Some broken http servers just specify parameters
00182   // like 'filename' without sepcifying disposition
00183   // method. Rewind to the first non-white-space
00184   // character.
00185   
00186   if (!*str)
00187     str = start;
00188 
00189   // RFC2231 - The legitimate parm format can be:
00190   // A. title=ThisIsTitle 
00191   // B. title*=us-ascii'en-us'This%20is%20wierd.
00192   // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
00193   //    title*1*=have%20to%20support%20this.
00194   //    title*2="Else..."
00195   // D. title*0="Hey, what you think you are doing?"
00196   //    title*1="There is no charset and lang info."
00197 
00198   PRInt32 paramLen = strlen(aParamName);
00199 
00200   while (*str) {
00201     const char *tokenStart = str;
00202     const char *tokenEnd = 0;
00203     const char *valueStart = str;
00204     const char *valueEnd = 0;
00205 
00206     NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
00207 
00208     // Skip forward to the end of this token. 
00209     for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
00210       ;
00211     tokenEnd = str;
00212 
00213     // Skip over whitespace, '=', and whitespace
00214     while (nsCRT::IsAsciiSpace(*str)) ++str;
00215     if (*str == '=') ++str;
00216     while (nsCRT::IsAsciiSpace(*str)) ++str;
00217 
00218     if (*str != '"')
00219     {
00220       // The value is a token, not a quoted string.
00221       valueStart = str;
00222       for (valueEnd = str;
00223            *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
00224            valueEnd++)
00225         ;
00226       str = valueEnd;
00227     }
00228     else
00229     {
00230       // The value is a quoted string. 
00231       ++str;
00232       valueStart = str;
00233       for (valueEnd = str; *valueEnd; ++valueEnd)
00234       {
00235         if (*valueEnd == '\\')
00236           ++valueEnd;
00237         else if (*valueEnd == '"')
00238           break;
00239       }
00240       str = valueEnd + 1;
00241     }
00242 
00243     // See if this is the simplest case (case A above),
00244     // a 'single' line value with no charset and lang.
00245     // If so, copy it and return.
00246     if (tokenEnd - tokenStart == paramLen &&
00247         !nsCRT::strncasecmp(tokenStart, aParamName, paramLen))
00248     {
00249       // if the parameter spans across multiple lines we have to strip out the
00250       //     line continuation -- jht 4/29/98 
00251       nsCAutoString tempStr(valueStart, valueEnd - valueStart);
00252       tempStr.StripChars("\r\n");
00253       *aResult = ToNewCString(tempStr);
00254       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
00255       return NS_OK;
00256     }
00257     // case B, C, and D
00258     else if (tokenEnd - tokenStart > paramLen &&
00259              !nsCRT::strncasecmp(tokenStart, aParamName, paramLen) &&
00260              *(tokenStart + paramLen) == '*')
00261     {
00262       const char *cp = tokenStart + paramLen + 1; // 1st char pass '*'
00263       PRBool needUnescape = *(tokenEnd - 1) == '*';
00264       // the 1st line of a multi-line parameter or a single line  that needs 
00265       // unescaping. ( title*0*=  or  title*= )
00266       if ((*cp == '0' && needUnescape) || (tokenEnd - tokenStart == paramLen + 1))
00267       {
00268         // look for single quotation mark(')
00269         const char *sQuote1 = PL_strchr(valueStart, 0x27);
00270         const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull);
00271 
00272         // Two single quotation marks must be present even in
00273         // absence of charset and lang. 
00274         if (!sQuote1 || !sQuote2)
00275           NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
00276         if (aCharset && sQuote1 > valueStart && sQuote1 < valueEnd)
00277         {
00278           *aCharset = (char *) nsMemory::Clone(valueStart, sQuote1 - valueStart + 1);
00279           if (*aCharset) 
00280             *(*aCharset + (sQuote1 - valueStart)) = 0;
00281         }
00282         if (aLang && sQuote1 && sQuote2 && sQuote2 > sQuote1 + 1 &&
00283             sQuote2 < valueEnd)
00284         {
00285           *aLang = (char *) nsMemory::Clone(sQuote1 + 1, sQuote2 - (sQuote1 + 1) + 1);
00286           if (*aLang) 
00287             *(*aLang + (sQuote2 - (sQuote1 + 1))) = 0;
00288         }
00289 
00290         // Be generous and handle gracefully when required 
00291         // single quotes are absent.
00292         if (sQuote1)
00293         {
00294           if(!sQuote2)
00295             sQuote2 = sQuote1;
00296         }
00297         else
00298           sQuote2 = valueStart - 1;
00299 
00300         if (sQuote2 && sQuote2 + 1 < valueEnd)
00301         {
00302           NS_ASSERTION(!*aResult, "This is the 1st line. result buffer should be null.");
00303           *aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1);
00304           if (*aResult)
00305           {
00306             memcpy(*aResult, sQuote2 + 1, valueEnd - (sQuote2 + 1));
00307             *(*aResult + (valueEnd - (sQuote2 + 1))) = 0;
00308             if (needUnescape)
00309             {
00310               nsUnescape(*aResult);
00311               if (tokenEnd - tokenStart == paramLen + 1)
00312                 // we're done; this is case B 
00313                 return NS_OK; 
00314             }
00315           }
00316         }
00317       }  // end of if-block :  title*0*=  or  title*= 
00318       // a line of multiline param with no need for unescaping : title*[0-9]=
00319       // or 2nd or later lines of a multiline param : title*[1-9]*= 
00320       else if (nsCRT::IsAsciiDigit(PRUnichar(*cp)))
00321       {
00322         PRInt32 len = 0;
00323         if (*aResult) // 2nd or later lines of multiline parameter
00324         {
00325           len = strlen(*aResult);
00326           char *ns = (char *) nsMemory::Realloc(*aResult, len + (valueEnd - valueStart) + 1);
00327           if (!ns)
00328           {
00329             nsMemory::Free(*aResult);
00330           }
00331           *aResult = ns;
00332         }
00333         else if (*cp == '0') // must be; 1st line :  title*0=
00334         {
00335           *aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1);
00336         }
00337         // else {} something is really wrong; out of memory
00338         if (*aResult)
00339         {
00340           // append a partial value
00341           memcpy(*aResult + len, valueStart, valueEnd - valueStart);
00342           *(*aResult + len + (valueEnd - valueStart)) = 0;
00343           if (needUnescape)
00344             nsUnescape(*aResult + len);
00345         }
00346         else 
00347           return NS_ERROR_OUT_OF_MEMORY;
00348       } // end of if-block :  title*[0-9]= or title*[1-9]*=
00349     }
00350 
00351     // str now points after the end of the value.
00352     //   skip over whitespace, ';', whitespace.
00353       
00354     while (nsCRT::IsAsciiSpace(*str)) ++str;
00355     if (*str == ';') ++str;
00356     while (nsCRT::IsAsciiSpace(*str)) ++str;
00357   }
00358 
00359   if (*aResult) 
00360     return NS_OK;
00361   else
00362     return NS_ERROR_INVALID_ARG; // aParameter not found !!
00363 }
00364 
00365 
00366 NS_IMETHODIMP
00367 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, 
00368                                            const char* aDefaultCharset, 
00369                                            PRBool aOverrideCharset, 
00370                                            PRBool aEatContinuations,
00371                                            nsACString& aResult)
00372 {
00373   aResult.Truncate();
00374   if (!aHeaderVal)
00375     return NS_ERROR_INVALID_ARG;
00376   if (!*aHeaderVal)
00377     return NS_OK;
00378 
00379 
00380   // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
00381   // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
00382   // to UTF-8. Otherwise, just strips away CRLF. 
00383   if (PL_strstr(aHeaderVal, "=?") || 
00384       aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || 
00385       Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal)))) {
00386     DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
00387   } else if (aEatContinuations && 
00388              (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
00389     aResult = aHeaderVal;
00390   } else {
00391     aEatContinuations = PR_FALSE;
00392     aResult = aHeaderVal;
00393   }
00394 
00395   if (aEatContinuations) {
00396     nsCAutoString temp(aResult);
00397     temp.StripChars("\r\n");
00398     aResult = temp;
00399   }
00400 
00401   return NS_OK;
00402 }
00403 
00404 NS_IMETHODIMP 
00405 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
00406                                        const char* aCharset,
00407                                        const char* aDefaultCharset,
00408                                        PRBool aOverrideCharset, 
00409                                        nsACString& aResult)
00410 {
00411   aResult.Truncate();
00412   nsresult rv;  
00413   // If aCharset is given, aParamValue was obtained from RFC2231 
00414   // encoding and we're pretty sure that it's in aCharset.
00415   if (aCharset && *aCharset)
00416   {
00417     nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
00418     if (NS_SUCCEEDED(rv)) 
00419       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
00420       return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
00421           IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult);
00422   }
00423 
00424   const nsAFlatCString& param = PromiseFlatCString(aParamValue);
00425   nsCAutoString unQuoted;
00426   nsACString::const_iterator s, e;
00427   param.BeginReading(s);
00428   param.EndReading(e);
00429 
00430   // strip '\' when used to quote CR, LF, '"' and '\'
00431   for ( ; s != e; ++s) {
00432     if ((*s == '\\')) {
00433       if (++s == e) {
00434         --s; // '\' is at the end. move back and append '\'.
00435       }
00436       else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
00437         --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
00438       }
00439       // else : skip '\' and append the quoted character.
00440     }
00441     unQuoted.Append(*s);
00442   }
00443 
00444   aResult = unQuoted;
00445   
00446   nsCAutoString decoded;
00447 
00448   // Try RFC 2047 encoding, instead.
00449   rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset, 
00450                            aOverrideCharset, PR_TRUE, decoded);
00451   
00452   if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
00453     aResult = decoded;
00454   
00455   return rv;
00456 }
00457 
00458 #define ISHEXCHAR(c) \
00459         (0x30 <= PRUint8(c) && PRUint8(c) <= 0x39  ||  \
00460          0x41 <= PRUint8(c) && PRUint8(c) <= 0x46  ||  \
00461          0x61 <= PRUint8(c) && PRUint8(c) <= 0x66)
00462 
00463 // Decode Q encoding (RFC 2047).
00464 // static
00465 char *DecodeQ(const char *in, PRUint32 length)
00466 {
00467   char *out, *dest = 0;
00468 
00469   out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
00470   if (dest == nsnull)
00471     return nsnull;
00472   while (length > 0) {
00473     PRUintn c = 0;
00474     switch (*in) {
00475     case '=':
00476       // check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
00477       if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
00478         goto badsyntax;
00479       PR_sscanf(in + 1, "%2X", &c);
00480       *out++ = (char) c;
00481       in += 3;
00482       length -= 3;
00483       break;
00484 
00485     case '_':
00486       *out++ = ' ';
00487       in++;
00488       length--;
00489       break;
00490 
00491     default:
00492       if (*in & 0x80) goto badsyntax;
00493       *out++ = *in++;
00494       length--;
00495     }
00496   }
00497   *out++ = '\0';
00498 
00499   for (out = dest; *out ; ++out) {
00500     if (*out == '\t')
00501       *out = ' ';
00502   }
00503 
00504   return dest;
00505 
00506  badsyntax:
00507   PR_Free(dest);
00508   return nsnull;
00509 }
00510 
00511 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) 
00512 // or has  ESC which may be an  indication that  it's in one of many ISO 
00513 // 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
00514 // static
00515 PRBool Is7bitNonAsciiString(const char *input, PRUint32 len)
00516 {
00517   PRInt32 c;
00518 
00519   enum { hz_initial, // No HZ seen yet
00520          hz_escaped, // Inside an HZ ~{ escape sequence 
00521          hz_seen, // Have seen at least one complete HZ sequence 
00522          hz_notpresent // Have seen something that is not legal HZ
00523   } hz_state;
00524 
00525   hz_state = hz_initial;
00526   while (len) {
00527     c = PRUint8(*input++);
00528     len--;
00529     if (c & 0x80) return PR_FALSE;
00530     if (c == 0x1B) return PR_TRUE;
00531     if (c == '~') {
00532       switch (hz_state) {
00533       case hz_initial:
00534       case hz_seen:
00535         if (*input == '{') {
00536           hz_state = hz_escaped;
00537         } else if (*input == '~') {
00538           // ~~ is the HZ encoding of ~.  Skip over second ~ as well
00539           hz_state = hz_seen;
00540           input++;
00541           len--;
00542         } else {
00543           hz_state = hz_notpresent;
00544         }
00545         break;
00546 
00547       case hz_escaped:
00548         if (*input == '}') hz_state = hz_seen;
00549         break;
00550       default:
00551         break;
00552       }
00553     }
00554   }
00555   return hz_state == hz_seen;
00556 }
00557 
00558 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
00559 
00560 // copy 'raw' sequences of octets in aInput to aOutput.
00561 // If aDefaultCharset is specified, the input is assumed to be in the
00562 // charset and converted to UTF-8. Otherwise, a blind copy is made.
00563 // If aDefaultCharset is specified, but the conversion to UTF-8
00564 // is not successful, each octet is replaced by Unicode replacement
00565 // chars. *aOutput is advanced by the number of output octets.
00566 // static
00567 void CopyRawHeader(const char *aInput, PRUint32 aLen, 
00568                    const char *aDefaultCharset, nsACString &aOutput)
00569 {
00570   PRInt32 c;
00571 
00572   // If aDefaultCharset is not specified, make a blind copy.
00573   if (!aDefaultCharset || !*aDefaultCharset) {
00574     aOutput.Append(aInput, aLen);
00575     return;
00576   }
00577 
00578   // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
00579   // A ~ may indicate it is HZ
00580   while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
00581     aOutput.Append(char(c));
00582     aLen--;
00583   }
00584   if (!aLen) {
00585     return;
00586   }
00587   aInput--;
00588 
00589   // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
00590   // string and aDefaultCharset is a 7bit non-ascii charset.
00591   PRBool skipCheck = (c == 0x1B || c == '~') && 
00592                      IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
00593 
00594   nsresult rv;
00595   // If not UTF-8, treat as default charset
00596   nsCOMPtr<nsIUTF8ConverterService> 
00597     cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
00598   nsCAutoString utf8Text;
00599   if (NS_SUCCEEDED(rv) &&
00600       NS_SUCCEEDED(
00601       cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 
00602       aDefaultCharset, skipCheck, utf8Text))) {
00603     aOutput.Append(utf8Text);
00604   } else { // replace each octet with Unicode replacement char in UTF-8.
00605     for (PRUint32 i = 0; i < aLen; i++) {
00606       aOutput.Append(REPLACEMENT_CHAR);
00607     }
00608   }
00609 }
00610 
00611 static const char especials[] = "()<>@,;:\\\"/[]?.=";
00612 
00613 // |decode_mime_part2_str| taken from comi18n.c
00614 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
00615 // If aOverrideCharset is true, charset in RFC2047-encoded words is 
00616 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
00617 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
00618 //static
00619 nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 
00620                           PRBool aOverrideCharset, nsACString &aResult)
00621 {
00622   const char *p, *q, *r;
00623   char *decodedText;
00624   const char *begin; // tracking pointer for where we are in the input buffer
00625   PRInt32 isLastEncodedWord = 0;
00626   const char *charsetStart, *charsetEnd;
00627   char charset[80];
00628 
00629   // initialize charset name to an empty string
00630   charset[0] = '\0';
00631 
00632   begin = aHeader;
00633 
00634   // To avoid buffer realloc, if possible, set capacity in advance. No 
00635   // matter what,  more than 3x expansion can never happen for all charsets
00636   // supported by Mozilla. SCSU/BCSU with the sliding window set to a
00637   // non-BMP block may be exceptions, but Mozilla does not support them. 
00638   // Neither any known mail/news program use them. Even if there's, we're
00639   // safe because we don't use a raw *char any more.
00640   aResult.SetCapacity(3 * strlen(aHeader));
00641 
00642   while ((p = PL_strstr(begin, "=?")) != 0) {
00643     if (isLastEncodedWord) {
00644       // See if it's all whitespace.
00645       for (q = begin; q < p; ++q) {
00646         if (!PL_strchr(" \t\r\n", *q)) break;
00647       }
00648     }
00649 
00650     if (!isLastEncodedWord || q < p) {
00651       // copy the part before the encoded-word
00652       CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
00653       begin = p;
00654     }
00655 
00656     p += 2;
00657 
00658     // Get charset info
00659     charsetStart = p;
00660     charsetEnd = 0;
00661     for (q = p; *q != '?'; q++) {
00662       if (*q <= ' ' || PL_strchr(especials, *q)) {
00663         goto badsyntax;
00664       }
00665 
00666       // RFC 2231 section 5
00667       if (!charsetEnd && *q == '*') {
00668         charsetEnd = q; 
00669       }
00670     }
00671     if (!charsetEnd) {
00672       charsetEnd = q;
00673     }
00674 
00675     // Check for too-long charset name
00676     if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset)) 
00677       goto badsyntax;
00678     
00679     memcpy(charset, charsetStart, charsetEnd - charsetStart);
00680     charset[charsetEnd - charsetStart] = 0;
00681 
00682     q++;
00683     if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
00684       goto badsyntax;
00685 
00686     if (q[1] != '?')
00687       goto badsyntax;
00688 
00689     r = q;
00690     for (r = q + 2; *r != '?'; r++) {
00691       if (*r < ' ') goto badsyntax;
00692     }
00693     if (r[1] != '=')
00694         goto badsyntax;
00695     else if (r == q + 2) {
00696         // it's empty, skip
00697         begin = r + 2;
00698         isLastEncodedWord = 1;
00699         continue;
00700     }
00701 
00702     if(*q == 'Q' || *q == 'q')
00703       decodedText = DecodeQ(q + 2, r - (q + 2));
00704     else {
00705       // bug 227290. ignore an extraneous '=' at the end.
00706       // (# of characters in B-encoded part has to be a multiple of 4)
00707       PRInt32 n = r - (q + 2);
00708       n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
00709       decodedText = PL_Base64Decode(q + 2, n, nsnull);
00710     }
00711 
00712     if (decodedText == nsnull)
00713       goto badsyntax;
00714 
00715     // Override charset if requested.  Never override labeled UTF-8.
00716     // Use default charset instead of UNKNOWN-8BIT
00717     if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) ||
00718         (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) {
00719       PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1);
00720       charset[sizeof(charset) - 1] = '\0';
00721     }
00722 
00723     {
00724       nsresult rv;
00725       nsCOMPtr<nsIUTF8ConverterService> 
00726         cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
00727       nsCAutoString utf8Text;
00728       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
00729       if (NS_SUCCEEDED(rv) && 
00730           NS_SUCCEEDED(
00731             cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
00732             charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) {
00733         aResult.Append(utf8Text);
00734       } else {
00735         aResult.Append(REPLACEMENT_CHAR);
00736       }
00737     }
00738     PR_Free(decodedText);
00739     begin = r + 2;
00740     isLastEncodedWord = 1;
00741     continue;
00742 
00743   badsyntax:
00744     // copy the part before the encoded-word
00745     aResult.Append(begin, p - begin);
00746     begin = p;
00747     isLastEncodedWord = 0;
00748   }
00749 
00750   // put the tail back
00751   CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
00752 
00753   nsCAutoString tempStr(aResult);
00754   tempStr.ReplaceChar('\t', ' ');
00755   aResult = tempStr;
00756 
00757   return NS_OK;
00758 }
00759