Back to index

lightning-sunbird  0.9+nobinonly
nsMetaCharsetObserver.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1999
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 #include "nsDeque.h"
00038 #include "nsICharsetAlias.h"
00039 #include "nsMetaCharsetObserver.h"
00040 #include "nsIMetaCharsetService.h"
00041 #include "nsIElementObserver.h"
00042 #include "nsIObserver.h"
00043 #include "nsIObserverService.h"
00044 #include "nsISupports.h"
00045 #include "nsCRT.h"
00046 #include "nsIParser.h"
00047 #include "pratom.h"
00048 #include "nsCharDetDll.h"
00049 #include "nsIServiceManager.h"
00050 #include "nsObserverBase.h"
00051 #include "nsWeakReference.h"
00052 #include "nsIParserService.h"
00053 #include "nsParserCIID.h"
00054 #include "nsMetaCharsetCID.h"
00055 #include "nsReadableUtils.h"
00056 #include "nsUnicharUtils.h"
00057 
00058 static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
00059  
00060 static const eHTMLTags gWatchTags[] = 
00061 { eHTMLTag_meta,
00062   eHTMLTag_unknown
00063 };
00064 
00065 //-------------------------------------------------------------------------
00066 nsMetaCharsetObserver::nsMetaCharsetObserver()
00067 {
00068   bMetaCharsetObserverStarted = PR_FALSE;
00069   nsresult res;
00070   mAlias = nsnull;
00071   nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res));
00072   if(NS_SUCCEEDED(res)) {
00073      mAlias = calias;
00074   }
00075 }
00076 //-------------------------------------------------------------------------
00077 nsMetaCharsetObserver::~nsMetaCharsetObserver()
00078 {
00079 }
00080 
00081 //-------------------------------------------------------------------------
00082 NS_IMPL_ADDREF ( nsMetaCharsetObserver )
00083 NS_IMPL_RELEASE ( nsMetaCharsetObserver )
00084 
00085 // Use the new scheme
00086 NS_IMPL_QUERY_INTERFACE4(nsMetaCharsetObserver, 
00087                          nsIElementObserver, 
00088                          nsIObserver, 
00089                          nsIMetaCharsetService, 
00090                          nsISupportsWeakReference)
00091 
00092 //-------------------------------------------------------------------------
00093 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
00094                      PRUint32 aDocumentID, 
00095                      const PRUnichar* aTag, 
00096                      PRUint32 numOfAttributes, 
00097                      const PRUnichar* nameArray[], 
00098                      const PRUnichar* valueArray[])
00099 {
00100   
00101     if(!nsDependentString(aTag).LowerCaseEqualsLiteral("meta")) 
00102         return NS_ERROR_ILLEGAL_VALUE;
00103     else
00104         return Notify(aDocumentID, numOfAttributes, nameArray, valueArray);
00105 }
00106 //-------------------------------------------------------------------------
00107 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
00108                      PRUint32 aDocumentID, 
00109                      eHTMLTags aTag, 
00110                      PRUint32 numOfAttributes, 
00111                      const PRUnichar* nameArray[], 
00112                      const PRUnichar* valueArray[])
00113 {
00114     if(eHTMLTag_meta != aTag) 
00115         return NS_ERROR_ILLEGAL_VALUE;
00116     else 
00117         return Notify(aDocumentID, numOfAttributes, nameArray, valueArray);
00118 }
00119 
00120 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
00121                      PRUint32 aDocumentID, 
00122                      PRUint32 numOfAttributes, 
00123                      const PRUnichar* nameArray[], 
00124                      const PRUnichar* valueArray[])
00125 {
00126    nsDeque keys(0);
00127    nsDeque values(0);
00128    PRUint32 i;
00129    for(i=0;i<numOfAttributes;i++)
00130    {
00131        keys.Push((void*)nameArray[i]);
00132        values.Push((void*)valueArray[i]);
00133    }
00134    return NS_OK;//Notify((nsISupports*)aDocumentID, &keys, &values);
00135 }
00136 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
00137                      nsISupports* aWebShell,
00138                      nsISupports* aChannel,
00139                      const PRUnichar* aTag, 
00140                      const nsStringArray* keys, 
00141                      const nsStringArray* values,
00142                      const PRUint32 aFlags)
00143 {
00144   nsresult result = NS_OK;
00145   // bug 125317 - document.write content is already an unicode content.
00146   if (!(aFlags & nsIElementObserver::IS_DOCUMENT_WRITE)) {
00147     if(!nsDependentString(aTag).LowerCaseEqualsLiteral("meta")) {
00148         result = NS_ERROR_ILLEGAL_VALUE;
00149     }
00150     else {
00151         result = Notify(aWebShell, aChannel, keys, values);
00152     }
00153   }
00154   return result;
00155 }
00156 
00157 #define IS_SPACE_CHARS(ch)  (ch == ' ' || ch == '\b' || ch == '\r' || ch == '\n')
00158 
00159 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
00160                     nsISupports* aWebShell,
00161                     nsISupports* aChannel,
00162                     const nsStringArray* keys, 
00163                     const nsStringArray* values)
00164 {
00165     NS_PRECONDITION(keys!=nsnull && values!=nsnull,"Need key-value pair");
00166 
00167     PRInt32 numOfAttributes = keys->Count();
00168     NS_ASSERTION( numOfAttributes == values->Count(), "size mismatch");
00169     nsresult res=NS_OK;
00170 #ifdef DEBUG
00171 
00172     PRUnichar Uxcommand[]={'X','_','C','O','M','M','A','N','D','\0'};
00173     PRUnichar UcharsetSource[]={'c','h','a','r','s','e','t','S','o','u','r','c','e','\0'};
00174     PRUnichar Ucharset[]={'c','h','a','r','s','e','t','\0'};
00175     
00176     NS_ASSERTION(numOfAttributes >= 3, "should have at least 3 private attribute");
00177     NS_ASSERTION(0==nsCRT::strcmp(Uxcommand,(keys->StringAt(numOfAttributes-1))->get()),"last name should be 'X_COMMAND'" );
00178     NS_ASSERTION(0==nsCRT::strcmp(UcharsetSource,(keys->StringAt(numOfAttributes-2))->get()),"2nd last name should be 'charsetSource'" );
00179     NS_ASSERTION(0==nsCRT::strcmp(Ucharset,(keys->StringAt(numOfAttributes-3))->get()),"3rd last name should be 'charset'" );
00180 
00181 #endif
00182     NS_ASSERTION(mAlias, "Didn't get nsICharsetAlias in constructor");
00183 
00184     if(nsnull == mAlias)
00185       return NS_ERROR_ABORT;
00186 
00187     // we need at least 5 - HTTP-EQUIV, CONTENT and 3 private
00188     if(numOfAttributes >= 5 ) 
00189     {
00190       const PRUnichar *charset = (values->StringAt(numOfAttributes-3))->get();
00191       const PRUnichar *source =  (values->StringAt(numOfAttributes-2))->get();
00192       PRInt32 err;
00193       nsAutoString srcStr(source);
00194       PRInt32  src = srcStr.ToInteger(&err);
00195       // if we cannot convert the string into PRInt32, return error
00196       NS_ASSERTION(NS_SUCCEEDED(err), "cannot get charset source");
00197       if(NS_FAILED(err))
00198           return NS_ERROR_ILLEGAL_VALUE;
00199 
00200       if(kCharsetFromMetaTag <= src)
00201           return NS_OK; // current charset has higher priority. don't bother to do the following
00202 
00203       PRInt32 i;
00204       const PRUnichar *httpEquivValue=nsnull;
00205       const PRUnichar *contentValue=nsnull;
00206       const PRUnichar *charsetValue=nsnull;
00207 
00208       for(i=0;i<(numOfAttributes-3);i++)
00209       {
00210         const PRUnichar *keyStr;
00211         keyStr = (keys->StringAt(i))->get();
00212 
00213         //Change 3.190 in nsHTMLTokens.cpp allow  ws/tab/cr/lf exist before 
00214         // and after text value, this need to be skipped before comparison
00215         while(IS_SPACE_CHARS(*keyStr)) 
00216           keyStr++;
00217 
00218         if(Substring(keyStr, keyStr+10).LowerCaseEqualsLiteral("http-equiv"))
00219               httpEquivValue = values->StringAt(i)->get();
00220         else if(Substring(keyStr, keyStr+7).LowerCaseEqualsLiteral("content"))
00221               contentValue = values->StringAt(i)->get();
00222         else if (Substring(keyStr, keyStr+7).LowerCaseEqualsLiteral("charset"))
00223               charsetValue = values->StringAt(i)->get();
00224       }
00225       NS_NAMED_LITERAL_STRING(contenttype, "Content-Type");
00226       NS_NAMED_LITERAL_STRING(texthtml, "text/html");
00227 
00228       if(nsnull == httpEquivValue || nsnull == contentValue)
00229         return NS_OK;
00230 
00231       while(IS_SPACE_CHARS(*httpEquivValue))
00232         ++httpEquivValue;
00233       // skip opening quote
00234       if (*httpEquivValue == '\'' || *httpEquivValue == '\"')
00235         ++httpEquivValue;
00236 
00237       while(IS_SPACE_CHARS(*contentValue))
00238         ++contentValue;
00239       // skip opening quote
00240       if (*contentValue == '\'' || *contentValue == '\"')
00241         ++contentValue;
00242 
00243       if(
00244          Substring(httpEquivValue,
00245                    httpEquivValue+contenttype.Length()).Equals(contenttype,
00246                                                                nsCaseInsensitiveStringComparator())
00247          &&
00248          Substring(contentValue,
00249                    contentValue+texthtml.Length()).Equals(texthtml,
00250                                                           nsCaseInsensitiveStringComparator())
00251         )
00252       {
00253 
00254          nsCAutoString newCharset;
00255 
00256          if (nsnull == charsetValue) 
00257          {
00258            nsAutoString contentPart1(contentValue+9); // after "text/html"
00259            PRInt32 start = contentPart1.RFind("charset=", PR_TRUE ) ;
00260            PRInt32 end = contentPart1.Length();
00261            if(kNotFound != start)
00262            {
00263              start += 8; // 8 = "charset=".length 
00264              while (start < end && contentPart1.CharAt(start) == PRUnichar(' '))
00265                ++start;
00266              if (start < end) {
00267                end = contentPart1.FindCharInSet("\'\"; ", start);
00268                if(kNotFound == end ) 
00269                  end = contentPart1.Length();
00270                NS_ASSERTION(end>=start, "wrong index");
00271                LossyCopyUTF16toASCII(Substring(contentPart1, start, end-start),
00272                                      newCharset);
00273              }
00274            } 
00275          }
00276          else   
00277          {
00278              LossyCopyUTF16toASCII(nsDependentString(charsetValue), newCharset);
00279          } 
00280 
00281          nsCAutoString charsetString; charsetString.AssignWithConversion(charset);
00282          
00283          if (!newCharset.IsEmpty())
00284          {    
00285              if(! newCharset.Equals(charsetString, nsCaseInsensitiveCStringComparator()))
00286              {
00287                  PRBool same = PR_FALSE;
00288                  nsresult res2 = mAlias->Equals( newCharset, charsetString , &same);
00289                  if(NS_SUCCEEDED(res2) && (! same))
00290                  {
00291                      nsCAutoString preferred;
00292                      res2 = mAlias->GetPreferred(newCharset, preferred);
00293                      if(NS_SUCCEEDED(res2))
00294                      {
00295                         // following charset should have been detected by parser
00296                         if (!preferred.EqualsLiteral("UTF-16") &&
00297                             !preferred.EqualsLiteral("UTF-16BE") &&
00298                             !preferred.EqualsLiteral("UTF-16LE") &&
00299                             !preferred.EqualsLiteral("UTF-32BE") &&
00300                             !preferred.EqualsLiteral("UTF-32LE")) {
00301                           // Propagate the error message so that the parser can
00302                           // shutdown correctly. - Ref. Bug 96440
00303                           res = NotifyWebShell(aWebShell,
00304                                                aChannel,
00305                                                preferred.get(),
00306                                                kCharsetFromMetaTag);
00307                         }
00308                      } // if(NS_SUCCEEDED(res)
00309                  }
00310              }
00311              else {
00312                res = NS_HTMLPARSER_VALID_META_CHARSET;
00313              } // if EqualIgnoreCase 
00314          } // if !newCharset.IsEmpty()
00315       } // if
00316     }
00317     else
00318     {
00319       nsAutoString compatCharset;
00320       if (NS_SUCCEEDED(GetCharsetFromCompatibilityTag(keys, values, compatCharset)))
00321       {
00322         if (!compatCharset.IsEmpty()) {
00323           res = NotifyWebShell(aWebShell,
00324                                aChannel,
00325                                NS_ConvertUCS2toUTF8(compatCharset).get(), 
00326                                kCharsetFromMetaTag);
00327         }
00328       }
00329     }
00330     return res;
00331 }
00332 
00333 //-------------------------------------------------------------------------
00334 NS_IMETHODIMP nsMetaCharsetObserver::GetCharsetFromCompatibilityTag(
00335                      const nsStringArray* keys, 
00336                      const nsStringArray* values, 
00337                      nsAString& aCharset)
00338 {
00339     if (!mAlias)
00340         return NS_ERROR_ABORT;
00341 
00342     aCharset.Truncate(0);
00343     nsresult res = NS_OK;
00344 
00345 
00346     // support for non standard case for compatibility
00347     // e.g. <META charset="ISO-8859-1">
00348     PRInt32 numOfAttributes = keys->Count();
00349     if ((numOfAttributes >= 3) &&
00350         (keys->StringAt(0)->LowerCaseEqualsLiteral("charset")))
00351     {
00352       nsAutoString srcStr((values->StringAt(numOfAttributes-2))->get());
00353       PRInt32 err;
00354       PRInt32  src = srcStr.ToInteger(&err);
00355       // if we cannot convert the string into PRInt32, return error
00356       if (NS_FAILED(err))
00357           return NS_ERROR_ILLEGAL_VALUE;
00358       
00359       // current charset have a lower priority
00360       if (kCharsetFromMetaTag > src)
00361       {
00362           nsCAutoString newCharset;
00363           newCharset.AssignWithConversion(values->StringAt(0)->get());
00364           
00365           nsCAutoString preferred;
00366           res = mAlias->GetPreferred(newCharset,
00367                                      preferred);
00368           if (NS_SUCCEEDED(res))
00369           {
00370               // compare against the current charset, 
00371               // also some charsets which should have been found in
00372               // the BOM detection.
00373               nsString* currentCharset = values->StringAt(numOfAttributes-3);
00374               if (!preferred.Equals(NS_LossyConvertUCS2toASCII(*currentCharset)) &&
00375                   !preferred.EqualsLiteral("UTF-16") &&
00376                   !preferred.EqualsLiteral("UTF-16BE") &&
00377                   !preferred.EqualsLiteral("UTF-16LE") &&
00378                   !preferred.EqualsLiteral("UTF-32BE") &&
00379                   !preferred.EqualsLiteral("UTF-32LE"))
00380                   AppendASCIItoUTF16(preferred, aCharset);
00381           }
00382       }
00383     }
00384 
00385   return res;
00386 }
00387 
00388 //-------------------------------------------------------------------------
00389 NS_IMETHODIMP nsMetaCharsetObserver::Observe(nsISupports *aSubject,
00390                             const char *aTopic,
00391                                const PRUnichar *aData) 
00392 {
00393   nsresult rv = NS_OK;
00394   if (!nsCRT::strcmp(aTopic, "parser-service-start")) {
00395     rv = Start();
00396   }
00397   return rv;
00398 }
00399 
00400 //-------------------------------------------------------------------------
00401 NS_IMETHODIMP nsMetaCharsetObserver::Start() 
00402 {
00403   nsresult rv = NS_OK;
00404 
00405   if (bMetaCharsetObserverStarted == PR_FALSE)  {
00406     bMetaCharsetObserverStarted = PR_TRUE;
00407 
00408     nsCOMPtr<nsIParserService> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID, &rv));
00409 
00410     if (NS_FAILED(rv))
00411       return rv;
00412 
00413     rv = parserService->RegisterObserver(this,
00414                                          NS_LITERAL_STRING("text/html"),
00415                                          gWatchTags);
00416   }
00417 
00418   return rv;
00419 }
00420 //-------------------------------------------------------------------------
00421 NS_IMETHODIMP nsMetaCharsetObserver::End() 
00422 {
00423   nsresult rv = NS_OK;
00424   if (bMetaCharsetObserverStarted == PR_TRUE)  {
00425     bMetaCharsetObserverStarted = PR_FALSE;
00426 
00427     nsCOMPtr<nsIParserService> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID, &rv));
00428 
00429     if (NS_FAILED(rv))
00430       return rv;
00431     
00432     rv = parserService->UnregisterObserver(this, NS_LITERAL_STRING("text/html"));
00433   }
00434   return rv;
00435 }
00436 //========================================================================== 
00437 
00438 
00439 
00440