Back to index

lightning-sunbird  0.9+nobinonly
mozSanitizingSerializer.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org HTML Sanitizer code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Ben Bucksch <mozilla@bucksch.org>.
00019  * Portions created by the Initial Developer are Copyright (C) 2002
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Netscape
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 // Removes potentially insecure or offending HTML
00040 
00041 /* I used nsPlaintextSerializer as base for this class. I don't understand
00042    all of the functions in the beginning. Possible that I fail to do
00043    something or do something useless.
00044    I am not proud about the implementation here at all.
00045    Feel free to fix it :-).
00046 */
00047 
00048 #include "mozSanitizingSerializer.h"
00049 #include "nsIServiceManager.h"
00050 #include "nsHTMLAtoms.h"
00051 #include "nsIDOMText.h"
00052 #include "nsIDOMElement.h"
00053 #include "nsITextContent.h"
00054 #include "nsTextFragment.h"
00055 #include "nsContentUtils.h"
00056 #include "nsReadableUtils.h"
00057 #include "plstr.h"
00058 #include "nsIProperties.h"
00059 #include "nsUnicharUtils.h"
00060 #include "nsIURI.h"
00061 #include "nsNetUtil.h"
00062 #include "nsEscape.h"
00063 
00064 //#define DEBUG_BenB
00065 
00066 static inline PRUnichar* escape(const nsString& source)
00067 {
00068   return nsEscapeHTML2(source.get(), source.Length()); 
00069 }
00070 
00071 /* XXX: |printf|s in some error conditions. They are intended as information
00072    for the user, because they complain about malformed pref values.
00073    Not sure, if popping up dialog boxes is the right thing for such code
00074    (and if so, how to do it).
00075  */
00076 
00077 #define TEXT_REMOVED "&lt;Text removed&gt;"
00078 #define TEXT_BREAKER "|"
00079 
00080 nsresult NS_NewSanitizingHTMLSerializer(nsIContentSerializer** aSerializer)
00081 {
00082   mozSanitizingHTMLSerializer* it = new mozSanitizingHTMLSerializer();
00083   if (!it) {
00084     return NS_ERROR_OUT_OF_MEMORY;
00085   }
00086   NS_ADDREF(it);
00087   *aSerializer = it;
00088   return NS_OK;
00089 }
00090 
00091 mozSanitizingHTMLSerializer::mozSanitizingHTMLSerializer()
00092   : mAllowedTags(30) // Just some initial buffer size
00093 {
00094   mOutputString = nsnull;
00095 }
00096 
00097 mozSanitizingHTMLSerializer::~mozSanitizingHTMLSerializer()
00098 {
00099 #ifdef DEBUG_BenB
00100   printf("Output:\n%s\n", NS_LossyConvertUCS2toASCII(*mOutputString).get());
00101 #endif
00102   mAllowedTags.Enumerate(ReleaseProperties);
00103 }
00104 
00105 //<copy from="xpcom/ds/nsProperties.cpp">
00106 PRBool PR_CALLBACK 
00107 mozSanitizingHTMLSerializer::ReleaseProperties(nsHashKey* key, void* data,
00108                                                void* closure)
00109 {
00110   nsIProperties* prop = (nsIProperties*)data;
00111   NS_IF_RELEASE(prop);
00112   return PR_TRUE;
00113 }
00114 //</copy>
00115 
00116 NS_IMPL_ISUPPORTS4(mozSanitizingHTMLSerializer,
00117                    nsIContentSerializer,
00118                    nsIContentSink,
00119                    nsIHTMLContentSink,
00120                    mozISanitizingHTMLSerializer)
00121 
00122 
00123 NS_IMETHODIMP 
00124 mozSanitizingHTMLSerializer::Init(PRUint32 aFlags, PRUint32 dummy,
00125                                   const char* aCharSet, PRBool aIsCopying)
00126 {
00127   NS_ENSURE_TRUE(nsContentUtils::GetParserServiceWeakRef(),
00128                  NS_ERROR_UNEXPECTED);
00129 
00130   return NS_OK;
00131 }
00132 
00133 NS_IMETHODIMP
00134 mozSanitizingHTMLSerializer::Initialize(nsAString* aOutString,
00135                                         PRUint32 aFlags,
00136                                         const nsAString& allowedTags)
00137 {
00138   nsresult rv = Init(aFlags, 0, nsnull, PR_FALSE);
00139   NS_ENSURE_SUCCESS(rv, rv);
00140 
00141   // XXX This is wrong. It violates XPCOM string ownership rules.
00142   // We're only getting away with this because instances of this
00143   // class are restricted to single function scope.
00144   // (Comment copied from nsPlaintextSerializer)
00145   mOutputString = aOutString;
00146 
00147   ParsePrefs(allowedTags);
00148 
00149   return NS_OK;
00150 }
00151 
00152 // This is not used within the class, but maybe called from somewhere else?
00153 NS_IMETHODIMP
00154 mozSanitizingHTMLSerializer::Flush(nsAString& aStr)
00155 {
00156 #ifdef DEBUG_BenB
00157   printf("Flush: -%s-", NS_LossyConvertUCS2toASCII(aStr).get());
00158 #endif
00159   Write(aStr);
00160   return NS_OK;
00161 }
00162 
00163 NS_IMETHODIMP
00164 mozSanitizingHTMLSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
00165                                                  nsAString& aStr)
00166 {
00167   return NS_OK;
00168 }
00169 
00170 void
00171 mozSanitizingHTMLSerializer::Write(const nsAString& aString)
00172 {
00173   mOutputString->Append(aString);
00174 }
00175 
00176 
00177 NS_IMETHODIMP
00178 mozSanitizingHTMLSerializer::IsEnabled(PRInt32 aTag, PRBool* aReturn)
00179 {
00180   *aReturn = PR_FALSE;
00181   return NS_OK;
00182 }
00183 
00184 
00188 PRBool
00189 mozSanitizingHTMLSerializer::IsContainer(PRInt32 aId)
00190 {
00191   PRBool isContainer = PR_FALSE;
00192 
00193   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
00194   if (parserService) {
00195     parserService->IsContainer(aId, isContainer);
00196   }
00197 
00198   return isContainer;
00199 }
00200 
00201 
00202 /* XXX I don't really know, what these functions do, but they seem to be
00203    needed ;-). Mostly copied from nsPlaintextSerializer. */
00204 /* akk says:
00205    "I wonder if the sanitizing class could inherit from nsHTMLSerializer,
00206    so that at least these methods that none of us understand only have to be
00207    written once?" */
00208 
00209 // static
00210 PRInt32
00211 mozSanitizingHTMLSerializer::GetIdForContent(nsIContent* aContent)
00212 {
00213   if (!aContent->IsContentOfType(nsIContent::eHTML)) {
00214     return eHTMLTag_unknown;
00215   }
00216 
00217   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
00218 
00219   return parserService ? parserService->HTMLAtomTagToId(aContent->Tag()) :
00220                          eHTMLTag_unknown;
00221 }
00222 
00223 NS_IMETHODIMP 
00224 mozSanitizingHTMLSerializer::AppendText(nsIDOMText* aText, 
00225                                         PRInt32 aStartOffset,
00226                                         PRInt32 aEndOffset, 
00227                                         nsAString& aStr)
00228 {
00229   nsresult rv = NS_OK;
00230 
00231   mOutputString = &aStr;
00232 
00233   nsAutoString linebuffer;
00234   rv = DoAddLeaf(eHTMLTag_text, linebuffer);
00235 
00236   return rv;
00237 }
00238 
00239 NS_IMETHODIMP 
00240 mozSanitizingHTMLSerializer::AppendElementStart(nsIDOMElement *aElement,
00241                                                 PRBool aHasChildren,
00242                                                 nsAString& aStr)
00243 {
00244   NS_ENSURE_ARG(aElement);
00245 
00246   mContent = do_QueryInterface(aElement);
00247   NS_ENSURE_TRUE(mContent, NS_ERROR_FAILURE);
00248 
00249   mOutputString = &aStr;
00250 
00251   PRInt32 id = GetIdForContent(mContent);
00252 
00253   PRBool isContainer = IsContainer(id);
00254 
00255   nsresult rv;
00256   if (isContainer) {
00257     rv = DoOpenContainer(id);
00258   }
00259   else {
00260     rv = DoAddLeaf(id, EmptyString());
00261   }
00262 
00263   mContent = 0;
00264   mOutputString = nsnull;
00265 
00266   return rv;
00267 } 
00268  
00269 NS_IMETHODIMP 
00270 mozSanitizingHTMLSerializer::AppendElementEnd(nsIDOMElement *aElement,
00271                                               nsAString& aStr)
00272 {
00273   NS_ENSURE_ARG(aElement);
00274 
00275   mContent = do_QueryInterface(aElement);
00276   NS_ENSURE_TRUE(mContent, NS_ERROR_FAILURE);
00277 
00278   mOutputString = &aStr;
00279 
00280   nsresult rv = NS_OK;
00281   PRInt32 id = GetIdForContent(mContent);
00282 
00283   PRBool isContainer = IsContainer(id);
00284 
00285   if (isContainer) {
00286     rv = DoCloseContainer(id);
00287   }
00288 
00289   mContent = 0;
00290   mOutputString = nsnull;
00291 
00292   return rv;
00293 }
00294 
00295 NS_IMETHODIMP
00296 mozSanitizingHTMLSerializer::OpenContainer(const nsIParserNode& aNode)
00297 {
00298   PRInt32 type = aNode.GetNodeType();
00299 
00300   mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode);
00301   return DoOpenContainer(type);
00302 }
00303 
00304 NS_IMETHODIMP 
00305 mozSanitizingHTMLSerializer::CloseContainer(const nsHTMLTag aTag)
00306 {
00307   // XXX Why do we need this?
00308   // mParserNode = NS_CONST_CAST(nsIParserNode*, &aNode);
00309   return DoCloseContainer(aTag);
00310 }
00311 
00312 NS_IMETHODIMP 
00313 mozSanitizingHTMLSerializer::AddHeadContent(const nsIParserNode& aNode)
00314 {
00315   nsresult rv = NS_OK;
00316   eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
00317   if (eHTMLTag_whitespace == type || 
00318       eHTMLTag_newline == type    ||
00319       eHTMLTag_text == type       ||
00320       eHTMLTag_entity == type) {
00321     rv = AddLeaf(aNode);
00322   }
00323   else if (eHTMLTag_title == type) {
00324     NS_ASSERTION(mParser, "Only CNavDTD treats title this way.");
00325 
00326     nsString skippedContent;
00327     PRInt32 lineNo;
00328 
00329     nsCOMPtr<nsIDTD> dtd;
00330     mParser->GetDTD(getter_AddRefs(dtd));
00331     NS_ENSURE_TRUE(dtd, NS_ERROR_UNEXPECTED);
00332 
00333     dtd->CollectSkippedContent(type, skippedContent, lineNo);
00334     SetTitle(skippedContent);
00335   }
00336   else {
00337     rv = OpenContainer(aNode);
00338     NS_ENSURE_SUCCESS(rv, rv);
00339     rv = CloseContainer(type);
00340   }
00341   return rv;
00342 }
00343 
00344 NS_IMETHODIMP 
00345 mozSanitizingHTMLSerializer::AddLeaf(const nsIParserNode& aNode)
00346 {
00347   eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
00348   const nsAString& text = aNode.GetText();
00349 
00350   mParserNode = NS_CONST_CAST(nsIParserNode*, &aNode);
00351   return DoAddLeaf(type, text);
00352 }
00353 
00354 NS_IMETHODIMP
00355 mozSanitizingHTMLSerializer::OpenHTML(const nsIParserNode& aNode)
00356 {
00357   return OpenContainer(aNode);
00358 }
00359 
00360 NS_IMETHODIMP 
00361 mozSanitizingHTMLSerializer::CloseHTML()
00362 {
00363   return CloseContainer(eHTMLTag_html);
00364 }
00365 
00366 NS_IMETHODIMP
00367 mozSanitizingHTMLSerializer::SetTitle(const nsString& aValue)
00368 {
00369   if (IsAllowedTag(eHTMLTag_title))
00370   {
00371     // See bug 195020 for a good reason to output the tags.
00372     // It will make sure we have a closing tag, and a
00373     // missing </title> tag won't result in everything
00374     // being eaten up as the title.
00375     Write(NS_LITERAL_STRING("<title>"));
00376     Write(nsAdoptingString(escape(aValue)));
00377     Write(NS_LITERAL_STRING("</title>"));
00378   }
00379   return NS_OK;
00380 }
00381 
00382 NS_IMETHODIMP 
00383 mozSanitizingHTMLSerializer::AddDocTypeDecl(const nsIParserNode& aNode)
00384 {
00385   return NS_OK;
00386 }
00387 
00388 NS_IMETHODIMP 
00389 mozSanitizingHTMLSerializer::SetDocumentCharset(nsACString& aCharset)
00390 {
00391   // No idea, if this works - it isn't invoked by |TestOutput|.
00392   Write(NS_LITERAL_STRING("\n<meta http-equiv=\"Context-Type\" content=\"text/html; charset=")
00393         /* Danger: breaking the line within the string literal, like
00394            "foo"\n"bar", breaks win32! */
00395         + nsAdoptingString(escape(NS_ConvertASCIItoUCS2(aCharset)))
00396         + NS_LITERAL_STRING("\">\n"));
00397   return NS_OK;
00398 }
00399 
00400 NS_IMETHODIMP 
00401 mozSanitizingHTMLSerializer::OpenHead(const nsIParserNode& aNode)
00402 {
00403   return OpenContainer(aNode);
00404 }
00405 
00406 NS_IMETHODIMP 
00407 mozSanitizingHTMLSerializer::CloseHead()
00408 {
00409   return CloseContainer(eHTMLTag_head);
00410 }
00411 
00412 NS_IMETHODIMP 
00413 mozSanitizingHTMLSerializer::OpenBody(const nsIParserNode& aNode)
00414 {
00415   return OpenContainer(aNode);
00416 }
00417 
00418 NS_IMETHODIMP 
00419 mozSanitizingHTMLSerializer::CloseBody()
00420 {
00421   return CloseContainer(eHTMLTag_body);
00422 }
00423 
00424 NS_IMETHODIMP 
00425 mozSanitizingHTMLSerializer::OpenForm(const nsIParserNode& aNode)
00426 {
00427   return OpenContainer(aNode);
00428 }
00429 
00430 NS_IMETHODIMP 
00431 mozSanitizingHTMLSerializer::CloseForm()
00432 {
00433   return CloseContainer(eHTMLTag_form);
00434 }
00435 
00436 NS_IMETHODIMP 
00437 mozSanitizingHTMLSerializer::OpenMap(const nsIParserNode& aNode)
00438 {
00439   return OpenContainer(aNode);
00440 }
00441 
00442 NS_IMETHODIMP 
00443 mozSanitizingHTMLSerializer::CloseMap()
00444 {
00445   return CloseContainer(eHTMLTag_map);
00446 }
00447 
00448 NS_IMETHODIMP 
00449 mozSanitizingHTMLSerializer::OpenFrameset(const nsIParserNode& aNode)
00450 {
00451   return OpenContainer(aNode);
00452 }
00453 
00454 NS_IMETHODIMP 
00455 mozSanitizingHTMLSerializer::CloseFrameset()
00456 {
00457   return CloseContainer(eHTMLTag_frameset);
00458 }
00459 
00460 
00461 // Here comes the actual code...
00462 
00463 nsresult
00464 mozSanitizingHTMLSerializer::DoOpenContainer(PRInt32 aTag)
00465 {
00466   eHTMLTags type = (eHTMLTags)aTag;
00467 
00468   if (IsAllowedTag(type))
00469   {
00470     nsIParserService* parserService =
00471       nsContentUtils::GetParserServiceWeakRef();
00472     if (!parserService)
00473       return NS_ERROR_OUT_OF_MEMORY;
00474     const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
00475     NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
00476 
00477     Write(NS_LITERAL_STRING("<") + nsDependentString(tag_name));
00478 
00479     // Attributes
00480     if (mParserNode)
00481     {
00482       PRInt32 count = mParserNode->GetAttributeCount();
00483       for (PRInt32 i = 0; i < count; i++)
00484       {
00485         const nsAString& key = mParserNode->GetKeyAt(i);
00486         if(IsAllowedAttribute(type, key))
00487         {
00488           // Ensure basic sanity of value
00489           nsAutoString value(mParserNode->GetValueAt(i));
00490                     // SanitizeAttrValue() modifies |value|
00491           if (NS_SUCCEEDED(SanitizeAttrValue(type, key, value)))
00492           {
00493             // Write out
00494             Write(NS_LITERAL_STRING(" "));
00495             Write(key); // I get an infinive loop with | + key + | !!!
00496             Write(NS_LITERAL_STRING("=\"") + value + NS_LITERAL_STRING("\""));
00497           }
00498         }
00499       }
00500     }
00501 
00502     Write(NS_LITERAL_STRING(">"));
00503   }
00504   else
00505     Write(NS_LITERAL_STRING(" "));
00506 
00507   return NS_OK;
00508 
00509 }
00510 
00511 nsresult
00512 mozSanitizingHTMLSerializer::DoCloseContainer(PRInt32 aTag)
00513 {
00514   eHTMLTags type = (eHTMLTags)aTag;
00515 
00516   if (IsAllowedTag(type)) {
00517     nsIParserService* parserService =
00518       nsContentUtils::GetParserServiceWeakRef();
00519     if (!parserService)
00520       return NS_ERROR_OUT_OF_MEMORY;
00521     const PRUnichar* tag_name = parserService->HTMLIdToStringTag(aTag);
00522     NS_ENSURE_TRUE(tag_name, NS_ERROR_INVALID_POINTER);
00523 
00524     Write(NS_LITERAL_STRING("</") + nsDependentString(tag_name)
00525           + NS_LITERAL_STRING(">"));
00526   }
00527   else
00528     Write(NS_LITERAL_STRING(" "));
00529 
00530   return NS_OK;
00531 }
00532 
00533 nsresult
00534 mozSanitizingHTMLSerializer::DoAddLeaf(PRInt32 aTag,
00535                                        const nsAString& aText)
00536 {
00537   eHTMLTags type = (eHTMLTags)aTag;
00538 
00539   nsresult rv = NS_OK;
00540 
00541   if (type == eHTMLTag_whitespace ||
00542       type == eHTMLTag_newline)
00543   {
00544     Write(aText); // sure to be safe?
00545   }
00546   else if (type == eHTMLTag_text)
00547   {
00548     nsAutoString text(aText);
00549     if(NS_SUCCEEDED(SanitizeTextNode(text)))
00550       Write(text);
00551     else
00552       Write(NS_LITERAL_STRING(TEXT_REMOVED)); // Does not happen (yet)
00553     NS_ENSURE_SUCCESS(rv, rv);
00554   }
00555   else if (type == eHTMLTag_entity)
00556   {
00557     Write(NS_LITERAL_STRING("&"));
00558     Write(aText); // sure to be safe?
00559     // using + operator here might give an infinitive loop, see above.
00560     // not adding ";", because Gecko delivers that as part of |aText| (freaky)
00561   }
00562   else if (type == eHTMLTag_script ||
00563            type == eHTMLTag_style ||
00564            type == eHTMLTag_server)
00565   {
00566     // These special tags require some extra care. The parser gives them
00567     // to us as leaves, but they're really containers. Their content is
00568     // contained in the "skipped content" of the parser. This code is
00569     // adapted from nsHTMLContentSink.cpp
00570     nsString skippedContent;
00571     PRInt32 lineNo;
00572 
00573     NS_ASSERTION(mParser, "We are receiving containers as leaves with "
00574                           "no skipped content.");
00575 
00576     nsCOMPtr<nsIDTD> dtd;
00577     mParser->GetDTD(getter_AddRefs(dtd));
00578     NS_ENSURE_TRUE(dtd, NS_ERROR_UNEXPECTED);
00579 
00580     // Note: we want to collect the skipped content no matter what. We
00581     // may end up throwing it away anyway, but the DTD doesn't care
00582     // about that.
00583     dtd->CollectSkippedContent(type, skippedContent, lineNo);
00584 
00585     DoOpenContainer(type);
00586     if (IsAllowedTag(type))
00587     {
00588       Write(skippedContent);
00589     }
00590     DoCloseContainer(type);
00591   }
00592   else
00593   {
00594     DoOpenContainer(type);
00595   }
00596 
00597   return rv;
00598 }
00599 
00600 
00604 nsresult
00605 mozSanitizingHTMLSerializer::SanitizeTextNode(nsString& aText /*inout*/)
00606 {
00607   aText.Adopt(escape(aText));
00608   return NS_OK;
00609 }
00610 
00622 nsresult
00623 mozSanitizingHTMLSerializer::SanitizeAttrValue(nsHTMLTag aTag,
00624                                                const nsAString& anAttrName,
00625                                                nsString& aValue /*inout*/)
00626 {
00627   /* First, cut the attribute to 1000 chars.
00628      Attributes with values longer than 1000 chars seem bogus,
00629      considering that we don't support any JS. The longest attributes
00630      I can think of are URLs, and URLs with 1000 chars are likely to be
00631      bogus, too. */
00632   aValue = Substring(aValue, 0, 1000);
00633   //aValue.Truncate(1000); //-- this cuts half of the document !!?!!
00634 
00635   aValue.Adopt(escape(aValue));
00636 
00637   /* Check some known bad stuff. Add more!
00638      I don't care too much, if it happens to trigger in some innocent cases
00639      (like <img alt="Statistical data: Mortage rates and newspapers">) -
00640      security first. */
00641   if (aValue.Find("javascript:") != kNotFound ||
00642       aValue.Find("data:") != kNotFound ||
00643       aValue.Find("base64") != kNotFound)
00644     return NS_ERROR_ILLEGAL_VALUE;
00645 
00646   // Check img src scheme
00647   if (aTag == eHTMLTag_img && 
00648       anAttrName.LowerCaseEqualsLiteral("src"))
00649   {
00650     nsresult rv;
00651     nsCOMPtr<nsIIOService> ioService = do_GetIOService(&rv);
00652     NS_ENSURE_SUCCESS(rv, rv);
00653     nsCAutoString scheme;
00654     rv = ioService->ExtractScheme(NS_LossyConvertUCS2toASCII(aValue), scheme);
00655     NS_ENSURE_SUCCESS(rv, rv);
00656 
00657     if (!scheme.Equals("cid", nsCaseInsensitiveCStringComparator()))
00658       return NS_ERROR_ILLEGAL_VALUE;
00659   }
00660 
00661 #ifdef DEBUG_BenB
00662   printf("attribute value for %s: -%s-\n",
00663          NS_LossyConvertUCS2toASCII(anAttrName).get(),
00664          NS_LossyConvertUCS2toASCII(aValue).get());
00665 #endif
00666 
00667   return NS_OK;
00668 }
00669 
00672 PRBool
00673 mozSanitizingHTMLSerializer::IsAllowedTag(nsHTMLTag aTag)
00674 {
00675 
00676   nsPRUint32Key tag_key(aTag);
00677 #ifdef DEBUG_BenB
00678   printf("IsAllowedTag %d: %s\n",
00679          aTag,
00680          mAllowedTags.Exists(&tag_key)?"yes":"no");
00681 #endif
00682   return mAllowedTags.Exists(&tag_key);
00683 }
00684 
00685 
00688 PRBool
00689 mozSanitizingHTMLSerializer::IsAllowedAttribute(nsHTMLTag aTag,
00690                                              const nsAString& anAttributeName)
00691 {
00692 #ifdef DEBUG_BenB
00693   printf("IsAllowedAttribute %d, -%s-\n",
00694          aTag,
00695          NS_LossyConvertUCS2toASCII(anAttributeName).get());
00696 #endif
00697   nsresult rv;
00698 
00699   nsPRUint32Key tag_key(aTag);
00700   nsIProperties* attr_bag = (nsIProperties*)mAllowedTags.Get(&tag_key);
00701   NS_ENSURE_TRUE(attr_bag, PR_FALSE);
00702 
00703   PRBool allowed;
00704   nsAutoString attr(anAttributeName);
00705   ToLowerCase(attr);
00706   rv = attr_bag->Has(NS_LossyConvertUCS2toASCII(attr).get(),
00707                      &allowed);
00708   if (NS_FAILED(rv))
00709     return PR_FALSE;
00710 
00711 #ifdef DEBUG_BenB
00712   printf(" Allowed: %s\n", allowed?"yes":"no");
00713 #endif
00714   return allowed;
00715 }
00716 
00717 
00735 nsresult
00736 mozSanitizingHTMLSerializer::ParsePrefs(const nsAString& aPref)
00737 {
00738   char* pref = ToNewCString(aPref);
00739   char* tags_lasts;
00740   for (char* iTag = PL_strtok_r(pref, " ", &tags_lasts);
00741        iTag;
00742        iTag = PL_strtok_r(NULL, " ", &tags_lasts))
00743   {
00744     ParseTagPref(nsCAutoString(iTag));
00745   }
00746   delete[] pref;
00747 
00748   return NS_OK;
00749 }
00750 
00751 
00755 nsresult
00756 mozSanitizingHTMLSerializer::ParseTagPref(const nsCAutoString& tagpref)
00757 {
00758   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
00759   if (!parserService)
00760     return NS_ERROR_OUT_OF_MEMORY;
00761 
00762   // Parsing tag
00763   PRInt32 bracket = tagpref.FindChar('(');
00764   if (bracket == 0)
00765   {
00766     printf(" malformed pref: %s\n", tagpref.get());
00767     return NS_ERROR_CANNOT_CONVERT_DATA;
00768   }
00769 
00770   nsAutoString tag;
00771   CopyUTF8toUTF16(StringHead(tagpref, bracket), tag);
00772 
00773   // Create key
00774   PRInt32 tag_id = parserService->HTMLStringTagToId(tag);
00775   if (tag_id == eHTMLTag_userdefined)
00776   {
00777     printf(" unknown tag <%s>, won't add.\n",
00778            NS_ConvertUTF16toUTF8(tag).get());
00779     return NS_ERROR_CANNOT_CONVERT_DATA;
00780   }
00781   nsPRUint32Key tag_key(tag_id);
00782 
00783   if (mAllowedTags.Exists(&tag_key))
00784   {
00785     printf(" duplicate tag: %s\n", NS_ConvertUTF16toUTF8(tag).get());
00786     return NS_ERROR_CANNOT_CONVERT_DATA;
00787   }
00788   if (bracket == kNotFound)
00789     /* There are no attributes in the pref. So, allow none; only the tag
00790        itself */
00791   {
00792     mAllowedTags.Put(&tag_key, 0);
00793   }
00794   else
00795   {
00796     // Attributes
00797 
00798     // where is the macro for non-fatal errors in opt builds?
00799     if(tagpref[tagpref.Length() - 1] != ')' ||
00800        tagpref.Length() < PRUint32(bracket) + 3)
00801     {
00802       printf(" malformed pref: %s\n", tagpref.get());
00803       return NS_ERROR_CANNOT_CONVERT_DATA;
00804     }
00805     nsCOMPtr<nsIProperties> attr_bag =
00806                                  do_CreateInstance(NS_PROPERTIES_CONTRACTID);
00807     NS_ENSURE_TRUE(attr_bag, NS_ERROR_INVALID_POINTER);
00808     nsCAutoString attrList;
00809     attrList.Append(Substring(tagpref,
00810                               bracket + 1,
00811                               tagpref.Length() - 2 - bracket));
00812     char* attrs_lasts;
00813     for (char* iAttr = PL_strtok_r(attrList.BeginWriting(),
00814                                    ",", &attrs_lasts);
00815          iAttr;
00816          iAttr = PL_strtok_r(NULL, ",", &attrs_lasts))
00817     {
00818       attr_bag->Set(iAttr, 0);
00819     }
00820 
00821     nsIProperties* attr_bag_raw = attr_bag;
00822     NS_ADDREF(attr_bag_raw);
00823     mAllowedTags.Put(&tag_key, attr_bag_raw);
00824   }
00825 
00826   return NS_OK;
00827 }
00828 
00829 /*
00830   might be useful:
00831   htmlparser/public/nsHTMLTokens.h for tag categories
00832 */