Back to index

lightning-sunbird  0.9+nobinonly
nsScriptableUnescapeHTML.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is Robert Sayre.
00018  * Portions created by the Initial Developer are Copyright (C) 2006
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 
00037 #include "nsString.h"
00038 #include "nsCRT.h"
00039 #include "nsISupportsArray.h"
00040 #include "nsIComponentManager.h"
00041 #include "nsCOMPtr.h"
00042 #include "nsXPCOM.h"
00043 #include "nsISupportsPrimitives.h"
00044 #include "nsXPIDLString.h"
00045 #include "nsIScriptLoader.h"
00046 #include "nsEscape.h"
00047 #include "nsIParser.h"
00048 #include "nsIDTD.h"
00049 #include "nsNetCID.h"
00050 #include "nsNetUtil.h"
00051 #include "nsParserCIID.h"
00052 #include "nsParserCIID.h"
00053 #include "nsIContentSink.h"
00054 #include "nsIHTMLToTextSink.h"
00055 #include "nsIDocumentEncoder.h"
00056 #include "nsIDOMDocumentFragment.h"
00057 #include "nsIFragmentContentSink.h"
00058 #include "nsIDOMDocument.h"
00059 #include "nsIDOMNodeList.h"
00060 #include "nsIDOMNode.h"
00061 #include "nsIDOMElement.h"
00062 #include "nsIDocument.h"
00063 #include "nsIContent.h"
00064 #include "nsHTMLParts.h"
00065 #include "nsContentCID.h"
00066 #include "nsIScriptableUnescapeHTML.h"
00067 #include "nsScriptableUnescapeHTML.h"
00068 
00069 #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\""
00070 #define HTML_BODY_TAG "BODY"
00071 #define HTML_BASE_TAG "BASE"
00072 
00073 NS_IMPL_ISUPPORTS1(nsScriptableUnescapeHTML, nsIScriptableUnescapeHTML)
00074 
00075 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
00076 
00077 // From /widget/HTMLConverter
00078 //
00079 // Takes HTML and converts it to plain text but in unicode.
00080 //
00081 NS_IMETHODIMP
00082 nsScriptableUnescapeHTML::Unescape(const nsAString & aFromStr, 
00083                                    nsAString & aToStr)
00084 {
00085   // create the parser to do the conversion.
00086   aToStr.SetLength(0);
00087   nsresult rv;
00088   nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
00089   if (NS_FAILED(rv)) return rv;
00090 
00091   // convert it!
00092   nsCOMPtr<nsIContentSink> sink;
00093 
00094   sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
00095   NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
00096 
00097   nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
00098   NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
00099 
00100   textSink->Initialize(&aToStr, nsIDocumentEncoder::OutputSelectionOnly
00101                        | nsIDocumentEncoder::OutputAbsoluteLinks, 0);
00102 
00103   parser->SetContentSink(sink);
00104 
00105   parser->Parse(aFromStr, 0, NS_LITERAL_CSTRING("text/html"),
00106                 PR_TRUE, eDTDMode_fragment);
00107   
00108   return NS_OK;
00109 }
00110 
00111 // The feed version of nsContentUtils::CreateContextualFragment It
00112 // creates a fragment, but doesn't go to all the effort to preserve
00113 // context like innerHTML does, because feed DOMs shouldn't have that.
00114 NS_IMETHODIMP
00115 nsScriptableUnescapeHTML::ParseFragment(const nsAString &aFragment,
00116                                         PRBool aIsXML,
00117                                         nsIURI* aBaseURI,
00118                                         nsIDOMElement* aContextElement,
00119                                         nsIDOMDocumentFragment** aReturn)
00120 {
00121   NS_ENSURE_ARG(aContextElement);
00122   *aReturn = nsnull;
00123 
00124   nsresult rv;
00125   nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
00126   NS_ENSURE_SUCCESS(rv, rv);
00127 
00128   nsCOMPtr<nsIDocument> document;
00129   nsCOMPtr<nsIDOMDocument> domDocument;
00130   nsCOMPtr<nsIDOMNode> contextNode;
00131   contextNode = do_QueryInterface(aContextElement);
00132   contextNode->GetOwnerDocument(getter_AddRefs(domDocument));
00133   document = do_QueryInterface(domDocument);
00134   NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE);
00135   
00136   // stop scripts
00137   nsCOMPtr<nsIScriptLoader> loader;
00138   PRBool scripts_enabled = PR_FALSE;
00139   if (document) {
00140     loader = document->GetScriptLoader();
00141     if (loader) {
00142       loader->GetEnabled(&scripts_enabled);
00143     }
00144   }
00145   if (scripts_enabled) {
00146     loader->SetEnabled(PR_FALSE);
00147   }
00148 
00149   // Wrap things in a div or body for parsing, but it won't show up in
00150   // the fragment.
00151   nsVoidArray tagStack;
00152   nsCAutoString base, spec;
00153   if (aIsXML) {
00154     // XHTML
00155     if (aBaseURI) {
00156       base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG));
00157       base.Append(NS_LITERAL_CSTRING(" xml:base=\""));
00158       aBaseURI->GetSpec(spec);
00159       // nsEscapeHTML is good enough, because we only need to get
00160       // quotes, ampersands, and angle brackets
00161       char* escapedSpec = nsEscapeHTML(spec.get());
00162       if (escapedSpec)
00163         base += escapedSpec;
00164       NS_Free(escapedSpec);
00165       base.Append(NS_LITERAL_CSTRING("\""));
00166       tagStack.AppendElement(ToNewUnicode(base));
00167     }  else {
00168       tagStack.AppendElement(ToNewUnicode(NS_LITERAL_CSTRING(XHTML_DIV_TAG)));
00169     }
00170   } else {
00171     // HTML
00172     tagStack.AppendElement(ToNewUnicode(NS_LITERAL_CSTRING(HTML_BODY_TAG)));
00173     if (aBaseURI) {
00174       base.Append(NS_LITERAL_CSTRING((HTML_BASE_TAG)));
00175       base.Append(NS_LITERAL_CSTRING(" href=\""));
00176       aBaseURI->GetSpec(spec);
00177       base = base + spec;
00178       base.Append(NS_LITERAL_CSTRING("\""));
00179       tagStack.AppendElement(ToNewUnicode(base));
00180     }
00181   }
00182 
00183   if (NS_SUCCEEDED(rv)) {
00184     nsCAutoString contentType;
00185     nsDTDMode mode;
00186     nsCOMPtr<nsIFragmentContentSink> sink;
00187     if (aIsXML) {
00188       mode = eDTDMode_full_standards;
00189       contentType = NS_LITERAL_CSTRING("application/xhtml+xml");
00190       sink = do_CreateInstance(NS_XHTMLPARANOIDFRAGMENTSINK_CONTRACTID);
00191     } else {
00192       mode = eDTDMode_fragment;
00193       contentType = NS_LITERAL_CSTRING("text/html");
00194       sink = do_CreateInstance(NS_HTMLPARANOIDFRAGMENTSINK_CONTRACTID);
00195     }
00196     if (sink) {
00197       sink->SetTargetDocument(document);
00198       nsCOMPtr<nsIContentSink> contentsink(do_QueryInterface(sink));
00199       parser->SetContentSink(contentsink);
00200       rv = parser->ParseFragment(aFragment, nsnull, tagStack,
00201                                  aIsXML, contentType, mode);
00202       if (NS_SUCCEEDED(rv))
00203         rv = sink->GetFragment(aReturn);
00204 
00205     } else {
00206       rv = NS_ERROR_FAILURE;
00207     }
00208   }
00209 
00210   // from nsContentUtils XXX Ick! Delete strings we allocated above.
00211   PRInt32 count = tagStack.Count();
00212   for (PRInt32 i = 0; i < count; i++) {
00213     PRUnichar* str = (PRUnichar*)tagStack.ElementAt(i);
00214     if (str)
00215       NS_Free(str);
00216   }
00217 
00218   if (scripts_enabled)
00219       loader->SetEnabled(PR_TRUE);
00220   
00221   return rv;
00222 }