Back to index

lightning-sunbird  0.9+nobinonly
txXMLParser.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is TransforMiiX XSLT processor code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * The MITRE Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1999
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Tom Kneeland <tomk@mitre.org> (Original Author)
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include "txXMLParser.h"
00040 #include "txURIUtils.h"
00041 #include "txXPathTreeWalker.h"
00042 
00043 #ifndef TX_EXE
00044 #include "nsIDocument.h"
00045 #include "nsIDOMDocument.h"
00046 #include "nsISyncLoadDOMService.h"
00047 #include "nsNetUtil.h"
00048 #else
00049 #include "expat_config.h"
00050 #include "expat.h"
00051 #endif
00052 
00053 #ifdef TX_EXE
00054 
00058 class txXMLParser
00059 {
00060   public:
00061     nsresult parse(istream& aInputStream, const nsAString& aUri,
00062                    txXPathNode** aResultDoc);
00063     const nsAString& getErrorString();
00064 
00068     int StartElement(const XML_Char *aName, const XML_Char **aAtts);
00069     int EndElement(const XML_Char* aName);
00070     void CharacterData(const XML_Char* aChars, int aLength);
00071     void Comment(const XML_Char* aChars);
00072     int ProcessingInstruction(const XML_Char *aTarget, const XML_Char *aData);
00073     int ExternalEntityRef(const XML_Char *aContext, const XML_Char *aBase,
00074                           const XML_Char *aSystemId,
00075                           const XML_Char *aPublicId);
00076 
00077   protected:
00078     void createErrorString();
00079     nsString  mErrorString;
00080     Document* mDocument;
00081     Node*  mCurrentNode;
00082     XML_Parser mExpatParser;
00083 };
00084 #endif
00085 
00086 nsresult
00087 txParseDocumentFromURI(const nsAString& aHref, const txXPathNode& aLoader,
00088                        nsAString& aErrMsg, txXPathNode** aResult)
00089 {
00090     NS_ENSURE_ARG_POINTER(aResult);
00091     *aResult = nsnull;
00092 #ifndef TX_EXE
00093     nsCOMPtr<nsIURI> documentURI;
00094     nsresult rv = NS_NewURI(getter_AddRefs(documentURI), aHref);
00095     NS_ENSURE_SUCCESS(rv, rv);
00096 
00097     nsIDocument* loaderDocument = txXPathNativeNode::getDocument(aLoader);
00098 
00099     nsCOMPtr<nsILoadGroup> loadGroup = loaderDocument->GetDocumentLoadGroup();
00100     nsIURI *loaderUri = loaderDocument->GetDocumentURI();
00101     NS_ENSURE_TRUE(loaderUri, NS_ERROR_FAILURE);
00102 
00103     nsCOMPtr<nsIChannel> channel;
00104     rv = NS_NewChannel(getter_AddRefs(channel), documentURI, nsnull,
00105                        loadGroup);
00106     NS_ENSURE_SUCCESS(rv, rv);
00107 
00108     nsCOMPtr<nsIHttpChannel> http = do_QueryInterface(channel);
00109     if (http) {
00110         http->SetReferrer(loaderUri);
00111     }
00112 
00113     nsCOMPtr<nsISyncLoadDOMService> loader =
00114       do_GetService("@mozilla.org/content/syncload-dom-service;1", &rv);
00115     NS_ENSURE_SUCCESS(rv, rv);
00116 
00117     // Raw pointer, we want the resulting txXPathNode to hold a reference to
00118     // the document.
00119     nsIDOMDocument* theDocument = nsnull;
00120     rv = loader->LoadDocumentAsXML(channel, loaderUri, &theDocument);
00121     if (NS_FAILED(rv)) {
00122         aErrMsg.Append(NS_LITERAL_STRING("Document load of ") + 
00123                        aHref + NS_LITERAL_STRING(" failed."));
00124         return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE;
00125     }
00126 
00127     *aResult = txXPathNativeNode::createXPathNode(theDocument);
00128     if (!*aResult) {
00129         NS_RELEASE(theDocument);
00130         return NS_ERROR_FAILURE;
00131     }
00132 
00133     return NS_OK;
00134 #else
00135     istream* xslInput = URIUtils::getInputStream(aHref, aErrMsg);
00136     if (!xslInput) {
00137         return NS_ERROR_FAILURE;
00138     }
00139     return txParseFromStream(*xslInput, aHref, aErrMsg, aResult);
00140 #endif
00141 }
00142 
00143 #ifdef TX_EXE
00144 nsresult
00145 txParseFromStream(istream& aInputStream, const nsAString& aUri,
00146                   nsAString& aErrorString, txXPathNode** aResult)
00147 {
00148     NS_ENSURE_ARG_POINTER(aResult);
00149     txXMLParser parser;
00150     nsresult rv = parser.parse(aInputStream, aUri, aResult);
00151     aErrorString = parser.getErrorString();
00152     return rv;
00153 }
00154 
00159 // shortcut macro for redirection into txXMLParser method calls
00160 #define TX_XMLPARSER(_userData) NS_STATIC_CAST(txXMLParser*, _userData)
00161 #define TX_ENSURE_DATA(_userData)                       \
00162   PR_BEGIN_MACRO                                        \
00163     if (!aUserData) {                                   \
00164         NS_WARNING("no userData in comment handler");   \
00165         return;                                         \
00166     }                                                   \
00167   PR_END_MACRO
00168 
00169 PR_STATIC_CALLBACK(void)
00170 startElement(void *aUserData, const XML_Char *aName, const XML_Char **aAtts)
00171 {
00172     TX_ENSURE_DATA(aUserData);
00173     TX_XMLPARSER(aUserData)->StartElement(aName, aAtts);
00174 }
00175 
00176 PR_STATIC_CALLBACK(void)
00177 endElement(void *aUserData, const XML_Char* aName)
00178 {
00179     TX_ENSURE_DATA(aUserData);
00180     TX_XMLPARSER(aUserData)->EndElement(aName);
00181 }
00182 
00183 PR_STATIC_CALLBACK(void)
00184 charData(void* aUserData, const XML_Char* aChars, int aLength)
00185 {
00186     TX_ENSURE_DATA(aUserData);
00187     TX_XMLPARSER(aUserData)->CharacterData(aChars, aLength);
00188 }
00189 
00190 PR_STATIC_CALLBACK(void)
00191 commentHandler(void* aUserData, const XML_Char* aChars)
00192 {
00193     TX_ENSURE_DATA(aUserData);
00194     TX_XMLPARSER(aUserData)->Comment(aChars);
00195 }
00196 
00197 PR_STATIC_CALLBACK(void)
00198 piHandler(void *aUserData, const XML_Char *aTarget, const XML_Char *aData)
00199 {
00200     TX_ENSURE_DATA(aUserData);
00201     TX_XMLPARSER(aUserData)->ProcessingInstruction(aTarget, aData);
00202 }
00203 
00204 PR_STATIC_CALLBACK(int)
00205 externalEntityRefHandler(XML_Parser aParser,
00206                          const XML_Char *aContext,
00207                          const XML_Char *aBase,
00208                          const XML_Char *aSystemId,
00209                          const XML_Char *aPublicId)
00210 {
00211     // aParser is aUserData is the txXMLParser,
00212     // we set that in txXMLParser::parse
00213     NS_ENSURE_TRUE(aParser, XML_ERROR_NONE);
00214     return ((txXMLParser*)aParser)->ExternalEntityRef(aContext, aBase,
00215                                                       aSystemId, aPublicId);
00216 }
00217 
00218 
00223 nsresult
00224 txXMLParser::parse(istream& aInputStream, const nsAString& aUri,
00225                    txXPathNode** aResultDoc)
00226 {
00227     mErrorString.Truncate();
00228     *aResultDoc = nsnull;
00229     if (!aInputStream) {
00230         mErrorString.AppendLiteral("unable to parse xml: invalid or unopen stream encountered.");
00231         return NS_ERROR_FAILURE;
00232     }
00233     mExpatParser = XML_ParserCreate(nsnull);
00234     if (!mExpatParser) {
00235         return NS_ERROR_OUT_OF_MEMORY;
00236     }
00237     mDocument = new Document();
00238     if (!mDocument) {
00239         XML_ParserFree(mExpatParser);
00240         return NS_ERROR_OUT_OF_MEMORY;
00241     }
00242     mDocument->documentBaseURI = aUri;
00243     mCurrentNode = mDocument;
00244 
00245     XML_SetUserData(mExpatParser, this);
00246     XML_SetElementHandler(mExpatParser, startElement, endElement);
00247     XML_SetCharacterDataHandler(mExpatParser, charData);
00248     XML_SetProcessingInstructionHandler(mExpatParser, piHandler);
00249     XML_SetCommentHandler(mExpatParser, commentHandler);
00250 #ifdef XML_DTD
00251     XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS);
00252 #endif
00253     XML_SetExternalEntityRefHandler(mExpatParser, externalEntityRefHandler);
00254     XML_SetExternalEntityRefHandlerArg(mExpatParser, this);
00255     XML_SetBase(mExpatParser,
00256                 (const XML_Char*)(PromiseFlatString(aUri).get()));
00257 
00258     const int bufferSize = 1024;
00259     char buf[bufferSize];
00260     PRBool done;
00261     do {
00262         aInputStream.read(buf, bufferSize);
00263         done = aInputStream.eof();
00264 
00265         if (!XML_Parse(mExpatParser, buf, aInputStream.gcount(), done)) {
00266             createErrorString();
00267             done = MB_TRUE;
00268             delete mDocument;
00269             mDocument = nsnull;
00270         }
00271     } while (!done);
00272     aInputStream.clear();
00273 
00274     // clean up
00275     XML_ParserFree(mExpatParser);
00276     // ownership to the caller
00277     *aResultDoc = txXPathNativeNode::createXPathNode(mDocument);
00278     mDocument = nsnull;
00279     return *aResultDoc ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
00280 }
00281 
00282 const nsAString&
00283 txXMLParser::getErrorString()
00284 {
00285     return mErrorString;
00286 }
00287 
00288 
00289 int
00290 txXMLParser::StartElement(const XML_Char *aName, const XML_Char **aAtts)
00291 {
00292     const XML_Char** theAtts = aAtts;
00293 
00294     Element* newElement =
00295         mDocument->createElement(nsDependentString((const PRUnichar*)aName));
00296     if (!newElement) {
00297         return XML_ERROR_NO_MEMORY;
00298     }
00299 
00300     while (*theAtts) {
00301         nsDependentString attName((const PRUnichar*)*theAtts++);
00302         nsDependentString attValue((const PRUnichar*)*theAtts++);
00303         newElement->setAttribute(attName, attValue);
00304     }
00305 
00306     int idx;
00307     if ((idx = XML_GetIdAttributeIndex(mExpatParser)) > -1) {
00308         nsDependentString idName((const PRUnichar*)*(aAtts + idx));
00309         nsDependentString idValue((const PRUnichar*)*(aAtts + idx + 1));
00310         // make sure IDs are unique
00311         if (!idValue.IsEmpty()) {
00312             mDocument->setElementID(idValue, newElement);
00313         }
00314     }
00315     mCurrentNode->appendChild(newElement);
00316     mCurrentNode = newElement;
00317 
00318     return XML_ERROR_NONE;
00319 }
00320 
00321 int
00322 txXMLParser::EndElement(const XML_Char* aName)
00323 {
00324     if (mCurrentNode->getParentNode()) {
00325         mCurrentNode = mCurrentNode->getParentNode();
00326     }
00327     return XML_ERROR_NONE;
00328 }
00329 
00330 void
00331 txXMLParser::CharacterData(const XML_Char* aChars, int aLength)
00332 {
00333     Node* prevSib = mCurrentNode->getLastChild();
00334     const PRUnichar* pChars = NS_STATIC_CAST(const PRUnichar*, aChars);
00335     if (prevSib && prevSib->getNodeType() == Node::TEXT_NODE) {
00336         NS_STATIC_CAST(NodeDefinition*, prevSib)->appendData(pChars, aLength);
00337     }
00338     else {
00339         // aChars is not null-terminated so we use Substring here.
00340         Node* node = mDocument->createTextNode(Substring(pChars,
00341                                                          pChars + aLength));
00342         mCurrentNode->appendChild(node);
00343     }
00344 }
00345 
00346 void
00347 txXMLParser::Comment(const XML_Char* aChars)
00348 {
00349     Node* node = mDocument->createComment(
00350         nsDependentString(NS_STATIC_CAST(const PRUnichar*, aChars)));
00351     mCurrentNode->appendChild(node);
00352 }
00353 
00354 int
00355 txXMLParser::ProcessingInstruction(const XML_Char *aTarget,
00356                                    const XML_Char *aData)
00357 {
00358     nsDependentString target((const PRUnichar*)aTarget);
00359     nsDependentString data((const PRUnichar*)aData);
00360     Node* node = mDocument->createProcessingInstruction(target, data);
00361     mCurrentNode->appendChild(node);
00362 
00363     return XML_ERROR_NONE;
00364 }
00365 
00366 int
00367 txXMLParser::ExternalEntityRef(const XML_Char *aContext,
00368                                const XML_Char *aBase,
00369                                const XML_Char *aSystemId,
00370                                const XML_Char *aPublicId)
00371 {
00372     if (aPublicId) {
00373         // not supported, this is "http://some.site.net/foo.dtd" stuff
00374         return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
00375     }
00376     nsAutoString absUrl;
00377     URIUtils::resolveHref(nsDependentString((PRUnichar*)aSystemId),
00378                           nsDependentString((PRUnichar*)aBase), absUrl);
00379     istream* extInput = URIUtils::getInputStream(absUrl, mErrorString);
00380     if (!extInput) {
00381         return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
00382     }
00383     XML_Parser parent = mExpatParser;
00384     mExpatParser = 
00385         XML_ExternalEntityParserCreate(mExpatParser, aContext, nsnull);
00386     if (!mExpatParser) {
00387         mExpatParser = parent;
00388         delete extInput;
00389         return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
00390     }
00391     XML_SetBase(mExpatParser, absUrl.get());
00392 
00393     const int bufSize = 1024;
00394     char buffer[bufSize];
00395     int result;
00396     PRBool done;
00397     do {
00398         extInput->read(buffer, bufSize);
00399         done = extInput->eof();
00400         if (!(result =
00401               XML_Parse(mExpatParser, buffer,  extInput->gcount(), done))) {
00402             createErrorString();
00403             mErrorString.Append(PRUnichar('\n'));
00404             done = MB_TRUE;
00405         }
00406     } while (!done);
00407 
00408     delete extInput;
00409     XML_ParserFree(mExpatParser);
00410 
00411     mExpatParser = parent;
00412 
00413     return result;
00414 }
00415 
00416 void
00417 txXMLParser::createErrorString()
00418 {
00419     XML_Error errCode = XML_GetErrorCode(mExpatParser);
00420     mErrorString.AppendWithConversion(XML_ErrorString(errCode));
00421     mErrorString.AppendLiteral(" at line ");
00422     mErrorString.AppendInt(XML_GetCurrentLineNumber(mExpatParser));
00423     mErrorString.AppendLiteral(" in ");
00424     mErrorString.Append((const PRUnichar*)XML_GetBase(mExpatParser));
00425 }
00426 #endif