Back to index

lightning-sunbird  0.9+nobinonly
ExprLexer.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is TransforMiiX XSLT processor code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * The MITRE Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1999
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Keith Visco <kvisco@ziplink.net> (Original Author)
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00043 #include "ExprLexer.h"
00044 #include "txAtoms.h"
00045 #include "nsString.h"
00046 #include "txError.h"
00047 #include "XMLUtils.h"
00048 
00052 txExprLexer::txExprLexer()
00053   : mCurrentItem(nsnull),
00054     mFirstItem(nsnull),
00055     mLastItem(nsnull),
00056     mTokenCount(0)
00057 {
00058 }
00059 
00063 txExprLexer::~txExprLexer()
00064 {
00065   //-- delete tokens
00066   Token* tok = mFirstItem;
00067   while (tok) {
00068     Token* temp = tok->mNext;
00069     delete tok;
00070     tok = temp;
00071   }
00072   mCurrentItem = nsnull;
00073 }
00074 
00075 Token*
00076 txExprLexer::nextToken()
00077 {
00078   NS_ASSERTION(mCurrentItem, "nextToken called beyoned the end");
00079   Token* token = mCurrentItem;
00080   mCurrentItem = mCurrentItem->mNext;
00081   return token;
00082 }
00083 
00084 void
00085 txExprLexer::pushBack()
00086 {
00087   mCurrentItem = mCurrentItem ? mCurrentItem->mPrevious : mLastItem;
00088 }
00089 
00090 void
00091 txExprLexer::addToken(Token* aToken)
00092 {
00093   if (mLastItem) {
00094     aToken->mPrevious = mLastItem;
00095     mLastItem->mNext = aToken;
00096   }
00097   if (!mFirstItem) {
00098     mFirstItem = aToken;
00099     mCurrentItem = aToken;
00100   }
00101   mLastItem = aToken;
00102   ++mTokenCount;
00103 }
00104 
00110 PRBool
00111 txExprLexer::nextIsOperatorToken(Token* aToken)
00112 {
00113   if (!aToken || aToken->mType == Token::NULL_TOKEN) {
00114     return PR_FALSE;
00115   }
00116   /* This relies on the tokens having the right order in ExprLexer.h */
00117   return aToken->mType < Token::COMMA ||
00118     aToken->mType > Token::UNION_OP;
00119 
00120 }
00121 
00125 nsresult
00126 txExprLexer::parse(const nsASingleFragmentString& aPattern)
00127 {
00128   iterator start, end;
00129   start = aPattern.BeginReading(mPosition);
00130   aPattern.EndReading(end);
00131 
00132   //-- initialize previous token, this will automatically get
00133   //-- deleted when it goes out of scope
00134   Token nullToken(nsnull, nsnull, Token::NULL_TOKEN);
00135 
00136   Token::Type defType;
00137   Token* newToken = nsnull;
00138   Token* prevToken = &nullToken;
00139   PRBool isToken;
00140 
00141   while (mPosition < end) {
00142 
00143     defType = Token::CNAME;
00144     isToken = PR_TRUE;
00145 
00146     if (*mPosition == DOLLAR_SIGN) {
00147       if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
00148         return NS_ERROR_XPATH_INVALID_VAR_NAME;
00149       }
00150       defType = Token::VAR_REFERENCE;
00151     } 
00152     // just reuse the QName parsing, which will use defType 
00153     // the token to construct
00154 
00155     if (XMLUtils::isLetter(*mPosition)) {
00156       // NCName, can get QName or OperatorName;
00157       //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
00158       //  and are dealt with below
00159       start = mPosition;
00160       while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
00161         /* just go */
00162       }
00163       if (mPosition < end && *mPosition == COLON) {
00164         // try QName or wildcard, might need to step back for axis
00165         if (++mPosition == end) {
00166           return NS_ERROR_XPATH_UNEXPECTED_END;
00167         }
00168         if (XMLUtils::isLetter(*mPosition)) {
00169           while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
00170             /* just go */
00171           }
00172         }
00173         else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
00174           // eat wildcard for NameTest, bail for var ref at COLON
00175           ++mPosition;
00176         }
00177         else {
00178           --mPosition; // step back
00179         }
00180       }
00181       if (nextIsOperatorToken(prevToken)) {
00182         NS_ConvertUTF16toUTF8 opUTF8(Substring(start, mPosition));
00183         if (txXPathAtoms::_and->EqualsUTF8(opUTF8)) {
00184           defType = Token::AND_OP;
00185         }
00186         else if (txXPathAtoms::_or->EqualsUTF8(opUTF8)) {
00187           defType = Token::OR_OP;
00188         }
00189         else if (txXPathAtoms::mod->EqualsUTF8(opUTF8)) {
00190           defType = Token::MODULUS_OP;
00191         }
00192         else if (txXPathAtoms::div->EqualsUTF8(opUTF8)) {
00193           defType = Token::DIVIDE_OP;
00194         }
00195         else {
00196           // XXX QUESTION: spec is not too precise
00197           // badops is sure an error, but is bad:ops, too? We say yes!
00198           return NS_ERROR_XPATH_OPERATOR_EXPECTED;
00199         }
00200       }
00201       newToken = new Token(start, mPosition, defType);
00202     }
00203     else if (isXPathDigit(*mPosition)) {
00204       start = mPosition;
00205       while (++mPosition < end && isXPathDigit(*mPosition)) {
00206         /* just go */
00207       }
00208       if (mPosition < end && *mPosition == '.') {
00209         while (++mPosition < end && isXPathDigit(*mPosition)) {
00210           /* just go */
00211         }
00212       }
00213       newToken = new Token(start, mPosition, Token::NUMBER);
00214     }
00215     else {
00216       switch (*mPosition) {
00217         //-- ignore whitespace
00218       case SPACE:
00219       case TX_TAB:
00220       case TX_CR:
00221       case TX_LF:
00222         ++mPosition;
00223         isToken = PR_FALSE;
00224         break;
00225       case S_QUOTE :
00226       case D_QUOTE :
00227         start = mPosition;
00228         while (++mPosition < end && *mPosition != *start) {
00229           // eat literal
00230         }
00231         if (mPosition == end) {
00232           mPosition = start;
00233           return NS_ERROR_XPATH_UNCLOSED_LITERAL;
00234         }
00235         newToken = new Token(start + 1, mPosition, Token::LITERAL);
00236         ++mPosition;
00237         break;
00238       case PERIOD:
00239         // period can be .., .(DIGITS)+ or ., check next
00240         if (++mPosition == end) {
00241           newToken = new Token(mPosition - 1, Token::SELF_NODE);
00242         }
00243         else if (isXPathDigit(*mPosition)) {
00244           start = mPosition - 1;
00245           while (++mPosition < end && isXPathDigit(*mPosition)) {
00246             /* just go */
00247           }
00248           newToken = new Token(start, mPosition, Token::NUMBER);
00249         }
00250         else if (*mPosition == PERIOD) {
00251           ++mPosition;
00252           newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
00253         }
00254         else {
00255           newToken = new Token(mPosition - 1, Token::SELF_NODE);
00256         }
00257         break;
00258       case COLON: // QNames are dealt above, must be axis ident
00259         if (++mPosition >= end || *mPosition != COLON ||
00260             prevToken->mType != Token::CNAME) {
00261           return NS_ERROR_XPATH_BAD_COLON;
00262         }
00263         prevToken->mType = Token::AXIS_IDENTIFIER;
00264         ++mPosition;
00265         isToken = PR_FALSE;
00266         break;
00267       case FORWARD_SLASH :
00268         if (++mPosition < end && *mPosition == FORWARD_SLASH) {
00269           ++mPosition;
00270           newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
00271         }
00272         else {
00273           newToken = new Token(mPosition - 1, Token::PARENT_OP);
00274         }
00275         break;
00276       case BANG : // can only be !=
00277         if (++mPosition < end && *mPosition == EQUAL) {
00278           ++mPosition;
00279           newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
00280           break;
00281         }
00282         // Error ! is not not()
00283         return NS_ERROR_XPATH_BAD_BANG;
00284       case EQUAL:
00285         newToken = new Token(mPosition, Token::EQUAL_OP);
00286         ++mPosition;
00287         break;
00288       case L_ANGLE:
00289         if (++mPosition == end) {
00290           return NS_ERROR_XPATH_UNEXPECTED_END;
00291         }
00292         if (*mPosition == EQUAL) {
00293           ++mPosition;
00294           newToken = new Token(mPosition - 2, mPosition,
00295                                Token::LESS_OR_EQUAL_OP);
00296         }
00297         else {
00298           newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
00299         }
00300         break;
00301       case R_ANGLE:
00302         if (++mPosition == end) {
00303           return NS_ERROR_XPATH_UNEXPECTED_END;
00304         }
00305         if (*mPosition == EQUAL) {
00306           ++mPosition;
00307           newToken = new Token(mPosition - 2, mPosition,
00308                                Token::GREATER_OR_EQUAL_OP);
00309         }
00310         else {
00311           newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
00312         }
00313         break;
00314       case HYPHEN :
00315         newToken = new Token(mPosition, Token::SUBTRACTION_OP);
00316         ++mPosition;
00317         break;
00318       case ASTERIX:
00319         if (nextIsOperatorToken(prevToken)) {
00320           newToken = new Token(mPosition, Token::MULTIPLY_OP);
00321         }
00322         else {
00323           newToken = new Token(mPosition, Token::CNAME);
00324         }
00325         ++mPosition;
00326         break;
00327       case L_PAREN:
00328         if (prevToken->mType == Token::CNAME) {
00329           NS_ConvertUTF16toUTF8 utf8Value(prevToken->Value());
00330           if (txXPathAtoms::comment->EqualsUTF8(utf8Value)) {
00331             prevToken->mType = Token::COMMENT;
00332           }
00333           else if (txXPathAtoms::node->EqualsUTF8(utf8Value)) {
00334             prevToken->mType = Token::NODE;
00335           }
00336           else if (txXPathAtoms::processingInstruction->EqualsUTF8(utf8Value)) {
00337             prevToken->mType = Token::PROC_INST;
00338           }
00339           else if (txXPathAtoms::text->EqualsUTF8(utf8Value)) {
00340             prevToken->mType = Token::TEXT;
00341           }
00342           else {
00343             prevToken->mType = Token::FUNCTION_NAME;
00344           }
00345         }
00346         newToken = new Token(mPosition, Token::L_PAREN);
00347         ++mPosition;
00348         break;
00349       case R_PAREN:
00350         newToken = new Token(mPosition, Token::R_PAREN);
00351         ++mPosition;
00352         break;
00353       case L_BRACKET:
00354         newToken = new Token(mPosition, Token::L_BRACKET);
00355         ++mPosition;
00356         break;
00357       case R_BRACKET:
00358         newToken = new Token(mPosition, Token::R_BRACKET);
00359         ++mPosition;
00360         break;
00361       case COMMA:
00362         newToken = new Token(mPosition, Token::COMMA);
00363         ++mPosition;
00364         break;
00365       case AT_SIGN :
00366         newToken = new Token(mPosition, Token::AT_SIGN);
00367         ++mPosition;
00368         break;
00369       case PLUS:
00370         newToken = new Token(mPosition, Token::ADDITION_OP);
00371         ++mPosition;
00372         break;
00373       case VERT_BAR:
00374         newToken = new Token(mPosition, Token::UNION_OP);
00375         ++mPosition;
00376         break;
00377       default:
00378         // Error, don't grok character :-(
00379         return NS_ERROR_XPATH_ILLEGAL_CHAR;
00380       }
00381     }
00382     if (isToken) {
00383       NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
00384       NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
00385       prevToken = newToken;
00386       addToken(newToken);
00387     }
00388   }
00389 
00390   // add a endToken to the list
00391   newToken = new Token(end, end, Token::END);
00392   if (!newToken) {
00393     return NS_ERROR_OUT_OF_MEMORY;
00394   }
00395   addToken(newToken);
00396 
00397   return NS_OK;
00398 }