Back to index

lightning-sunbird  0.9+nobinonly
nsScanner.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim: set ts=2 sw=2 et tw=78: */
00003 /* ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is mozilla.org code.
00017  *
00018  * The Initial Developer of the Original Code is
00019  * Netscape Communications Corporation.
00020  * Portions created by the Initial Developer are Copyright (C) 1998
00021  * the Initial Developer. All Rights Reserved.
00022  *
00023  * Contributor(s):
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 //#define __INCREMENTAL 1
00040 
00041 #include "nsScanner.h"
00042 #include "nsDebug.h"
00043 #include "nsIServiceManager.h"
00044 #include "nsICharsetConverterManager.h"
00045 #include "nsICharsetAlias.h"
00046 #include "nsReadableUtils.h"
00047 #include "nsIInputStream.h"
00048 #include "nsILocalFile.h"
00049 #include "nsNetUtil.h"
00050 #include "nsUTF8Utils.h" // for LossyConvertEncoding
00051 #include "nsCRT.h"
00052 #include "nsParser.h"
00053 
00054 static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
00055 
00056 // We replace NUL characters with this character.
00057 static PRUnichar sInvalid = UCS2_REPLACEMENT_CHAR;
00058 
00059 nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :
00060   mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set
00061 {
00062   // Build filter that will be used to filter out characters with
00063   // bits that none of the terminal chars have. This works very well
00064   // because terminal chars often have only the last 4-6 bits set and
00065   // normal ascii letters have bit 7 set. Other letters have even higher
00066   // bits set.
00067   
00068   // Calculate filter
00069   const PRUnichar *current = aTerminateChars;
00070   PRUnichar terminalChar = *current;
00071   while (terminalChar) {
00072     mFilter &= ~terminalChar;
00073     ++current;
00074     terminalChar = *current;
00075   }
00076 }
00077 
00078 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00079 
00080 #ifdef __INCREMENTAL
00081 const int   kBufsize=1;
00082 #else
00083 const int   kBufsize=64;
00084 #endif
00085 
00086 MOZ_DECL_CTOR_COUNTER(nsScanner)
00087 
00088 
00097 nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
00098                      PRInt32 aSource)
00099   : mParser(nsnull)
00100 {
00101   MOZ_COUNT_CTOR(nsScanner);
00102 
00103   mTotalRead = anHTMLString.Length();
00104   mSlidingBuffer = nsnull;
00105   mCountRemaining = 0;
00106   mFirstNonWhitespacePosition = -1;
00107   AppendToBuffer(anHTMLString);
00108   mSlidingBuffer->BeginReading(mCurrentPosition);
00109   mMarkPosition = mCurrentPosition;
00110   mIncremental = PR_FALSE;
00111   mUnicodeDecoder = 0;
00112   mCharsetSource = kCharsetUninitialized;
00113   SetDocumentCharset(aCharset, aSource);
00114 }
00115 
00125 nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,
00126                      const nsACString& aCharset, PRInt32 aSource)
00127   : mFilename(aFilename), mParser(nsnull)
00128 {
00129   MOZ_COUNT_CTOR(nsScanner);
00130 
00131   mSlidingBuffer = nsnull;
00132 
00133   // XXX This is a big hack.  We need to initialize the iterators to something.
00134   // What matters is that mCurrentPosition == mEndPosition, so that our methods
00135   // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
00136   // so that we have some hope of catching null pointer dereferences associated
00137   // with this hack. --darin
00138   memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
00139   mMarkPosition = mCurrentPosition;
00140   mEndPosition = mCurrentPosition;
00141 
00142   mIncremental = PR_TRUE;
00143   mFirstNonWhitespacePosition = -1;
00144   mCountRemaining = 0;
00145   mTotalRead=0;
00146 
00147   if(aCreateStream) {
00148     nsCOMPtr<nsILocalFile> file;
00149     nsCOMPtr<nsIInputStream> fileStream;
00150     
00151     NS_NewLocalFile(aFilename, PR_TRUE, getter_AddRefs(file));
00152     if (file)
00153       NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), file);
00154 
00155   } //if
00156   mUnicodeDecoder = 0;
00157   mCharsetSource = kCharsetUninitialized;
00158   SetDocumentCharset(aCharset, aSource);
00159 }
00160 
00170 nsScanner::nsScanner(const nsAString& aFilename, nsIInputStream* aStream,
00171                      const nsACString& aCharset, PRInt32 aSource)
00172   : mFilename(aFilename), mParser(nsnull)
00173 {  
00174   MOZ_COUNT_CTOR(nsScanner);
00175 
00176   mSlidingBuffer = nsnull;
00177 
00178   // XXX This is a big hack.  We need to initialize the iterators to something.
00179   // What matters is that mCurrentPosition == mEndPosition, so that our methods
00180   // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
00181   // so that we have some hope of catching null pointer dereferences associated
00182   // with this hack. --darin
00183   memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
00184   mMarkPosition = mCurrentPosition;
00185   mEndPosition = mCurrentPosition;
00186 
00187   mIncremental = PR_FALSE;
00188   mFirstNonWhitespacePosition = -1;
00189   mCountRemaining = 0;
00190   mTotalRead=0;
00191   mInputStream=aStream;
00192   mUnicodeDecoder = 0;
00193   mCharsetSource = kCharsetUninitialized;
00194   SetDocumentCharset(aCharset, aSource);
00195 }
00196 
00197 
00198 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource) {
00199 
00200   nsresult res = NS_OK;
00201 
00202   if( aSource < mCharsetSource) // priority is lower the the current one , just
00203     return res;
00204 
00205   nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res));
00206   NS_ASSERTION( nsnull != calias, "cannot find charset alias");
00207   if( NS_SUCCEEDED(res) && (nsnull != calias))
00208   {
00209     PRBool same = PR_FALSE;
00210     res = calias->Equals(aCharset, mCharset, &same);
00211     if(NS_SUCCEEDED(res) && same)
00212     {
00213       return NS_OK; // no difference, don't change it
00214     }
00215     // different, need to change it
00216     nsCAutoString charsetName;
00217     res = calias->GetPreferred(aCharset, charsetName);
00218 
00219     if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
00220     {
00221        // failed - unknown alias , fallback to ISO-8859-1
00222       charsetName.AssignLiteral("ISO-8859-1");
00223     }
00224     mCharset = charsetName;
00225     mCharsetSource = aSource;
00226 
00227     nsCOMPtr<nsICharsetConverterManager> ccm = 
00228              do_GetService(kCharsetConverterManagerCID, &res);
00229     if(NS_SUCCEEDED(res) && (nsnull != ccm))
00230     {
00231       nsIUnicodeDecoder * decoder = nsnull;
00232       res = ccm->GetUnicodeDecoderRaw(mCharset.get(), &decoder);
00233       if(NS_SUCCEEDED(res) && (nsnull != decoder))
00234       {
00235          NS_IF_RELEASE(mUnicodeDecoder);
00236 
00237          mUnicodeDecoder = decoder;
00238       }    
00239     }
00240   }
00241   return res;
00242 }
00243 
00244 
00252 nsScanner::~nsScanner() {
00253 
00254   if (mSlidingBuffer) {
00255     delete mSlidingBuffer;
00256   }
00257 
00258   MOZ_COUNT_DTOR(nsScanner);
00259 
00260   if(mInputStream) {
00261     mInputStream->Close();
00262     mInputStream = 0;
00263   }
00264 
00265   NS_IF_RELEASE(mUnicodeDecoder);
00266 }
00267 
00278 void nsScanner::RewindToMark(void){
00279   if (mSlidingBuffer) {
00280     mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
00281     mCurrentPosition = mMarkPosition;
00282   }
00283 }
00284 
00285 
00295 void nsScanner::Mark() {
00296   if (mSlidingBuffer) {
00297     mSlidingBuffer->DiscardPrefix(mCurrentPosition);
00298     mSlidingBuffer->BeginReading(mCurrentPosition);
00299     mMarkPosition = mCurrentPosition;
00300   }
00301 }
00302  
00303 
00311 PRBool nsScanner::UngetReadable(const nsAString& aBuffer) {
00312   if (!mSlidingBuffer) {
00313     return PR_FALSE;
00314   }
00315 
00316   mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
00317   mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
00318   mSlidingBuffer->EndReading(mEndPosition);
00319  
00320   PRUint32 length = aBuffer.Length();
00321   mCountRemaining += length; // Ref. bug 117441
00322   mTotalRead += length;
00323   return PR_TRUE;
00324 }
00325 
00333 nsresult nsScanner::Append(const nsAString& aBuffer) {
00334   mTotalRead += aBuffer.Length();
00335   AppendToBuffer(aBuffer);
00336   return NS_OK;
00337 }
00338 
00346 nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
00347                            nsIRequest *aRequest)
00348 {
00349   nsresult res=NS_OK;
00350   PRUnichar *unichars, *start;
00351   if(mUnicodeDecoder) {
00352     PRInt32 unicharBufLen = 0;
00353     mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
00354     nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
00355     NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
00356     start = unichars = buffer->DataStart();
00357 
00358     PRInt32 totalChars = 0;
00359     PRInt32 unicharLength = unicharBufLen;
00360     do {
00361       PRInt32 srcLength = aLen;
00362       res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
00363 
00364       totalChars += unicharLength;
00365       // Continuation of failure case
00366       if(NS_FAILED(res)) {
00367         // if we failed, we consume one byte, replace it with U+FFFD
00368         // and try the conversion again.
00369 
00370         // This is only needed because some decoders don't follow the
00371         // nsIUnicodeDecoder contract: they return a failure when *aDestLength
00372         // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
00373         if ((unichars + unicharLength) >= buffer->DataEnd()) {
00374           NS_ERROR("Unexpected end of destination buffer");
00375           break;
00376         }
00377 
00378         unichars[unicharLength++] = (PRUnichar)0xFFFD;
00379         unichars = unichars + unicharLength;
00380         unicharLength = unicharBufLen - (++totalChars);
00381 
00382         mUnicodeDecoder->Reset();
00383 
00384         if(((PRUint32) (srcLength + 1)) > aLen) {
00385           srcLength = aLen;
00386         }
00387         else {
00388           ++srcLength;
00389         }
00390 
00391         aBuffer += srcLength;
00392         aLen -= srcLength;
00393       }
00394     } while (NS_FAILED(res) && (aLen > 0));
00395 
00396     buffer->SetDataLength(totalChars);
00397     AppendToBuffer(buffer, aRequest);
00398     mTotalRead += totalChars;
00399 
00400     // Don't propagate return code of unicode decoder
00401     // since it doesn't reflect on our success or failure
00402     // - Ref. bug 87110
00403     res = NS_OK; 
00404   }
00405   else {
00406     AppendASCIItoBuffer(aBuffer, aLen, aRequest);
00407     mTotalRead+=aLen;
00408   }
00409 
00410   return res;
00411 }
00412 
00413 
00420 nsresult nsScanner::FillBuffer(void) {
00421   nsresult result=NS_OK;
00422 
00423   if(!mInputStream) {
00424     result=kEOF;
00425   }
00426   else {
00427     PRUint32 numread=0;
00428     char buf[kBufsize+1];
00429     buf[kBufsize]=0;
00430 
00431     // XXX use ReadSegments to avoid extra buffer copy? --darin
00432 
00433     result = mInputStream->Read(buf, kBufsize, &numread);
00434     if (0 == numread) {
00435       return kEOF;
00436     }
00437 
00438     if((0<numread) && NS_SUCCEEDED(result)) {
00439       AppendASCIItoBuffer(buf, numread, nsnull);
00440     }
00441     mTotalRead+=numread;
00442   }
00443 
00444   return result;
00445 }
00446 
00454 nsresult nsScanner::GetChar(PRUnichar& aChar) {
00455   nsresult result=NS_OK;
00456   aChar=0;  
00457 
00458   if (!mSlidingBuffer) {
00459     return kEOF;
00460   }
00461 
00462   if (mCurrentPosition == mEndPosition) {
00463     result = FillBuffer();
00464   }
00465 
00466   if(NS_OK == result){
00467     aChar = *mCurrentPosition++;
00468     --mCountRemaining;
00469   }
00470   return result;
00471 }
00472 
00473 
00482 nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) {
00483   nsresult result=NS_OK;
00484   aChar=0;  
00485   
00486   if (!mSlidingBuffer) {
00487     return kEOF;
00488   }
00489 
00490   if (mCurrentPosition == mEndPosition) {
00491     result = FillBuffer();
00492   }
00493 
00494   if(NS_OK == result){
00495     if (aOffset) {
00496       while ((NS_OK == result) && (mCountRemaining <= aOffset)) {
00497         result = FillBuffer();
00498       }
00499 
00500       if (NS_OK == result) {
00501         nsScannerIterator pos = mCurrentPosition;
00502         pos.advance(aOffset);
00503         aChar=*pos;
00504       }
00505     }
00506     else {
00507       aChar=*mCurrentPosition;
00508     }
00509   }
00510 
00511   return result;
00512 }
00513 
00514 nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset)
00515 {
00516   if (!mSlidingBuffer) {
00517     return kEOF;
00518   }
00519 
00520   if (mCurrentPosition == mEndPosition) {
00521     return FillBuffer();
00522   }    
00523   
00524   nsScannerIterator start, end;
00525 
00526   start = mCurrentPosition;
00527 
00528   if ((PRInt32)mCountRemaining <= aOffset) {
00529     return kEOF;
00530   }
00531 
00532   if (aOffset > 0) {
00533     start.advance(aOffset);
00534   }
00535 
00536   if (mCountRemaining < PRUint32(aNumChars + aOffset)) {
00537     end = mEndPosition;
00538   }
00539   else {
00540     end = start;
00541     end.advance(aNumChars);
00542   }
00543 
00544   CopyUnicodeTo(start, end, aStr);
00545 
00546   return NS_OK;
00547 }
00548 
00549 
00557 nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) {
00558 
00559   if (!mSlidingBuffer) {
00560     return kEOF;
00561   }
00562 
00563   PRUnichar theChar = 0;
00564   nsresult  result = Peek(theChar);
00565   
00566   if (NS_FAILED(result)) {
00567     return result;
00568   }
00569   
00570   nsScannerIterator current = mCurrentPosition;
00571   PRBool    done = PR_FALSE;
00572   PRBool    skipped = PR_FALSE;
00573   
00574   while (!done && current != mEndPosition) {
00575     switch(theChar) {
00576       case '\n':
00577       case '\r': ++aNewlinesSkipped;
00578       case ' ' :
00579       case '\t':
00580         {
00581           skipped = PR_TRUE;
00582           PRUnichar thePrevChar = theChar;
00583           theChar = (++current != mEndPosition) ? *current : '\0';
00584           if ((thePrevChar == '\r' && theChar == '\n') ||
00585               (thePrevChar == '\n' && theChar == '\r')) {
00586             theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
00587           }
00588         }
00589         break;
00590       default:
00591         done = PR_TRUE;
00592         break;
00593     }
00594   }
00595 
00596   if (skipped) {
00597     SetPosition(current);
00598     if (current == mEndPosition) {
00599       result = FillBuffer();
00600     }
00601   }
00602 
00603   return result;
00604 }
00605 
00613 nsresult nsScanner::SkipOver(PRUnichar aSkipChar){
00614 
00615   if (!mSlidingBuffer) {
00616     return kEOF;
00617   }
00618 
00619   PRUnichar ch=0;
00620   nsresult   result=NS_OK;
00621 
00622   while(NS_OK==result) {
00623     result=Peek(ch);
00624     if(NS_OK == result) {
00625       if(ch!=aSkipChar) {
00626         break;
00627       }
00628       GetChar(ch);
00629     } 
00630     else break;
00631   } //while
00632   return result;
00633 
00634 }
00635 
00643 nsresult nsScanner::SkipOver(nsString& aSkipSet){
00644 
00645   if (!mSlidingBuffer) {
00646     return kEOF;
00647   }
00648 
00649   PRUnichar theChar=0;
00650   nsresult  result=NS_OK;
00651 
00652   while(NS_OK==result) {
00653     result=Peek(theChar);
00654     if(NS_OK == result) {
00655       PRInt32 pos=aSkipSet.FindChar(theChar);
00656       if(kNotFound==pos) {
00657         break;
00658       }
00659       GetChar(theChar);
00660     } 
00661     else break;
00662   } //while
00663   return result;
00664 
00665 }
00666 
00667 
00676 nsresult nsScanner::SkipTo(nsString& aValidSet){
00677   if (!mSlidingBuffer) {
00678     return kEOF;
00679   }
00680 
00681   PRUnichar ch=0;
00682   nsresult  result=NS_OK;
00683 
00684   while(NS_OK==result) {
00685     result=Peek(ch);
00686     if(NS_OK == result) {
00687       PRInt32 pos=aValidSet.FindChar(ch);
00688       if(kNotFound!=pos) {
00689         break;
00690       }
00691       GetChar(ch);
00692     } 
00693     else break;
00694   } //while
00695   return result;
00696 }
00697 
00698 #if 0
00699 void DoErrTest(nsString& aString) {
00700   PRInt32 pos=aString.FindChar(0);
00701   if(kNotFound<pos) {
00702     if(aString.Length()-1!=pos) {
00703     }
00704   }
00705 }
00706 
00707 void DoErrTest(nsCString& aString) {
00708   PRInt32 pos=aString.FindChar(0);
00709   if(kNotFound<pos) {
00710     if(aString.Length()-1!=pos) {
00711     }
00712   }
00713 }
00714 #endif
00715 
00724 nsresult nsScanner::SkipPast(nsString& aValidSet){
00725   NS_NOTYETIMPLEMENTED("Error: SkipPast not yet implemented.");
00726   return NS_OK;
00727 }
00728 
00735 nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
00736 
00737   if (!mSlidingBuffer) {
00738     return kEOF;
00739   }
00740 
00741   PRUnichar         theChar=0;
00742   nsresult          result=Peek(theChar);
00743   nsScannerIterator current, end;
00744   PRBool            found=PR_FALSE;  
00745   
00746   current = mCurrentPosition;
00747   end = mEndPosition;
00748 
00749   // Loop until we find an illegal character. Everything is then appended
00750   // later.
00751   while(current != end && !found) {
00752     theChar=*current;
00753 
00754     switch(theChar) {
00755       case '\n':
00756       case '\r':
00757       case ' ' :
00758       case '\t':
00759       case '\v':
00760       case '\f':
00761       case '<':
00762       case '>':
00763       case '/':
00764         found = PR_TRUE;
00765         break;
00766 
00767       case '\0':
00768         ReplaceCharacter(current, sInvalid);
00769         break;
00770 
00771       default:
00772         break;
00773     }
00774 
00775     if (!found) {
00776       ++current;
00777     }
00778   }
00779 
00780   // Don't bother appending nothing.
00781   if (current != mCurrentPosition) {
00782     AppendUnicodeTo(mCurrentPosition, current, aString);
00783   }
00784 
00785   SetPosition(current);  
00786   if (current == end) {
00787     result = FillBuffer();
00788   }
00789 
00790   //DoErrTest(aString);
00791 
00792   return result;
00793 }
00794 
00802 nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
00803 
00804   if (!mSlidingBuffer) {
00805     return kEOF;
00806   }
00807 
00808   PRUnichar         theChar=0;
00809   nsresult          result=Peek(theChar);
00810   nsScannerIterator origin, current, end;
00811   PRBool            found=PR_FALSE;  
00812 
00813   origin = mCurrentPosition;
00814   current = mCurrentPosition;
00815   end = mEndPosition;
00816 
00817   while(current != end) {
00818  
00819     theChar=*current;
00820     if(theChar) {
00821       found=PR_FALSE;
00822       switch(theChar) {
00823         case '_':
00824         case '-':
00825         case '.':
00826           // Don't allow ':' in entity names.  See bug 23791
00827           found = PR_TRUE;
00828           break;
00829         default:
00830           found = ('a'<=theChar && theChar<='z') ||
00831                   ('A'<=theChar && theChar<='Z') ||
00832                   ('0'<=theChar && theChar<='9');
00833           break;
00834       }
00835 
00836       if(!found) {
00837         AppendUnicodeTo(mCurrentPosition, current, aString);
00838         break;
00839       }
00840     }
00841     ++current;
00842   }
00843   
00844   SetPosition(current);
00845   if (current == end) {
00846     AppendUnicodeTo(origin, current, aString);
00847     return FillBuffer();
00848   }
00849 
00850   //DoErrTest(aString);
00851 
00852   return result;
00853 }
00854 
00861 nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
00862 
00863   if (!mSlidingBuffer) {
00864     return kEOF;
00865   }
00866 
00867   NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
00868 
00869   PRUnichar         theChar=0;
00870   nsresult          result=Peek(theChar);
00871   nsScannerIterator origin, current, end;
00872 
00873   origin = mCurrentPosition;
00874   current = origin;
00875   end = mEndPosition;
00876 
00877   PRBool done = PR_FALSE;
00878   while(current != end) {
00879     theChar=*current;
00880     if(theChar) {
00881       done = (theChar < '0' || theChar > '9') && 
00882              ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
00883                              (theChar < 'a' || theChar > 'f')
00884                              :PR_TRUE);
00885       if(done) {
00886         AppendUnicodeTo(origin, current, aString);
00887         break;
00888       }
00889     }
00890     ++current;
00891   }
00892 
00893   SetPosition(current);
00894   if (current == end) {
00895     AppendUnicodeTo(origin, current, aString);
00896     return FillBuffer();
00897   }
00898 
00899   //DoErrTest(aString);
00900 
00901   return result;
00902 }
00903 
00912 nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
00913                                    PRInt32& aNewlinesSkipped,
00914                                    PRBool& aHaveCR) {
00915 
00916   aHaveCR = PR_FALSE;
00917 
00918   if (!mSlidingBuffer) {
00919     return kEOF;
00920   }
00921 
00922   PRUnichar theChar = 0;
00923   nsresult  result = Peek(theChar);
00924   
00925   if (NS_FAILED(result)) {
00926     return result;
00927   }
00928   
00929   nsScannerIterator origin, current, end;
00930   PRBool done = PR_FALSE;  
00931 
00932   origin = mCurrentPosition;
00933   current = origin;
00934   end = mEndPosition;
00935 
00936   PRBool haveCR = PR_FALSE;
00937 
00938   while(!done && current != end) {
00939     switch(theChar) {
00940       case '\n':
00941       case '\r':
00942         {
00943           ++aNewlinesSkipped;
00944           PRUnichar thePrevChar = theChar;
00945           theChar = (++current != end) ? *current : '\0';
00946           if ((thePrevChar == '\r' && theChar == '\n') ||
00947               (thePrevChar == '\n' && theChar == '\r')) {
00948             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
00949             haveCR = PR_TRUE;
00950           } else if (thePrevChar == '\r') {
00951             // Lone CR becomes CRLF; callers should know to remove extra CRs
00952             AppendUnicodeTo(origin, current, aString);
00953             aString.writable().Append(PRUnichar('\n'));
00954             origin = current;
00955             haveCR = PR_TRUE;
00956           }
00957         }
00958         break;
00959       case ' ' :
00960       case '\t':
00961         theChar = (++current != end) ? *current : '\0';
00962         break;
00963       default:
00964         done = PR_TRUE;
00965         AppendUnicodeTo(origin, current, aString);
00966         break;
00967     }
00968   }
00969 
00970   SetPosition(current);
00971   if (current == end) {
00972     AppendUnicodeTo(origin, current, aString);
00973     result = FillBuffer();
00974   }
00975 
00976   aHaveCR = haveCR;
00977   return result;
00978 }
00979 
00980 //XXXbz callers of this have to manage their lone '\r' themselves if they want
00981 //it to work.  Good thing they're all in view-source and it deals.
00982 nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, 
00983                                    nsScannerIterator& aEnd,
00984                                    PRInt32& aNewlinesSkipped) {
00985 
00986   if (!mSlidingBuffer) {
00987     return kEOF;
00988   }
00989 
00990   PRUnichar theChar = 0;
00991   nsresult  result = Peek(theChar);
00992   
00993   if (NS_FAILED(result)) {
00994     return result;
00995   }
00996   
00997   nsScannerIterator origin, current, end;
00998   PRBool done = PR_FALSE;  
00999 
01000   origin = mCurrentPosition;
01001   current = origin;
01002   end = mEndPosition;
01003 
01004   while(!done && current != end) {
01005     switch(theChar) {
01006       case '\n':
01007       case '\r': ++aNewlinesSkipped;
01008       case ' ' :
01009       case '\t':
01010         {
01011           PRUnichar thePrevChar = theChar;
01012           theChar = (++current != end) ? *current : '\0';
01013           if ((thePrevChar == '\r' && theChar == '\n') ||
01014               (thePrevChar == '\n' && theChar == '\r')) {
01015             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
01016           }
01017         }
01018         break;
01019       default:
01020         done = PR_TRUE;
01021         aStart = origin;
01022         aEnd = current;
01023         break;
01024     }
01025   }
01026 
01027   SetPosition(current);
01028   if (current == end) {
01029     aStart = origin;
01030     aEnd = current;
01031     result = FillBuffer();
01032   }
01033 
01034   return result;
01035 }
01036 
01047 nsresult nsScanner::ReadWhile(nsString& aString,
01048                              nsString& aValidSet,
01049                              PRBool addTerminal){
01050 
01051   if (!mSlidingBuffer) {
01052     return kEOF;
01053   }
01054 
01055   PRUnichar         theChar=0;
01056   nsresult          result=Peek(theChar);
01057   nsScannerIterator origin, current, end;
01058 
01059   origin = mCurrentPosition;
01060   current = origin;
01061   end = mEndPosition;
01062 
01063   while(current != end) {
01064  
01065     theChar=*current;
01066     if (theChar == '\0') {
01067       ReplaceCharacter(current, sInvalid);
01068       theChar = sInvalid;
01069     }
01070     if(theChar) {
01071       PRInt32 pos=aValidSet.FindChar(theChar);
01072       if(kNotFound==pos) {
01073         if(addTerminal)
01074           ++current;
01075         AppendUnicodeTo(origin, current, aString);
01076         break;
01077       }
01078     }
01079     ++current;
01080   }
01081 
01082   SetPosition(current);
01083   if (current == end) {
01084     AppendUnicodeTo(origin, current, aString);
01085     return FillBuffer();
01086   }
01087 
01088   //DoErrTest(aString);
01089 
01090   return result;
01091 
01092 }
01093 
01104 nsresult nsScanner::ReadUntil(nsAString& aString,
01105                               const nsReadEndCondition& aEndCondition,
01106                               PRBool addTerminal)
01107 {  
01108   if (!mSlidingBuffer) {
01109     return kEOF;
01110   }
01111 
01112   nsScannerIterator origin, current;
01113   const PRUnichar* setstart = aEndCondition.mChars;
01114   const PRUnichar* setcurrent;
01115 
01116   origin = mCurrentPosition;
01117   current = origin;
01118 
01119   PRUnichar         theChar=0;
01120   nsresult          result=Peek(theChar);
01121 
01122   if (NS_FAILED(result)) {
01123     return result;
01124   }
01125   
01126   while (current != mEndPosition) {
01127     theChar = *current;
01128     if (theChar == '\0') {
01129       ReplaceCharacter(current, sInvalid);
01130       theChar = sInvalid;
01131     }
01132 
01133     // Filter out completely wrong characters
01134     // Check if all bits are in the required area
01135     if(!(theChar & aEndCondition.mFilter)) {
01136       // They were. Do a thorough check.
01137 
01138       setcurrent = setstart;
01139       while (*setcurrent) {
01140         if (*setcurrent == theChar) {
01141           if(addTerminal)
01142             ++current;
01143           AppendUnicodeTo(origin, current, aString);
01144           SetPosition(current);
01145 
01146           //DoErrTest(aString);
01147 
01148           return NS_OK;
01149         }
01150         ++setcurrent;
01151       }
01152     }
01153     
01154     ++current;
01155   }
01156 
01157   // If we are here, we didn't find any terminator in the string and
01158   // current = mEndPosition
01159   SetPosition(current);
01160   AppendUnicodeTo(origin, current, aString);
01161   return FillBuffer();
01162 }
01163 
01164 nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
01165                               const nsReadEndCondition& aEndCondition,
01166                               PRBool addTerminal)
01167 {  
01168   if (!mSlidingBuffer) {
01169     return kEOF;
01170   }
01171 
01172   nsScannerIterator origin, current;
01173   const PRUnichar* setstart = aEndCondition.mChars;
01174   const PRUnichar* setcurrent;
01175 
01176   origin = mCurrentPosition;
01177   current = origin;
01178 
01179   PRUnichar         theChar=0;
01180   nsresult          result=Peek(theChar);
01181 
01182   if (NS_FAILED(result)) {
01183     return result;
01184   }
01185   
01186   while (current != mEndPosition) {
01187     theChar = *current;
01188     if (theChar == '\0') {
01189       ReplaceCharacter(current, sInvalid);
01190       theChar = sInvalid;
01191     }
01192 
01193     // Filter out completely wrong characters
01194     // Check if all bits are in the required area
01195     if(!(theChar & aEndCondition.mFilter)) {
01196       // They were. Do a thorough check.
01197 
01198       setcurrent = setstart;
01199       while (*setcurrent) {
01200         if (*setcurrent == theChar) {
01201           if(addTerminal)
01202             ++current;
01203           AppendUnicodeTo(origin, current, aString);
01204           SetPosition(current);
01205 
01206           //DoErrTest(aString);
01207 
01208           return NS_OK;
01209         }
01210         ++setcurrent;
01211       }
01212     }
01213     
01214     ++current;
01215   }
01216 
01217   // If we are here, we didn't find any terminator in the string and
01218   // current = mEndPosition
01219   SetPosition(current);
01220   AppendUnicodeTo(origin, current, aString);
01221   return FillBuffer();
01222 }
01223 
01224 nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, 
01225                               nsScannerIterator& aEnd,
01226                               const nsReadEndCondition &aEndCondition,
01227                               PRBool addTerminal)
01228 {
01229   if (!mSlidingBuffer) {
01230     return kEOF;
01231   }
01232 
01233   nsScannerIterator origin, current;
01234   const PRUnichar* setstart = aEndCondition.mChars;
01235   const PRUnichar* setcurrent;
01236 
01237   origin = mCurrentPosition;
01238   current = origin;
01239 
01240   PRUnichar         theChar=0;
01241   nsresult          result=Peek(theChar);
01242   
01243   if (NS_FAILED(result)) {
01244     aStart = aEnd = current;
01245     return result;
01246   }
01247   
01248   while (current != mEndPosition) {
01249     if (theChar == '\0') {
01250       ReplaceCharacter(current, sInvalid);
01251       theChar = sInvalid;
01252     }
01253 
01254     // Filter out completely wrong characters
01255     // Check if all bits are in the required area
01256     if(!(theChar & aEndCondition.mFilter)) {
01257       // They were. Do a thorough check.
01258       setcurrent = setstart;
01259       while (*setcurrent) {
01260         if (*setcurrent == theChar) {
01261           if(addTerminal)
01262             ++current;
01263           aStart = origin;
01264           aEnd = current;
01265           SetPosition(current);
01266 
01267           return NS_OK;
01268         }
01269         ++setcurrent;
01270       }
01271     }
01272     
01273     ++current;
01274     theChar = *current;
01275   }
01276 
01277   // If we are here, we didn't find any terminator in the string and
01278   // current = mEndPosition
01279   SetPosition(current);
01280   aStart = origin;
01281   aEnd = current;
01282   return FillBuffer();
01283 }
01284 
01292 nsresult nsScanner::ReadUntil(nsAString& aString,
01293                               PRUnichar aTerminalChar,
01294                               PRBool addTerminal)
01295 {
01296   if (!mSlidingBuffer) {
01297     return kEOF;
01298   }
01299 
01300   nsScannerIterator origin, current;
01301 
01302   origin = mCurrentPosition;
01303   current = origin;
01304 
01305   PRUnichar theChar;
01306   nsresult result = Peek(theChar);
01307 
01308   if (NS_FAILED(result)) {
01309     return result;
01310   }
01311 
01312   while (current != mEndPosition) {
01313     if (theChar == '\0') {
01314       ReplaceCharacter(current, sInvalid);
01315       theChar = sInvalid;
01316     }
01317 
01318     if (aTerminalChar == theChar) {
01319       if(addTerminal)
01320         ++current;
01321       AppendUnicodeTo(origin, current, aString);
01322       SetPosition(current);
01323       return NS_OK;
01324     }
01325     ++current;
01326     theChar = *current;
01327   }
01328 
01329   // If we are here, we didn't find any terminator in the string and
01330   // current = mEndPosition
01331   AppendUnicodeTo(origin, current, aString);
01332   SetPosition(current);
01333   return FillBuffer();
01334 
01335 }
01336 
01337 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
01338 {
01339   aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
01340 }
01341 
01342 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
01343 {
01344   aPosition = mCurrentPosition;
01345 }
01346 
01347 void nsScanner::EndReading(nsScannerIterator& aPosition)
01348 {
01349   aPosition = mEndPosition;
01350 }
01351  
01352 void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse)
01353 {
01354   if (mSlidingBuffer) {
01355 #ifdef DEBUG
01356     PRUint32 origRemaining = mCountRemaining;
01357 #endif
01358 
01359     if (aReverse) {
01360       mCountRemaining += (Distance(aPosition, mCurrentPosition));
01361     }
01362     else {
01363       mCountRemaining -= (Distance(mCurrentPosition, aPosition));
01364     }
01365 
01366     NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
01367                  (mCountRemaining <= origRemaining && !aReverse),
01368                  "Improper use of nsScanner::SetPosition. Make sure to set the"
01369                  " aReverse parameter correctly");
01370 
01371     mCurrentPosition = aPosition;
01372     if (aTerminate && (mCurrentPosition == mEndPosition)) {
01373       mMarkPosition = mCurrentPosition;
01374       mSlidingBuffer->DiscardPrefix(mCurrentPosition);
01375     }
01376   }
01377 }
01378 
01379 void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
01380                                  PRUnichar aChar)
01381 {
01382   if (mSlidingBuffer) {
01383     mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
01384   }
01385 }
01386 
01387 void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
01388                                nsIRequest *aRequest)
01389 {
01390   if (nsParser::sParserDataListeners && mParser &&
01391       NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
01392                                              aBuf->DataEnd()), aRequest))) {
01393     // Don't actually append on failure.
01394 
01395     return;
01396   }
01397 
01398   if (!mSlidingBuffer) {
01399     mSlidingBuffer = new nsScannerString(aBuf);
01400     mSlidingBuffer->BeginReading(mCurrentPosition);
01401     mMarkPosition = mCurrentPosition;
01402     mSlidingBuffer->EndReading(mEndPosition);
01403     mCountRemaining = aBuf->DataLength();
01404   }
01405   else {
01406     mSlidingBuffer->AppendBuffer(aBuf);
01407     if (mCurrentPosition == mEndPosition) {
01408       mSlidingBuffer->BeginReading(mCurrentPosition);
01409     }
01410     mSlidingBuffer->EndReading(mEndPosition);
01411     mCountRemaining += aBuf->DataLength();
01412   }
01413 
01414   if (mFirstNonWhitespacePosition == -1) {
01415     nsScannerIterator iter(mCurrentPosition);
01416     nsScannerIterator end(mEndPosition);
01417 
01418     while (iter != end) {
01419       if (!nsCRT::IsAsciiSpace(*iter)) {
01420         mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
01421 
01422         break;
01423       }
01424 
01425       ++iter;
01426     }
01427   }
01428 }
01429 
01430 void nsScanner::AppendASCIItoBuffer(const char* aData, PRUint32 aLen,
01431                                     nsIRequest *aRequest)
01432 {
01433   nsScannerString::Buffer* buf = nsScannerString::AllocBuffer(aLen);
01434   if (buf)
01435   {
01436     LossyConvertEncoding<char, PRUnichar> converter(buf->DataStart());
01437     converter.write(aData, aLen);
01438     converter.write_terminator();
01439     AppendToBuffer(buf, aRequest);
01440   }
01441 }
01442 
01451 void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
01452   if (!mSlidingBuffer) {
01453     aCopyBuffer.Truncate();
01454     return;
01455   }
01456 
01457   nsScannerIterator start, end;
01458   start = mCurrentPosition;
01459   end = mEndPosition;
01460 
01461   CopyUnicodeTo(start, end, aCopyBuffer);
01462 }
01463 
01472 nsString& nsScanner::GetFilename(void) {
01473   return mFilename;
01474 }
01475 
01485 void nsScanner::SelfTest(void) {
01486 #ifdef _DEBUG
01487 #endif
01488 }
01489 
01490 
01491