Back to index

lightning-sunbird  0.9+nobinonly
nsScannerString.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim:set ts=2 sw=2 sts=2 et cindent: */
00003 /* ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is Mozilla.
00017  *
00018  * The Initial Developer of the Original Code is IBM Corporation.
00019  * Portions created by IBM Corporation are Copyright (C) 2003
00020  * IBM Corporation. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Darin Fisher <darin@meer.net>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include <stdlib.h>
00040 #include "nsScannerString.h"
00041 
00042 
00047 nsScannerBufferList::Buffer*
00048 nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
00049   {
00050     PRUint32 len = aString.Length();
00051 
00052     Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (len + 1) * sizeof(PRUnichar));
00053     if (buf)
00054       {
00055         // leave PRCList members of Buffer uninitialized
00056 
00057         buf->mUsageCount = 0;
00058         buf->mDataEnd = buf->DataStart() + len;
00059 
00060         nsAString::const_iterator source;
00061         aString.BeginReading(source);
00062         nsCharTraits<PRUnichar>::copy(buf->DataStart(), source.get(), len);
00063 
00064         // XXX null terminate.  this shouldn't be required, but we do it because
00065         // nsScanner erroneously thinks it can dereference DataEnd :-(
00066         *buf->mDataEnd = PRUnichar(0);
00067       }
00068     return buf;
00069   }
00070 
00071 nsScannerBufferList::Buffer*
00072 nsScannerBufferList::AllocBuffer( PRUint32 capacity )
00073   {
00074     Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (capacity + 1) * sizeof(PRUnichar));
00075     if (buf)
00076       {
00077         // leave PRCList members of Buffer uninitialized
00078 
00079         buf->mUsageCount = 0;
00080         buf->mDataEnd = buf->DataStart() + capacity;
00081 
00082         // XXX null terminate.  this shouldn't be required, but we do it because
00083         // nsScanner erroneously thinks it can dereference DataEnd :-(
00084         *buf->mDataEnd = PRUnichar(0);
00085       }
00086     return buf;
00087   }
00088 
00089 void
00090 nsScannerBufferList::ReleaseAll()
00091   {
00092     while (!PR_CLIST_IS_EMPTY(&mBuffers))
00093       {
00094         PRCList* node = PR_LIST_HEAD(&mBuffers);
00095         PR_REMOVE_LINK(node);
00096         //printf(">>> freeing buffer @%p\n", node);
00097         free(NS_STATIC_CAST(Buffer*, node));
00098       }
00099   }
00100 
00101 void
00102 nsScannerBufferList::SplitBuffer( const Position& pos )
00103   {
00104     // splitting to the right keeps the work string and any extant token
00105     // pointing to and holding a reference count on the same buffer.
00106 
00107     Buffer* bufferToSplit = pos.mBuffer;
00108     NS_ASSERTION(bufferToSplit, "null pointer");
00109 
00110     PRUint32 splitOffset = pos.mPosition - bufferToSplit->DataStart();
00111     NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
00112                  splitOffset <= bufferToSplit->DataLength(),
00113                  "split offset is outside buffer");
00114     
00115     PRUint32 len = bufferToSplit->DataLength() - splitOffset;
00116     Buffer* new_buffer = AllocBuffer(len);
00117     if (new_buffer)
00118       {
00119         nsCharTraits<PRUnichar>::copy(new_buffer->DataStart(),
00120                                       bufferToSplit->DataStart() + splitOffset,
00121                                       len);
00122         InsertAfter(new_buffer, bufferToSplit);
00123         bufferToSplit->SetDataLength(splitOffset);
00124       }
00125   }
00126 
00127 void
00128 nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
00129   {
00130     if (aBuf == Head())
00131       {
00132         while (!PR_CLIST_IS_EMPTY(&mBuffers) && !Head()->IsInUse())
00133           {
00134             Buffer* buffer = Head();
00135             PR_REMOVE_LINK(buffer);
00136             free(buffer);
00137           }
00138       }
00139   }
00140 
00141 size_t
00142 nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
00143   {
00144     size_t result = 0;
00145     if (aStart.mBuffer == aEnd.mBuffer)
00146       {
00147         result = aEnd.mPosition - aStart.mPosition;
00148       }
00149     else
00150       {
00151         result = aStart.mBuffer->DataEnd() - aStart.mPosition;
00152         for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
00153           result += b->DataLength();
00154         result += aEnd.mPosition - aEnd.mBuffer->DataStart();
00155       }
00156     return result;
00157   }
00158 
00159 
00164 nsScannerSubstring::nsScannerSubstring()
00165   : mStart(nsnull, nsnull)
00166   , mEnd(nsnull, nsnull)
00167   , mBufferList(nsnull)
00168   , mLength(0)
00169   , mIsDirty(PR_TRUE)
00170   {
00171   }
00172 
00173 nsScannerSubstring::nsScannerSubstring( const nsAString& s )
00174   : mBufferList(nsnull)
00175   , mIsDirty(PR_TRUE)
00176   {
00177     Rebind(s);
00178   }
00179 
00180 nsScannerSubstring::~nsScannerSubstring()
00181   {
00182     release_ownership_of_buffer_list();
00183   }
00184 
00185 PRInt32
00186 nsScannerSubstring::CountChar( PRUnichar c ) const
00187   {
00188       /*
00189         re-write this to use a counting sink
00190        */
00191 
00192     size_type result = 0;
00193     size_type lengthToExamine = Length();
00194 
00195     nsScannerIterator iter;
00196     for ( BeginReading(iter); ; )
00197       {
00198         PRInt32 lengthToExamineInThisFragment = iter.size_forward();
00199         const PRUnichar* fromBegin = iter.get();
00200         result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
00201         if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
00202           return result;
00203         iter.advance(lengthToExamineInThisFragment);
00204       }
00205       // never reached; quiets warnings
00206     return 0;
00207   }
00208 
00209 void
00210 nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
00211                             const nsScannerIterator& aStart, 
00212                             const nsScannerIterator& aEnd )
00213   {
00214     // allow for the case where &aString == this
00215 
00216     aString.acquire_ownership_of_buffer_list();
00217     release_ownership_of_buffer_list();
00218 
00219     mStart      = aStart;
00220     mEnd        = aEnd;
00221     mBufferList = aString.mBufferList;
00222     mLength     = Distance(aStart, aEnd);
00223     mIsDirty    = PR_TRUE;
00224   }
00225 
00226 void
00227 nsScannerSubstring::Rebind( const nsAString& aString )
00228   {
00229     release_ownership_of_buffer_list();
00230 
00231     mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
00232     mIsDirty    = PR_TRUE;
00233 
00234     init_range_from_buffer_list();
00235     acquire_ownership_of_buffer_list();
00236   }
00237 
00238 const nsSubstring&
00239 nsScannerSubstring::AsString() const
00240   {
00241     if (mIsDirty)
00242       {
00243         nsScannerSubstring* mutable_this = NS_CONST_CAST(nsScannerSubstring*, this);
00244 
00245         if (mStart.mBuffer == mEnd.mBuffer) {
00246           // We only have a single fragment to deal with, so just return it
00247           // as a substring.
00248           mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
00249         } else {
00250           // Otherwise, we need to copy the data into a flattened buffer.
00251           nsScannerIterator start, end;
00252           CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
00253         }
00254 
00255         mutable_this->mIsDirty = PR_FALSE;
00256       }
00257 
00258     return mFlattenedRep;
00259   }
00260 
00261 nsScannerIterator&
00262 nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
00263   {
00264     iter.mOwner = this;
00265 
00266     iter.mFragment.mBuffer = mStart.mBuffer;
00267     iter.mFragment.mFragmentStart = mStart.mPosition;
00268     if (mStart.mBuffer == mEnd.mBuffer)
00269       iter.mFragment.mFragmentEnd = mEnd.mPosition;
00270     else
00271       iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
00272 
00273     iter.mPosition = mStart.mPosition;
00274     iter.normalize_forward();
00275     return iter;
00276   }
00277 
00278 nsScannerIterator&
00279 nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
00280   {
00281     iter.mOwner = this;
00282 
00283     iter.mFragment.mBuffer = mEnd.mBuffer;
00284     iter.mFragment.mFragmentEnd = mEnd.mPosition;
00285     if (mStart.mBuffer == mEnd.mBuffer)
00286       iter.mFragment.mFragmentStart = mStart.mPosition;
00287     else
00288       iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
00289 
00290     iter.mPosition = mEnd.mPosition;
00291     // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
00292     return iter;
00293   }
00294 
00295 PRBool
00296 nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
00297   {
00298     // check to see if we are at the end of the buffer list
00299     if (frag.mBuffer == mEnd.mBuffer)
00300       return PR_FALSE;
00301 
00302     frag.mBuffer = NS_STATIC_CAST(const Buffer*, PR_NEXT_LINK(frag.mBuffer));
00303 
00304     if (frag.mBuffer == mStart.mBuffer)
00305       frag.mFragmentStart = mStart.mPosition;
00306     else
00307       frag.mFragmentStart = frag.mBuffer->DataStart();
00308 
00309     if (frag.mBuffer == mEnd.mBuffer)
00310       frag.mFragmentEnd = mEnd.mPosition;
00311     else
00312       frag.mFragmentEnd = frag.mBuffer->DataEnd();
00313 
00314     return PR_TRUE;
00315   }
00316 
00317 PRBool
00318 nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
00319   {
00320     // check to see if we are at the beginning of the buffer list
00321     if (frag.mBuffer == mStart.mBuffer)
00322       return PR_FALSE;
00323 
00324     frag.mBuffer = NS_STATIC_CAST(const Buffer*, PR_PREV_LINK(frag.mBuffer));
00325 
00326     if (frag.mBuffer == mStart.mBuffer)
00327       frag.mFragmentStart = mStart.mPosition;
00328     else
00329       frag.mFragmentStart = frag.mBuffer->DataStart();
00330 
00331     if (frag.mBuffer == mEnd.mBuffer)
00332       frag.mFragmentEnd = mEnd.mPosition;
00333     else
00334       frag.mFragmentEnd = frag.mBuffer->DataEnd();
00335 
00336     return PR_TRUE;
00337   }
00338 
00339 
00344 nsScannerString::nsScannerString( Buffer* aBuf )
00345   {
00346     mBufferList = new nsScannerBufferList(aBuf);
00347 
00348     init_range_from_buffer_list();
00349     acquire_ownership_of_buffer_list();
00350   }
00351 
00352 void
00353 nsScannerString::AppendBuffer( Buffer* aBuf )
00354   {
00355     mBufferList->Append(aBuf);
00356     mLength += aBuf->DataLength();
00357 
00358     mEnd.mBuffer = aBuf;
00359     mEnd.mPosition = aBuf->DataEnd();
00360 
00361     mIsDirty = PR_TRUE;
00362   }
00363 
00364 void
00365 nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
00366   {
00367     Position old_start(mStart);
00368     mStart = aIter;
00369     mLength -= Position::Distance(old_start, mStart);
00370     
00371     mStart.mBuffer->IncrementUsageCount();
00372     old_start.mBuffer->DecrementUsageCount();
00373 
00374     mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
00375 
00376     mIsDirty = PR_TRUE;
00377   }
00378 
00379 void
00380 nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
00381     /*
00382      * Warning: this routine manipulates the shared buffer list in an unexpected way.
00383      *  The original design did not really allow for insertions, but this call promises
00384      *  that if called for a point after the end of all extant token strings, that no token string
00385      *  or the work string will be invalidated.
00386      *
00387      *  This routine is protected because it is the responsibility of the derived class to keep those promises.
00388      */
00389   {
00390     Position insertPos(aInsertPoint);
00391 
00392     mBufferList->SplitBuffer(insertPos);
00393       // splitting to the right keeps the work string and any extant token pointing to and
00394       //  holding a reference count on the same buffer
00395 
00396     Buffer* new_buffer = AllocBufferFromString(aReadable);
00397       // make a new buffer with all the data to insert...
00398       //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
00399       //  of this and decide if we should do the work to fill it
00400 
00401     Buffer* buffer_to_split = insertPos.mBuffer;
00402     mBufferList->InsertAfter(new_buffer, buffer_to_split);
00403     mLength += aReadable.Length();
00404 
00405     mEnd.mBuffer = mBufferList->Tail();
00406     mEnd.mPosition = mEnd.mBuffer->DataEnd();
00407 
00408     mIsDirty = PR_TRUE;
00409   }
00410 
00411 void
00412 nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar)
00413   {
00414     // XXX Casting a const to non-const. Unless the base class
00415     // provides support for writing iterators, this is the best
00416     // that can be done.
00417     PRUnichar* pos = NS_CONST_CAST(PRUnichar*, aPosition.get());
00418     *pos = aChar;
00419 
00420     mIsDirty = PR_TRUE;
00421   }
00422 
00423 
00428 void
00429 nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
00430                               const nsScannerIterator &aEnd)
00431 {
00432   // If the start and end positions are inside the same buffer, we must
00433   // acquire ownership of the buffer.  If not, we can optimize by not holding
00434   // onto it.
00435 
00436   Buffer *buffer = NS_CONST_CAST(Buffer*, aStart.buffer());
00437   PRBool sameBuffer = buffer == aEnd.buffer();
00438 
00439   nsScannerBufferList *bufferList;
00440 
00441   if (sameBuffer) {
00442     bufferList = aStart.mOwner->mBufferList;
00443     bufferList->AddRef();
00444     buffer->IncrementUsageCount();
00445   }
00446 
00447   if (mBufferList)
00448     ReleaseBuffer();
00449 
00450   if (sameBuffer) {
00451     mBuffer = buffer;
00452     mBufferList = bufferList;
00453     mString.Rebind(aStart.mPosition, aEnd.mPosition);
00454   } else {
00455     mBuffer = nsnull;
00456     mBufferList = nsnull;
00457     CopyUnicodeTo(aStart, aEnd, mString);
00458   }
00459 }
00460 
00461 void
00462 nsScannerSharedSubstring::ReleaseBuffer()
00463 {
00464   NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
00465   mBuffer->DecrementUsageCount();
00466   mBufferList->DiscardUnreferencedPrefix(mBuffer);
00467   mBufferList->Release();
00468 }
00469 
00470 void
00471 nsScannerSharedSubstring::MakeMutable()
00472 {
00473   nsString temp(mString); // this will force a copy of the data
00474   mString.Assign(temp);   // mString will now share the just-allocated buffer
00475 
00476   ReleaseBuffer();
00477 
00478   mBuffer = nsnull;
00479   mBufferList = nsnull;
00480 }
00481 
00486 void
00487 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
00488                const nsScannerIterator& aSrcEnd,
00489                nsAString& aDest )
00490   {
00491     nsAString::iterator writer;
00492     if (!EnsureStringLength(aDest, Distance(aSrcStart, aSrcEnd))) {
00493       aDest.Truncate();
00494       return; // out of memory
00495     }
00496     aDest.BeginWriting(writer);
00497     nsScannerIterator fromBegin(aSrcStart);
00498     
00499     copy_string(fromBegin, aSrcEnd, writer);
00500   }
00501 
00502 void
00503 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
00504                  const nsScannerIterator& aSrcEnd,
00505                  nsScannerSharedSubstring& aDest )
00506   {
00507     // Check whether we can just create a dependent string.
00508     if (aDest.str().IsEmpty()) {
00509       // We can just make |aDest| point to the buffer.
00510       // This will take care of copying if the buffer spans fragments.
00511       aDest.Rebind(aSrcStart, aSrcEnd);
00512     } else {
00513       // The dest string is not empty, so it can't be a dependent substring.
00514       AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
00515     }
00516   }
00517 
00518 void
00519 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
00520                  const nsScannerIterator& aSrcEnd,
00521                  nsAString& aDest )
00522   {
00523     nsAString::iterator writer;
00524     PRUint32 oldLength = aDest.Length();
00525     if (!EnsureStringLength(aDest, oldLength + Distance(aSrcStart, aSrcEnd)))
00526       return; // out of memory
00527     aDest.BeginWriting(writer).advance(oldLength);
00528     nsScannerIterator fromBegin(aSrcStart);
00529     
00530     copy_string(fromBegin, aSrcEnd, writer);
00531   }
00532 
00533 PRBool
00534 FindCharInReadable( PRUnichar aChar,
00535                     nsScannerIterator& aSearchStart,
00536                     const nsScannerIterator& aSearchEnd )
00537   {
00538     while ( aSearchStart != aSearchEnd )
00539       {
00540         PRInt32 fragmentLength;
00541         if ( SameFragment(aSearchStart, aSearchEnd) ) 
00542           fragmentLength = aSearchEnd.get() - aSearchStart.get();
00543         else
00544           fragmentLength = aSearchStart.size_forward();
00545 
00546         const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
00547         if ( charFoundAt ) {
00548           aSearchStart.advance( charFoundAt - aSearchStart.get() );
00549           return PR_TRUE;
00550         }
00551 
00552         aSearchStart.advance(fragmentLength);
00553       }
00554 
00555     return PR_FALSE;
00556   }
00557 
00558 PRBool
00559 FindInReadable( const nsAString& aPattern,
00560                 nsScannerIterator& aSearchStart,
00561                 nsScannerIterator& aSearchEnd,
00562                 const nsStringComparator& compare )
00563   {
00564     PRBool found_it = PR_FALSE;
00565 
00566       // only bother searching at all if we're given a non-empty range to search
00567     if ( aSearchStart != aSearchEnd )
00568       {
00569         nsAString::const_iterator aPatternStart, aPatternEnd;
00570         aPattern.BeginReading(aPatternStart);
00571         aPattern.EndReading(aPatternEnd);
00572 
00573           // outer loop keeps searching till we find it or run out of string to search
00574         while ( !found_it )
00575           {
00576               // fast inner loop (that's what it's called, not what it is) looks for a potential match
00577             while ( aSearchStart != aSearchEnd &&
00578                     compare(*aPatternStart, *aSearchStart) )
00579               ++aSearchStart;
00580 
00581               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
00582             if ( aSearchStart == aSearchEnd )
00583               break;
00584 
00585               // otherwise, we're at a potential match, let's see if we really hit one
00586             nsAString::const_iterator testPattern(aPatternStart);
00587             nsScannerIterator testSearch(aSearchStart);
00588 
00589               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
00590             for(;;)
00591               {
00592                   // we already compared the first character in the outer loop,
00593                   //  so we'll advance before the next comparison
00594                 ++testPattern;
00595                 ++testSearch;
00596 
00597                   // if we verified all the way to the end of the pattern, then we found it!
00598                 if ( testPattern == aPatternEnd )
00599                   {
00600                     found_it = PR_TRUE;
00601                     aSearchEnd = testSearch; // return the exact found range through the parameters
00602                     break;
00603                   }
00604 
00605                   // if we got to end of the string we're searching before we hit the end of the
00606                   //  pattern, we'll never find what we're looking for
00607                 if ( testSearch == aSearchEnd )
00608                   {
00609                     aSearchStart = aSearchEnd;
00610                     break;
00611                   }
00612 
00613                   // else if we mismatched ... it's time to advance to the next search position
00614                   //  and get back into the `fast' loop
00615                 if ( compare(*testPattern, *testSearch) )
00616                   {
00617                     ++aSearchStart;
00618                     break;
00619                   }
00620               }
00621           }
00622       }
00623 
00624     return found_it;
00625   }
00626 
00632 PRBool
00633 RFindInReadable( const nsAString& aPattern,
00634                  nsScannerIterator& aSearchStart,
00635                  nsScannerIterator& aSearchEnd,
00636                  const nsStringComparator& aComparator )
00637   {
00638     PRBool found_it = PR_FALSE;
00639 
00640     nsScannerIterator savedSearchEnd(aSearchEnd);
00641     nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
00642 
00643     while ( searchStart != searchEnd )
00644       {
00645         if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
00646           {
00647             found_it = PR_TRUE;
00648 
00649               // this is the best match so far, so remember it
00650             aSearchStart = searchStart;
00651             aSearchEnd = searchEnd;
00652 
00653               // ...and get ready to search some more
00654               //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
00655             ++searchStart;
00656             searchEnd = savedSearchEnd;
00657           }
00658       }
00659 
00660       // if we never found it, return an empty range
00661     if ( !found_it )
00662       aSearchStart = aSearchEnd;
00663 
00664     return found_it;
00665   }