Back to index

lightning-sunbird  0.9+nobinonly
nsReadableUtils.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 2000
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Scott Collins <scc@mozilla.org> (original author)
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include "nsReadableUtils.h"
00040 #include "nsMemory.h"
00041 #include "nsString.h"
00042 #include "nsUTF8Utils.h"
00043 
00044 NS_COM
00045 void
00046 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
00047   {
00048     aDest.Truncate();
00049     LossyAppendUTF16toASCII(aSource, aDest);
00050   }
00051 
00052 NS_COM
00053 void
00054 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
00055   {
00056     aDest.Truncate();
00057     AppendASCIItoUTF16(aSource, aDest);
00058   }
00059 
00060 NS_COM
00061 void
00062 LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
00063   {
00064     aDest.Truncate();
00065     if (aSource) {
00066       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
00067     }
00068   }
00069 
00070 NS_COM
00071 void
00072 CopyASCIItoUTF16( const char* aSource, nsAString& aDest )
00073   {
00074     aDest.Truncate();
00075     if (aSource) {
00076       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
00077     }
00078   }
00079 
00080 NS_COM
00081 void
00082 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
00083   {
00084     aDest.Truncate();
00085     AppendUTF16toUTF8(aSource, aDest);
00086   }
00087 
00088 NS_COM
00089 void
00090 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
00091   {
00092     aDest.Truncate();
00093     AppendUTF8toUTF16(aSource, aDest);
00094   }
00095 
00096 NS_COM
00097 void
00098 CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
00099   {
00100     aDest.Truncate();
00101     AppendUTF16toUTF8(aSource, aDest);
00102   }
00103 
00104 NS_COM
00105 void
00106 CopyUTF8toUTF16( const char* aSource, nsAString& aDest )
00107   {
00108     aDest.Truncate();
00109     AppendUTF8toUTF16(aSource, aDest);
00110   }
00111 
00112 NS_COM
00113 void
00114 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
00115   {
00116     PRUint32 old_dest_length = aDest.Length();
00117     aDest.SetLength(old_dest_length + aSource.Length());
00118 
00119     nsAString::const_iterator fromBegin, fromEnd;
00120 
00121     nsACString::iterator dest;
00122     aDest.BeginWriting(dest);
00123 
00124     dest.advance(old_dest_length);
00125 
00126       // right now, this won't work on multi-fragment destinations
00127     LossyConvertEncoding<PRUnichar, char> converter(dest.get());
00128     
00129     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
00130   }
00131 
00132 NS_COM
00133 void
00134 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
00135   {
00136     PRUint32 old_dest_length = aDest.Length();
00137     aDest.SetLength(old_dest_length + aSource.Length());
00138 
00139     nsACString::const_iterator fromBegin, fromEnd;
00140 
00141     nsAString::iterator dest;
00142     aDest.BeginWriting(dest);
00143 
00144     dest.advance(old_dest_length);
00145 
00146       // right now, this won't work on multi-fragment destinations
00147     LossyConvertEncoding<char, PRUnichar> converter(dest.get());
00148 
00149     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
00150   }
00151 
00152 NS_COM
00153 void
00154 LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
00155   {
00156     if (aSource) {
00157       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
00158     }
00159   }
00160 
00161 NS_COM
00162 void
00163 AppendASCIItoUTF16( const char* aSource, nsAString& aDest )
00164   {
00165     if (aSource) {
00166       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
00167     }
00168   }
00169 
00170 NS_COM
00171 void
00172 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
00173   {
00174     nsAString::const_iterator source_start, source_end;
00175     CalculateUTF8Size calculator;
00176     copy_string(aSource.BeginReading(source_start),
00177                 aSource.EndReading(source_end), calculator);
00178 
00179     PRUint32 count = calculator.Size();
00180 
00181     if (count)
00182       {
00183         PRUint32 old_dest_length = aDest.Length();
00184 
00185         // Grow the buffer if we need to.
00186         aDest.SetLength(old_dest_length + count);
00187 
00188         nsACString::iterator dest;
00189         aDest.BeginWriting(dest);
00190 
00191         dest.advance(old_dest_length);
00192 
00193         if (count <= (PRUint32)dest.size_forward())
00194           {
00195             // aDest has enough room in the fragment just past the end
00196             // of its old data that it can hold what we're about to
00197             // append. Append using copy_string().
00198 
00199             // All ready? Time to convert
00200 
00201             ConvertUTF16toUTF8 converter(dest.get());
00202             copy_string(aSource.BeginReading(source_start),
00203                         aSource.EndReading(source_end), converter);
00204 
00205             if (converter.Size() != count)
00206               {
00207                 NS_ERROR("Input invalid or incorrect length was calculated");
00208 
00209                 aDest.SetLength(old_dest_length);
00210               }
00211           }
00212         else
00213           {
00214             // This isn't the fastest way to do this, but it gets
00215             // complicated to convert UTF16 into a fragmented UTF8
00216             // string, so we'll take the easy way out here in this
00217             // rare situation.
00218 
00219             aDest.Replace(old_dest_length, count,
00220                           NS_ConvertUTF16toUTF8(aSource));
00221           }
00222       }
00223   }
00224 
00225 NS_COM
00226 void
00227 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
00228   {
00229     nsACString::const_iterator source_start, source_end;
00230     CalculateUTF8Length calculator;
00231     copy_string(aSource.BeginReading(source_start),
00232                 aSource.EndReading(source_end), calculator);
00233 
00234     PRUint32 count = calculator.Length();
00235 
00236     if (count)
00237       {
00238         PRUint32 old_dest_length = aDest.Length();
00239 
00240         // Grow the buffer if we need to.
00241         aDest.SetLength(old_dest_length + count);
00242 
00243         nsAString::iterator dest;
00244         aDest.BeginWriting(dest);
00245 
00246         dest.advance(old_dest_length);
00247 
00248         if (count <= (PRUint32)dest.size_forward())
00249           {
00250             // aDest has enough room in the fragment just past the end
00251             // of its old data that it can hold what we're about to
00252             // append. Append using copy_string().
00253 
00254             // All ready? Time to convert
00255 
00256             ConvertUTF8toUTF16 converter(dest.get());
00257             copy_string(aSource.BeginReading(source_start),
00258                         aSource.EndReading(source_end), converter);
00259 
00260             if (converter.Length() != count)
00261               {
00262                 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
00263                 aDest.SetLength(old_dest_length);
00264               }
00265           }
00266         else
00267           {
00268             // This isn't the fastest way to do this, but it gets
00269             // complicated to convert parts of a UTF8 string into a
00270             // UTF16 string, so we'll take the easy way out here in
00271             // this rare situation.
00272 
00273             aDest.Replace(old_dest_length, count,
00274                           NS_ConvertUTF8toUTF16(aSource));
00275           }
00276       }
00277   }
00278 
00279 NS_COM
00280 void
00281 AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
00282   {
00283     if (aSource) {
00284       AppendUTF16toUTF8(nsDependentString(aSource), aDest);
00285     }
00286   }
00287 
00288 NS_COM
00289 void
00290 AppendUTF8toUTF16( const char* aSource, nsAString& aDest )
00291   {
00292     if (aSource) {
00293       AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
00294     }
00295   }
00296 
00297 
00305 template <class FromStringT, class ToCharT>
00306 inline
00307 ToCharT*
00308 AllocateStringCopy( const FromStringT& aSource, ToCharT* )
00309   {
00310     return NS_STATIC_CAST(ToCharT*, nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
00311   }
00312 
00313 
00314 NS_COM
00315 char*
00316 ToNewCString( const nsAString& aSource )
00317   {
00318     char* result = AllocateStringCopy(aSource, (char*)0);
00319     if (!result)
00320       return nsnull;
00321 
00322     nsAString::const_iterator fromBegin, fromEnd;
00323     LossyConvertEncoding<PRUnichar, char> converter(result);
00324     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
00325     return result;
00326   }
00327 
00328 NS_COM
00329 char*
00330 ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count )
00331   {
00332     nsAString::const_iterator start, end;
00333     CalculateUTF8Size calculator;
00334     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
00335                 calculator);
00336 
00337     if (aUTF8Count)
00338       *aUTF8Count = calculator.Size();
00339 
00340     char *result = NS_STATIC_CAST(char*,
00341         nsMemory::Alloc(calculator.Size() + 1));
00342     if (!result)
00343       return nsnull;
00344 
00345     ConvertUTF16toUTF8 converter(result);
00346     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
00347                 converter).write_terminator();
00348     NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
00349 
00350     return result;
00351   }
00352 
00353 NS_COM
00354 char*
00355 ToNewCString( const nsACString& aSource )
00356   {
00357     // no conversion needed, just allocate a buffer of the correct length and copy into it
00358 
00359     char* result = AllocateStringCopy(aSource, (char*)0);
00360     if (!result)
00361       return nsnull;
00362 
00363     nsACString::const_iterator fromBegin, fromEnd;
00364     char* toBegin = result;
00365     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
00366     return result;
00367   }
00368 
00369 NS_COM
00370 PRUnichar*
00371 ToNewUnicode( const nsAString& aSource )
00372   {
00373     // no conversion needed, just allocate a buffer of the correct length and copy into it
00374 
00375     PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
00376     if (!result)
00377       return nsnull;
00378 
00379     nsAString::const_iterator fromBegin, fromEnd;
00380     PRUnichar* toBegin = result;
00381     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = PRUnichar(0);
00382     return result;
00383   }
00384 
00385 NS_COM
00386 PRUnichar*
00387 ToNewUnicode( const nsACString& aSource )
00388   {
00389     PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
00390     if (!result)
00391       return nsnull;
00392 
00393     nsACString::const_iterator fromBegin, fromEnd;
00394     LossyConvertEncoding<char, PRUnichar> converter(result);
00395     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
00396     return result;
00397   }
00398 
00399 NS_COM
00400 PRUnichar*
00401 UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count )
00402   {
00403     nsACString::const_iterator start, end;
00404     CalculateUTF8Length calculator;
00405     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
00406                 calculator);
00407 
00408     if (aUTF16Count)
00409       *aUTF16Count = calculator.Length();
00410 
00411     PRUnichar *result = NS_STATIC_CAST(PRUnichar*,
00412         nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
00413     if (!result)
00414       return nsnull;
00415 
00416     ConvertUTF8toUTF16 converter(result);
00417     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
00418                 converter).write_terminator();
00419     NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
00420 
00421     return result;
00422   }
00423 
00424 NS_COM
00425 PRUnichar*
00426 CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength )
00427   {
00428     nsAString::const_iterator fromBegin, fromEnd;
00429     PRUnichar* toBegin = aDest;    
00430     copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
00431     return aDest;
00432   }
00433 
00434 NS_COM 
00435 void 
00436 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
00437                const nsAString::const_iterator& aSrcEnd,
00438                nsAString& aDest )
00439   {
00440     nsAString::iterator writer;
00441     aDest.SetLength(Distance(aSrcStart, aSrcEnd));
00442     aDest.BeginWriting(writer);
00443     nsAString::const_iterator fromBegin(aSrcStart);
00444     
00445     copy_string(fromBegin, aSrcEnd, writer);
00446   }
00447 
00448 NS_COM 
00449 void 
00450 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
00451                  const nsAString::const_iterator& aSrcEnd,
00452                  nsAString& aDest )
00453   {
00454     nsAString::iterator writer;
00455     PRUint32 oldLength = aDest.Length();
00456     aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
00457     aDest.BeginWriting(writer).advance(oldLength);
00458     nsAString::const_iterator fromBegin(aSrcStart);
00459     
00460     copy_string(fromBegin, aSrcEnd, writer);
00461   }
00462 
00463 NS_COM
00464 PRBool
00465 IsASCII( const nsAString& aString )
00466   {
00467     static const PRUnichar NOT_ASCII = PRUnichar(~0x007F);
00468 
00469 
00470     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
00471 
00472     nsAString::const_iterator done_reading;
00473     aString.EndReading(done_reading);
00474 
00475       // for each chunk of |aString|...
00476     PRUint32 fragmentLength = 0;
00477     nsAString::const_iterator iter;
00478     for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
00479       {
00480         fragmentLength = PRUint32(iter.size_forward());
00481         const PRUnichar* c = iter.get();
00482         const PRUnichar* fragmentEnd = c + fragmentLength;
00483 
00484           // for each character in this chunk...
00485         while ( c < fragmentEnd )
00486           if ( *c++ & NOT_ASCII )
00487             return PR_FALSE;
00488       }
00489 
00490     return PR_TRUE;
00491   }
00492 
00493 NS_COM
00494 PRBool
00495 IsASCII( const nsACString& aString )
00496   {
00497     static const char NOT_ASCII = char(~0x7F);
00498 
00499 
00500     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
00501 
00502     nsACString::const_iterator done_reading;
00503     aString.EndReading(done_reading);
00504 
00505       // for each chunk of |aString|...
00506     PRUint32 fragmentLength = 0;
00507     nsACString::const_iterator iter;
00508     for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
00509       {
00510         fragmentLength = PRUint32(iter.size_forward());
00511         const char* c = iter.get();
00512         const char* fragmentEnd = c + fragmentLength;
00513 
00514           // for each character in this chunk...
00515         while ( c < fragmentEnd )
00516           if ( *c++ & NOT_ASCII )
00517             return PR_FALSE;
00518       }
00519 
00520     return PR_TRUE;
00521   }
00522 
00523 NS_COM
00524 PRBool
00525 IsUTF8( const nsACString& aString )
00526   {
00527     nsReadingIterator<char> done_reading;
00528     aString.EndReading(done_reading);
00529 
00530     PRInt32 state = 0;
00531     PRBool overlong = PR_FALSE;
00532     PRBool surrogate = PR_FALSE;
00533     PRBool nonchar = PR_FALSE;
00534     PRUint16 olupper = 0; // overlong byte upper bound.
00535     PRUint16 slower = 0;  // surrogate byte lower bound.
00536 
00537       // for each chunk of |aString|...
00538     PRUint32 fragmentLength = 0;
00539     nsReadingIterator<char> iter;
00540 
00541     for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
00542       {
00543         fragmentLength = PRUint32(iter.size_forward());
00544         const char* ptr = iter.get();
00545         const char* fragmentEnd = ptr + fragmentLength;
00546 
00547           // for each character in this chunk...
00548         while ( ptr < fragmentEnd )
00549           {
00550             PRUint8 c;
00551             
00552             if (0 == state)
00553               {
00554                 c = *ptr++;
00555 
00556                 if ( UTF8traits::isASCII(c) ) 
00557                   continue;
00558 
00559                 if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
00560                   return PR_FALSE;
00561                 else if ( UTF8traits::is2byte(c) ) 
00562                     state = 1;
00563                 else if ( UTF8traits::is3byte(c) ) 
00564                   {
00565                     state = 2;
00566                     if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] 
00567                       {
00568                         overlong = PR_TRUE;
00569                         olupper = 0x9F;
00570                       }
00571                     else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
00572                       {
00573                         surrogate = PR_TRUE;
00574                         slower = 0xA0;
00575                       }
00576                     else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
00577                       nonchar = PR_TRUE;
00578                   }
00579                 else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
00580                   {
00581                     state = 3;
00582                     nonchar = PR_TRUE;
00583                     if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
00584                       {
00585                         overlong = PR_TRUE;
00586                         olupper = 0x8F;
00587                       }
00588                     else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] 
00589                       {
00590                         // actually not surrogates but codepoints beyond 0x10FFFF
00591                         surrogate = PR_TRUE;
00592                         slower = 0x90;
00593                       }
00594                   }
00595                 else
00596                   return PR_FALSE; // Not UTF-8 string
00597               }
00598               
00599               while (ptr < fragmentEnd && state)
00600                 {
00601                   c = *ptr++;
00602                   --state;
00603 
00604                   // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
00605                   if ( nonchar &&  ( !state &&  c < 0xBE ||
00606                        state == 1 && c != 0xBF  ||
00607                        state == 2 && 0x0F != (0x0F & c) ))
00608                      nonchar = PR_FALSE;
00609 
00610                   if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper || 
00611                        surrogate && slower <= c || nonchar && !state )
00612                     return PR_FALSE; // Not UTF-8 string
00613                   overlong = surrogate = PR_FALSE;
00614                 }
00615             }
00616         }
00617     return !state; // state != 0 at the end indicates an invalid UTF-8 seq. 
00618   }
00619 
00623 class ConvertToUpperCase
00624   {
00625     public:
00626       typedef char value_type;
00627 
00628       PRUint32
00629       write( const char* aSource, PRUint32 aSourceLength )
00630         {
00631           char* cp = NS_CONST_CAST(char*,aSource);
00632           const char* end = aSource + aSourceLength;
00633           while (cp != end) {
00634             char ch = *cp;
00635             if ((ch >= 'a') && (ch <= 'z'))
00636               *cp = ch - ('a' - 'A');
00637             ++cp;
00638           }
00639           return aSourceLength;
00640         }
00641   };
00642 
00643 #ifdef MOZ_V1_STRING_ABI
00644 NS_COM
00645 void
00646 ToUpperCase( nsACString& aCString )
00647   {
00648     nsACString::iterator fromBegin, fromEnd;
00649     ConvertToUpperCase converter;
00650     copy_string(aCString.BeginWriting(fromBegin), aCString.EndWriting(fromEnd), converter);
00651   }
00652 #endif
00653 
00654 NS_COM
00655 void
00656 ToUpperCase( nsCSubstring& aCString )
00657   {
00658     ConvertToUpperCase converter;
00659     char* start;
00660     converter.write(aCString.BeginWriting(start), aCString.Length());
00661   }
00662 
00666 class CopyToUpperCase
00667   {
00668     public:
00669       typedef char value_type;
00670 
00671       CopyToUpperCase( nsACString::iterator& aDestIter )
00672         : mIter(aDestIter)
00673         {
00674         }
00675 
00676       PRUint32
00677       write( const char* aSource, PRUint32 aSourceLength )
00678         {
00679           PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
00680           char* cp = mIter.get();
00681           const char* end = aSource + len;
00682           while (aSource != end) {
00683             char ch = *aSource;
00684             if ((ch >= 'a') && (ch <= 'z'))
00685               *cp = ch - ('a' - 'A');
00686             else
00687               *cp = ch;
00688             ++aSource;
00689             ++cp;
00690           }
00691           mIter.advance(len);
00692           return len;
00693         }
00694 
00695     protected:
00696       nsACString::iterator& mIter;
00697   };
00698 
00699 NS_COM
00700 void
00701 ToUpperCase( const nsACString& aSource, nsACString& aDest )
00702   {
00703     nsACString::const_iterator fromBegin, fromEnd;
00704     nsACString::iterator toBegin;
00705     aDest.SetLength(aSource.Length());
00706     CopyToUpperCase converter(aDest.BeginWriting(toBegin));
00707     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
00708   }
00709 
00713 class ConvertToLowerCase
00714   {
00715     public:
00716       typedef char value_type;
00717 
00718       PRUint32
00719       write( const char* aSource, PRUint32 aSourceLength )
00720         {
00721           char* cp = NS_CONST_CAST(char*,aSource);
00722           const char* end = aSource + aSourceLength;
00723           while (cp != end) {
00724             char ch = *cp;
00725             if ((ch >= 'A') && (ch <= 'Z'))
00726               *cp = ch + ('a' - 'A');
00727             ++cp;
00728           }
00729           return aSourceLength;
00730         }
00731   };
00732 
00733 #ifdef MOZ_V1_STRING_ABI
00734 NS_COM
00735 void
00736 ToLowerCase( nsACString& aCString )
00737   {
00738     nsACString::iterator fromBegin, fromEnd;
00739     ConvertToLowerCase converter;
00740     copy_string(aCString.BeginWriting(fromBegin), aCString.EndWriting(fromEnd), converter);
00741   }
00742 #endif
00743 
00744 NS_COM
00745 void
00746 ToLowerCase( nsCSubstring& aCString )
00747   {
00748     ConvertToLowerCase converter;
00749     char* start;
00750     converter.write(aCString.BeginWriting(start), aCString.Length());
00751   }
00752 
00756 class CopyToLowerCase
00757   {
00758     public:
00759       typedef char value_type;
00760 
00761       CopyToLowerCase( nsACString::iterator& aDestIter )
00762         : mIter(aDestIter)
00763         {
00764         }
00765 
00766       PRUint32
00767       write( const char* aSource, PRUint32 aSourceLength )
00768         {
00769           PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
00770           char* cp = mIter.get();
00771           const char* end = aSource + len;
00772           while (aSource != end) {
00773             char ch = *aSource;
00774             if ((ch >= 'A') && (ch <= 'Z'))
00775               *cp = ch + ('a' - 'A');
00776             else
00777               *cp = ch;
00778             ++aSource;
00779             ++cp;
00780           }
00781           mIter.advance(len);
00782           return len;
00783         }
00784 
00785     protected:
00786       nsACString::iterator& mIter;
00787   };
00788 
00789 NS_COM
00790 void
00791 ToLowerCase( const nsACString& aSource, nsACString& aDest )
00792   {
00793     nsACString::const_iterator fromBegin, fromEnd;
00794     nsACString::iterator toBegin;
00795     aDest.SetLength(aSource.Length());
00796     CopyToLowerCase converter(aDest.BeginWriting(toBegin));
00797     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
00798   }
00799 
00800 template <class StringT, class IteratorT, class Comparator>
00801 PRBool
00802 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
00803   {
00804     PRBool found_it = PR_FALSE;
00805 
00806       // only bother searching at all if we're given a non-empty range to search
00807     if ( aSearchStart != aSearchEnd )
00808       {
00809         IteratorT aPatternStart, aPatternEnd;
00810         aPattern.BeginReading(aPatternStart);
00811         aPattern.EndReading(aPatternEnd);
00812 
00813           // outer loop keeps searching till we find it or run out of string to search
00814         while ( !found_it )
00815           {
00816               // fast inner loop (that's what it's called, not what it is) looks for a potential match
00817             while ( aSearchStart != aSearchEnd &&
00818                     compare(*aPatternStart, *aSearchStart) )
00819               ++aSearchStart;
00820 
00821               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
00822             if ( aSearchStart == aSearchEnd )
00823               break;
00824 
00825               // otherwise, we're at a potential match, let's see if we really hit one
00826             IteratorT testPattern(aPatternStart);
00827             IteratorT testSearch(aSearchStart);
00828 
00829               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
00830             for(;;)
00831               {
00832                   // we already compared the first character in the outer loop,
00833                   //  so we'll advance before the next comparison
00834                 ++testPattern;
00835                 ++testSearch;
00836 
00837                   // if we verified all the way to the end of the pattern, then we found it!
00838                 if ( testPattern == aPatternEnd )
00839                   {
00840                     found_it = PR_TRUE;
00841                     aSearchEnd = testSearch; // return the exact found range through the parameters
00842                     break;
00843                   }
00844 
00845                   // if we got to end of the string we're searching before we hit the end of the
00846                   //  pattern, we'll never find what we're looking for
00847                 if ( testSearch == aSearchEnd )
00848                   {
00849                     aSearchStart = aSearchEnd;
00850                     break;
00851                   }
00852 
00853                   // else if we mismatched ... it's time to advance to the next search position
00854                   //  and get back into the `fast' loop
00855                 if ( compare(*testPattern, *testSearch) )
00856                   {
00857                     ++aSearchStart;
00858                     break;
00859                   }
00860               }
00861           }
00862       }
00863 
00864     return found_it;
00865   }
00866 
00867 
00868 NS_COM
00869 PRBool
00870 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
00871   {
00872     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
00873   }
00874 
00875 NS_COM
00876 PRBool
00877 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
00878   {
00879     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
00880   }
00881 
00882 NS_COM
00883 PRBool
00884 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
00885   {
00886     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
00887   }
00888 
00894 NS_COM
00895 PRBool
00896 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
00897   {
00898     PRBool found_it = PR_FALSE;
00899 
00900     nsAString::const_iterator savedSearchEnd(aSearchEnd);
00901     nsAString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
00902 
00903     while ( searchStart != searchEnd )
00904       {
00905         if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
00906           {
00907             found_it = PR_TRUE;
00908 
00909               // this is the best match so far, so remember it
00910             aSearchStart = searchStart;
00911             aSearchEnd = searchEnd;
00912 
00913               // ...and get ready to search some more
00914               //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
00915             ++searchStart;
00916             searchEnd = savedSearchEnd;
00917           }
00918       }
00919 
00920       // if we never found it, return an empty range
00921     if ( !found_it )
00922       aSearchStart = aSearchEnd;
00923 
00924     return found_it;
00925   }
00926 
00927 NS_COM
00928 PRBool
00929 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
00930   {
00931     PRBool found_it = PR_FALSE;
00932 
00933     nsACString::const_iterator savedSearchEnd(aSearchEnd);
00934     nsACString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
00935 
00936     while ( searchStart != searchEnd )
00937       {
00938         if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
00939           {
00940             found_it = PR_TRUE;
00941 
00942               // this is the best match so far, so remember it
00943             aSearchStart = searchStart;
00944             aSearchEnd = searchEnd;
00945 
00946               // ...and get ready to search some more
00947               //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
00948             ++searchStart;
00949             searchEnd = savedSearchEnd;
00950           }
00951       }
00952 
00953       // if we never found it, return an empty range
00954     if ( !found_it )
00955       aSearchStart = aSearchEnd;
00956 
00957     return found_it;
00958   }
00959 
00960 NS_COM 
00961 PRBool 
00962 FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
00963   {
00964     PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
00965 
00966     const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
00967     if ( charFoundAt ) {
00968       aSearchStart.advance( charFoundAt - aSearchStart.get() );
00969       return PR_TRUE;
00970     }
00971 
00972     aSearchStart.advance(fragmentLength);
00973     return PR_FALSE;
00974   }
00975 
00976 NS_COM 
00977 PRBool 
00978 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
00979   {
00980     PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
00981 
00982     const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
00983     if ( charFoundAt ) {
00984       aSearchStart.advance( charFoundAt - aSearchStart.get() );
00985       return PR_TRUE;
00986     }
00987 
00988     aSearchStart.advance(fragmentLength);
00989     return PR_FALSE;
00990   } 
00991 
00992 NS_COM 
00993 PRUint32 
00994 CountCharInReadable( const nsAString& aStr,
00995                      PRUnichar aChar )
00996 {
00997   PRUint32 count = 0;
00998   nsAString::const_iterator begin, end;
00999   
01000   aStr.BeginReading(begin);
01001   aStr.EndReading(end);
01002   
01003   while (begin != end) {
01004     if (*begin == aChar) {
01005       ++count;
01006     }
01007     ++begin;
01008   }
01009 
01010   return count;
01011 }
01012 
01013 NS_COM 
01014 PRUint32 
01015 CountCharInReadable( const nsACString& aStr,
01016                      char aChar )
01017 {
01018   PRUint32 count = 0;
01019   nsACString::const_iterator begin, end;
01020   
01021   aStr.BeginReading(begin);
01022   aStr.EndReading(end);
01023   
01024   while (begin != end) {
01025     if (*begin == aChar) {
01026       ++count;
01027     }
01028     ++begin;
01029   }
01030 
01031   return count;
01032 }
01033 
01034 NS_COM PRBool
01035 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
01036                   const nsStringComparator& aComparator )
01037   {
01038     nsAString::size_type src_len = aSource.Length(),
01039                          sub_len = aSubstring.Length();
01040     if (sub_len > src_len)
01041       return PR_FALSE;
01042     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
01043   }
01044 
01045 NS_COM PRBool
01046 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
01047                   const nsCStringComparator& aComparator )
01048   {
01049     nsACString::size_type src_len = aSource.Length(),
01050                           sub_len = aSubstring.Length();
01051     if (sub_len > src_len)
01052       return PR_FALSE;
01053     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
01054   }
01055 
01056 NS_COM PRBool
01057 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
01058                 const nsStringComparator& aComparator )
01059   {
01060     nsAString::size_type src_len = aSource.Length(),
01061                          sub_len = aSubstring.Length();
01062     if (sub_len > src_len)
01063       return PR_FALSE;
01064     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
01065                                                                  aComparator);
01066   }
01067 
01068 NS_COM PRBool
01069 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
01070                 const nsCStringComparator& aComparator )
01071   {
01072     nsACString::size_type src_len = aSource.Length(),
01073                           sub_len = aSubstring.Length();
01074     if (sub_len > src_len)
01075       return PR_FALSE;
01076     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
01077                                                                  aComparator);
01078   }
01079 
01080 
01081 
01082 static const PRUnichar empty_buffer[1] = { '\0' };
01083 
01084 NS_COM const nsAFlatString& EmptyString()
01085   {
01086     static const nsDependentString sEmpty(empty_buffer);
01087 
01088     return sEmpty;
01089   }
01090 
01091 NS_COM const nsAFlatCString& EmptyCString()
01092   {
01093     static const nsDependentCString sEmpty((const char *)empty_buffer);
01094 
01095     return sEmpty;
01096   }
01097 
01098 NS_COM
01099 void
01100 AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest)
01101   {
01102     NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
01103     if (IS_IN_BMP(aSource))
01104       {
01105         aDest.Append(PRUnichar(aSource));
01106       }
01107     else
01108       {
01109         aDest.Append(H_SURROGATE(aSource));
01110         aDest.Append(L_SURROGATE(aSource));
01111       }
01112   }