Back to index

lightning-sunbird  0.9+nobinonly
nsLinebreakConverter.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsLinebreakConverter.h"
00039 
00040 #include "nsMemory.h"
00041 #include "nsCRT.h"
00042 
00043 
00044 #if defined(XP_WIN) && defined(_MSC_VER) && (_MSC_VER <= 1100)
00045 #define LOSER_CHAR_CAST(t)       (char *)(t)
00046 #define LOSER_UNICHAR_CAST(t)    (PRUnichar *)(t)
00047 #else
00048 #define LOSER_CHAR_CAST(t)       (t)
00049 #define LOSER_UNICHAR_CAST(t)    (t)
00050 #endif
00051 
00052 /*----------------------------------------------------------------------------
00053        GetLinebreakString 
00054        
00055        Could make this inline
00056 ----------------------------------------------------------------------------*/
00057 static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType)
00058 {
00059   static const char* const sLinebreaks[] = {
00060     "",             // any
00061     NS_LINEBREAK,   // platform
00062     LFSTR,          // content
00063     CRLF,           // net
00064     CRSTR,          // Mac
00065     LFSTR,          // Unix
00066     CRLF,           // Windows
00067     nsnull  
00068   };
00069   
00070   return sLinebreaks[aBreakType];
00071 }
00072 
00073 
00074 /*----------------------------------------------------------------------------
00075        AppendLinebreak 
00076        
00077        Wee inline method to append a line break. Modifies ioDest.
00078 ----------------------------------------------------------------------------*/
00079 template<class T>
00080 void AppendLinebreak(T*& ioDest, const char* lineBreakStr)
00081 {
00082   *ioDest++ = *lineBreakStr;
00083 
00084   if (lineBreakStr[1])
00085     *ioDest++ = lineBreakStr[1];
00086 }
00087 
00088 /*----------------------------------------------------------------------------
00089        CountChars 
00090        
00091        Counts occurrences of breakStr in aSrc
00092 ----------------------------------------------------------------------------*/
00093 template<class T>
00094 PRInt32 CountLinebreaks(const T* aSrc, PRInt32 inLen, const char* breakStr)
00095 {
00096   const T* src = aSrc;
00097   const T* srcEnd = aSrc + inLen;
00098   PRInt32 theCount = 0;
00099 
00100   while (src < srcEnd)
00101   {
00102     if (*src == *breakStr)
00103     {
00104       src++;
00105 
00106       if (breakStr[1])
00107       {
00108         if (src < srcEnd && *src == breakStr[1])
00109         {
00110           src++;
00111           theCount++;
00112         }
00113       }
00114       else
00115       {
00116         theCount++;
00117       }
00118     }
00119     else
00120     {
00121       src++;
00122     }
00123   }
00124   
00125   return theCount;
00126 }
00127 
00128 
00129 /*----------------------------------------------------------------------------
00130        ConvertBreaks 
00131        
00132        ioLen *includes* a terminating null, if any
00133 ----------------------------------------------------------------------------*/
00134 template<class T>
00135 static T* ConvertBreaks(const T* inSrc, PRInt32& ioLen, const char* srcBreak, const char* destBreak)
00136 {
00137   NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string");
00138   
00139   T* resultString = nsnull;
00140    
00141   // handle the no conversion case
00142   if (nsCRT::strcmp(srcBreak, destBreak) == 0)
00143   {
00144     resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
00145     if (!resultString) return nsnull;
00146     memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any
00147     return resultString;
00148   }
00149     
00150   PRInt32 srcBreakLen = strlen(srcBreak);
00151   PRInt32 destBreakLen = strlen(destBreak);
00152 
00153   // handle the easy case, where the string length does not change, and the
00154   // breaks are only 1 char long, i.e. CR <-> LF
00155   if (srcBreakLen == destBreakLen && srcBreakLen == 1)
00156   {
00157     resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
00158     if (!resultString) return nsnull;
00159     
00160     const T* src = inSrc;
00161     const T* srcEnd = inSrc + ioLen;             // includes null, if any
00162     T*       dst = resultString;
00163     
00164     char srcBreakChar = *srcBreak;        // we know it's one char long already
00165     char dstBreakChar = *destBreak;
00166     
00167     while (src < srcEnd)
00168     {
00169       if (*src == srcBreakChar)
00170       {
00171         *dst++ = dstBreakChar;
00172         src++;
00173       }
00174       else
00175       {
00176         *dst++ = *src++;
00177       }
00178     }
00179 
00180     // ioLen does not change
00181   }
00182   else
00183   {
00184     // src and dest termination is different length. Do it a slower way.
00185     
00186     // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique.
00187     PRInt32 numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak);
00188     
00189     PRInt32 newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
00190     resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen);
00191     if (!resultString) return nsnull;
00192     
00193     const T* src = inSrc;
00194     const T* srcEnd = inSrc + ioLen;             // includes null, if any
00195     T*       dst = resultString;
00196     
00197     while (src < srcEnd)
00198     {
00199       if (*src == *srcBreak)
00200       {
00201         *dst++ = *destBreak;
00202         if (destBreak[1])
00203           *dst++ = destBreak[1];
00204       
00205         src++;
00206         if (src < srcEnd && srcBreak[1] && *src == srcBreak[1])
00207           src++;
00208       }
00209       else
00210       {
00211         *dst++ = *src++;
00212       }
00213     }
00214     
00215     ioLen = newBufLen;
00216   }
00217   
00218   return resultString;
00219 }
00220 
00221 
00222 /*----------------------------------------------------------------------------
00223   ConvertBreaksInSitu 
00224        
00225   Convert breaks in situ. Can only do this if the linebreak length
00226   does not change.
00227 ----------------------------------------------------------------------------*/
00228 template<class T>
00229 static void ConvertBreaksInSitu(T* inSrc, PRInt32 inLen, char srcBreak, char destBreak)
00230 {
00231   T* src = inSrc;
00232   T* srcEnd = inSrc + inLen;
00233 
00234   while (src < srcEnd)
00235   {
00236     if (*src == srcBreak)
00237       *src = destBreak;
00238     
00239     src++;
00240   }
00241 }
00242 
00243 
00244 /*----------------------------------------------------------------------------
00245   ConvertUnknownBreaks 
00246        
00247   Convert unknown line breaks to the specified break.
00248        
00249   This will convert CRLF pairs to one break, and single CR or LF to a break.
00250 ----------------------------------------------------------------------------*/
00251 template<class T>
00252 static T* ConvertUnknownBreaks(const T* inSrc, PRInt32& ioLen, const char* destBreak)
00253 {
00254   const T* src = inSrc;
00255   const T* srcEnd = inSrc + ioLen;        // includes null, if any
00256   
00257   PRInt32 destBreakLen = strlen(destBreak);
00258   PRInt32 finalLen = 0;
00259 
00260   while (src < srcEnd)
00261   {
00262     if (*src == nsCRT::CR)
00263     {
00264       if (src < srcEnd && src[1] == nsCRT::LF)
00265       {
00266         // CRLF
00267         finalLen += destBreakLen;
00268         src++;
00269       }
00270       else
00271       {
00272         // Lone CR
00273         finalLen += destBreakLen;
00274       }
00275     }
00276     else if (*src == nsCRT::LF)
00277     {
00278       // Lone LF
00279       finalLen += destBreakLen;
00280     }
00281     else
00282     {
00283       finalLen++;
00284     }
00285     src++;
00286   }
00287   
00288   T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen);
00289   if (!resultString) return nsnull;
00290 
00291   src = inSrc;
00292   srcEnd = inSrc + ioLen;          // includes null, if any
00293 
00294   T* dst = resultString;
00295   
00296   while (src < srcEnd)
00297   {
00298     if (*src == nsCRT::CR)
00299     {
00300       if (src < srcEnd && src[1] == nsCRT::LF)
00301       {
00302         // CRLF
00303         AppendLinebreak(dst, destBreak);
00304         src++;
00305       }
00306       else
00307       {
00308         // Lone CR
00309         AppendLinebreak(dst, destBreak);
00310       }
00311     }
00312     else if (*src == nsCRT::LF)
00313     {
00314       // Lone LF
00315       AppendLinebreak(dst, destBreak);
00316     }
00317     else
00318     {
00319       *dst++ = *src;
00320     }
00321     src++;
00322   }
00323 
00324   ioLen = finalLen;
00325   return resultString;
00326 }
00327 
00328 
00329 #ifdef XP_MAC
00330 #pragma mark -
00331 #endif
00332 
00333 
00334 /*----------------------------------------------------------------------------
00335        ConvertLineBreaks 
00336        
00337 ----------------------------------------------------------------------------*/
00338 char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
00339             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
00340 {
00341   NS_ASSERTION(aDestBreaks != eLinebreakAny, "Invalid parameter");
00342   if (!aSrc) return nsnull;
00343   
00344   PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
00345 
00346   char* resultString;
00347   if (aSrcBreaks == eLinebreakAny)
00348     resultString = ConvertUnknownBreaks(LOSER_CHAR_CAST(aSrc), sourceLen, GetLinebreakString(aDestBreaks));
00349   else
00350     resultString = ConvertBreaks(LOSER_CHAR_CAST(aSrc), sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
00351   
00352   if (outLen)
00353     *outLen = sourceLen;
00354   return resultString;
00355 }
00356 
00357 
00358 /*----------------------------------------------------------------------------
00359        ConvertLineBreaksInSitu 
00360        
00361 ----------------------------------------------------------------------------*/
00362 nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks,
00363             ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
00364 {
00365   NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
00366   if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
00367   
00368   NS_ASSERTION(aDestBreaks != eLinebreakAny, "Invalid parameter");
00369 
00370   PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen;
00371   
00372   // can we convert in-place?
00373   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
00374   const char* dstBreaks = GetLinebreakString(aDestBreaks);
00375   
00376   if ( (aSrcBreaks != eLinebreakAny) &&
00377        (strlen(srcBreaks) == 1) &&
00378        (strlen(dstBreaks) == 1) )
00379   {
00380     ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
00381     if (outLen)
00382       *outLen = sourceLen;
00383   }
00384   else
00385   {
00386     char* destBuffer;
00387     
00388     if (aSrcBreaks == eLinebreakAny)
00389       destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
00390     else
00391       destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
00392 
00393     if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
00394     *ioBuffer = destBuffer;
00395     if (outLen)
00396       *outLen = sourceLen;
00397   }
00398   
00399   return NS_OK;
00400 }
00401 
00402 
00403 /*----------------------------------------------------------------------------
00404        ConvertUnicharLineBreaks 
00405        
00406 ----------------------------------------------------------------------------*/
00407 PRUnichar* nsLinebreakConverter::ConvertUnicharLineBreaks(const PRUnichar* aSrc,
00408             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
00409 {
00410   NS_ASSERTION(aDestBreaks != eLinebreakAny, "Invalid parameter");
00411   if (!aSrc) return nsnull;
00412   
00413   PRInt32 bufLen = (aSrcLen == kIgnoreLen) ? nsCRT::strlen(aSrc) + 1 : aSrcLen;
00414 
00415   PRUnichar* resultString;
00416   if (aSrcBreaks == eLinebreakAny)
00417     resultString = ConvertUnknownBreaks(LOSER_UNICHAR_CAST(aSrc), bufLen, GetLinebreakString(aDestBreaks));
00418   else
00419     resultString = ConvertBreaks(LOSER_UNICHAR_CAST(aSrc), bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
00420   
00421   if (outLen)
00422     *outLen = bufLen;
00423   return resultString;
00424 }
00425 
00426 
00427 /*----------------------------------------------------------------------------
00428        ConvertStringLineBreaks 
00429        
00430 ----------------------------------------------------------------------------*/
00431 nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(PRUnichar **ioBuffer,
00432             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, PRInt32 aSrcLen, PRInt32* outLen)
00433 {
00434   NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
00435   if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
00436   NS_ASSERTION(aDestBreaks != eLinebreakAny, "Invalid parameter");
00437 
00438   PRInt32 sourceLen = (aSrcLen == kIgnoreLen) ? nsCRT::strlen(*ioBuffer) + 1 : aSrcLen;
00439 
00440   // can we convert in-place?
00441   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
00442   const char* dstBreaks = GetLinebreakString(aDestBreaks);
00443   
00444   if ( (aSrcBreaks != eLinebreakAny) &&
00445        (strlen(srcBreaks) == 1) &&
00446        (strlen(dstBreaks) == 1) )
00447   {
00448     ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
00449     if (outLen)
00450       *outLen = sourceLen;
00451   }
00452   else
00453   {
00454     PRUnichar* destBuffer;
00455     
00456     if (aSrcBreaks == eLinebreakAny)
00457       destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
00458     else
00459       destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
00460 
00461     if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
00462     *ioBuffer = destBuffer;
00463     if (outLen)
00464       *outLen = sourceLen;
00465   }
00466   
00467   return NS_OK;
00468 }
00469 
00470 /*----------------------------------------------------------------------------
00471        ConvertStringLineBreaks 
00472        
00473 ----------------------------------------------------------------------------*/
00474 nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString,
00475           ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks)
00476 {
00477 
00478   NS_ASSERTION(aDestBreaks != eLinebreakAny, "Invalid parameter");
00479 
00480   // nothing to do
00481   if (ioString.IsEmpty()) return NS_OK;
00482   
00483   nsresult rv;
00484   
00485   // remember the old buffer in case
00486   // we blow it away later
00487   nsString::char_iterator stringBuf;
00488   ioString.BeginWriting(stringBuf);
00489   
00490   PRInt32    newLen;
00491     
00492   rv = ConvertUnicharLineBreaksInSitu(&stringBuf,
00493                                       aSrcBreaks, aDestBreaks,
00494                                       ioString.Length() + 1, &newLen);
00495   if (NS_FAILED(rv)) return rv;
00496 
00497   if (stringBuf != ioString.get())
00498     ioString.Adopt(stringBuf);
00499   
00500   return NS_OK;
00501 }
00502 
00503 
00504