Back to index

lightning-sunbird  0.9+nobinonly
nsConverterInputStream.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsConverterInputStream.h"
00039 #include "nsIInputStream.h"
00040 #include "nsICharsetConverterManager.h"
00041 #include "nsIServiceManager.h"
00042 
00043 #define CONVERTER_BUFFER_SIZE 8192
00044 
00045 NS_IMPL_ISUPPORTS3(nsConverterInputStream, nsIConverterInputStream,
00046                    nsIUnicharInputStream, nsIUnicharLineInputStream)
00047     
00048 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00049 
00050 NS_IMETHODIMP
00051 nsConverterInputStream::Init(nsIInputStream* aStream,
00052                              const char *aCharset,
00053                              PRInt32 aBufferSize,
00054                              PRUnichar aReplacementChar)
00055 {
00056     if (!aCharset)
00057         aCharset = "UTF-8";
00058 
00059     nsresult rv;
00060 
00061     if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
00062     
00063     // get the decoder
00064     nsCOMPtr<nsICharsetConverterManager> ccm =
00065         do_GetService(kCharsetConverterManagerCID, &rv);
00066     if (NS_FAILED(rv)) return nsnull;
00067 
00068     rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter));
00069     if (NS_FAILED(rv)) return rv;
00070  
00071     // set up our buffers
00072     rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize);
00073     if (NS_FAILED(rv)) return rv;
00074 
00075     rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize);
00076     if (NS_FAILED(rv)) return rv;
00077 
00078     mInput = aStream;
00079     mReplacementChar = aReplacementChar;
00080     
00081     return NS_OK;
00082 }
00083 
00084 NS_IMETHODIMP
00085 nsConverterInputStream::Close()
00086 {
00087     nsresult rv = mInput ? mInput->Close() : NS_OK;
00088     PR_FREEIF(mLineBuffer);
00089     mInput = nsnull;
00090     mConverter = nsnull;
00091     mByteData = nsnull;
00092     mUnicharData = nsnull;
00093     return rv;
00094 }
00095 
00096 NS_IMETHODIMP
00097 nsConverterInputStream::Read(PRUnichar* aBuf,
00098                              PRUint32 aCount,
00099                              PRUint32 *aReadCount)
00100 {
00101   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00102   PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
00103   if (0 == readCount) {
00104     // Fill the unichar buffer
00105     readCount = Fill(&mLastErrorCode);
00106     if (readCount == 0) {
00107       *aReadCount = 0;
00108       return mLastErrorCode;
00109     }
00110   }
00111   if (readCount > aCount) {
00112     readCount = aCount;
00113   }
00114   memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset,
00115          readCount * sizeof(PRUnichar));
00116   mUnicharDataOffset += readCount;
00117   *aReadCount = readCount;
00118   return NS_OK;
00119 }
00120 
00121 NS_IMETHODIMP
00122 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
00123                                      void* aClosure,
00124                                      PRUint32 aCount, PRUint32 *aReadCount)
00125 {
00126   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00127   PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
00128   nsresult rv;
00129   if (0 == bytesToWrite) {
00130     // Fill the unichar buffer
00131     bytesToWrite = Fill(&rv);
00132     if (bytesToWrite <= 0) {
00133       *aReadCount = 0;
00134       return rv;
00135     }
00136   }
00137   
00138   if (bytesToWrite > aCount)
00139     bytesToWrite = aCount;
00140   
00141   PRUint32 bytesWritten;
00142   PRUint32 totalBytesWritten = 0;
00143 
00144   while (bytesToWrite) {
00145     rv = aWriter(this, aClosure,
00146                  mUnicharData->GetBuffer() + mUnicharDataOffset,
00147                  totalBytesWritten, bytesToWrite, &bytesWritten);
00148     if (NS_FAILED(rv)) {
00149       // don't propagate errors to the caller
00150       break;
00151     }
00152     
00153     bytesToWrite -= bytesWritten;
00154     totalBytesWritten += bytesWritten;
00155     mUnicharDataOffset += bytesWritten;
00156     
00157   }
00158 
00159   *aReadCount = totalBytesWritten;
00160 
00161   return NS_OK;
00162 }
00163 
00164 NS_IMETHODIMP
00165 nsConverterInputStream::ReadString(PRUint32 aCount, nsAString& aString,
00166                                    PRUint32* aReadCount)
00167 {
00168   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00169   PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
00170   if (0 == readCount) {
00171     // Fill the unichar buffer
00172     readCount = Fill(&mLastErrorCode);
00173     if (readCount == 0) {
00174       *aReadCount = 0;
00175       return mLastErrorCode;
00176     }
00177   }
00178   if (readCount > aCount) {
00179     readCount = aCount;
00180   }
00181   const PRUnichar* buf = NS_REINTERPRET_CAST(const PRUnichar*, 
00182                                              mUnicharData->GetBuffer() +
00183                                              mUnicharDataOffset);
00184   aString.Assign(buf, readCount);
00185   mUnicharDataOffset += readCount;
00186   *aReadCount = readCount;
00187   return NS_OK;
00188 }
00189 
00190 PRUint32
00191 nsConverterInputStream::Fill(nsresult * aErrorCode)
00192 {
00193   if (nsnull == mInput) {
00194     // We already closed the stream!
00195     *aErrorCode = NS_BASE_STREAM_CLOSED;
00196     return 0;
00197   }
00198 
00199   if (NS_FAILED(mLastErrorCode)) {
00200     // We failed to completely convert last time, and error-recovery
00201     // is disabled.  We will fare no better this time, so...
00202     *aErrorCode = mLastErrorCode;
00203     return 0;
00204   }
00205   
00206   // We assume a many to one conversion and are using equal sizes for
00207   // the two buffers.  However if an error happens at the very start
00208   // of a byte buffer we may end up in a situation where n bytes lead
00209   // to n+1 unicode chars.  Thus we need to keep track of the leftover
00210   // bytes as we convert.
00211   
00212   PRInt32 nb = mByteData->Fill(aErrorCode, mInput, mLeftOverBytes);
00213 #if defined(DEBUG_bzbarsky) && 0
00214   for (unsigned int foo = 0; foo < mByteData->GetLength(); ++foo) {
00215     fprintf(stderr, "%c", mByteData->GetBuffer()[foo]);
00216   }
00217   fprintf(stderr, "\n");
00218 #endif
00219   if (nb <= 0 && mLeftOverBytes == 0) {
00220     // No more data 
00221     *aErrorCode = NS_OK;
00222     return 0;
00223   }
00224 
00225   NS_ASSERTION(PRUint32(nb) + mLeftOverBytes == mByteData->GetLength(),
00226                "mByteData is lying to us somewhere");
00227   
00228   // Now convert as much of the byte buffer to unicode as possible
00229   mUnicharDataOffset = 0;
00230   mUnicharDataLength = 0;
00231   PRUint32 srcConsumed = 0;
00232   do {
00233     PRInt32 srcLen = mByteData->GetLength() - srcConsumed;
00234     PRInt32 dstLen = mUnicharData->GetBufferSize() - mUnicharDataLength;
00235     *aErrorCode = mConverter->Convert(mByteData->GetBuffer()+srcConsumed,
00236                                       &srcLen,
00237                                       mUnicharData->GetBuffer()+mUnicharDataLength,
00238                                       &dstLen);
00239     mUnicharDataLength += dstLen;
00240     // XXX if srcLen is negative, we want to drop the _first_ byte in
00241     // the erroneous byte sequence and try again.  This is not quite
00242     // possible right now -- see bug 160784
00243     srcConsumed += srcLen;
00244     if (NS_FAILED(*aErrorCode) && mReplacementChar) {
00245       NS_ASSERTION(0 < mUnicharData->GetBufferSize() - mUnicharDataLength,
00246                    "Decoder returned an error but filled the output buffer! "
00247                    "Should not happen.");
00248       mUnicharData->GetBuffer()[mUnicharDataLength++] = mReplacementChar;
00249       ++srcConsumed;
00250       // XXX this is needed to make sure we don't underrun our buffer;
00251       // bug 160784 again
00252       srcConsumed = PR_MAX(srcConsumed, 0);
00253       mConverter->Reset();
00254     }
00255     NS_ASSERTION(srcConsumed <= mByteData->GetLength(),
00256                  "Whoa.  The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
00257   } while (mReplacementChar &&
00258            NS_FAILED(*aErrorCode));
00259 
00260   mLeftOverBytes = mByteData->GetLength() - srcConsumed;
00261 
00262   return mUnicharDataLength;
00263 }
00264 
00265 NS_IMETHODIMP
00266 nsConverterInputStream::ReadLine(nsAString& aLine, PRBool* aResult)
00267 {
00268   if (!mLineBuffer) {
00269     nsresult rv = NS_InitLineBuffer(&mLineBuffer);
00270     if (NS_FAILED(rv)) return rv;
00271   }
00272   return NS_ReadLine(this, mLineBuffer, aLine, aResult);
00273 }