Back to index

lightning-sunbird  0.9+nobinonly
nsUnicharInputStream.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsIUnicharInputStream.h"
00039 #include "nsIInputStream.h"
00040 #include "nsIByteBuffer.h"
00041 #include "nsIUnicharBuffer.h"
00042 #include "nsIServiceManager.h"
00043 #include "nsString.h"
00044 #include "nsCRT.h"
00045 #include "nsUTF8Utils.h"
00046 #include <fcntl.h>
00047 #if defined(NS_WIN32)
00048 #include <io.h>
00049 #else
00050 #include <unistd.h>
00051 #endif
00052 
00053 class StringUnicharInputStream : public nsIUnicharInputStream {
00054 public:
00055   StringUnicharInputStream(const nsAString* aString,
00056                            PRBool aTakeOwnership);
00057 
00058   NS_DECL_ISUPPORTS
00059   NS_DECL_NSIUNICHARINPUTSTREAM
00060 
00061   const nsAString* mString;
00062   PRUint32 mPos;
00063   PRUint32 mLen;
00064   PRBool mOwnsString;
00065 
00066 private:
00067   ~StringUnicharInputStream();
00068 };
00069 
00070 StringUnicharInputStream::StringUnicharInputStream(const nsAString* aString,
00071                                                    PRBool aTakeOwnership)
00072   : mString(aString),
00073     mPos(0),
00074     mLen(aString->Length()),
00075     mOwnsString(aTakeOwnership)
00076 {
00077 }
00078 
00079 StringUnicharInputStream::~StringUnicharInputStream()
00080 {
00081   if (mString && mOwnsString) {
00082     // Some compilers dislike deleting const pointers
00083     nsAString* mutable_string = NS_CONST_CAST(nsAString*, mString);
00084     delete mutable_string;
00085   }
00086 }
00087 
00088 NS_IMETHODIMP
00089 StringUnicharInputStream::Read(PRUnichar* aBuf,
00090                                PRUint32 aCount,
00091                                PRUint32 *aReadCount)
00092 {
00093   if (mPos >= mLen) {
00094     *aReadCount = 0;
00095     return NS_OK;
00096   }
00097   nsAString::const_iterator iter;
00098   mString->BeginReading(iter);
00099   const PRUnichar* us = iter.get();
00100   PRUint32 amount = mLen - mPos;
00101   if (amount > aCount) {
00102     amount = aCount;
00103   }
00104   memcpy(aBuf, us + mPos, sizeof(PRUnichar) * amount);
00105   mPos += amount;
00106   *aReadCount = amount;
00107   return NS_OK;
00108 }
00109 
00110 NS_IMETHODIMP
00111 StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
00112                                        void* aClosure,
00113                                        PRUint32 aCount, PRUint32 *aReadCount)
00114 {
00115   PRUint32 bytesWritten;
00116   PRUint32 totalBytesWritten = 0;
00117 
00118   nsresult rv;
00119   aCount = PR_MIN(mString->Length() - mPos, aCount);
00120   
00121   nsAString::const_iterator iter;
00122   mString->BeginReading(iter);
00123   
00124   while (aCount) {
00125     rv = aWriter(this, aClosure, iter.get() + mPos,
00126                  totalBytesWritten, aCount, &bytesWritten);
00127     
00128     if (NS_FAILED(rv)) {
00129       // don't propagate errors to the caller
00130       break;
00131     }
00132     
00133     aCount -= bytesWritten;
00134     totalBytesWritten += bytesWritten;
00135     mPos += bytesWritten;
00136   }
00137   
00138   *aReadCount = totalBytesWritten;
00139   
00140   return NS_OK;
00141 }
00142 
00143 NS_IMETHODIMP
00144 StringUnicharInputStream::ReadString(PRUint32 aCount, nsAString& aString,
00145                                      PRUint32* aReadCount)
00146 {
00147   if (mPos >= mLen) {
00148     *aReadCount = 0;
00149     return NS_OK;
00150   }
00151   PRUint32 amount = mLen - mPos;
00152   if (amount > aCount) {
00153     amount = aCount;
00154   }
00155   aString = Substring(*mString, mPos, amount);
00156   mPos += amount;
00157   *aReadCount = amount;
00158   return NS_OK;
00159 }
00160 
00161 nsresult StringUnicharInputStream::Close()
00162 {
00163   mPos = mLen;
00164   if (mString && mOwnsString) {
00165     // Some compilers dislike deleting const pointers
00166     nsAString* mutable_string = NS_CONST_CAST(nsAString*, mString);
00167     delete mutable_string;
00168   }
00169   mString = nsnull;
00170   return NS_OK;
00171 }
00172 
00173 NS_IMPL_ISUPPORTS1(StringUnicharInputStream, nsIUnicharInputStream)
00174 
00175 NS_COM nsresult
00176 NS_NewStringUnicharInputStream(nsIUnicharInputStream** aInstancePtrResult,
00177                                const nsAString* aString,
00178                                PRBool aTakeOwnership)
00179 {
00180   NS_ENSURE_ARG_POINTER(aString);
00181   NS_PRECONDITION(aInstancePtrResult, "null ptr");
00182 
00183   StringUnicharInputStream* it = new StringUnicharInputStream(aString,
00184                                                               aTakeOwnership);
00185   if (!it) {
00186     return NS_ERROR_OUT_OF_MEMORY;
00187   }
00188 
00189   NS_ADDREF(*aInstancePtrResult = it);
00190   return NS_OK;
00191 }
00192 
00193 //----------------------------------------------------------------------
00194 
00195 class UTF8InputStream : public nsIUnicharInputStream {
00196 public:
00197   UTF8InputStream();
00198   nsresult Init(nsIInputStream* aStream, PRUint32 aBufSize);
00199 
00200   NS_DECL_ISUPPORTS
00201   NS_DECL_NSIUNICHARINPUTSTREAM
00202 
00203 private:
00204   ~UTF8InputStream();
00205 
00206 protected:
00207   PRInt32 Fill(nsresult * aErrorCode);
00208 
00209   static void CountValidUTF8Bytes(const char *aBuf, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2bytes);
00210 
00211   nsCOMPtr<nsIInputStream> mInput;
00212   nsCOMPtr<nsIByteBuffer> mByteData;
00213   nsCOMPtr<nsIUnicharBuffer> mUnicharData;
00214   
00215   PRUint32 mByteDataOffset;
00216   PRUint32 mUnicharDataOffset;
00217   PRUint32 mUnicharDataLength;
00218 };
00219 
00220 UTF8InputStream::UTF8InputStream() :
00221   mByteDataOffset(0),
00222   mUnicharDataOffset(0),
00223   mUnicharDataLength(0)
00224 {
00225 }
00226 
00227 nsresult 
00228 UTF8InputStream::Init(nsIInputStream* aStream, PRUint32 aBufferSize)
00229 {
00230   if (aBufferSize == 0) {
00231     aBufferSize = 8192;
00232   }
00233 
00234   nsresult rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize);
00235   if (NS_FAILED(rv)) return rv;
00236   rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize);
00237   if (NS_FAILED(rv)) return rv;
00238 
00239   mInput = aStream;
00240 
00241   return NS_OK;
00242 }
00243 
00244 NS_IMPL_ISUPPORTS1(UTF8InputStream,nsIUnicharInputStream)
00245 
00246 UTF8InputStream::~UTF8InputStream()
00247 {
00248   Close();
00249 }
00250 
00251 nsresult UTF8InputStream::Close()
00252 {
00253   mInput = nsnull;
00254   mByteData = nsnull;
00255   mUnicharData = nsnull;
00256 
00257   return NS_OK;
00258 }
00259 
00260 nsresult UTF8InputStream::Read(PRUnichar* aBuf,
00261                                PRUint32 aCount,
00262                                PRUint32 *aReadCount)
00263 {
00264   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00265   PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
00266   nsresult errorCode;
00267   if (0 == readCount) {
00268     // Fill the unichar buffer
00269     readCount = Fill(&errorCode);
00270     if (readCount <= 0) {
00271       *aReadCount = 0;
00272       return errorCode;
00273     }
00274   }
00275   if (readCount > aCount) {
00276     readCount = aCount;
00277   }
00278   memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset,
00279          readCount * sizeof(PRUnichar));
00280   mUnicharDataOffset += readCount;
00281   *aReadCount = readCount;
00282   return NS_OK;
00283 }
00284 
00285 NS_IMETHODIMP
00286 UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
00287                               void* aClosure,
00288                               PRUint32 aCount, PRUint32 *aReadCount)
00289 {
00290   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00291   PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
00292   nsresult rv = NS_OK;
00293   if (0 == bytesToWrite) {
00294     // Fill the unichar buffer
00295     bytesToWrite = Fill(&rv);
00296     if (bytesToWrite <= 0) {
00297       *aReadCount = 0;
00298       return rv;
00299     }
00300   }
00301   
00302   if (bytesToWrite > aCount)
00303     bytesToWrite = aCount;
00304   
00305   PRUint32 bytesWritten;
00306   PRUint32 totalBytesWritten = 0;
00307 
00308   while (bytesToWrite) {
00309     rv = aWriter(this, aClosure,
00310                  mUnicharData->GetBuffer() + mUnicharDataOffset,
00311                  totalBytesWritten, bytesToWrite, &bytesWritten);
00312 
00313     if (NS_FAILED(rv)) {
00314       // don't propagate errors to the caller
00315       break;
00316     }
00317     
00318     bytesToWrite -= bytesWritten;
00319     totalBytesWritten += bytesWritten;
00320     mUnicharDataOffset += bytesWritten;
00321   }
00322 
00323   *aReadCount = totalBytesWritten;
00324   
00325   return NS_OK;
00326 }
00327 
00328 NS_IMETHODIMP
00329 UTF8InputStream::ReadString(PRUint32 aCount, nsAString& aString,
00330                             PRUint32* aReadCount)
00331 {
00332   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
00333   PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset;
00334   nsresult errorCode;
00335   if (0 == readCount) {
00336     // Fill the unichar buffer
00337     readCount = Fill(&errorCode);
00338     if (readCount <= 0) {
00339       *aReadCount = 0;
00340       return errorCode;
00341     }
00342   }
00343   if (readCount > aCount) {
00344     readCount = aCount;
00345   }
00346   const PRUnichar* buf = NS_REINTERPRET_CAST(const PRUnichar*, 
00347                                              mUnicharData->GetBuffer() +
00348                                              mUnicharDataOffset);
00349   aString.Assign(buf, readCount);
00350 
00351   mUnicharDataOffset += readCount;
00352   *aReadCount = readCount;
00353   return NS_OK;
00354 }
00355 
00356 
00357 PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)
00358 {
00359   if (nsnull == mInput) {
00360     // We already closed the stream!
00361     *aErrorCode = NS_BASE_STREAM_CLOSED;
00362     return -1;
00363   }
00364 
00365   NS_ASSERTION(mByteData->GetLength() >= mByteDataOffset, "unsigned madness");
00366   PRUint32 remainder = mByteData->GetLength() - mByteDataOffset;
00367   mByteDataOffset = remainder;
00368   PRInt32 nb = mByteData->Fill(aErrorCode, mInput, remainder);
00369   if (nb <= 0) {
00370     // Because we assume a many to one conversion, the lingering data
00371     // in the byte buffer must be a partial conversion
00372     // fragment. Because we know that we have recieved no more new
00373     // data to add to it, we can't convert it. Therefore, we discard
00374     // it.
00375     return nb;
00376   }
00377   NS_ASSERTION(remainder + nb == mByteData->GetLength(), "bad nb");
00378 
00379   // Now convert as much of the byte buffer to unicode as possible
00380   PRUint32 srcLen, dstLen;
00381   CountValidUTF8Bytes(mByteData->GetBuffer(),remainder + nb, srcLen, dstLen);
00382 
00383   // the number of UCS2 characters should always be <= the number of
00384   // UTF8 chars
00385   NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer");
00386   NS_ASSERTION(PRInt32(dstLen) <= mUnicharData->GetBufferSize(),
00387                "Ouch. I would overflow my buffer if I wasn't so careful.");
00388   if (PRInt32(dstLen) > mUnicharData->GetBufferSize()) return 0;
00389   
00390   ConvertUTF8toUTF16 converter(mUnicharData->GetBuffer());
00391   
00392   nsASingleFragmentCString::const_char_iterator start = mByteData->GetBuffer();
00393   nsASingleFragmentCString::const_char_iterator end = mByteData->GetBuffer() + srcLen;
00394             
00395   copy_string(start, end, converter);
00396   NS_ASSERTION(converter.Length() == dstLen, "length mismatch");
00397                
00398   mUnicharDataOffset = 0;
00399   mUnicharDataLength = converter.Length();
00400   mByteDataOffset = srcLen;
00401   
00402   return mUnicharDataLength;
00403 }
00404 
00405 void
00406 UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2chars)
00407 {
00408   const char *c = aBuffer;
00409   const char *end = aBuffer + aMaxBytes;
00410   const char *lastchar = c;     // pre-initialize in case of 0-length buffer
00411   PRUint32 ucs2bytes = 0;
00412   while (c < end && *c) {
00413     lastchar = c;
00414     ucs2bytes++;
00415     
00416     if (UTF8traits::isASCII(*c))
00417       c++;
00418     else if (UTF8traits::is2byte(*c))
00419       c += 2;
00420     else if (UTF8traits::is3byte(*c))
00421       c += 3;
00422     else if (UTF8traits::is4byte(*c))
00423       c += 4;
00424     else if (UTF8traits::is5byte(*c))
00425       c += 5;
00426     else if (UTF8traits::is6byte(*c))
00427       c += 6;
00428     else {
00429       NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
00430       break; // Otherwise we go into an infinite loop.  But what happens now?
00431     }
00432   }
00433   if (c > end) {
00434     c = lastchar;
00435     ucs2bytes--;
00436   }
00437 
00438   aValidUTF8bytes = c - aBuffer;
00439   aValidUCS2chars = ucs2bytes;
00440 }
00441 
00442 NS_COM nsresult
00443 NS_NewUTF8ConverterStream(nsIUnicharInputStream** aInstancePtrResult,
00444                           nsIInputStream* aStreamToWrap,
00445                           PRInt32 aBufferSize)
00446 {
00447   // Create converter input stream
00448   UTF8InputStream* it = new UTF8InputStream();
00449   if (nsnull == it) {
00450     return NS_ERROR_OUT_OF_MEMORY;
00451   }
00452 
00453   nsresult rv = it->Init(aStreamToWrap, aBufferSize);
00454   if (NS_FAILED(rv))
00455     return rv;
00456 
00457   return it->QueryInterface(NS_GET_IID(nsIUnicharInputStream), 
00458                             (void **) aInstancePtrResult);
00459 }