Back to index

lightning-sunbird  0.9+nobinonly
nsUnicodeToUTF32.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim:expandtab:shiftwidth=2:tabstop=2: 
00003  */
00004 /* ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is Mozilla Communicator client code.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Netscape Communications Corporation.
00021  * Portions created by the Initial Developer are Copyright (C) 1998
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *   Jungshik Shin <jshin@mailaps.org>
00026  *
00027  * Alternatively, the contents of this file may be used under the terms of
00028  * either of the GNU General Public License Version 2 or later (the "GPL"),
00029  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00030  * in which case the provisions of the GPL or the LGPL are applicable instead
00031  * of those above. If you wish to allow use of your version of this file only
00032  * under the terms of either the GPL or the LGPL, and not to allow others to
00033  * use your version of this file under the terms of the MPL, indicate your
00034  * decision by deleting the provisions above and replace them with the notice
00035  * and other provisions required by the GPL or the LGPL. If you do not delete
00036  * the provisions above, a recipient may use your version of this file under
00037  * the terms of any one of the MPL, the GPL or the LGPL.
00038  *
00039  * ***** END LICENSE BLOCK ***** */
00040 
00041 #include <string.h>
00042 #include "nsUCSupport.h"
00043 #include "nsUnicodeToUTF32.h"
00044 
00045 #ifdef IS_BIG_ENDIAN
00046 #define UCS4_TO_LE_STRING(u, s)           \
00047   PR_BEGIN_MACRO                          \
00048     s[3] = PRUint8(((u) >> 24) & 0xffL);  \
00049     s[2] = PRUint8(((u) >> 16) & 0xffL);  \
00050     s[1] = PRUint8(((u) >> 8) & 0xffL);   \
00051     s[0] = PRUint8((u) & 0xffL);          \
00052   PR_END_MACRO
00053 #else 
00054 #define UCS4_TO_LE_STRING(u, s)           \
00055   PR_BEGIN_MACRO                          \
00056     *((PRUint32*)(s)) = (u);              \
00057   PR_END_MACRO
00058 #endif
00059 
00060 #ifdef IS_BIG_ENDIAN
00061 #define UCS4_TO_BE_STRING(u, s)           \
00062   PR_BEGIN_MACRO                          \
00063     *((PRUint32*)(s)) = (u);              \
00064   PR_END_MACRO
00065 #else
00066 #define UCS4_TO_BE_STRING(u, s)           \
00067   PR_BEGIN_MACRO                          \
00068     s[0] = PRUint8(((u) >> 24) & 0xffL);  \
00069     s[1] = PRUint8(((u) >> 16) & 0xffL);  \
00070     s[2] = PRUint8(((u) >> 8) & 0xffL);   \
00071     s[3] = PRUint8((u) & 0xffL);          \
00072   PR_END_MACRO
00073 #endif
00074 
00075 //----------------------------------------------------------------------
00076 // Static functions common to nsUnicodeToUTF32LE and nsUnicodeToUTF32BE
00077  
00078 static nsresult ConvertCommon(const PRUnichar * aSrc, 
00079                               PRInt32 * aSrcLength, 
00080                               char * aDest, 
00081                               PRInt32 * aDestLength,
00082                               PRUnichar * aHighSurrogate,
00083                               PRBool aIsLE)
00084 {
00085   const PRUnichar * src = aSrc;
00086   const PRUnichar * srcEnd = aSrc + *aSrcLength;
00087   char * dest = aDest;
00088   const char * destEnd = aDest + *aDestLength; 
00089   PRUint32 ucs4;
00090 
00091 
00092   // left-over high surroage code point from the prev. run.
00093   if (*aHighSurrogate) 
00094   {
00095     if (! *aSrcLength)
00096     {
00097       *aDestLength = 0;
00098       return NS_OK_UENC_MOREINPUT;
00099     }
00100     if (*aDestLength < 4) 
00101     {
00102       *aSrcLength = 0;
00103       *aDestLength = 0;
00104       return NS_OK_UENC_MOREOUTPUT;
00105     }
00106     if ((*src & 0xfc00) != 0xdc00) // Not a low surrogate codepoint. Unpaird.
00107       ucs4 = PRUint32(*aHighSurrogate);
00108     else 
00109       ucs4 = (((*aHighSurrogate & 0x3ffL) << 10) | (*src & 0x3ffL)) + 0x10000;
00110 
00111     ++src;
00112     if (aIsLE)
00113       UCS4_TO_LE_STRING(ucs4, dest);
00114     else
00115       UCS4_TO_BE_STRING(ucs4, dest);
00116     dest += 4;
00117     *aHighSurrogate = 0;
00118   }
00119 
00120   while (src < srcEnd) {
00121     // regular codepoint or an unpaired low surrogate
00122     if ((src[0] & 0xfc00) != 0xd800) 
00123     {
00124       if (destEnd - dest < 4)
00125         goto error_more_output;
00126       ucs4 = PRUint32(src[0]);  
00127     }
00128     else  // high surrogate
00129     {
00130       if ((src+1) >= srcEnd) {
00131         //we need another surrogate to complete this unicode char
00132         *aHighSurrogate = src[0];
00133         *aDestLength = dest - aDest;
00134         return NS_OK_UENC_MOREINPUT;
00135       }
00136       //handle surrogate
00137       if (destEnd - dest < 4)
00138         goto error_more_output;
00139       if ((src[1] & 0xfc00) != 0xdc00)  // unpaired 
00140         ucs4 = PRUint32(src[0]);  
00141       else 
00142       {  // convert surrogate pair to UCS4
00143         ucs4 = (((src[0] & 0x3ffL) << 10) | (src[1] & 0x3ffL)) + 0x10000;
00144         *aHighSurrogate = 0;
00145         ++src;
00146       }
00147     }
00148     if (aIsLE)
00149       UCS4_TO_LE_STRING(ucs4, dest);
00150     else
00151       UCS4_TO_BE_STRING(ucs4, dest);
00152     dest += 4;
00153     ++src;
00154   }
00155 
00156   *aDestLength = dest - aDest;
00157   return NS_OK;
00158 
00159 error_more_output:
00160   *aSrcLength = src - aSrc;
00161   *aDestLength = dest - aDest;
00162   return NS_OK_UENC_MOREOUTPUT;
00163 
00164 }
00165 
00166 static nsresult FinishCommon(char * aDest, 
00167                              PRInt32 * aDestLength, 
00168                              PRUnichar * aHighSurrogate,
00169                              PRBool aIsLE)
00170 {
00171   char * dest = aDest;
00172 
00173   if (*aHighSurrogate) {
00174     if (*aDestLength < 4) {
00175       *aDestLength = 0;
00176       return NS_OK_UENC_MOREOUTPUT;
00177     }
00178     PRUint32 high = PRUint32(*aHighSurrogate);
00179     if (aIsLE)
00180       UCS4_TO_LE_STRING(high, dest);
00181     else
00182       UCS4_TO_BE_STRING(high, dest);
00183     *aHighSurrogate = 0;
00184     *aDestLength = 4;
00185     return NS_OK;
00186   } 
00187 
00188   *aDestLength  = 0;
00189   return NS_OK;
00190 }
00191 
00192 
00193 
00194 //----------------------------------------------------------------------
00195 // Class nsUnicodeToUTF32 [implementation]
00196 
00197 NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32, nsIUnicodeEncoder)
00198 
00199 
00200 //----------------------------------------------------------------------
00201 // Subclassing of nsIUnicodeEncoder class [implementation]
00202 
00203 NS_IMETHODIMP nsUnicodeToUTF32::GetMaxLength(const PRUnichar * aSrc, 
00204                                             PRInt32 aSrcLength, 
00205                                             PRInt32 * aDestLength)
00206 {
00207   *aDestLength = aSrcLength * 4;
00208   return NS_OK;
00209 }
00210 
00211 NS_IMETHODIMP nsUnicodeToUTF32::FillInfo(PRUint32 *aInfo)
00212 {
00213   memset(aInfo, 0xFF, (0x10000L >> 3));
00214   return NS_OK;
00215 }
00216 
00217 
00218 //----------------------------------------------------------------------
00219 // Class nsUnicodeToUTF32BE [implementation]
00220 
00221 //----------------------------------------------------------------------
00222 // Subclassing of nsUnicodeToUTF32 class [implementation]
00223   
00224 
00225 NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc, 
00226                                           PRInt32 * aSrcLength, 
00227                                           char * aDest, 
00228                                           PRInt32 * aDestLength)
00229 {
00230   return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, 
00231                        &mHighSurrogate, PR_FALSE);
00232 }
00233 
00234 NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest, 
00235                                          PRInt32 * aDestLength)
00236 {
00237   return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_FALSE);
00238 }
00239 
00240 
00241 //----------------------------------------------------------------------
00242 // Class nsUnicodeToUTF32LE [implementation]
00243   
00244 //----------------------------------------------------------------------
00245 // Subclassing of nsUnicodeToUTF32 class [implementation]
00246 
00247 
00248 NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc, 
00249                                           PRInt32 * aSrcLength, 
00250                                           char * aDest, 
00251                                           PRInt32 * aDestLength)
00252 {
00253   return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, 
00254                        &mHighSurrogate, PR_TRUE);
00255 }
00256 
00257 NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest, 
00258                                          PRInt32 * aDestLength)
00259 {
00260   return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_TRUE);
00261 }
00262