Back to index

lightning-sunbird  0.9+nobinonly
nsUnicodeToUTF8.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 //----------------------------------------------------------------------
00039 // Global functions and data [declaration]
00040 #include "nsUCSupport.h"
00041 #include "nsUnicodeToUTF8.h"
00042 #include <string.h>
00043 
00044 NS_IMPL_ISUPPORTS1(nsUnicodeToUTF8, nsIUnicodeEncoder)
00045 
00046 //----------------------------------------------------------------------
00047 // nsUnicodeToUTF8 class [implementation]
00048 
00049 NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const PRUnichar * aSrc, 
00050                                               PRInt32 aSrcLength,
00051                                               PRInt32 * aDestLength)
00052 {
00053   // aSrc is interpreted as UTF16, 3 is normally enough.
00054   // But when previous buffer only contains part of the surrogate pair, we 
00055   // need to complete it here. If the first word in following buffer is not
00056   // in valid surrogate rang, we need to convert the remaining of last buffer 
00057   // to 3 bytes.
00058   *aDestLength = 3*aSrcLength + 3;
00059   return NS_OK;
00060 }
00061 
00062 NS_IMETHODIMP nsUnicodeToUTF8::FillInfo(PRUint32 *aInfo)
00063 {
00064   memset(aInfo, 0xFF, (0x10000L >> 3));
00065   return NS_OK;
00066 }
00067 
00068 NS_IMETHODIMP nsUnicodeToUTF8::Convert(const PRUnichar * aSrc, 
00069                                 PRInt32 * aSrcLength, 
00070                                 char * aDest, 
00071                                 PRInt32 * aDestLength)
00072 {
00073   const PRUnichar * src = aSrc;
00074   const PRUnichar * srcEnd = aSrc + *aSrcLength;
00075   char * dest = aDest;
00076   PRInt32 destLen = *aDestLength;
00077   PRUint32 n;
00078 
00079   //complete remaining of last conversion
00080   if (mHighSurrogate) {
00081     if (src < srcEnd) {
00082       *aDestLength = 0;
00083       return NS_OK_UENC_MOREINPUT;
00084     }
00085     if (*aDestLength < 4) {
00086       *aSrcLength = 0;
00087       *aDestLength = 0;
00088       return NS_OK_UENC_MOREOUTPUT;
00089     }
00090     if (*src < (PRUnichar)0xdc00 || *src > (PRUnichar)0xdfff) { //not a pair
00091       *dest++ = (char)0xe0 | (mHighSurrogate >> 12);
00092       *dest++ = (char)0x80 | ((mHighSurrogate >> 6) & 0x003f);
00093       *dest++ = (char)0x80 | (mHighSurrogate & 0x003f);
00094       destLen -= 3;
00095     } else { 
00096       n = ((mHighSurrogate - (PRUnichar)0xd800) << 10) + 
00097               (*src - (PRUnichar)0xdc00) + 0x10000;
00098       *dest++ = (char)0xf0 | (n >> 18);
00099       *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
00100       *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
00101       *dest++ = (char)0x80 | (n & 0x3f);
00102       ++src;
00103       destLen -= 4;
00104     }
00105     mHighSurrogate = 0;
00106   }
00107 
00108   while (src < srcEnd) {
00109     if ( *src <= 0x007f) {
00110       if (destLen < 1)
00111         goto error_more_output;
00112       *dest++ = (char)*src;
00113       --destLen;
00114     } else if (*src <= 0x07ff) {
00115       if (destLen < 2)
00116         goto error_more_output;
00117       *dest++ = (char)0xc0 | (*src >> 6);
00118       *dest++ = (char)0x80 | (*src & 0x003f);
00119       destLen -= 2;
00120     } else if (*src >= (PRUnichar)0xD800 && *src < (PRUnichar)0xDC00) {
00121       if ((src+1) >= srcEnd) {
00122         //we need another surrogate to complete this unicode char
00123         mHighSurrogate = *src;
00124         *aDestLength = dest - aDest;
00125         return NS_OK_UENC_MOREINPUT;
00126       }
00127       //handle surrogate
00128       if (destLen < 4)
00129         goto error_more_output;
00130       if (*(src+1) < (PRUnichar)0xdc00 || *(src+1) > 0xdfff) { //not a pair
00131         *dest++ = (char)0xe0 | (*src >> 12);
00132         *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
00133         *dest++ = (char)0x80 | (*src & 0x003f);
00134         destLen -= 3;
00135       } else {
00136         n = ((*src - (PRUnichar)0xd800) << 10) + (*(src+1) - (PRUnichar)0xdc00) + (PRUint32)0x10000;
00137         *dest++ = (char)0xf0 | (n >> 18);
00138         *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
00139         *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
00140         *dest++ = (char)0x80 | (n & 0x3f);
00141         destLen -= 4;
00142         ++src;
00143       }
00144     } else { 
00145       if (destLen < 3)
00146         goto error_more_output;
00147       //treat rest of the character as BMP
00148       *dest++ = (char)0xe0 | (*src >> 12);
00149       *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
00150       *dest++ = (char)0x80 | (*src & 0x003f);
00151       destLen -= 3;
00152     }
00153     ++src;
00154   }
00155 
00156   *aDestLength = dest - aDest;
00157   return NS_OK;
00158 
00159 error_more_output:
00160   *aSrcLength = src - aSrc;
00161   *aDestLength = dest - aDest;
00162   return NS_OK_UENC_MOREOUTPUT;
00163 }
00164 
00165 NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, PRInt32 * aDestLength)
00166 {
00167   char * dest = aDest;
00168 
00169   if (mHighSurrogate) {
00170     if (*aDestLength < 3) {
00171       *aDestLength = 0;
00172       return NS_OK_UENC_MOREOUTPUT;
00173     }
00174     *dest++ = (char)0xe0 | (mHighSurrogate >> 12);
00175     *dest++ = (char)0x80 | ((mHighSurrogate >> 6) & 0x003f);
00176     *dest++ = (char)0x80 | (mHighSurrogate & 0x003f);
00177     mHighSurrogate = 0;
00178     *aDestLength = 3;
00179     return NS_OK;
00180   } 
00181 
00182   *aDestLength  = 0;
00183   return NS_OK;
00184 }