Back to index

lightning-sunbird  0.9+nobinonly
nsISO2022CNToUnicode.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Ervin Yan <ervin.yan@sun.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "nsISO2022CNToUnicode.h"
00039 #include "nsUCSupport.h"
00040 #include "nsICharsetConverterManager.h"
00041 #include "nsIServiceManager.h"
00042 
00043 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00044 
00045 NS_IMETHODIMP nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
00046 {
00047     nsresult rv;
00048 
00049     if(!mGB2312_Decoder) {
00050        // creating a delegate converter (GB2312)
00051        nsCOMPtr<nsICharsetConverterManager> ccm =
00052               do_GetService(kCharsetConverterManagerCID, &rv);
00053        if(NS_FAILED(rv))
00054           return NS_ERROR_UNEXPECTED;
00055 
00056        rv = ccm->GetUnicodeDecoderRaw("GB2312", getter_AddRefs(mGB2312_Decoder));
00057        if(NS_FAILED(rv))
00058           return NS_ERROR_UNEXPECTED;
00059     }
00060 
00061     if(!mGB2312_Decoder) // failed creating a delegate converter
00062        return NS_ERROR_UNEXPECTED;
00063 
00064     rv = mGB2312_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
00065     return rv;
00066 }
00067 
00068 NS_IMETHODIMP nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
00069 {
00070     nsresult rv;
00071 
00072     if(!mEUCTW_Decoder) {
00073        // creating a delegate converter (x-euc-tw)
00074        nsCOMPtr<nsICharsetConverterManager> ccm =
00075               do_GetService(kCharsetConverterManagerCID, &rv);
00076        if(NS_FAILED(rv))
00077           return NS_ERROR_UNEXPECTED;
00078 
00079        rv = ccm->GetUnicodeDecoderRaw("x-euc-tw", getter_AddRefs(mEUCTW_Decoder));
00080        if(NS_FAILED(rv))
00081           return NS_ERROR_UNEXPECTED;
00082     }
00083 
00084     if(!mEUCTW_Decoder) // failed creating a delegate converter
00085        return NS_ERROR_UNEXPECTED;
00086 
00087     rv = mEUCTW_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
00088     return(rv);
00089 }
00090 
00091 NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
00092 {
00093   const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
00094   const unsigned char * src = (unsigned char *) aSrc;
00095   PRUnichar* destEnd = aDest + *aDestLen;
00096   PRUnichar* dest = aDest;
00097   nsresult rv;
00098   PRInt32 aLen; 
00099 
00100   while ((src < srcEnd))
00101   {
00102     switch (mState)
00103     {
00104       case eState_ASCII:
00105         if(ESC == *src) {
00106            mState = eState_ESC;
00107         } else {
00108            if(dest+1 >= destEnd)
00109               goto error1;
00110            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00111 
00112            mState = eState_ASCII;
00113         }
00114         break;
00115 
00116       case eState_ESC:    // ESC
00117         if('$' == *src) {
00118            mState = eState_ESC_24;
00119         } else {
00120            if(dest+2 >= destEnd)
00121               goto error1;
00122            *dest++ = (PRUnichar) ESC;
00123            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00124 
00125            mState = eState_ASCII;
00126         }
00127         break;
00128 
00129       case eState_ESC_24: // ESC $
00130         if(')' == *src) {
00131            mState = eState_ESC_24_29;
00132         } else if('*' == *src) {
00133            mState = eState_ESC_24_2A;
00134         } else if('+' == *src) {
00135            mState = eState_ESC_24_2B;
00136         } else {
00137            if(dest+3 >= destEnd)
00138               goto error1;
00139            *dest++ = (PRUnichar) ESC;
00140            *dest++ = (PRUnichar) '$';
00141            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00142 
00143            mState = eState_ASCII;
00144         }
00145         break;
00146 
00147       case eState_ESC_24_29: // ESC $ )
00148         if('A' == *src) {
00149            mState = eState_ESC_24_29_A;
00150         } else if('G' == *src) {
00151            mState = eState_ESC_24_29_G;
00152         } else {
00153            if(dest+4 >= destEnd)
00154               goto error1;
00155            *dest++ = (PRUnichar) ESC;
00156            *dest++ = (PRUnichar) '$';
00157            *dest++ = (PRUnichar) ')';
00158            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00159 
00160            mState = eState_ASCII;
00161         }
00162         break;
00163 
00164       case eState_ESC_24_29_A:  // ESC $ ) A
00165         if(SO == *src) {
00166            mState = eState_GB2312_1980;
00167            mRunLength = 0;
00168         } else {
00169            if(dest+5 >= destEnd)
00170               goto error1;
00171            *dest++ = (PRUnichar) ESC;
00172            *dest++ = (PRUnichar) '$';
00173            *dest++ = (PRUnichar) ')';
00174            *dest++ = (PRUnichar) 'A';
00175            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00176 
00177            mState = eState_ASCII;
00178         }
00179         break;
00180 
00181       case eState_GB2312_1980:   // ESC $ ) A SO
00182         if(SI == *src) { // Shift-In (SI)
00183            mState = eState_ESC_24_29_A_SO_SI;
00184            if (mRunLength == 0) {
00185               if(dest+1 >= destEnd)
00186                  goto error1;
00187               *dest++ = 0xFFFD;
00188            }
00189            mRunLength = 0;
00190         } else if(ESC == *src) {
00191            mState = eState_ESC;
00192         } else {
00193            if(0x20 < *src && *src < 0x7f) {
00194               mData = *src;
00195               mState = eState_GB2312_1980_2ndbyte;
00196            } else {
00197               if(dest+1 >= destEnd)
00198                  goto error1;
00199               *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00200            }
00201         }
00202         break; 
00203 
00204       case eState_GB2312_1980_2ndbyte:  // ESC $ ) A SO
00205         if(0x20 < *src && *src < 0x7f) {
00206            unsigned char gb[2];
00207            PRInt32 gbLen = 2;
00208 
00209            gb[0] = mData | 0x80;
00210            gb[1] = *src | 0x80;
00211 
00212            aLen = destEnd - dest;
00213            rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
00214            ++mRunLength;
00215            if(rv == NS_OK_UDEC_MOREOUTPUT) {
00216               goto error1;
00217            } else if(NS_FAILED(rv)) {
00218               goto error2;
00219            }
00220 
00221            dest += aLen;
00222         } else {
00223            if(dest+2 >= destEnd)
00224               goto error1;
00225            *dest++ = (PRUnichar) mData;
00226            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00227         }
00228         mState = eState_GB2312_1980;
00229         break;
00230 
00231       case eState_ESC_24_29_A_SO_SI:  // ESC $ ) A SO SI
00232         if(SO == *src) {
00233            mState = eState_GB2312_1980;
00234            mRunLength = 0;
00235         } else if(ESC == *src) {
00236            mState = eState_ESC;
00237         } else {
00238            if(dest+1 >= destEnd)
00239               goto error1;
00240            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00241 
00242            mState = eState_ESC_24_29_A_SO_SI;
00243         }
00244         break;
00245 
00246       case eState_ESC_24_29_G:   // ESC $ ) G
00247         if(SO == *src) {
00248            mState = eState_CNS11643_1;
00249            mRunLength = 0;
00250         } else {
00251            if(dest+5 >= destEnd)
00252               goto error1;
00253            *dest++ = (PRUnichar) ESC;
00254            *dest++ = (PRUnichar) '$';
00255            *dest++ = (PRUnichar) ')';
00256            *dest++ = (PRUnichar) 'G';
00257            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00258 
00259            mState = eState_ASCII;
00260         }
00261         break;
00262 
00263       case eState_CNS11643_1:   // ESC $ ) G SO
00264         if(SI == *src) { // Shift-In (SI)
00265            mState = eState_ESC_24_29_G_SO_SI;
00266            if (mRunLength == 0) {
00267               if(dest+1 >= destEnd)
00268                  goto error1;
00269               *dest++ = 0xFFFD;
00270            }
00271            mRunLength = 0;
00272         } else if(ESC == *src) {
00273            mState = eState_ESC;
00274         } else {
00275            if(0x20 < *src && *src < 0x7f) {
00276               mData = *src;
00277               mState = eState_CNS11643_1_2ndbyte;
00278            } else {
00279               if(dest+1 >= destEnd)
00280                  goto error1;
00281               *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00282            }
00283         }
00284         break;
00285 
00286       case eState_CNS11643_1_2ndbyte:  // ESC $ ) G SO
00287         if(0x20 < *src && *src < 0x7f) {
00288            unsigned char cns[4];
00289            PRInt32 cnsLen = 2;
00290 
00291            cns[0] = mData | 0x80;
00292            cns[1] = *src | 0x80;
00293 
00294            aLen = destEnd - dest;
00295            rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
00296            ++mRunLength;
00297            if(rv == NS_OK_UDEC_MOREOUTPUT) {
00298               goto error1;
00299            } else if(NS_FAILED(rv)) {
00300               goto error2;
00301            }
00302 
00303            dest += aLen;
00304         } else {
00305            if(dest+2 >= destEnd)
00306               goto error1;
00307            *dest++ = (PRUnichar) mData;
00308            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00309         }
00310         mState = eState_CNS11643_1;
00311         break;
00312 
00313       case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
00314         if(SO == *src) {
00315            mState = eState_CNS11643_1;
00316            mRunLength = 0;
00317         } else if(ESC == *src) {
00318            mState = eState_ESC;
00319         } else {
00320            if(dest+1 >= destEnd)
00321               goto error1;
00322            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00323 
00324            mState = eState_ESC_24_29_G_SO_SI;
00325         }
00326         break;
00327 
00328       case eState_ESC_24_2A: // ESC $ *
00329         if('H' == *src) {
00330            mState = eState_ESC_24_2A_H;
00331         } else {
00332            if(dest+4 >= destEnd)
00333               goto error1;
00334            *dest++ = (PRUnichar) ESC;
00335            *dest++ = (PRUnichar) '$';
00336            *dest++ = (PRUnichar) '*';
00337            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00338 
00339            mState = eState_ASCII;
00340         }
00341         break;
00342 
00343       case eState_ESC_24_2A_H:  // ESC $ * H
00344         if(ESC == *src) {
00345            mState = eState_ESC_24_2A_H_ESC;
00346         } else {
00347            if(dest+5 >= destEnd)
00348               goto error1;
00349            *dest++ = (PRUnichar) ESC;
00350            *dest++ = (PRUnichar) '$';
00351            *dest++ = (PRUnichar) '*';
00352            *dest++ = (PRUnichar) 'H';
00353            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00354 
00355            mState = eState_ASCII;
00356         } 
00357         break;
00358 
00359       case eState_ESC_24_2A_H_ESC:  // ESC $ * H ESC
00360         if(SS2 == *src) {
00361            mState = eState_CNS11643_2;
00362            mRunLength = 0;
00363         } else if('$' == *src) {
00364            mState = eState_ESC_24;
00365         } else {
00366            if(dest+6 >= destEnd)
00367               goto error1;
00368            *dest++ = (PRUnichar) ESC;
00369            *dest++ = (PRUnichar) '$';
00370            *dest++ = (PRUnichar) '*';
00371            *dest++ = (PRUnichar) 'H';
00372            *dest++ = (PRUnichar) ESC;
00373            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00374 
00375            mState = eState_ASCII;
00376         }
00377         break;
00378 
00379       case eState_CNS11643_2:  // ESC $ * H ESC SS2
00380         if(SI == *src) { // Shift-In (SI)
00381            mState = eState_ESC_24_2A_H_ESC_SS2_SI;
00382            if (mRunLength == 0) {
00383               if(dest+1 >= destEnd)
00384                  goto error1;
00385               *dest++ = 0xFFFD;
00386            }
00387            mRunLength = 0;
00388         } else if(ESC == *src) {
00389            mState = eState_ESC_24_2A_H_ESC;
00390         } else {
00391            if(0x20 < *src && *src < 0x7f) {
00392               mData = *src;
00393               mState = eState_CNS11643_2_2ndbyte;
00394            } else {
00395               if(dest+1 >= destEnd)
00396                  goto error1;
00397               *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00398            }
00399         }
00400         break;
00401 
00402       case eState_CNS11643_2_2ndbyte:   // ESC $ * H ESC SS2
00403         if(0x20 < *src && *src < 0x7f) {
00404            unsigned char cns[4];
00405            PRInt32 cnsLen = 4;
00406  
00407            cns[0] = (unsigned char) MBYTE;
00408            cns[1] = (unsigned char) (PMASK + 2);
00409            cns[2] = mData | 0x80;
00410            cns[3] = *src | 0x80;
00411  
00412            aLen = destEnd - dest;
00413            rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
00414            ++mRunLength;
00415            if(rv == NS_OK_UDEC_MOREOUTPUT) {
00416               goto error1;
00417            } else if(NS_FAILED(rv)) {
00418               goto error2;
00419            }
00420 
00421            dest += aLen;
00422         } else {
00423            if(dest+2 >= destEnd)
00424               goto error1;
00425            *dest++ = (PRUnichar) mData;
00426            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00427         }
00428         mState = eState_CNS11643_2;
00429         break;
00430 
00431       case eState_ESC_24_2A_H_ESC_SS2_SI:  // ESC $ * H ESC SS2 SI
00432         if(ESC == *src) {
00433            mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
00434         } else {
00435            if(dest+1 >= destEnd)
00436               goto error1;
00437            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00438 
00439            mState = eState_ESC_24_2A_H_ESC_SS2_SI;
00440         }
00441         break;
00442 
00443       case eState_ESC_24_2A_H_ESC_SS2_SI_ESC:  // ESC $ * H ESC SS2 SI ESC
00444         if(SS2 == *src) {
00445            mState = eState_CNS11643_2;
00446            mRunLength = 0;
00447         } else if('$' == *src) {
00448            mState = eState_ESC_24;
00449         } else {
00450            if(dest+1 >= destEnd)
00451               goto error1;
00452            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00453 
00454            mState = eState_ESC_24_2A_H_ESC_SS2_SI;
00455         }
00456         break;
00457 
00458       case eState_ESC_24_2B: // ESC $ +
00459         if('I' <= *src && *src <= 'M') {
00460             mState = eState_ESC_24_2B_I;
00461             mPlaneID = *src - 'I' + 3;
00462         } else {
00463            if(dest+4 >= destEnd)
00464               goto error1;
00465            *dest++ = (PRUnichar) ESC;
00466            *dest++ = (PRUnichar) '$';
00467            *dest++ = (PRUnichar) '+';
00468            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00469 
00470            mState = eState_ASCII;
00471         }
00472         break;
00473 
00474       case eState_ESC_24_2B_I:  // ESC $ + I
00475         if(ESC == *src) {
00476            mState = eState_ESC_24_2B_I_ESC;
00477         } else {
00478            if(dest+5 >= destEnd)
00479               goto error1;
00480            *dest++ = (PRUnichar) ESC;
00481            *dest++ = (PRUnichar) '$';
00482            *dest++ = (PRUnichar) '+';
00483            *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
00484            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00485 
00486            mState = eState_ASCII;
00487         }
00488         break;
00489 
00490       case eState_ESC_24_2B_I_ESC:  // ESC $ + I ESC
00491         if(SS3 == *src) {
00492            mState = eState_CNS11643_3;
00493            mRunLength = 0;
00494         } else if('$' == *src) {
00495            mState = eState_ESC_24;
00496         } else {
00497            if(dest+6 >= destEnd)
00498               goto error1;
00499            *dest++ = (PRUnichar) ESC;
00500            *dest++ = (PRUnichar) '$';
00501            *dest++ = (PRUnichar) '+';
00502            *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
00503            *dest++ = (PRUnichar) ESC;
00504            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00505 
00506            mState = eState_ASCII;
00507         }
00508         break;
00509 
00510       case eState_CNS11643_3:   // ESC $ + I ESC SS3
00511         if(SI == *src) { // Shift-In (SI)
00512            mState = eState_ESC_24_2B_I_ESC_SS3_SI;
00513            if (mRunLength == 0) {
00514               if(dest+1 >= destEnd)
00515                  goto error1;
00516               *dest++ = 0xFFFD;
00517            }
00518            mRunLength = 0;
00519         } else if(ESC == *src) {
00520            mState = eState_ESC_24_2B_I_ESC;
00521         } else {
00522            if(0x20 < *src && *src < 0x7f) {
00523               mData = *src;
00524               mState = eState_CNS11643_3_2ndbyte;
00525            } else {
00526               if(dest+1 >= destEnd)
00527                  goto error1;
00528               *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00529            }
00530         }
00531 
00532         break;
00533 
00534       case eState_CNS11643_3_2ndbyte:  // ESC $ + I ESC SS3
00535         if(0x20 < *src && *src < 0x7f) {
00536            unsigned char cns[4];
00537            PRInt32 cnsLen = 4;
00538 
00539            cns[0] = (unsigned char) MBYTE;
00540            cns[1] = (unsigned char) (PMASK + mPlaneID);
00541            cns[2] = mData | 0x80;
00542            cns[3] = *src | 0x80;
00543 
00544            aLen = destEnd - dest;
00545            rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
00546            ++mRunLength;
00547            if(rv == NS_OK_UDEC_MOREOUTPUT) {
00548               goto error1;
00549            } else if(NS_FAILED(rv)) {
00550               goto error2;
00551            }
00552 
00553            dest += aLen;
00554         } else {
00555            if(dest+2 >= destEnd)
00556               goto error1;
00557            *dest++ = (PRUnichar) mData;
00558            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00559         }
00560         mState = eState_CNS11643_3;
00561         break;
00562 
00563       case eState_ESC_24_2B_I_ESC_SS3_SI:  // ESC $ + I ESC SS3 SI
00564         if(ESC == *src) {
00565            mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
00566         } else {
00567            if(dest+1 >= destEnd)
00568               goto error1;
00569            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00570 
00571            mState = eState_ESC_24_2B_I_ESC_SS3_SI;
00572         }
00573         break;
00574 
00575       case eState_ESC_24_2B_I_ESC_SS3_SI_ESC:  // ESC $ + I ESC SS3 SI ESC
00576         if(SS3 == *src) {
00577            mState = eState_CNS11643_3;
00578            mRunLength = 0;
00579         } else if('$' == *src) {
00580            mState = eState_ESC_24;
00581         } else {
00582            if(dest+1 >= destEnd)
00583               goto error1;
00584            *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00585 
00586            mState = eState_ESC_24_2B_I_ESC_SS3_SI;
00587         }
00588         break;
00589 
00590     } // switch
00591     src++;
00592   }
00593 
00594   *aDestLen = dest- aDest;
00595   return NS_OK;
00596 
00597 error1:
00598   *aDestLen = dest-aDest;
00599   src++;
00600   if ((mState == eState_ASCII) && (src == srcEnd)) {
00601     return NS_OK;
00602   }
00603   *aSrcLen = src - (const unsigned char*)aSrc;
00604   return NS_OK_UDEC_MOREOUTPUT;
00605 
00606 error2:
00607   *aSrcLen = src - (const unsigned char*)aSrc;
00608   *aDestLen = dest-aDest;
00609   mState = eState_ASCII;
00610   return NS_ERROR_UNEXPECTED;
00611 }