Back to index

lightning-sunbird  0.9+nobinonly
nsJapaneseToUnicode.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 #include "nsJapaneseToUnicode.h"
00038 
00039 #include "nsUCSupport.h"
00040 
00041 #include "nsIPrefBranch.h"
00042 #include "nsIPrefService.h"
00043 
00044 #include "japanese.map"
00045 
00046 #include "nsICharsetConverterManager.h"
00047 #include "nsIServiceManager.h"
00048 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00049 
00050 #define SJIS_INDEX mMapIndex[0]
00051 #define JIS0208_INDEX mMapIndex[1]
00052 #define JIS0212_INDEX gJIS0212Index
00053 
00054 void nsJapaneseToUnicode::setMapMode()
00055 {
00056   nsresult res;
00057 
00058   mMapIndex = gIndex;
00059 
00060   nsCOMPtr<nsIPrefBranch> prefBranch = do_GetService(NS_PREFSERVICE_CONTRACTID);
00061   if (!prefBranch) return;
00062   nsXPIDLCString prefMap;
00063   res = prefBranch->GetCharPref("intl.jis0208.map", getter_Copies(prefMap));
00064   if (!NS_SUCCEEDED(res)) return;
00065   nsCaseInsensitiveCStringComparator comparator;
00066   if ( prefMap.Equals(NS_LITERAL_CSTRING("cp932"), comparator) ) {
00067     mMapIndex = gCP932Index;
00068   } else if ( prefMap.Equals(NS_LITERAL_CSTRING("ibm943"), comparator) ) {
00069     mMapIndex = gIBM943Index;
00070   }
00071 }
00072 
00073 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
00074    const char * aSrc, PRInt32 * aSrcLen,
00075      PRUnichar * aDest, PRInt32 * aDestLen)
00076 {
00077    static const PRUint8 sbIdx[256] =
00078    {
00079      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x00 */
00080      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x08 */
00081      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x10 */
00082      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x18 */
00083      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x20 */
00084      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x28 */
00085      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x30 */
00086      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x38 */
00087         0,    1,    2,    3,    4,    5,    6,    7,  /* 0x40 */
00088         8,    9,   10,   11,   12,   13,   14,   15,  /* 0x48 */
00089        16,   17,   18,   19,   20,   21,   22,   23,  /* 0x50 */
00090        24,   25,   26,   27,   28,   29,   30,   31,  /* 0x58 */
00091        32,   33,   34,   35,   36,   37,   38,   39,  /* 0x60 */
00092        40,   41,   42,   43,   44,   45,   46,   47,  /* 0x68 */
00093        48,   49,   50,   51,   52,   53,   54,   55,  /* 0x70 */
00094        56,   57,   58,   59,   60,   61,   62, 0xFF,  /* 0x78 */
00095        63,   64,   65,   66,   67,   68,   69,   70,  /* 0x80 */
00096        71,   72,   73,   74,   75,   76,   77,   78,  /* 0x88 */
00097        79,   80,   81,   82,   83,   84,   85,   86,  /* 0x90 */
00098        87,   88,   89,   90,   91,   92,   93,   94,  /* 0x98 */
00099        95,   96,   97,   98,   99,  100,  101,  102,  /* 0xa0 */
00100       103,  104,  105,  106,  107,  108,  109,  110,  /* 0xa8 */
00101       111,  112,  113,  114,  115,  116,  117,  118,  /* 0xb0 */
00102       119,  120,  121,  122,  123,  124,  125,  126,  /* 0xb8 */
00103       127,  128,  129,  130,  131,  132,  133,  134,  /* 0xc0 */
00104       135,  136,  137,  138,  139,  140,  141,  142,  /* 0xc8 */
00105       143,  144,  145,  146,  147,  148,  149,  150,  /* 0xd0 */
00106       151,  152,  153,  154,  155,  156,  157,  158,  /* 0xd8 */
00107       159,  160,  161,  162,  163,  164,  165,  166,  /* 0xe0 */
00108       167,  168,  169,  170,  171,  172,  173,  174,  /* 0xe8 */
00109       175,  176,  177,  178,  179,  180,  181,  182,  /* 0xf0 */
00110       183,  184,  185,  186,  187, 0xFF, 0xFF, 0xFF,  /* 0xf8 */
00111    };
00112 
00113    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
00114    const unsigned char* src =(unsigned char*) aSrc;
00115    PRUnichar* destEnd = aDest + *aDestLen;
00116    PRUnichar* dest = aDest;
00117    while((src < srcEnd))
00118    {
00119        switch(mState)
00120        {
00121 
00122           case 0:
00123           if(*src & 0x80)
00124           {
00125             mData = SJIS_INDEX[*src & 0x7F];
00126             if(mData < 0xE000 )
00127             {
00128                mState = 1; // two bytes 
00129             } else {
00130                if( mData > 0xFF00)
00131                {
00132                  if(0xFFFD == mData) {
00133                    // IE-compatible handling of undefined codepoints:
00134                    // 0x80 --> U+0080
00135                    // 0xa0 --> U+F8F0
00136                    // 0xfd --> U+F8F1
00137                    // 0xfe --> U+F8F2
00138                    // 0xff --> U+F8F3
00139                    switch (*src) {
00140                      case 0x80:
00141                        *dest++ = (PRUnichar) *src;
00142                        break;
00143 
00144                      case 0xa0:
00145                        *dest++ = (PRUnichar) 0xf8f0;
00146                        break;
00147 
00148                      case 0xfd:
00149                      case 0xfe:
00150                      case 0xff:
00151                        *dest++ = (PRUnichar) 0xf8f1 + 
00152                                    (*src - (unsigned char)(0xfd));
00153                        break;
00154 
00155                      default:
00156                        *dest++ = 0x30FB;
00157                    }
00158                    if(dest >= destEnd)
00159                      goto error1;
00160                  } else {
00161                    *dest++ = mData; // JIS 0201
00162                    if(dest >= destEnd)
00163                      goto error1;
00164                  }
00165                } else {
00166                  mState = 2; // EUDC
00167                }
00168             }
00169           } else {
00170             // ASCII
00171             *dest++ = (PRUnichar) *src;
00172             if(dest >= destEnd)
00173               goto error1;
00174           }
00175           break;
00176 
00177           case 1: // Index to table
00178           {
00179             PRUint8 off = sbIdx[*src];
00180             if(0xFF == off) {
00181                *dest++ = 0x30FB;
00182             } else {
00183                PRUnichar ch = gJapaneseMap[mData+off];
00184                if(ch == 0xfffd) 
00185                  ch = 0x30fb;
00186                *dest++ = ch;
00187             }
00188             mState = 0;
00189             if(dest >= destEnd)
00190               goto error1;
00191           }
00192           break;
00193 
00194           case 2: // EUDC
00195           {
00196             PRUint8 off = sbIdx[*src];
00197             if(0xFF == off) {
00198                *dest++ = 0x30fb;
00199             } else {
00200                *dest++ = mData + off;
00201             }
00202             mState = 0;
00203             if(dest >= destEnd)
00204               goto error1;
00205           }
00206           break;
00207 
00208        }
00209        src++;
00210    }
00211    *aDestLen = dest - aDest;
00212    return NS_OK;
00213 error1:
00214    *aDestLen = dest-aDest;
00215    src++;
00216    if ((mState == 0) && (src == srcEnd)) {
00217      return NS_OK;
00218    }
00219    *aSrcLen = src - (const unsigned char*)aSrc;
00220    return NS_OK_UDEC_MOREOUTPUT;
00221 }
00222 
00223 
00224 
00225 
00226 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
00227    const char * aSrc, PRInt32 * aSrcLen,
00228      PRUnichar * aDest, PRInt32 * aDestLen)
00229 {
00230    static const PRUint8 sbIdx[256] =
00231    {
00232 /* 0x0X */
00233      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
00234      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00235 /* 0x1X */
00236      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00237      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00238 /* 0x2X */
00239      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00240      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00241 /* 0x3X */
00242      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00243      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00244 /* 0x4X */
00245      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00246      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00247 /* 0x5X */
00248      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00249      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00250 /* 0x6X */
00251      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00252      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00253 /* 0x7X */
00254      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00255      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00256 /* 0x8X */
00257      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00258      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00259 /* 0x9X */
00260      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00261      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00262 /* 0xAX */
00263      0xFF, 0,    1,    2,    3,    4,    5,    6,  
00264      7,    8 ,   9,    10,   11,   12,   13,   14,
00265 /* 0xBX */
00266      15,   16,   17,   18,   19,   20,   21,   22, 
00267      23,   24,   25,   26,   27,   28,   29,   30, 
00268 /* 0xCX */
00269      31,   32,   33,   34,   35,   36,   37,   38, 
00270      39,   40,   41,   42,   43,   44,   45,   46, 
00271 /* 0xDX */
00272      47,   48,   49,   50,   51,   52,   53,   54, 
00273      55,   56,   57,   58,   59,   60,   61,   62, 
00274 /* 0xEX */
00275      63,   64,   65,   66,   67,   68,   69,   70, 
00276      71,   72,   73,   74,   75,   76,   77,   78, 
00277 /* 0xFX */
00278      79,   80,   81,   82,   83,   84,   85,   86, 
00279      87,   88,   89,   90,   91,   92,   93,   0xFF, 
00280    };
00281 
00282    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
00283    const unsigned char* src =(unsigned char*) aSrc;
00284    PRUnichar* destEnd = aDest + *aDestLen;
00285    PRUnichar* dest = aDest;
00286    while((src < srcEnd))
00287    {
00288        switch(mState)
00289        {
00290           case 0:
00291           if(*src & 0x80  && *src != (unsigned char)0xa0)
00292           {
00293             mData = JIS0208_INDEX[*src & 0x7F];
00294             if(mData != 0xFFFD )
00295             {
00296                mState = 1; // two byte JIS0208
00297             } else {
00298                if( 0x8e == *src) {
00299                  // JIS 0201
00300                  mState = 2; // JIS0201
00301                } else if(0x8f == *src) {
00302                  // JIS 0212
00303                  mState = 3; // JIS0212
00304                } else {
00305                  // others 
00306                  *dest++ = 0xFFFD;
00307                  if(dest >= destEnd)
00308                    goto error1;
00309                }
00310             }
00311           } else {
00312             // ASCII
00313             *dest++ = (PRUnichar) *src;
00314             if(dest >= destEnd)
00315               goto error1;
00316           }
00317           break;
00318 
00319           case 1: // Index to table
00320           {
00321             PRUint8 off = sbIdx[*src];
00322             if(0xFF == off) {
00323               *dest++ = 0xFFFD;
00324                // if the first byte is valid for EUC-JP but the second 
00325                // is not while being a valid US-ASCII(i.e. < 0xc0), save it
00326                // instead of eating it up !
00327                if ( ! (*src & 0xc0)  )
00328                  *dest++ = (PRUnichar) *src;;
00329             } else {
00330                *dest++ = gJapaneseMap[mData+off];
00331             }
00332             mState = 0;
00333             if(dest >= destEnd)
00334               goto error1;
00335           }
00336           break;
00337 
00338           case 2: // JIS 0201
00339           {
00340             if((0xA1 <= *src) && (*src <= 0xDF)) {
00341               *dest++ = (0xFF61-0x00A1) + *src;
00342             } else {
00343               *dest++ = 0xFFFD;             
00344               // if 0x8e is not followed by a valid JIS X 0201 byte
00345               // but by a valid US-ASCII, save it instead of eating it up.
00346               if ( (PRUint8)*src < (PRUint8)0x7f )
00347                  *dest++ = (PRUnichar) *src;
00348             }
00349             mState = 0;
00350             if(dest >= destEnd)
00351               goto error1;
00352           }
00353           break;
00354 
00355           case 3: // JIS 0212
00356           {
00357             if(*src & 0x80)
00358             {
00359               mData = JIS0212_INDEX[*src & 0x7F];
00360               if(mData != 0xFFFD )
00361               {
00362                  mState = 4; 
00363               } else {
00364                  mState = 5; // error
00365               }
00366             } else {
00367               mState = 5; // error
00368             }
00369           }
00370           break;
00371           case 4:
00372           {
00373             PRUint8 off = sbIdx[*src];
00374             if(0xFF == off) {
00375                *dest++ = 0xFFFD;
00376             } else {
00377                *dest++ = gJapaneseMap[mData+off];
00378             }
00379             mState = 0;
00380             if(dest >= destEnd)
00381               goto error1;
00382           }
00383           break;
00384           case 5: // two bytes undefined
00385           {
00386             *dest++ = 0xFFFD;
00387             mState = 0;
00388             if(dest >= destEnd)
00389               goto error1;
00390           }
00391           break;
00392        }
00393        src++;
00394    }
00395    *aDestLen = dest - aDest;
00396    return NS_OK;
00397 error1:
00398    *aDestLen = dest-aDest;
00399    src++;
00400    if ((mState == 0) && (src == srcEnd)) {
00401      return NS_OK;
00402    } 
00403    *aSrcLen = src - (const unsigned char*)aSrc;
00404    return NS_OK_UDEC_MOREOUTPUT;
00405 }
00406 
00407 
00408 
00409 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
00410    const char * aSrc, PRInt32 * aSrcLen,
00411      PRUnichar * aDest, PRInt32 * aDestLen)
00412 {
00413    static const PRUint16 fbIdx[128] =
00414    {
00415 /* 0x8X */
00416      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
00417      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
00418 /* 0x9X */
00419      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
00420      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
00421 /* 0xAX */
00422      0xFFFD, 0,      94,     94* 2,  94* 3,  94* 4,  94* 5,  94* 6,  
00423      94* 7,  94* 8 , 94* 9,  94*10,  94*11,  94*12,  94*13,  94*14,
00424 /* 0xBX */
00425      94*15,  94*16,  94*17,  94*18,  94*19,  94*20,  94*21,  94*22,
00426      94*23,  94*24,  94*25,  94*26,  94*27,  94*28,  94*29,  94*30,
00427 /* 0xCX */
00428      94*31,  94*32,  94*33,  94*34,  94*35,  94*36,  94*37,  94*38,
00429      94*39,  94*40,  94*41,  94*42,  94*43,  94*44,  94*45,  94*46,
00430 /* 0xDX */
00431      94*47,  94*48,  94*49,  94*50,  94*51,  94*52,  94*53,  94*54,
00432      94*55,  94*56,  94*57,  94*58,  94*59,  94*60,  94*61,  94*62,
00433 /* 0xEX */
00434      94*63,  94*64,  94*65,  94*66,  94*67,  94*68,  94*69,  94*70,
00435      94*71,  94*72,  94*73,  94*74,  94*75,  94*76,  94*77,  94*78,
00436 /* 0xFX */
00437      94*79,  94*80,  94*81,  94*82,  94*83,  94*84,  94*85,  94*86,
00438      94*87,  94*88,  94*89,  94*90,  94*91,  94*92,  94*93,  0xFFFD,
00439    };
00440    static const PRUint8 sbIdx[256] =
00441    {
00442 /* 0x0X */
00443      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00444      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00445 /* 0x1X */
00446      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00447      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00448 /* 0x2X */
00449      0xFF, 0,    1,    2,    3,    4,    5,    6,  
00450      7,    8 ,   9,    10,   11,   12,   13,   14,
00451 /* 0x3X */
00452      15,   16,   17,   18,   19,   20,   21,   22, 
00453      23,   24,   25,   26,   27,   28,   29,   30, 
00454 /* 0x4X */
00455      31,   32,   33,   34,   35,   36,   37,   38, 
00456      39,   40,   41,   42,   43,   44,   45,   46, 
00457 /* 0x5X */
00458      47,   48,   49,   50,   51,   52,   53,   54, 
00459      55,   56,   57,   58,   59,   60,   61,   62, 
00460 /* 0x6X */
00461      63,   64,   65,   66,   67,   68,   69,   70, 
00462      71,   72,   73,   74,   75,   76,   77,   78, 
00463 /* 0x7X */
00464      79,   80,   81,   82,   83,   84,   85,   86, 
00465      87,   88,   89,   90,   91,   92,   93,   0xFF, 
00466 /* 0x8X */
00467      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00468      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00469 /* 0x9X */
00470      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00471      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00472 /* 0xAX */
00473      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00474      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00475 /* 0xBX */
00476      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00477      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00478 /* 0xCX */
00479      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00480      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00481 /* 0xDX */
00482      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00483      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00484 /* 0xEX */
00485      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00486      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00487 /* 0xFX */
00488      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00489      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
00490    };
00491 
00492    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
00493    const unsigned char* src =(unsigned char*) aSrc;
00494    PRUnichar* destEnd = aDest + *aDestLen;
00495    PRUnichar* dest = aDest;
00496    while((src < srcEnd))
00497    {
00498      
00499        switch(mState)
00500        {
00501           case mState_ASCII:
00502             if(0x1b == *src)
00503             {
00504               mLastLegalState = mState;
00505               mState = mState_ESC;
00506             } else if(*src & 0x80) {
00507               goto error2;
00508             } else {
00509               *dest++ = (PRUnichar) *src;
00510               if(dest >= destEnd)
00511                 goto error1;
00512             }
00513           break;
00514           
00515           case mState_ESC:
00516             if( '(' == *src) {
00517               mState = mState_ESC_28;
00518             } else if ('$' == *src)  {
00519               mState = mState_ESC_24;
00520             } else if ('.' == *src)  { // for ISO-2022-JP-2
00521               mState = mState_ESC_2e;
00522             } else if ('N' == *src)  { // for ISO-2022-JP-2
00523               mState = mState_ESC_4e;
00524             } else  {
00525               if((dest+2) >= destEnd)
00526                 goto error1;
00527               *dest++ = (PRUnichar) 0x1b;
00528               if(0x80 & *src)
00529                 goto error2;
00530               *dest++ = (PRUnichar) *src;
00531               mState = mLastLegalState;
00532             }
00533           break;
00534 
00535           case mState_ESC_28: // ESC (
00536             if( 'B' == *src) {
00537               mState = mState_ASCII;
00538               if (mRunLength == 0) {
00539                 if((dest+1) >= destEnd)
00540                   goto error1;
00541                 *dest++ = 0xFFFD;
00542               }
00543               mRunLength = 0;
00544             } else if ('J' == *src)  {
00545               mState = mState_JISX0201_1976Roman;
00546               if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
00547                 if((dest+1) >= destEnd)
00548                   goto error1;
00549                 *dest++ = 0xFFFD;
00550               }
00551               mRunLength = 0;
00552             } else if ('I' == *src)  {
00553               mState = mState_JISX0201_1976Kana;
00554               mRunLength = 0;
00555             } else  {
00556               if((dest+3) >= destEnd)
00557                 goto error1;
00558               *dest++ = (PRUnichar) 0x1b;
00559               *dest++ = (PRUnichar) '(';
00560               if(0x80 & *src)
00561                 goto error2;
00562               *dest++ = (PRUnichar) *src;
00563               mState = mLastLegalState;
00564             }
00565           break;
00566 
00567           case mState_ESC_24: // ESC $
00568             if( '@' == *src) {
00569               mState = mState_JISX0208_1978;
00570               mRunLength = 0;
00571             } else if ('A' == *src)  {
00572               mState = mState_GB2312_1980;
00573               mRunLength = 0;
00574             } else if ('B' == *src)  {
00575               mState = mState_JISX0208_1983;
00576               mRunLength = 0;
00577             } else if ('(' == *src)  {
00578               mState = mState_ESC_24_28;
00579             } else  {
00580               if((dest+3) >= destEnd)
00581                 goto error1;
00582               *dest++ = (PRUnichar) 0x1b;
00583               *dest++ = (PRUnichar) '$';
00584               if(0x80 & *src)
00585                 goto error2;
00586               *dest++ = (PRUnichar) *src;
00587               mState = mLastLegalState;
00588             }
00589           break;
00590 
00591           case mState_ESC_24_28: // ESC $ (
00592             if( 'C' == *src) {
00593               mState = mState_KSC5601_1987;
00594               mRunLength = 0;
00595             } else if ('D' == *src) {
00596               mState = mState_JISX0212_1990;
00597               mRunLength = 0;
00598             } else  {
00599               if((dest+4) >= destEnd)
00600                 goto error1;
00601               *dest++ = (PRUnichar) 0x1b;
00602               *dest++ = (PRUnichar) '$';
00603               *dest++ = (PRUnichar) '(';
00604               if(0x80 & *src)
00605                 goto error2;
00606               *dest++ = (PRUnichar) *src;
00607               mState = mLastLegalState;
00608             }
00609           break;
00610 
00611           case mState_JISX0201_1976Roman:
00612             if(0x1b == *src) {
00613               mLastLegalState = mState;
00614               mState = mState_ESC;
00615             } else if(*src & 0x80) {
00616               goto error2;
00617             } else {
00618               // XXX We need to  decide how to handle \ and ~ here
00619               // we may need a if statement here for '\' and '~' 
00620               // to map them to Yen and Overbar
00621               *dest++ = (PRUnichar) *src;
00622               ++mRunLength;
00623               if(dest >= destEnd)
00624                 goto error1;
00625             }
00626           break;
00627 
00628           case mState_JISX0201_1976Kana:
00629             if(0x1b == *src) {
00630               mLastLegalState = mState;
00631               mState = mState_ESC;
00632             } else {
00633               if((0x21 <= *src) && (*src <= 0x5F)) {
00634                 *dest++ = (0xFF61-0x0021) + *src;
00635                 ++mRunLength;
00636               } else {
00637                 goto error2;
00638               }
00639               if(dest >= destEnd)
00640                 goto error1;
00641             }
00642           break;
00643 
00644           case mState_JISX0208_1978:
00645             if(0x1b == *src) {
00646               mLastLegalState = mState;
00647               mState = mState_ESC;
00648             } else if(*src & 0x80) {
00649               mLastLegalState = mState;
00650               mState = mState_ERROR;
00651             } else {
00652               mData = JIS0208_INDEX[*src & 0x7F];
00653               if(0xFFFD == mData)
00654                 goto error2;
00655               mState = mState_JISX0208_1978_2ndbyte;
00656             }
00657           break;
00658 
00659           case mState_GB2312_1980:
00660             if(0x1b == *src) {
00661               mLastLegalState = mState;
00662               mState = mState_ESC;
00663             } else if(*src & 0x80) {
00664               mLastLegalState = mState;
00665               mState = mState_ERROR;
00666             } else {
00667               mData = fbIdx[*src & 0x7F];
00668               if(0xFFFD == mData)
00669                 goto error2;
00670               mState = mState_GB2312_1980_2ndbyte;
00671             }
00672           break;
00673 
00674           case mState_JISX0208_1983:
00675             if(0x1b == *src) {
00676               mLastLegalState = mState;
00677               mState = mState_ESC;
00678             } else if(*src & 0x80) {
00679               mLastLegalState = mState;
00680               mState = mState_ERROR;
00681             } else {
00682               mData = JIS0208_INDEX[*src & 0x7F];
00683               if(0xFFFD == mData)
00684                 goto error2;
00685               mState = mState_JISX0208_1983_2ndbyte;
00686             }
00687           break;
00688 
00689           case mState_KSC5601_1987:
00690             if(0x1b == *src) {
00691               mLastLegalState = mState;
00692               mState = mState_ESC;
00693             } else if(*src & 0x80) {
00694               mLastLegalState = mState;
00695               mState = mState_ERROR;
00696             } else {
00697               mData = fbIdx[*src & 0x7F];
00698               if(0xFFFD == mData)
00699                 goto error2;
00700               mState = mState_KSC5601_1987_2ndbyte;
00701             }
00702           break;
00703 
00704           case mState_JISX0212_1990:
00705             if(0x1b == *src) {
00706               mLastLegalState = mState;
00707               mState = mState_ESC;
00708             } else if(*src & 0x80) {
00709               mLastLegalState = mState;
00710               mState = mState_ERROR;
00711             } else {
00712               mData = JIS0212_INDEX[*src & 0x7F];
00713               if(0xFFFD == mData)
00714                 goto error2;
00715               mState = mState_JISX0212_1990_2ndbyte;
00716             }
00717           break;
00718 
00719           case mState_JISX0208_1978_2ndbyte:
00720           {
00721             PRUint8 off = sbIdx[*src];
00722             if(0xFF == off) {
00723                goto error2;
00724             } else {
00725                // XXX We need to map from JIS X 0208 1983 to 1987 
00726                // in the next line before pass to *dest++
00727                *dest++ = gJapaneseMap[mData+off];
00728                ++mRunLength;
00729             }
00730             mState = mState_JISX0208_1978;
00731             if(dest >= destEnd)
00732               goto error1;
00733           }
00734           break;
00735 
00736           case mState_GB2312_1980_2ndbyte:
00737           {
00738             PRUint8 off = sbIdx[*src];
00739             if(0xFF == off) {
00740                goto error2;
00741             } else {
00742               if (!mGB2312Decoder) {
00743                 // creating a delegate converter (GB2312)
00744                 nsresult rv;
00745                 nsCOMPtr<nsICharsetConverterManager> ccm = 
00746                          do_GetService(kCharsetConverterManagerCID, &rv);
00747                 if (NS_SUCCEEDED(rv)) {
00748                   rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
00749                 }
00750               }
00751               if (!mGB2312Decoder) {// failed creating a delegate converter
00752                 goto error2;
00753               } else {
00754                 unsigned char gb[2];
00755                 PRUnichar uni;
00756                 PRInt32 gbLen = 2, uniLen = 1;
00757                 // ((mData/94)+0x21) is the original 1st byte.
00758                 // *src is the present 2nd byte.
00759                 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
00760                 gb[0] = ((mData / 94) + 0x21) | 0x80;
00761                 gb[1] = *src | 0x80;
00762                 // Convert GB2312 to unicode.
00763                 mGB2312Decoder->Convert((const char *)gb, &gbLen,
00764                                         &uni, &uniLen);
00765                 *dest++ = uni;
00766                 ++mRunLength;
00767               }
00768             }
00769             mState = mState_GB2312_1980;
00770             if(dest >= destEnd)
00771               goto error1;
00772           }
00773           break;
00774 
00775           case mState_JISX0208_1983_2ndbyte:
00776           {
00777             PRUint8 off = sbIdx[*src];
00778             if(0xFF == off) {
00779                goto error2;
00780             } else {
00781                *dest++ = gJapaneseMap[mData+off];
00782                ++mRunLength;
00783             }
00784             mState = mState_JISX0208_1983;
00785             if(dest >= destEnd)
00786               goto error1;
00787           }
00788           break;
00789 
00790           case mState_KSC5601_1987_2ndbyte:
00791           {
00792             PRUint8 off = sbIdx[*src];
00793             if(0xFF == off) {
00794                goto error2;
00795             } else {
00796               if (!mEUCKRDecoder) {
00797                 // creating a delegate converter (EUC-KR)
00798                 nsresult rv;
00799                 nsCOMPtr<nsICharsetConverterManager> ccm = 
00800                          do_GetService(kCharsetConverterManagerCID, &rv);
00801                 if (NS_SUCCEEDED(rv)) {
00802                   rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
00803                 }
00804               }
00805               if (!mEUCKRDecoder) {// failed creating a delegate converter
00806                 goto error2;
00807               } else {              
00808                 unsigned char ksc[2];
00809                 PRUnichar uni;
00810                 PRInt32 kscLen = 2, uniLen = 1;
00811                 // ((mData/94)+0x21) is the original 1st byte.
00812                 // *src is the present 2nd byte.
00813                 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
00814                 ksc[0] = ((mData / 94) + 0x21) | 0x80;
00815                 ksc[1] = *src | 0x80;
00816                 // Convert EUC-KR to unicode.
00817                 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
00818                                        &uni, &uniLen);
00819                 *dest++ = uni;
00820                 ++mRunLength;
00821               }
00822             }
00823             mState = mState_KSC5601_1987;
00824             if(dest >= destEnd)
00825               goto error1;
00826           }
00827           break;
00828 
00829           case mState_JISX0212_1990_2ndbyte:
00830           {
00831             PRUint8 off = sbIdx[*src];
00832             if(0xFF == off) {
00833                goto error2;
00834             } else {
00835                *dest++ = gJapaneseMap[mData+off];
00836                ++mRunLength;
00837             }
00838             mState = mState_JISX0212_1990;
00839             if(dest >= destEnd)
00840               goto error1;
00841           }
00842           break;
00843 
00844           case mState_ESC_2e: // ESC .
00845             // "ESC ." will designate 96 character set to G2.
00846             mState = mLastLegalState;
00847             if( 'A' == *src) {
00848               G2charset = G2_ISO88591;
00849             } else if ('F' == *src) {
00850               G2charset = G2_ISO88597;
00851             } else  {
00852               if((dest+3) >= destEnd)
00853                 goto error1;
00854               *dest++ = (PRUnichar) 0x1b;
00855               *dest++ = (PRUnichar) '.';
00856               if(0x80 & *src)
00857                 goto error2;
00858               *dest++ = (PRUnichar) *src;
00859             }
00860           break;
00861 
00862           case mState_ESC_4e: // ESC N
00863             // "ESC N" is the SS2 sequence, that invoke a G2 designated
00864             // character set.  Since SS2 is effective only for next one
00865             // character, mState should be returned to the last status.
00866             mState = mLastLegalState;
00867             if((0x20 <= *src) && (*src <= 0x7F)) {
00868               if (G2_ISO88591 == G2charset) {
00869                 *dest++ = *src | 0x80;
00870                 ++mRunLength;
00871               } else if (G2_ISO88597 == G2charset) {
00872                 if (!mISO88597Decoder) {
00873                   // creating a delegate converter (ISO-8859-7)
00874                   nsresult rv;
00875                   nsCOMPtr<nsICharsetConverterManager> ccm = 
00876                            do_GetService(kCharsetConverterManagerCID, &rv);
00877                   if (NS_SUCCEEDED(rv)) {
00878                     rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
00879                   }
00880                 }
00881                 if (!mISO88597Decoder) {// failed creating a delegate converter
00882                   goto error2;
00883                 } else {
00884                   // Put one character with ISO-8859-7 encoding.
00885                   unsigned char gr = *src | 0x80;
00886                   PRUnichar uni;
00887                   PRInt32 grLen = 1, uniLen = 1;
00888                   // Convert ISO-8859-7 to unicode.
00889                   mISO88597Decoder->Convert((const char *)&gr, &grLen,
00890                                             &uni, &uniLen);
00891                   *dest++ = uni;
00892                   ++mRunLength;
00893                 }
00894               } else {// G2charset is G2_unknown (not designated yet)
00895                 goto error2;
00896               }
00897               if(dest >= destEnd)
00898                 goto error1;
00899             } else {
00900               if((dest+3) >= destEnd)
00901                 goto error1;
00902               *dest++ = (PRUnichar) 0x1b;
00903               *dest++ = (PRUnichar) 'N';
00904               if(0x80 & *src)
00905                 goto error2;
00906               *dest++ = (PRUnichar) *src;
00907             }
00908           break;
00909 
00910           case mState_ERROR:
00911              mState = mLastLegalState;
00912              mRunLength = 0;
00913              goto error2;
00914           break;
00915 
00916        } // switch
00917        src++;
00918    }
00919    *aDestLen = dest - aDest;
00920    return NS_OK;
00921 error1:
00922    *aDestLen = dest-aDest;
00923    src++;
00924    if ((mState == 0) && (src == srcEnd)) {
00925      return NS_OK;
00926    }
00927    *aSrcLen = src - (const unsigned char*)aSrc;
00928    return NS_OK_UDEC_MOREOUTPUT;
00929 error2:
00930    *aSrcLen = src - (const unsigned char*)aSrc;
00931    *aDestLen = dest-aDest;
00932    return NS_ERROR_UNEXPECTED;
00933 }