Back to index

lightning-sunbird  0.9+nobinonly
nsISO2022KRToUnicode.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Jungshik Shin <jshin@mailaps.org>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "nsISO2022KRToUnicode.h"
00039 #include "nsUCSupport.h"
00040 #include "nsICharsetConverterManager.h"
00041 #include "nsIServiceManager.h"
00042 
00043 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00044 
00045 NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
00046 {
00047   const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
00048   const unsigned char* src =(unsigned char*) aSrc;
00049   PRUnichar* destEnd = aDest + *aDestLen;
00050   PRUnichar* dest = aDest;
00051   while((src < srcEnd))
00052   {
00053     switch(mState)
00054     {
00055       case mState_Init:
00056         if(0x1b == *src) {
00057           mLastLegalState = mState_ASCII;
00058           mState = mState_ESC;
00059           break;
00060         }
00061         mState = mState_ASCII;
00062         // fall through
00063 
00064       case mState_ASCII:
00065         if(0x0e == *src) { // Shift-Out 
00066           mState = mState_KSX1001_1992;
00067           mRunLength = 0;
00068         } 
00069         else if(*src & 0x80) {
00070           *dest++ = 0xFFFD;
00071           if(dest >= destEnd)
00072             goto error1;
00073         } 
00074         else {
00075           *dest++ = (PRUnichar) *src;
00076           if(dest >= destEnd)
00077             goto error1;
00078         }
00079         break;
00080           
00081       case mState_ESC:
00082         if('$' == *src) {
00083           mState = mState_ESC_24;
00084         } 
00085         else  {
00086           if((dest+2) >= destEnd)
00087             goto error1;
00088           *dest++ = (PRUnichar) 0x1b;
00089           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00090           mState =  mLastLegalState;
00091         }
00092         break;
00093 
00094       case mState_ESC_24: // ESC $
00095         if(')' == *src) {
00096           mState = mState_ESC_24_29;
00097         } 
00098         else  {
00099           if((dest+3) >= destEnd)
00100             goto error1;
00101           *dest++ = (PRUnichar) 0x1b;
00102           *dest++ = (PRUnichar) '$';
00103           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00104           mState = mLastLegalState;
00105         }
00106         break;
00107 
00108       case mState_ESC_24_29: // ESC $ )
00109         mState = mLastLegalState;
00110         if('C' == *src) {
00111           mState = mState_ASCII;
00112           mRunLength = 0;
00113         } 
00114         else  {
00115           if((dest+4) >= destEnd)
00116             goto error1;
00117           *dest++ = (PRUnichar) 0x1b;
00118           *dest++ = (PRUnichar) '$';
00119           *dest++ = (PRUnichar) ')';
00120           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
00121           mState = mLastLegalState;
00122         }
00123         break;
00124 
00125       case mState_KSX1001_1992:
00126         if (0x20 < (PRUint8) *src  && (PRUint8) *src < 0x7f) {
00127           mData = (PRUint8) *src;
00128           mState = mState_KSX1001_1992_2ndbyte;
00129         } 
00130         else if (0x0f == *src) { // Shift-In (SI)
00131           mState = mState_ASCII;
00132           if (mRunLength == 0) {
00133             if(dest+1 >= destEnd)
00134               goto error1;
00135             *dest++ = 0xFFFD;
00136           }
00137           mRunLength = 0;
00138         } 
00139         else if ((PRUint8) *src == 0x20 || (PRUint8) *src == 0x09) {
00140           // Allow space and tab between SO and SI (i.e. in Hangul segment)
00141           mState = mState_KSX1001_1992;
00142           *dest++ = (PRUnichar) *src;
00143           ++mRunLength;
00144           if(dest >= destEnd)
00145           goto error1;
00146         } 
00147         else {         // Everything else is invalid.
00148           *dest++ = 0xFFFD;
00149           if(dest >= destEnd)
00150              goto error1;
00151         }
00152         break;
00153 
00154       case mState_KSX1001_1992_2ndbyte:
00155         if ( 0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f  ) {
00156           if (!mEUCKRDecoder) {
00157             // creating a delegate converter (EUC-KR)
00158             nsresult rv;
00159             nsCOMPtr<nsICharsetConverterManager> ccm = 
00160                   do_GetService(kCharsetConverterManagerCID, &rv);
00161             if (NS_SUCCEEDED(rv)) {
00162               rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
00163             }
00164           }
00165 
00166           if (!mEUCKRDecoder) {// failed creating a delegate converter
00167            *dest++ = 0xFFFD;
00168           } 
00169           else {              
00170             unsigned char ksx[2];
00171             PRUnichar uni;
00172             PRInt32 ksxLen = 2, uniLen = 1;
00173             // mData is the original 1st byte.
00174             // *src is the present 2nd byte.
00175             // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
00176             ksx[0] = mData | 0x80;
00177             ksx[1] = *src | 0x80;
00178             // Convert EUC-KR to unicode.
00179             mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
00180             *dest++ = uni;
00181             ++mRunLength;
00182           }
00183           if(dest >= destEnd)
00184             goto error1;
00185           mState = mState_KSX1001_1992;
00186         } 
00187         else {        // Invalid 
00188           if ( 0x0f == *src ) {   // Shift-In (SI)
00189             mState = mState_ASCII;
00190           } 
00191           else {
00192             mState = mState_KSX1001_1992;
00193           }
00194           *dest++ = 0xFFFD;
00195           if(dest >= destEnd)
00196            goto error1;
00197         }
00198         break;
00199 
00200       case mState_ERROR:
00201         mState = mLastLegalState;
00202         *dest++ = 0xFFFD;
00203         if(dest >= destEnd)
00204           goto error1;
00205         break;
00206 
00207     } // switch
00208     src++;
00209     if ( *src == 0x0a || *src == 0x0d )   // if LF/CR, return to US-ASCII unconditionally.
00210       mState = mState_Init;
00211    }
00212    *aDestLen = dest - aDest;
00213    return NS_OK;
00214 
00215 error1:
00216    *aDestLen = dest-aDest;
00217    *aSrcLen = src-(unsigned char*)aSrc;
00218    return NS_OK_UDEC_MOREOUTPUT;
00219 }
00220