Back to index

lightning-sunbird  0.9+nobinonly
nsGBKConvUtil.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsGBKConvUtil.h"
00039 #include "gbku.h"
00040 #include "nsCRT.h"
00041 #include "nsICharRepresentable.h"
00042 #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
00043 //--------------------------------------------------------------------
00044 // nsGBKConvUtil
00045 //--------------------------------------------------------------------
00046 
00047 static PRBool gInitToGBKTable = PR_FALSE;
00048 static const PRUnichar gGBKToUnicodeTable[MAX_GBK_LENGTH] = {
00049 #include "cp936map.h"
00050 };
00051 static PRUint16 gUnicodeToGBKTable[0xA000-0x4e00];
00052 
00053 PRBool nsGBKConvUtil::UnicodeToGBKChar(
00054   PRUnichar aChar, PRBool aToGL, char* 
00055   aOutByte1, char* aOutByte2)
00056 {
00057   NS_ASSERTION(gInitToGBKTable, "gGBKToUnicodeTable is not init yet. need to call InitToGBKTable first");
00058   PRBool found=PR_FALSE;
00059   *aOutByte1 = *aOutByte2 = 0;
00060   if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff))
00061   {
00062     // surrogate is not in here
00063     return PR_FALSE;
00064   }
00065   if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF))
00066   {
00067     PRUint16 item = gUnicodeToGBKTable[aChar - 0x4e00];
00068     if(item != 0) 
00069     {
00070       *aOutByte1 = item >> 8;
00071       *aOutByte2 = item & 0x00FF;
00072       found = PR_TRUE;
00073     } else {
00074       return PR_FALSE;
00075     }
00076   } else {
00077     // ugly linear search
00078     for( PRInt32 i = 0; i < MAX_GBK_LENGTH; i++ )
00079     {
00080       if( aChar == gGBKToUnicodeTable[i])
00081       {
00082         *aOutByte1 = (i /  0x00BF + 0x0081) ;
00083         *aOutByte2 = (i %  0x00BF + 0x0040) ;
00084         found = PR_TRUE;
00085         break;
00086       }
00087     }
00088   }
00089   if(! found)
00090     return PR_FALSE;
00091 
00092   if(aToGL) {
00093     // to GL, we only return if it is in the range 
00094     if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) &&
00095        UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE))
00096     {
00097       // mask them to GL 
00098       *aOutByte1 &= 0x7F;
00099       *aOutByte2 &= 0x7F;
00100     } else {
00101       // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
00102       // it is not a GB2312 character, we cannot map to GL 
00103       *aOutByte1 = 0x00;
00104       *aOutByte2 = 0x00;
00105       return PR_FALSE;
00106     }
00107   }
00108   return PR_TRUE;
00109 }
00110 PRUnichar nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2)
00111 {
00112   NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range");
00113   NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range");
00114 
00115   PRUint8 i1 = (PRUint8)aByte1;
00116   PRUint8 i2 = (PRUint8)aByte2;
00117   PRUint16 idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ;
00118 
00119   NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB");
00120   // play it safe- add if statement here ot protect ARB
00121   // probably not necessary
00122   if(idx < MAX_GBK_LENGTH)
00123     return gGBKToUnicodeTable[ idx ];
00124   else
00125     return UCS2_NO_MAPPING;
00126 }
00127 void nsGBKConvUtil::InitToGBKTable()
00128 {
00129   if ( gInitToGBKTable )
00130    return;
00131 
00132   PRUnichar unicode;
00133   PRUnichar i;
00134   // zap it to zero first
00135   memset(gUnicodeToGBKTable,0, sizeof(gUnicodeToGBKTable));
00136 
00137   for ( i=0; i<MAX_GBK_LENGTH; i++ )
00138   {
00139     unicode = gGBKToUnicodeTable[i];
00140     // to reduce size of gUnicodeToGBKTable, we only do direct unicode to GB 
00141     // table mapping between unicode 0x4E00 and 0xA000. Others by searching
00142     // gGBKToUnicodeTable. There is a trade off between memory usage and speed.
00143     if(UNICHAR_IN_RANGE(0x4e00, unicode, 0x9fff))
00144     {
00145       unicode -= 0x4E00; 
00146       gUnicodeToGBKTable[unicode] =  (( i / 0x00BF + 0x0081) << 8) | 
00147                                     ( i % 0x00BF+ 0x0040);
00148     }
00149   }
00150   gInitToGBKTable = PR_TRUE;
00151 }
00152 void nsGBKConvUtil::FillInfo( 
00153   PRUint32 *aInfo, 
00154   PRUint8 aStart1, PRUint8 aEnd1, 
00155   PRUint8 aStart2, PRUint8 aEnd2
00156 )
00157 {
00158   PRUint16 i,j, k;
00159   PRUnichar unicode;
00160 
00161   for ( i=aStart1; i<=aEnd1; i++) 
00162   {
00163     for( j=aStart2; j<=aEnd2; j++)
00164     {
00165       k = (i - 0x0081)*0x00BF +(j-0x0040);    
00166       unicode = gGBKToUnicodeTable[k];
00167       NS_ASSERTION(unicode != 0xFFFF, "somehow the table still use 0xffff");
00168       if (unicode != UCS2_NO_MAPPING) 
00169       {
00170         SET_REPRESENTABLE(aInfo, unicode);
00171       }               
00172     }
00173   }                   
00174 }
00175 void nsGBKConvUtil::FillGB2312Info( 
00176   PRUint32 *aInfo
00177 )
00178 {
00179   // The following range is coded by looking at the GB2312 standard
00180   // and make sure we do not call FillInfo for undefined code point
00181   // Symbol
00182   // row 1 - 1 range (full)
00183   FillInfo(aInfo, 0x21|0x80, 0x21|0x80, 0x21|0x80, 0x7E|0x80);
00184   // row 2 - 3 range
00185   FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+17)|0x80, (0x20+66)|0x80);
00186   FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+69)|0x80, (0x20+78)|0x80);
00187   FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+81)|0x80, (0x20+92)|0x80);
00188   // row 3 - 1 range (full)
00189   FillInfo(aInfo, 0x23|0x80, 0x23|0x80, 0x21|0x80, 0x7E|0x80);
00190   // row 4 - 1 range
00191   FillInfo(aInfo, 0x24|0x80, 0x24|0x80, (0x20+ 1)|0x80, (0x20+83)|0x80);
00192   // row 5 - 1 range
00193   FillInfo(aInfo, 0x25|0x80, 0x25|0x80, (0x20+ 1)|0x80, (0x20+86)|0x80);
00194   // row 6 - 2 range
00195   FillInfo(aInfo, 0x26|0x80, 0x26|0x80, (0x20+ 1)|0x80, (0x20+24)|0x80);
00196   FillInfo(aInfo, 0x26|0x80, 0x26|0x80, (0x20+33)|0x80, (0x20+56)|0x80);
00197   // row 7
00198   FillInfo(aInfo, 0x27|0x80, 0x27|0x80, (0x20+ 1)|0x80, (0x20+33)|0x80);
00199   FillInfo(aInfo, 0x27|0x80, 0x27|0x80, (0x20+49)|0x80, (0x20+81)|0x80);
00200   // row 8
00201   FillInfo(aInfo, 0x28|0x80, 0x28|0x80, (0x20+ 1)|0x80, (0x20+26)|0x80);
00202   FillInfo(aInfo, 0x28|0x80, 0x28|0x80, (0x20+36)|0x80, (0x20+73)|0x80);
00203   // row 9
00204   FillInfo(aInfo, 0x29|0x80, 0x29|0x80, (0x20+ 4)|0x80, (0x20+79)|0x80);
00205  
00206   // Frequent used Hanzi
00207   // 3021-567e
00208   FillInfo(aInfo, 0x30|0x80, 0x56|0x80, 0x21|0x80, 0x7E|0x80);
00209   // 5721-5779
00210   FillInfo(aInfo, 0x57|0x80, 0x57|0x80, 0x21|0x80, 0x79|0x80);
00211 
00212   // Infrequent used Hanzi
00213   // 5821-777e
00214   FillInfo(aInfo, 0x58|0x80, 0x77|0x80, 0x21|0x80, 0x7E|0x80);
00215 }