Back to index

lightning-sunbird  0.9+nobinonly
uscan.c
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   jeroen.dobbelaere@acunia.com
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "unicpriv.h"
00039 #define CHK_GR94(b) ( (PRUint8) 0xa0 < (PRUint8) (b) && (PRUint8) (b) < (PRUint8) 0xff )
00040 #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
00041 /*=================================================================================
00042 
00043 =================================================================================*/
00044 typedef  PRBool (*uSubScannerFunc) (unsigned char* in, PRUint16* out);
00045 /*=================================================================================
00046 
00047 =================================================================================*/
00048 
00049 typedef PRBool (*uScannerFunc) (
00050                                 uShiftTable    *shift,
00051                                 PRInt32*    state,
00052                                 unsigned char  *in,
00053                                 PRUint16    *out,
00054                                 PRUint32     inbuflen,
00055                                 PRUint32*    inscanlen
00056                                 );
00057 
00058 MODULE_PRIVATE PRBool uScan(  
00059                             uShiftTable    *shift,
00060                             PRInt32*    state,
00061                             unsigned char  *in,
00062                             PRUint16    *out,
00063                             PRUint32     inbuflen,
00064                             PRUint32*    inscanlen
00065                             );
00066 
00067 #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
00068 
00069 PRIVATE PRBool uCheckAndScanAlways1Byte(
00070                                         uShiftTable    *shift,
00071                                         PRInt32*    state,
00072                                         unsigned char  *in,
00073                                         PRUint16    *out,
00074                                         PRUint32     inbuflen,
00075                                         PRUint32*    inscanlen
00076                                         );
00077 PRIVATE PRBool uCheckAndScanAlways2Byte(
00078                                         uShiftTable    *shift,
00079                                         PRInt32*    state,
00080                                         unsigned char  *in,
00081                                         PRUint16    *out,
00082                                         PRUint32     inbuflen,
00083                                         PRUint32*    inscanlen
00084                                         );
00085 PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR(
00086                                                uShiftTable    *shift,
00087                                                PRInt32*    state,
00088                                                unsigned char  *in,
00089                                                PRUint16    *out,
00090                                                PRUint32     inbuflen,
00091                                                PRUint32*    inscanlen
00092                                                );
00093 PRIVATE PRBool uCheckAndScanAlways2ByteGR128(
00094                                                uShiftTable    *shift,
00095                                                PRInt32*    state,
00096                                                unsigned char  *in,
00097                                                PRUint16    *out,
00098                                                PRUint32     inbuflen,
00099                                                PRUint32*    inscanlen
00100                                                );
00101 PRIVATE PRBool uCheckAndScanByTable(
00102                                     uShiftTable    *shift,
00103                                     PRInt32*    state,
00104                                     unsigned char  *in,
00105                                     PRUint16    *out,
00106                                     PRUint32     inbuflen,
00107                                     PRUint32*    inscanlen
00108                                     );
00109 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8F(
00110                                             uShiftTable    *shift,
00111                                             PRInt32*    state,
00112                                             unsigned char  *in,
00113                                             PRUint16    *out,
00114                                             PRUint32     inbuflen,
00115                                             PRUint32*    inscanlen
00116                                             );
00117 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA2(
00118                                               uShiftTable    *shift,
00119                                               PRInt32*    state,
00120                                               unsigned char  *in,
00121                                               PRUint16    *out,
00122                                               PRUint32     inbuflen,
00123                                               PRUint32*    inscanlen
00124                                               );
00125 
00126 PRIVATE PRBool uCheckAndScanAlways2ByteSwap(
00127                                             uShiftTable    *shift,
00128                                             PRInt32*    state,
00129                                             unsigned char  *in,
00130                                             PRUint16    *out,
00131                                             PRUint32     inbuflen,
00132                                             PRUint32*    inscanlen
00133                                             );
00134 PRIVATE PRBool uCheckAndScanAlways4Byte(
00135                                         uShiftTable    *shift,
00136                                         PRInt32*    state,
00137                                         unsigned char  *in,
00138                                         PRUint16    *out,
00139                                         PRUint32     inbuflen,
00140                                         PRUint32*    inscanlen
00141                                         );
00142 PRIVATE PRBool uCheckAndScanAlways4ByteSwap(
00143                                             uShiftTable    *shift,
00144                                             PRInt32*    state,
00145                                             unsigned char  *in,
00146                                             PRUint16    *out,
00147                                             PRUint32     inbuflen,
00148                                             PRUint32*    inscanlen
00149                                             );
00150 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA3(
00151                                               uShiftTable    *shift,
00152                                               PRInt32*    state,
00153                                               unsigned char  *in,
00154                                               PRUint16    *out,
00155                                               PRUint32     inbuflen,
00156                                               PRUint32*    inscanlen
00157                                               );
00158 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA4(
00159                                               uShiftTable    *shift,
00160                                               PRInt32*    state,
00161                                               unsigned char  *in,
00162                                               PRUint16    *out,
00163                                               PRUint32     inbuflen,
00164                                               PRUint32*    inscanlen
00165                                               );
00166 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA5(
00167                                               uShiftTable    *shift,
00168                                               PRInt32*    state,
00169                                               unsigned char  *in,
00170                                               PRUint16    *out,
00171                                               PRUint32     inbuflen,
00172                                               PRUint32*    inscanlen
00173                                               );
00174 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA6(
00175                                               uShiftTable    *shift,
00176                                               PRInt32*    state,
00177                                               unsigned char  *in,
00178                                               PRUint16    *out,
00179                                               PRUint32     inbuflen,
00180                                               PRUint32*    inscanlen
00181                                               );
00182 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA7(
00183                                               uShiftTable    *shift,
00184                                               PRInt32*    state,
00185                                               unsigned char  *in,
00186                                               PRUint16    *out,
00187                                               PRUint32     inbuflen,
00188                                               PRUint32*    inscanlen
00189                                               );
00190 PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
00191                                                uShiftTable    *shift,
00192                                                PRInt32*    state,
00193                                                unsigned char  *in,
00194                                                PRUint16    *out,
00195                                                PRUint32     inbuflen,
00196                                                PRUint32*    inscanlen
00197                                                );
00198 
00199 PRIVATE PRBool uCnSAlways8BytesDecomposedHangul(
00200                                               uShiftTable    *shift,
00201                                               PRInt32*    state,
00202                                               unsigned char  *in,
00203                                               PRUint16    *out,
00204                                               PRUint32     inbuflen,
00205                                               PRUint32*    inscanlen
00206                                               );
00207 PRIVATE PRBool uCheckAndScanJohabHangul(
00208                                         uShiftTable    *shift,
00209                                         PRInt32*    state,
00210                                         unsigned char  *in,
00211                                         PRUint16    *out,
00212                                         PRUint32     inbuflen,
00213                                         PRUint32*    inscanlen
00214                                         );
00215 PRIVATE PRBool uCheckAndScanJohabSymbol(
00216                                         uShiftTable    *shift,
00217                                         PRInt32*    state,
00218                                         unsigned char  *in,
00219                                         PRUint16    *out,
00220                                         PRUint32     inbuflen,
00221                                         PRUint32*    inscanlen
00222                                         );
00223 
00224 PRIVATE PRBool uCheckAndScan4BytesGB18030(
00225                                           uShiftTable    *shift,
00226                                           PRInt32*    state,
00227                                           unsigned char  *in,
00228                                           PRUint16    *out,
00229                                           PRUint32     inbuflen,
00230                                           PRUint32*    inscanlen
00231                                           );
00232 
00233 PRIVATE PRBool uScanAlways2Byte(
00234                                 unsigned char*  in,
00235                                 PRUint16*    out
00236                                 );
00237 PRIVATE PRBool uScanAlways2ByteShiftGR(
00238                                        unsigned char*  in,
00239                                        PRUint16*    out
00240                                        );
00241 PRIVATE PRBool uScanAlways1Byte(
00242                                 unsigned char*  in,
00243                                 PRUint16*    out
00244                                 );
00245 PRIVATE PRBool uScanAlways1BytePrefix8E(
00246                                         unsigned char*  in,
00247                                         PRUint16*    out
00248                                         );
00249 PRIVATE PRBool uScanAlways2ByteUTF8(
00250                                     unsigned char*  in,
00251                                     PRUint16*    out
00252                                     );
00253 PRIVATE PRBool uScanAlways3ByteUTF8(
00254                                     unsigned char*  in,
00255                                     PRUint16*    out
00256                                     );
00257                                     /*=================================================================================
00258                                     
00259 =================================================================================*/
00260 PRIVATE const uScannerFunc m_scanner[uNumOfCharsetType] =
00261 {
00262     uCheckAndScanAlways1Byte,
00263     uCheckAndScanAlways2Byte,
00264     uCheckAndScanByTable,
00265     uCheckAndScanAlways2ByteShiftGR,
00266     uCheckAndScan2ByteGRPrefix8F,
00267     uCheckAndScan2ByteGRPrefix8EA2,
00268     uCheckAndScanAlways2ByteSwap,
00269     uCheckAndScanAlways4Byte,
00270     uCheckAndScanAlways4ByteSwap,
00271     uCheckAndScan2ByteGRPrefix8EA3,
00272     uCheckAndScan2ByteGRPrefix8EA4,
00273     uCheckAndScan2ByteGRPrefix8EA5,
00274     uCheckAndScan2ByteGRPrefix8EA6,
00275     uCheckAndScan2ByteGRPrefix8EA7,
00276     uCheckAndScanAlways1ByteShiftGL,
00277     uCnSAlways8BytesDecomposedHangul,
00278     uCheckAndScanJohabHangul,
00279     uCheckAndScanJohabSymbol,
00280     uCheckAndScan4BytesGB18030,
00281     uCheckAndScanAlways2ByteGR128
00282 };
00283 
00284 /*=================================================================================
00285 
00286 =================================================================================*/
00287 
00288 PRIVATE const uSubScannerFunc m_subscanner[uNumOfCharType] =
00289 {
00290     uScanAlways1Byte,
00291     uScanAlways2Byte,
00292     uScanAlways2ByteShiftGR,
00293     uScanAlways1BytePrefix8E,
00294     uScanAlways2ByteUTF8,
00295     uScanAlways3ByteUTF8
00296 };
00297 /*=================================================================================
00298 
00299 =================================================================================*/
00300 MODULE_PRIVATE PRBool uScan(  
00301                             uShiftTable    *shift,
00302                             PRInt32*    state,
00303                             unsigned char  *in,
00304                             PRUint16    *out,
00305                             PRUint32     inbuflen,
00306                             PRUint32*    inscanlen
00307                             )
00308 {
00309   return (* m_scanner[shift->classID]) (shift,state,in,out,inbuflen,inscanlen);
00310 }
00311 /*=================================================================================
00312 
00313 =================================================================================*/
00314 PRIVATE PRBool uScanAlways1Byte(
00315                                 unsigned char*  in,
00316                                 PRUint16*    out
00317                                 )
00318 {
00319   *out = (PRUint16) in[0];
00320   return PR_TRUE;
00321 }
00322 
00323 /*=================================================================================
00324 
00325 =================================================================================*/
00326 PRIVATE PRBool uScanAlways2Byte(
00327                                 unsigned char*  in,
00328                                 PRUint16*    out
00329                                 )
00330 {
00331   *out = (PRUint16) (( in[0] << 8) | (in[1]));
00332   return PR_TRUE;
00333 }
00334 /*=================================================================================
00335 
00336 =================================================================================*/
00337 PRIVATE PRBool uScanAlways2ByteShiftGR(
00338                                        unsigned char*  in,
00339                                        PRUint16*    out
00340                                        )
00341 {
00342   *out = (PRUint16) ((( in[0] << 8) | (in[1])) &  0x7F7F);
00343   return PR_TRUE;
00344 }
00345 
00346 /*=================================================================================
00347 
00348 =================================================================================*/
00349 PRIVATE PRBool uScanAlways1BytePrefix8E(
00350                                         unsigned char*  in,
00351                                         PRUint16*    out
00352                                         )
00353 {
00354   *out = (PRUint16) in[1];
00355   return PR_TRUE;
00356 }
00357 /*=================================================================================
00358 
00359 =================================================================================*/
00360 PRIVATE PRBool uScanAlways2ByteUTF8(
00361                                     unsigned char*  in,
00362                                     PRUint16*    out
00363                                     )
00364 {
00365   *out = (PRUint16) (((in[0] & 0x001F) << 6 )| (in[1] & 0x003F));
00366   return PR_TRUE;
00367 }
00368 
00369 /*=================================================================================
00370 
00371 =================================================================================*/
00372 PRIVATE PRBool uScanAlways3ByteUTF8(
00373                                     unsigned char*  in,
00374                                     PRUint16*    out
00375                                     )
00376 {
00377   *out = (PRUint16) (((in[0] & 0x000F) << 12 ) | ((in[1] & 0x003F) << 6)
00378     | (in[2] & 0x003F));
00379   return PR_TRUE;
00380 }
00381 /*=================================================================================
00382 
00383 =================================================================================*/
00384 PRIVATE PRBool uCheckAndScanAlways1Byte(
00385                                         uShiftTable    *shift,
00386                                         PRInt32*    state,
00387                                         unsigned char  *in,
00388                                         PRUint16    *out,
00389                                         PRUint32     inbuflen,
00390                                         PRUint32*    inscanlen
00391                                         )
00392 {
00393   /* Don't check inlen. The caller should ensure it is larger than 0 */
00394   *inscanlen = 1;
00395   *out = (PRUint16) in[0];
00396   
00397   return PR_TRUE;
00398 }
00399 
00400 /*=================================================================================
00401 
00402 =================================================================================*/
00403 PRIVATE PRBool uCheckAndScanAlways2Byte(
00404                                         uShiftTable    *shift,
00405                                         PRInt32*    state,
00406                                         unsigned char  *in,
00407                                         PRUint16    *out,
00408                                         PRUint32     inbuflen,
00409                                         PRUint32*    inscanlen
00410                                         )
00411 {
00412   if(inbuflen < 2)
00413     return PR_FALSE;
00414   else
00415   {
00416     *inscanlen = 2;
00417     *out = ((in[0] << 8) | ( in[1])) ;
00418     return PR_TRUE;
00419   }
00420 }
00421 /*=================================================================================
00422 
00423 =================================================================================*/
00424 PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR(
00425                                                uShiftTable    *shift,
00426                                                PRInt32*    state,
00427                                                unsigned char  *in,
00428                                                PRUint16    *out,
00429                                                PRUint32     inbuflen,
00430                                                PRUint32*    inscanlen
00431                                                )
00432 {
00433   /*
00434    * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
00435    * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
00436    * Only 2nd byte range needs to be checked because 
00437    * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp 
00438    */
00439   if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
00440     return PR_FALSE;
00441   else if (! CHK_GR94(in[1]))  
00442   {
00443     *inscanlen = 2; 
00444     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
00445     return PR_TRUE;
00446   }
00447   else
00448   {
00449     *inscanlen = 2;
00450     *out = (((in[0] << 8) | ( in[1]))  & 0x7F7F);
00451     return PR_TRUE;
00452   }
00453 }
00454 /*=================================================================================
00455 
00456 =================================================================================*/
00457 PRIVATE PRBool uCheckAndScanAlways2ByteGR128(
00458                                                uShiftTable    *shift,
00459                                                PRInt32*    state,
00460                                                unsigned char  *in,
00461                                                PRUint16    *out,
00462                                                PRUint32     inbuflen,
00463                                                PRUint32*    inscanlen
00464                                                )
00465 {
00466   /*
00467    * The first byte should be in  [0xa1,0xfe] 
00468    * and the second byte can take any value with MSB = 1.
00469    * Used by CP949 -> Unicode converter.
00470    * Only 2nd byte range needs to be checked because 
00471    * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp 
00472    */
00473   if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
00474     return PR_FALSE;
00475   else if (! in[1] & 0x80)     /* 2nd byte range check */
00476   {
00477     *inscanlen = 2; 
00478     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
00479     return PR_TRUE;
00480   }
00481   else
00482   {
00483     *inscanlen = 2;
00484     *out = (in[0] << 8) |  in[1];
00485     return PR_TRUE;
00486   }
00487 }
00488 /*=================================================================================
00489 
00490 =================================================================================*/
00491 PRIVATE PRBool uCheckAndScanByTable(
00492                                     uShiftTable    *shift,
00493                                     PRInt32*    state,
00494                                     unsigned char  *in,
00495                                     PRUint16    *out,
00496                                     PRUint32     inbuflen,
00497                                     PRUint32*    inscanlen
00498                                     )
00499 {
00500   PRInt16 i;
00501   const uShiftCell* cell = &(shift->shiftcell[0]);
00502   PRInt16 itemnum = shift->numOfItem;
00503   for(i=0;i<itemnum;i++)
00504   {
00505     if( ( in[0] >=  cell[i].shiftin_Min) &&
00506       ( in[0] <=  cell[i].shiftin_Max))
00507     {
00508       if(inbuflen < cell[i].reserveLen)
00509         return PR_FALSE;
00510       else
00511       {
00512         *inscanlen = cell[i].reserveLen;
00513         return (uSubScanner(cell[i].classID,in,out));
00514       }
00515     }
00516   }
00517   return PR_FALSE;
00518 }
00519 /*=================================================================================
00520 
00521 =================================================================================*/
00522 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8F(
00523                                             uShiftTable    *shift,
00524                                             PRInt32*    state,
00525                                             unsigned char  *in,
00526                                             PRUint16    *out,
00527                                             PRUint32     inbuflen,
00528                                             PRUint32*    inscanlen
00529                                             )
00530 {
00531   if((inbuflen < 3) ||(in[0] != 0x8F)) 
00532     return PR_FALSE;
00533   else if (! CHK_GR94(in[1]))  /* 2nd byte range check */
00534   {
00535     *inscanlen = 2; 
00536     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
00537     return PR_TRUE;
00538   }
00539   else if (! CHK_GR94(in[2]))  /* 3rd byte range check */
00540   {
00541     *inscanlen = 3; 
00542     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
00543     return PR_TRUE;
00544   }
00545   else
00546   {
00547     *inscanlen = 3;
00548     *out = (((in[1] << 8) | ( in[2]))  & 0x7F7F);
00549     return PR_TRUE;
00550   }
00551 }
00552 /*=================================================================================
00553 
00554 =================================================================================*/
00555 
00556 /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
00557  * where X is 2,3,4,5,6,7 
00558  */
00559 #define CNS_8EAX_4BYTE(PREFIX)                    \
00560   if((inbuflen < 4) || (in[0] != 0x8E))           \
00561     return PR_FALSE;                              \
00562   else if((in[1] != (PREFIX)))                    \
00563   {                                               \
00564     *inscanlen = 2;                               \
00565     *out = 0xFF;                                  \
00566     return PR_TRUE;                               \
00567   }                                               \
00568   else if(! CHK_GR94(in[2]))                      \
00569   {                                               \
00570     *inscanlen = 3;                               \
00571     *out = 0xFF;                                  \
00572     return PR_TRUE;                               \
00573   }                                               \
00574   else if(! CHK_GR94(in[3]))                      \
00575   {                                               \
00576     *inscanlen = 4;                               \
00577     *out = 0xFF;                                  \
00578     return PR_TRUE;                               \
00579   }                                               \
00580   else                                            \
00581   {                                               \
00582     *inscanlen = 4;                               \
00583     *out = (((in[2] << 8) | ( in[3]))  & 0x7F7F); \
00584     return PR_TRUE;                               \
00585   }    
00586 
00587 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA2(
00588                                               uShiftTable    *shift,
00589                                               PRInt32*    state,
00590                                               unsigned char  *in,
00591                                               PRUint16    *out,
00592                                               PRUint32     inbuflen,
00593                                               PRUint32*    inscanlen
00594                                               )
00595 {
00596   CNS_8EAX_4BYTE(0xA2)
00597 }
00598 
00599 /*=================================================================================
00600 
00601 =================================================================================*/
00602 PRIVATE PRBool uCheckAndScanAlways2ByteSwap(
00603                                             uShiftTable    *shift,
00604                                             PRInt32*    state,
00605                                             unsigned char  *in,
00606                                             PRUint16    *out,
00607                                             PRUint32     inbuflen,
00608                                             PRUint32*    inscanlen
00609                                             )
00610 {
00611   if(inbuflen < 2)
00612     return PR_FALSE;
00613   else
00614   {
00615     *inscanlen = 2;
00616     *out = ((in[1] << 8) | ( in[0])) ;
00617     return PR_TRUE;
00618   }
00619 }
00620 /*=================================================================================
00621 
00622 =================================================================================*/
00623 PRIVATE PRBool uCheckAndScanAlways4Byte(
00624                                         uShiftTable    *shift,
00625                                         PRInt32*    state,
00626                                         unsigned char  *in,
00627                                         PRUint16    *out,
00628                                         PRUint32     inbuflen,
00629                                         PRUint32*    inscanlen
00630                                         )
00631 {
00632   if(inbuflen < 4)
00633     return PR_FALSE;
00634   else
00635   {
00636     *inscanlen = 4;
00637     if((0 ==in[0]) && ( 0==in[1]))
00638       *out = ((in[2] << 8) | ( in[3])) ;
00639     else
00640       *out = 0xFFFD ;
00641     return PR_TRUE;
00642   }
00643 }
00644 
00645 /*=================================================================================
00646 
00647 =================================================================================*/
00648 PRIVATE PRBool uCheckAndScanAlways4ByteSwap(
00649                                             uShiftTable    *shift,
00650                                             PRInt32*    state,
00651                                             unsigned char  *in,
00652                                             PRUint16    *out,
00653                                             PRUint32     inbuflen,
00654                                             PRUint32*    inscanlen
00655                                             )
00656 {
00657   if(inbuflen < 4)
00658     return PR_FALSE;
00659   else
00660   {
00661     *inscanlen = 4;
00662     if((0 ==in[2]) && ( 0==in[3]))
00663       *out = ((in[1] << 8) | ( in[0])) ;
00664     else
00665       *out = 0xFFFD ;
00666     return PR_TRUE;
00667   }
00668 }
00669 /*=================================================================================
00670 
00671 =================================================================================*/
00672 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA3(
00673                                               uShiftTable    *shift,
00674                                               PRInt32*    state,
00675                                               unsigned char  *in,
00676                                               PRUint16    *out,
00677                                               PRUint32     inbuflen,
00678                                               PRUint32*    inscanlen
00679                                               )
00680 {
00681   CNS_8EAX_4BYTE(0xA3)
00682 }
00683 /*=================================================================================
00684 
00685 =================================================================================*/
00686 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA4(
00687                                               uShiftTable    *shift,
00688                                               PRInt32*    state,
00689                                               unsigned char  *in,
00690                                               PRUint16    *out,
00691                                               PRUint32     inbuflen,
00692                                               PRUint32*    inscanlen
00693                                               )
00694 {
00695   CNS_8EAX_4BYTE(0xA4)
00696 }
00697 /*=================================================================================
00698 
00699 =================================================================================*/
00700 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA5(
00701                                               uShiftTable    *shift,
00702                                               PRInt32*    state,
00703                                               unsigned char  *in,
00704                                               PRUint16    *out,
00705                                               PRUint32     inbuflen,
00706                                               PRUint32*    inscanlen
00707                                               )
00708 {
00709   CNS_8EAX_4BYTE(0xA5)
00710 }
00711 /*=================================================================================
00712 
00713 =================================================================================*/
00714 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA6(
00715                                               uShiftTable    *shift,
00716                                               PRInt32*    state,
00717                                               unsigned char  *in,
00718                                               PRUint16    *out,
00719                                               PRUint32     inbuflen,
00720                                               PRUint32*    inscanlen
00721                                               )
00722 {
00723   CNS_8EAX_4BYTE(0xA6)
00724 }
00725 /*=================================================================================
00726 
00727 =================================================================================*/
00728 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA7(
00729                                               uShiftTable    *shift,
00730                                               PRInt32*    state,
00731                                               unsigned char  *in,
00732                                               PRUint16    *out,
00733                                               PRUint32     inbuflen,
00734                                               PRUint32*    inscanlen
00735                                               )
00736 {
00737   CNS_8EAX_4BYTE(0xA7)
00738 }
00739 /*=================================================================================
00740 
00741 =================================================================================*/
00742 PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
00743                                                uShiftTable    *shift,
00744                                                PRInt32*    state,
00745                                                unsigned char  *in,
00746                                                PRUint16    *out,
00747                                                PRUint32     inbuflen,
00748                                                PRUint32*    inscanlen
00749                                                )
00750 {
00751   /* Don't check inlen. The caller should ensure it is larger than 0 */
00752   *inscanlen = 1;
00753   *out = (PRUint16) in[0] | 0x80;
00754   
00755   return PR_TRUE;
00756 }
00757 /*=================================================================================
00758 
00759 =================================================================================*/
00760 #define SBase 0xAC00
00761 #define SCount 11172
00762 #define LCount 19
00763 #define VCount 21
00764 #define TCount 28
00765 #define NCount (VCount * TCount)
00766 
00767 PRIVATE PRBool uCnSAlways8BytesDecomposedHangul(
00768                                               uShiftTable    *shift,
00769                                               PRInt32*    state,
00770                                               unsigned char  *in,
00771                                               PRUint16    *out,
00772                                               PRUint32     inbuflen,
00773                                               PRUint32*    inscanlen
00774                                               )
00775 {
00776   
00777   PRUint16 LIndex, VIndex, TIndex;
00778   /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
00779   if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
00780     (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
00781     return PR_FALSE;
00782   
00783   /* Compute LIndex  */
00784   if((in[3] < 0xa1) && (in[3] > 0xbe)) { /* illegal leading consonant */
00785     return PR_FALSE;
00786   } 
00787   else {
00788     static const PRUint8 lMap[] = {
00789       /*        A1   A2   A3   A4   A5   A6   A7  */
00790       0,   1,0xff,   2,0xff,0xff,   3,
00791         /*   A8   A9   AA   AB   AC   AD   AE   AF  */
00792         4,   5,0xff,0xff,0xff,0xff,0xff,0xff,
00793         /*   B0   B1   B2   B3   B4   B5   B6   B7  */
00794         0xff,   6,   7,   8,0xff,   9,  10,  11,
00795         /*   B8   B9   BA   BB   BC   BD   BE       */
00796         12,  13,  14,  15,  16,  17,  18     
00797     };
00798     
00799     LIndex = lMap[in[3] - 0xa1];
00800     if(0xff == (0xff & LIndex))
00801       return PR_FALSE;
00802   }
00803   
00804   /* Compute VIndex  */
00805   if((in[5] < 0xbf) && (in[5] > 0xd3)) { /* illegal medial vowel */
00806     return PR_FALSE;
00807   } 
00808   else {
00809     VIndex = in[5] - 0xbf;
00810   }
00811   
00812   /* Compute TIndex  */
00813   if(0xd4 == in[7])  
00814   {
00815     TIndex = 0;
00816   } 
00817   else if((in[7] < 0xa1) && (in[7] > 0xbe)) {/* illegal trailling consonant */
00818     return PR_FALSE;
00819   } 
00820   else {
00821     static const PRUint8 tMap[] = {
00822       /*        A1   A2   A3   A4   A5   A6   A7  */
00823       1,   2,   3,   4,   5,   6,   7,
00824         /*   A8   A9   AA   AB   AC   AD   AE   AF  */
00825         0xff,   8,   9,  10,  11,  12,  13,  14,
00826         /*   B0   B1   B2   B3   B4   B5   B6   B7  */
00827         15,  16,  17,0xff,  18,  19,  20,  21,
00828         /*   B8   B9   BA   BB   BC   BD   BE       */
00829         22,0xff,  23,  24,  25,  26,  27     
00830     };
00831     TIndex = tMap[in[7] - 0xa1];
00832     if(0xff == (0xff & TIndex))
00833       return PR_FALSE;
00834   }
00835   
00836   *inscanlen = 8;
00837   /* the following line is from Unicode 2.0 page 3-13 item 5 */
00838   *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
00839   
00840   return PR_TRUE;
00841 }
00842 /*=================================================================================
00843 
00844 =================================================================================*/
00845 
00846 PRIVATE PRBool uCheckAndScanJohabHangul(
00847                                         uShiftTable    *shift,
00848                                         PRInt32*    state,
00849                                         unsigned char  *in,
00850                                         PRUint16    *out,
00851                                         PRUint32     inbuflen,
00852                                         PRUint32*    inscanlen
00853                                         )
00854 {
00855 /* since we don't have code to convert Johab to Unicode right now     *
00856   * make this part of code #if 0 to save space untill we fully test it */
00857   if(inbuflen < 2)
00858     return PR_FALSE;
00859   else {
00860   /*
00861   * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
00862   * of "CJKV Information Processing" for details
00863     */
00864     static const PRUint8 lMap[32]={ /* totaly 19  */
00865       0xff,0xff,0,   1,   2,   3,   4,   5,    /* 0-7    */
00866         6,   7,   8,   9,   10,  11,  12,  13,   /* 8-15   */
00867         14,  15,  16,  17,  18,  0xff,0xff,0xff, /* 16-23  */
00868         0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff  /* 24-31  */
00869     };
00870     static const PRUint8 vMap[32]={ /* totaly 21 */
00871       0xff,0xff,0xff,0,   1,   2,   3,   4,    /* 0-7   */
00872         0xff,0xff,5,   6,   7,   8,   9,   10,   /* 8-15  */
00873         0xff,0xff,11,  12,  13,  14,  15,  16,   /* 16-23 */
00874         0xff,0xff,17,  18,  19,  20,  0xff,0xff  /* 24-31 */
00875     };
00876     static const PRUint8 tMap[32]={ /* totaly 29 */
00877       0xff,0,   1,   2,   3,   4,   5,   6,    /* 0-7   */
00878         7,   8,   9,   10,  11,  12,  13,  14,   /* 8-15  */
00879         15,  16,  0xff,17,  18,  19,  20,  21,   /* 16-23 */
00880         22,  23,  24,  25,  26,  27,  0xff,0xff  /* 24-31 */
00881     };
00882     PRUint16 ch = (in[0] << 8) | in[1];
00883     PRUint16 LIndex, VIndex, TIndex;
00884     if(0 == (0x8000 & ch))
00885       return PR_FALSE;
00886     LIndex=lMap[(ch>>10)& 0x1F];
00887     VIndex=vMap[(ch>>5) & 0x1F];
00888     TIndex=tMap[(ch>>0) & 0x1F];
00889     if((0xff==(LIndex)) || 
00890       (0xff==(VIndex)) || 
00891       (0xff==(TIndex)))
00892       return PR_FALSE;
00893     /* the following line is from Unicode 2.0 page 3-13 item 5 */
00894     *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
00895     *inscanlen = 2;
00896     return PR_TRUE;
00897   }
00898 }
00899 PRIVATE PRBool uCheckAndScanJohabSymbol(
00900                                         uShiftTable    *shift,
00901                                         PRInt32*    state,
00902                                         unsigned char  *in,
00903                                         PRUint16    *out,
00904                                         PRUint32     inbuflen,
00905                                         PRUint32*    inscanlen
00906                                         )
00907 {
00908   if(inbuflen < 2)
00909     return PR_FALSE;
00910   else {
00911   /*
00912   * The following code are based on the Perl code lised under
00913   * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
00914   * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
00915   *
00916   * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
00917   *   my @johab = unpack("C*", $_[0]);
00918   *   my ($offset, $d8_off) = (0,0);
00919   *   my @out = ();
00920   *   while(($hi, $lo) = splice($johab, 0, 2)) {
00921   *     $offset = 1 if ($hi > 223 and $hi < 250);
00922   *     $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
00923   *     push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
00924   *            ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
00925   *            $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
00926   *   }
00927   *   return pack ("C*", @out);
00928   * }
00929   * additional comments from Ken Lunde
00930   * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
00931   * has three possible return values:
00932   * 0  if $hi is not equal to 216
00933   * 94 if $hi is euqal to 216 and if $lo is greater than 160
00934   * 42 if $hi is euqal to 216 and if $lo is not greater than 160
00935     */ 
00936     unsigned char hi = in[0];
00937     unsigned char lo = in[1];
00938     PRUint16 offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0;
00939     PRUint16 d8_off = 0;
00940     if(216 == hi) {
00941       if( lo > 160)
00942         d8_off = 94;
00943       else
00944         d8_off = 42;
00945     }
00946     
00947     *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) -
00948       (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) |
00949       (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : 
00950     128));
00951     *inscanlen = 2;
00952     return PR_TRUE;
00953   }
00954 }
00955 PRIVATE PRBool uCheckAndScan4BytesGB18030(
00956                                           uShiftTable    *shift,
00957                                           PRInt32*    state,
00958                                           unsigned char  *in,
00959                                           PRUint16    *out,
00960                                           PRUint32     inbuflen,
00961                                           PRUint32*    inscanlen
00962                                           )
00963 {
00964   PRUint32  data;
00965   if(inbuflen < 4) 
00966     return PR_FALSE;
00967   
00968   if((in[0] < 0x81 ) || (0xfe < in[0])) 
00969     return PR_FALSE;
00970   if((in[1] < 0x30 ) || (0x39 < in[1])) 
00971     return PR_FALSE;
00972   if((in[2] < 0x81 ) || (0xfe < in[2])) 
00973     return PR_FALSE;
00974   if((in[3] < 0x30 ) || (0x39 < in[3])) 
00975     return PR_FALSE;
00976   
00977   data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + 
00978     (in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
00979   
00980   *inscanlen = 4;
00981   *out = (data < 0x00010000) ? data : 0xFFFD;
00982   return PR_TRUE;
00983 }