Back to index

im-sdk  12.3.91
ProcGbk.c
Go to the documentation of this file.
00001 #include  <stdio.h>
00002 #include  <string.h>
00003 #include  <stdlib.h>
00004 #include  "GeneType.h"
00005 #include  "PyBasic.h"
00006 #include  "ciku.h"
00007 
00008 JINT   GbkHzcodeToYjcode(JINT nHzcode);
00009 JINT   GetNextLine(FILE* pfFile, CHAR* szBuf);
00010 VOID   ProcGbkHz(VOID);
00011 VOID   ReadGbkLine(VOID);
00012 VOID   CreateGbkHzcodeToYj();
00013 VOID   TestGbkHzcodeToYj();
00014 JINT   FastMatchYinJieStr(CHAR* szPystr);
00015 JINT   HzcodeToYjcode(JINT nHzcode);
00016 VOID   Show32To9Int(JUINT *pnHzYj32);
00017 
00018 static UCHAR  szGbkLine[30000][40];
00019 static JWORD  wGbkCode[30000];
00020 static JINT   nGbkYj[30000];
00021 static JINT   nGbkLine;
00022 
00023 JINT   nGbkNumByYj[NUM_YINJIE];
00024 JWORD  wGbkHzByYj[NUM_YINJIE][400];
00025 JINT   nYj[0xFFFF];
00026 
00027 VOID ProcGbkHz(VOID)
00028 {
00029        JINT   i, nTmp, nTmp2, nTmp3;
00030 
00031        ReadGbkLine();
00032 
00033        memset (nGbkNumByYj, '\0', NUM_YINJIE * sizeof(JINT));
00034        for (i = 0; i < NUM_YINJIE; i++)
00035               memset (wGbkHzByYj[i], '\0', 400 * sizeof(JWORD));
00036 
00037        nTmp2 = nTmp3 = 0;
00038        nTmp = 0;
00039        for (i = 0; i < nGbkLine; i++)
00040        {
00041               if (nGbkYj[i] != 0xFFFF)
00042               {
00043                      /* From 0xB0A1 to 0xF7FF was processed as Normal GB2312 Hanzi */
00044                      if( (wGbkCode[i] >= 0x8140) && ( ! ( ((wGbkCode[i] & 0xFF00) >= 0xB000) && ((wGbkCode[i] & 0xFF00) <= 0xF700)
00045                                                    && ((wGbkCode[i] & 0x00FF) >= 0x00A1))) )
00046                      {
00047                             wGbkHzByYj[nGbkYj[i]][nGbkNumByYj[nGbkYj[i]]] = wGbkCode[i];
00048                             nGbkNumByYj[nGbkYj[i]] ++;
00049                             nTmp2 ++;
00050                      }
00051               }
00052               else
00053               {
00054               /*     printf ("!!%s", szGbkLine[i]);     */
00055                      nTmp ++;
00056               }
00057        }
00058        printf ("Total %d Non_Standard Yinjie in < GBK.txt >\n", nTmp);
00059 
00060        for (i = 0; i < NUM_YINJIE; i++)
00061        {
00062               if ((i != 0) && (i % 5 == 0))
00063                      printf ("\n");
00064               printf("%3d|%-6s[%3d] ", i, YINJIESTR_CSZ[i], nGbkNumByYj[i]);
00065        }
00066        printf("\n");
00067 
00072 }
00073 
00074 
00075 VOID ReadGbkLine(VOID)
00076 {
00077        FILE*  pfGbk;
00078        CHAR   szGbkName[] = "GBK.txt";
00079        UCHAR  szTmp[40];
00080        JINT   i, k, m, t2, t3;
00081        JINT   nTmp, nInvalidNum;
00082 
00083        pfGbk = fopen (szGbkName, "rb");
00084        if (pfGbk == NULL)
00085        {
00086               printf ("Failed to Open File %s\n", szGbkName);
00087               exit (FALSE);
00088        }
00089 
00090        k = 0;
00091        for (t2 = 1; t2 == 1;  )
00092        {
00093               memset(szTmp, '\0', 40);
00094               t2 = GetNextLine(pfGbk, (CHAR*)szTmp);
00095               t3 = strlen((CHAR*)szTmp);
00096 
00097               if (t3 > 2)
00098               {
00099                      for (m = 0; m < t3; m++)
00100                             szGbkLine[k][m] = szTmp[m];
00101                      k++;
00102               }
00103        }
00104 
00105        fclose (pfGbk);
00106        nGbkLine = k;
00107        printf("nGbkLine is %d\n", nGbkLine);
00108 
00109        /* Get GBK Hanzi Code. */
00110        memset (wGbkCode, 0, sizeof(JWORD) * 30000);
00111        for (i = 0; i < nGbkLine; i++)
00112               wGbkCode[i] = (JWORD)(((JWORD)szGbkLine[i][7] << 8) + szGbkLine[i][8]);
00113 
00114        /* Get GBK Yinjie Code. If not a standard Yinjie, use 0xFFFF instead */
00115        nInvalidNum = 0;
00116        memset (nGbkYj, 0, sizeof(JINT) * 30000);
00117        for (i = 0; i < nGbkLine; i++)
00118        {
00119               memset(szTmp, '\0', 40);
00120               for (k = 10; (szGbkLine[i][k] >= 'a') && (szGbkLine[i][k] <= 'z'); k++)
00121                      szTmp[k - 10] = szGbkLine[i][k];
00122 
00123               nTmp = FastMatchYinJieStr((CHAR*)szTmp);
00124               if (nTmp == -1)
00125               {
00126                      nGbkYj[i] = 0xFFFF;
00127                      nInvalidNum ++;
00128               }
00129               else
00130                      nGbkYj[i] = nTmp;
00131        }
00132 
00133        printf("nInvalidNum in GBK.txt is %d\n", nInvalidNum);
00134 }
00135 
00136 
00137 /*
00138 **  Create (JUINT)GBKHZCODETOYJ[] array to replace GBHZCODETOYJ[].
00139 **  From 0x8140 to 0xFEFE, not include 0xA140 to 0xA9FE.
00140 **  Notice: Reserved Area I and Reserved Area II are also included
00141 **  In this Array.
00142 **
00143 **  This data is needed in file < PyBasic.h >
00144 */
00145 VOID CreateGbkHzcodeToYj()
00146 {
00147        JINT   i, j, k, w, nTmp, nHzcode;
00148        JUINT  nHzYj32[32];
00149 
00150        /* Init nYj[] Array */
00151        for (i = 0; i < 0xFFFF; i++)
00152               nYj[i] = 0x01FF;
00153 
00154        /* Fill nYj[] with data come from nGbkYj[] */
00155        w = 0;
00156        for (i = 0; i < nGbkLine; i++)
00157               if ((nGbkYj[i] >= 0) && (nGbkYj[i] < NUM_YINJIE))
00158               {
00159                      nYj[wGbkCode[i]] = nGbkYj[i];
00160                      w++;
00161               }
00162        printf("Valid Yinjie in nGbkYj is %d\n", w);
00163 
00164        /* I => High, J => Low */
00165        /* Recover these GB Info by GOOD data */
00166        w = 0;
00167        for (i = 0xB0; i <= 0xF7; i++)
00168               for (j = 0xA1; j <= 0xFE; j++)
00169               {
00170                      nHzcode = (i << 8) + j;
00171                      nTmp   = HzcodeToYjcode(nHzcode);
00172                      if ((nTmp >= 0) && (nTmp < NUM_YINJIE))
00173                      {
00174                             nYj[nHzcode] = nTmp;
00175                             w++;
00176                      }
00177               }
00178        printf("Valid Yinjie in HzcodeToYjcode() is %d\n", w);
00179 
00180 
00181        /* [0x8140 ~ 0xA0FE], include 0x??7F */
00182        /* 32 * 191 */
00183        w = 0;
00184        for (k = 0; k < 32; k++)
00185               nHzYj32[k] = 0x01FF;
00186 
00187        for (i = 0x81; i <= 0xA0; i++)
00188        {
00189               for (j = 0x40; j <= 0xFE; j++)
00190               {
00191                      nHzcode          = (i << 8) + j;
00192                      nHzYj32[w] = nYj[nHzcode];
00193                      w++;
00194                      if (w == 32)
00195                      {
00196                             Show32To9Int(nHzYj32);
00197                             w = 0;
00198                             for (k = 0; k < 32; k++)
00199                                    nHzYj32[k] = 0x01FF;
00200                      }
00201               }
00202        }
00203        if (w != 0)
00204               Show32To9Int(nHzYj32);
00205        printf("\n**0x8140 ~ 0xA0FE**\n");
00206 
00207        /* 85 * 191 */
00208        w = 0;
00209        for (k = 0; k < 32; k++)
00210               nHzYj32[k] = 0x01FF;
00211 
00212        for (i = 0xAA; i <= 0xFE; i++)
00213        {
00214               for (j = 0x40; j <= 0xFE; j++)
00215               {
00216                      nHzcode          = (i << 8) + j;
00217                      nHzYj32[w] = nYj[nHzcode];
00218                      w++;
00219                      if (w == 32)
00220                      {
00221                             Show32To9Int(nHzYj32);
00222                             w = 0;
00223                             for (k = 0; k < 32; k++)
00224                                    nHzYj32[k] = 0x01FF;
00225                      }
00226               }
00227        }
00228        if (w != 0)
00229               Show32To9Int(nHzYj32);
00230        printf("\n**0xAA40 ~ 0xFEFE**\n");
00231 }
00232 
00233 
00234 /*
00235 **  pnHzYj32 point to a 32 int array which indicates the Yinjie code.
00236 */
00237 VOID Show32To9Int(JUINT *pnHzYj32)
00238 {
00239        JUINT  nNine[9];
00240        JUINT  i, nFrom, nTo, nToMode, nTmp1, nTmp2;
00241 
00242        for (i = 0; i < 9; i++)
00243               nNine[i] = 0x00000000;
00244 
00245        for (i = 0; i < 32; i++)
00246        {
00247               nFrom = (i * 9) / 32;
00248               nTo   = ((i + 1) * 9) / 32;
00249               if (nFrom == nTo)
00250               {
00251                      nToMode = 32 - (((i + 1) * 9) % 32);
00252                      nTmp1  = nNine[nFrom];
00253                      nTmp2  = pnHzYj32[i] << nToMode;
00254                      nTmp2  |= nTmp1;
00255                      nNine[nFrom] |= nTmp2;
00256               }
00257               else /* (nTo > nFrom) */
00258               {
00259                      nToMode = ((i + 1) * 9) % 32;
00260 
00261                      nTmp1  = nNine[nFrom];
00262                      nTmp2  = pnHzYj32[i] >> nToMode;
00263                      nTmp2  |= nTmp1;
00264                      nNine[nFrom] |= nTmp2;
00265 
00266                      nTmp1  = nNine[nTo];
00267                      nTmp2  = pnHzYj32[i] << (32 - nToMode);
00268                      nTmp2  |= nTmp1;
00269                      nNine[nTo] |= nTmp2;
00270               }
00271        }
00272 
00273        printf("    ");
00274        for (i = 0; i < 9; i++)
00275               printf("0x%08X, ", nNine[i]);
00276        printf("\n");
00277 }
00278 
00279 
00280 VOID TestGbkHzcodeToYj()
00281 {
00282        UCHAR  chHz[3];
00283        JINT   nHzcode;
00284        JINT   nYj;
00285        JINT   w;
00286 
00287        w = 0;
00288        chHz[2] = '\0';
00289        for (chHz[0] = 0x81; chHz[0] <= 0xFE; chHz[0]++)
00290        {
00291               for (chHz[1] = 0x40; chHz[1] <= 0xFE; chHz[1]++)
00292               {
00293                      nHzcode = ((JINT)chHz[0] << 8) + (JINT)chHz[1];
00294                      nYj    = GbkHzcodeToYjcode(nHzcode);
00295                      printf("0x%02X%02X  %s      ", (JUINT)chHz[0], (JUINT)chHz[1], chHz);
00296                      if (nYj == 0x01FF)
00297                             printf("[NONE]\n");
00298                      else if ((nYj >= 0) && (nYj < NUM_YINJIE ))
00299                      {
00300                             printf("%s\n", YINJIESTR_CSZ[nYj]);
00301                             w++;
00302                      }
00303                      else
00304                             printf("*Error*\n");
00305               }
00306        }
00307 
00308        printf("Valid Yinjie in GBKHZCODETOYJ[] is %d", w);
00309 }
00310 
00311