Back to index

im-sdk  12.3.91
md.c
Go to the documentation of this file.
00001 /* @(#)md.c   1.7 97/10/31 */
00002 #include <stdio.h>
00003 #include <iconv.h>
00004 #include <assert.h>
00005 #include <stdlib.h>
00006 #include <string.h>
00007 #include "utfchar.h"
00008 #include "hhentry.h"
00009 #include "md_hanja_dic.h"
00010 
00011 void
00012 hash_table_init (HHList table[], int size);
00013 
00014 iconv_t prepare_iconv (const char *to_encoding,
00015                      const char *from_encoding);
00016 
00017 extern void
00018 test_read_first (FILE *fp);
00019 
00020 HHList hash_table[HASH_TABLE_NUMBER];
00021 
00022 HHList hhlist;
00023 
00024 int
00025 main(argc,argv)
00026      int argc;
00027      char **argv;
00028 {
00029   char sname[40],sword[40]; /* Dictionary Source */
00030   char tname[40],tword[40]; /* Dictionary Target */
00031   
00032   int i,j,k,count,bad;
00033   unsigned char c,info[80];
00034   FILE *s;
00035   FILE *fdic;
00036   HH *hh;
00037 
00038   iconv_t cd;
00039   iconv_t cd_utf8_to_utf16;
00040   int hval;
00041 
00042   if (argc != 3){
00043     fprintf (stderr, "you should provide one data file to process\n");
00044     exit (-1);
00045   }
00046   
00047   cd = prepare_iconv ("UTF-8", "EUC-KR");
00048   cd_utf8_to_utf16 = prepare_iconv ("UTF-16", "UTF-8");
00049   
00050   if (cd == (iconv_t) -1){
00051     fprintf (stderr, "Failed open iconv, esiting...\n");
00052     exit (-1);
00053   }
00054     
00055 
00056   sprintf(sname, argv[1]);
00057   if ((s = fopen(sname,"r")) != NULL){
00058     printf("Processing %s...\n",sname);
00059     count=0; bad=0;
00060     while (1)
00061       {
00062        j = 0;
00063        while (c = getc(s),(!feof(s) && (c != '\n')))
00064          info[j++] = c;
00065        if (feof(s))
00066          break;
00067        j = 0;
00068        if (k = 0,info[j++] == '[') {
00069          if (strchr(info+j,']'))
00070            while (c = info[j++],c != ']')
00071              if (c > 0x7f)
00072               sword[k++] = c;
00073              else { bad = 1; break; }
00074          else
00075            bad = 1;
00076        }
00077        else bad = 1;
00078        sword[k]=0;
00079        if (k = 0,info[j++] == '[') {
00080          if (strchr(info+j,']'))
00081            while (c = info[j++],c != ']')
00082              if (c > 0x7f) tword[k++] = c; else { bad = 1; break; }
00083          else bad = 1;
00084        }
00085        else
00086          bad = 1;
00087        tword[k] = 0;
00088        count++;
00089        if (bad)
00090          {
00091            printf("Bad data on %d\n",count); bad = 0;
00092          }
00093        else if (strlen(sword)>MAXLENGTH) {
00094          printf("Too long word on %d\n",count);
00095        } else if (strlen(sword)!=strlen(tword)) {
00096          printf("Incompatible data on %d\n",count);
00097        } else {
00098          
00099          hh = process_hangul_hanja_pair ((iconv_t) -1, sword, tword);
00100          /* verification purpose */
00101          if (!hh){
00102            fprintf (stderr,
00103                    "error in processing %s, %s\n", sword, tword);
00104            exit (-1);
00105          } else {
00106            UTFCHAR *myutf16;
00107            
00108            char myinbuf[100], myoutbuf[100];
00109            char *myinptr, *myoutptr;
00110            size_t myinlen, myoutlen;
00111            size_t myiconv_ret;
00112            int utf16len = 0;
00113            
00114            memset (myinbuf, 0, sizeof (myinbuf));
00115            memset (myoutbuf, 0, sizeof (myoutbuf));
00116            myinlen = strlen (hh->utf_hangul);
00117            myoutlen = sizeof (myoutbuf);
00118            myinptr = myinbuf, myoutptr = myoutbuf;
00119 
00120            memcpy (myinbuf, hh->utf_hangul, myinlen);
00121            myiconv_ret =
00122              iconv (cd_utf8_to_utf16,
00123                    &myinptr, &myinlen, &myoutptr, &myoutlen);
00124            utf16len = sizeof (myoutbuf) - myoutlen;
00125            myutf16 =
00126              (UTFCHAR *) calloc (utf16len + 1,
00127                               sizeof (UTFCHAR));
00128            memcpy (myutf16, myoutbuf, utf16len);
00129            
00130            hval = hash (myutf16);
00131            hhlist_add_hh (&hash_table[hval], hh);
00132            free (myutf16);
00133            
00134          }
00135          /* verification purpose */
00136          hh_free (hh);
00137        }
00138       }
00139 
00140     fclose(s);
00141   }
00142 
00143   iconv_close (cd);
00144   iconv_close (cd_utf8_to_utf16);
00145 
00146   
00147   fdic = fopen (argv[2], "w");
00148   hash_table_dump_content (hash_table, HASH_TABLE_NUMBER, fdic);
00149         
00150   fclose (fdic);
00151 
00152   for (i = 0; i < HASH_TABLE_NUMBER; i++){
00153     printf ("array [%d]: %d items\n", i, hash_table[i].n_count);
00154     /*
00155     hhlist_print_string (hash_table + i, NULL);
00156     */
00157     printf ("==================================\n");
00158   }
00159 
00160 
00161   return(0);
00162 }
00163 
00164 HH *
00165 process_hangul_hanja_pair (iconv_t cd, char *str_hangul, char *str_hanja)
00166 {
00167   int len_hangul, len_hanja;
00168   char *ptr_from, *ptr_to;
00169   char euc_hangul[100], euc_hanja[100];
00170   char utf_hangul[100], utf_hanja[100];
00171   int len_from, len_to;
00172   size_t iconv_return;
00173 
00174 
00175   HH *hh_return;
00176   
00177   assert (str_hangul != NULL);
00178   assert (str_hanja != NULL);
00179   
00180   len_hangul = strlen (str_hangul);
00181 
00182   len_hanja = strlen (str_hanja);
00183   if (cd == (iconv_t)-1){
00184     hh_return = (HH *) calloc (1, sizeof (HH));
00185     hh_return->utf_hangul = (unsigned char *)strdup (str_hangul);
00186     hh_return->utf_hanja = (unsigned char *)strdup (str_hanja);
00187     return hh_return;
00188     
00189   } else {
00190   
00191     strcpy (euc_hangul, str_hangul);
00192     strcpy (euc_hanja, str_hanja);
00193 
00194     hh_return = (HH *) calloc (1, sizeof (HH));
00195     hh_return->utf_hangul = NULL;
00196     hh_return->utf_hanja = NULL;
00197   
00198     ptr_from = euc_hangul, ptr_to = utf_hangul;
00199     len_from = strlen (euc_hangul), len_to = sizeof (utf_hangul);
00200                 
00201     iconv_return =
00202       iconv (cd, &ptr_from, &len_from, &ptr_to, &len_to);
00203     if (iconv_return == (size_t) -1){
00204       fprintf (stderr, "There was an error doing iconv with %s\n",
00205               euc_hangul);
00206       perror ("Following error:\n");
00207       return NULL;
00208     } else {
00209       /*
00210        int u8_len = 0;
00211       */
00212       int j;
00213 
00214       j = 100 - len_to;
00215 
00216       hh_return->utf_hangul =
00217        (unsigned char *) calloc (j +1, sizeof (unsigned char));
00218       memcpy (hh_return->utf_hangul, utf_hangul, j);
00219     }
00220   
00221     ptr_from = euc_hanja, ptr_to = utf_hanja;
00222     len_from = strlen (euc_hanja), len_to = sizeof (utf_hanja);
00223                 
00224     iconv_return =
00225       iconv (cd, &ptr_from, &len_from, &ptr_to, &len_to);
00226     if (iconv_return == (size_t) -1){
00227       fprintf (stderr, "There was an error doing iconv with %s\n",
00228               euc_hanja);
00229       perror ("Following error:\n");
00230       return NULL;
00231     } else {
00232       int j;
00233 
00234       j = 100 - len_to;
00235     
00236       hh_return->utf_hanja =
00237        (unsigned char *) calloc (j + 1, sizeof (unsigned char));
00238       memcpy (hh_return->utf_hanja, utf_hanja, j);
00239     }
00240     return hh_return;
00241   }
00242 
00243 
00244 
00245 }
00246 
00247 
00248 iconv_t
00249 prepare_iconv
00250 (const char *to_encoding, const char *from_encoding)
00251 {
00252 
00253   iconv_t cd;
00254   cd = iconv_open (to_encoding, from_encoding);
00255   if (cd == (iconv_t) -1){
00256     fprintf (stderr, "cannot open iconv\n");
00257     return (iconv_t) -1;        
00258   } else
00259     return cd;
00260 }
00261 
00262 
00263     
00264   
00265 void
00266 hash_table_init (HHList table[], int size)
00267 {
00268   int i;
00269   for (i = 0; i < size; i++){
00270     hhlist_init (&table[i]);
00271   }
00272 }