Back to index

im-sdk  12.3.91
hhentry.c
Go to the documentation of this file.
00001 #include <assert.h>
00002 #include <stdio.h>
00003 #include <string.h>
00004 #include <stdlib.h>
00005 #include "utfchar.h"
00006 #include "hhentry.h"
00007 
00008 /* hh basic functions */
00009 void print_utfchar_hex_value (unsigned char *str, FILE *fp);
00010 void put_int24_to_buffer (int i, char *buffer);
00011 void put_int24_to_file (int i, FILE *fp);
00012 void get_int24_from_buffer (int *val, char *buffer);
00013 void get_int24_from_file (int *val, FILE *fp);
00014 
00015 HH *
00016 hh_new ()
00017 {
00018   HH *new_hh;
00019   new_hh = (HH *) calloc (1, sizeof (HH));
00020   return new_hh;
00021 }
00022 
00023 void
00024 hh_free (HH *hh)
00025 {
00026   assert (hh != NULL);
00027   if (hh == NULL)
00028     return;
00029   free (hh->utf_hangul);
00030   free (hh->utf_hanja);
00031   free (hh);
00032   hh = NULL;
00033 }
00034 
00035 HH *
00036 hh_new_with_data (unsigned char *utf_hangul, unsigned char *utf_hanja)
00037 {
00038   
00039   HH *new_hh;
00040   
00041   assert (utf_hangul != NULL);
00042   assert (utf_hanja != NULL);
00043 
00044   if (!utf_hangul || !utf_hanja)
00045     return NULL;
00046 
00047   new_hh = (HH *) calloc (1, sizeof (HH));
00048   new_hh->utf_hangul = (unsigned char *) strdup (utf_hangul);
00049   new_hh->utf_hanja =  (unsigned char *) strdup (utf_hanja);
00050   
00051   return new_hh;
00052 }
00053 
00054 /* hhitem basic functions */
00055 HHItem *
00056 hhitem_new ()
00057 {
00058   HHItem *new_entry;
00059   new_entry = (HHItem *) calloc (1, sizeof (HHItem));
00060   new_entry->n_hanja = 0;
00061   new_entry->hanja_list = NULL;
00062   new_entry->hangul = NULL;
00063 
00064   return new_entry;
00065 }
00066 
00067 void
00068 hhitem_free (HHItem *entry)
00069 {
00070   int i;
00071 
00072   assert (entry != NULL);
00073   
00074   if (entry == NULL){
00075     fprintf (stderr, "hhitem_free error: entry or *entry is null\n");
00076     return;
00077   }
00078   if (entry->hangul){
00079     free (entry->hangul);
00080   }
00081   for (i = 0 ; i < entry->n_hanja; i++){
00082     free (entry->hanja_list[i]);
00083   }
00084   free (entry->hanja_list);
00085   free (entry);
00086 }
00087 
00088 HHItem *
00089 hhitem_new_with_data (HH *hh)
00090 {
00091   HHItem *new_entry;
00092 
00093   assert (hh != NULL);
00094   assert (hh->utf_hangul != NULL);
00095   assert (hh->utf_hanja != NULL);
00096   
00097   new_entry = (HHItem *) calloc (1, sizeof (HHItem));
00098   new_entry->n_hanja = 1;
00099   new_entry->hangul = (unsigned char *) strdup (hh->utf_hangul);
00100   new_entry->hanja_list =
00101     (unsigned char **) calloc (1, sizeof (unsigned char *));
00102   new_entry->hanja_list[0] =
00103     (unsigned char *) strdup (hh->utf_hanja);;
00104 
00105   return new_entry;
00106 }
00107 
00108 void
00109 hhitem_init (HHItem *hhentry)
00110 {
00111   int n;
00112   assert (hhentry != NULL);
00113   
00114   free (hhentry->hangul);
00115   hhentry->hangul = NULL;
00116   
00117   for (n = 0 ; n < hhentry->n_hanja; n++){
00118     free (hhentry->hanja_list[n]);
00119   }
00120   free (hhentry->hanja_list);
00121   hhentry->hanja_list = NULL;
00122   hhentry->n_hanja = 0;
00123 }
00124 
00125 int
00126 hhitem_comp (HHItem *a, HHItem *b)
00127 {
00128   return strcmp (a->hangul, b->hangul);
00129 
00130 }
00131 
00132 HHItem *
00133 hhitem_add_hanja (HHItem *hhentry, unsigned char *utf_hanja)
00134 {
00135   int n_hanja;
00136   unsigned char **tmp;
00137   
00138   assert (hhentry != NULL);
00139   assert (utf_hanja != NULL);
00140   if (hhentry == NULL || utf_hanja == NULL)
00141     return NULL;
00142   n_hanja = hhentry->n_hanja;
00143 
00144   tmp = (unsigned char **) calloc (n_hanja + 1, sizeof (unsigned char *));
00145   memcpy (tmp, hhentry->hanja_list, n_hanja * sizeof (unsigned char *));
00146   tmp[n_hanja] = (unsigned char *) strdup (utf_hanja);
00147   
00148   free (hhentry->hanja_list);
00149   hhentry->hanja_list = tmp;
00150   hhentry->n_hanja += 1;
00151   return hhentry;
00152 }
00153 
00154 /* src, dst is a pointer to the existing buffer */
00155 void
00156 hhitem_copy (HHItem *dst, HHItem *src)
00157 {
00158   int n_hanja;
00159   int utf_len;
00160   assert (dst != NULL);
00161   assert (src != NULL);
00162 
00163   if (dst == NULL || src == NULL)
00164     return ;
00165   utf_len = strlen (src->hangul);
00166   if (dst->hangul)
00167     free (dst->hangul);
00168   
00169   dst->hangul = (unsigned char *) calloc (utf_len +1, sizeof (unsigned char));
00170   strcpy (dst->hangul, src->hangul);
00171   
00172   dst->n_hanja = src->n_hanja;
00173 
00174   if (dst->hanja_list)
00175     free (dst->hanja_list);
00176   
00177   dst->hanja_list =
00178     (unsigned char **) calloc (src->n_hanja, sizeof (unsigned char *));
00179   for (n_hanja = 0; n_hanja < dst->n_hanja; n_hanja++){
00180     utf_len = strlen (src->hanja_list[n_hanja]);
00181     dst->hanja_list[n_hanja] =
00182       (unsigned char *) calloc (utf_len +1, sizeof (unsigned char));
00183     strcpy (dst->hanja_list[n_hanja], src->hanja_list[n_hanja]);
00184   }
00185 }
00186 
00187 
00188 int
00189 hhitem_serialize
00190 (HHItem *hhitem, int buffer_size, unsigned char *buffer_return)
00191 {
00192   int i, i_len = 0;
00193   int total = 0;
00194   unsigned char *pchar;
00195   int buffer_overflow = 0;
00196     
00197   assert (buffer_return != NULL);
00198   if (buffer_return == NULL){
00199     fprintf (stderr, "fp can't be null\n");
00200     return 0;
00201   }
00202   pchar = buffer_return;
00203   /* length infomaon */
00204   /* nothing is written at this time */
00205   /* just placeholder will be written to file */
00206   put_int24_to_buffer (i_len, pchar);
00207   pchar += 3;
00208   total += 3;
00209   
00210   /* hangul length */
00211   i_len = strlen (hhitem->hangul);
00212   put_int24_to_buffer (i_len, pchar);
00213   pchar += 3;
00214   total += 3;
00215   
00216   /* hangul string with terminating null */
00217   memcpy (pchar, hhitem->hangul, strlen (hhitem->hangul) + 1);
00218   total += strlen (hhitem->hangul) + 1;
00219   pchar += strlen (hhitem->hangul) + 1;
00220 
00221   /* dump info of 'number of hanjas' */
00222   put_int24_to_buffer (hhitem->n_hanja, pchar);
00223   total += 3;
00224   pchar += 3;
00225 
00226   /* dump hanja strings */
00227   for (i = 0 ; i < hhitem->n_hanja; i++){
00228     i_len = strlen (hhitem->hanja_list[i]);
00229 
00230     /* dump info of 'lenth of hanja' */
00231     put_int24_to_buffer (i_len, pchar);
00232     total += 3;
00233     if (total >= buffer_size){
00234       buffer_overflow = 1;
00235       break;
00236     }
00237       
00238     pchar += 3;
00239 
00240     memcpy (pchar, hhitem->hanja_list[i], i_len + 1);
00241     total += i_len + 1;
00242     pchar += i_len + 1;
00243   }
00244 
00245   if (buffer_overflow)
00246     return 0;
00247   
00248   pchar = buffer_return;
00249   put_int24_to_buffer (total, pchar);
00250 
00251   return total;
00252 }
00253 
00254 void
00255 hhlist_dump_content (HHList *hhlist, FILE *fp)
00256 {
00257   int i;
00258   int i_total;
00259   HHItem *p_cursor;
00260   unsigned char buffer_return[1024];
00261   unsigned char *tmp = NULL;
00262   int dump_return;
00263 
00264   i_total = hhlist->n_count;
00265   /* write number of HHItems in the HHList */
00266   put_int24_to_file (i_total, fp);
00267   
00268   for (i = 0 ; i < hhlist->n_count; i++){
00269     p_cursor = hhlist->list[i];
00270     dump_return = hhitem_serialize ( p_cursor,
00271                                  sizeof (buffer_return), buffer_return);
00272     if (dump_return <= 0){
00273       fprintf (stderr, "hhlist_dump_content error : "
00274               "failed to write hhlist[%d]\n",  i);
00275       continue;
00276     } else if (dump_return > 0 && dump_return < 1024){
00277       fwrite (buffer_return, 1, dump_return, fp);
00278     } else {
00279       /* try with bigger buffer */
00280       fprintf
00281        (stderr, "hhlist_dump_content, buffer was not big enough\n"
00282         "trying with bigger buffer....\n");
00283 
00284       tmp = (unsigned char *) calloc (dump_return, sizeof (unsigned char));
00285       if (tmp){
00286        dump_return = hhitem_serialize (p_cursor, dump_return, tmp);
00287        fwrite (tmp, 1, dump_return, fp);
00288        free (tmp);
00289       } else {
00290        /* failed to write this list, thus skipping... */
00291        fprintf (stderr, "hhlist_dump_content error : "
00292                "failed to write hhlist[%d]\n",  i);
00293        continue;
00294       }
00295     }
00296   }
00297 }
00298 
00299 #if 0
00300 void
00301 hash_table_read_content (FILE *fp, HHList **table, int *size) 
00302 {
00303   int i;
00304   char dict_name[100];
00305   int version_signature;
00306   int major_version;
00307   int minor_version;
00308   int table_size;
00309 
00310   fread (dict_name, strlen(IIIM_KO_LE_DIC) + 1, 1,  fp);
00311   if (strcmp (dict_name, IIIM_KO_LE_DIC)){
00312     /* this is not ko-le dictionary, cancelling... */
00313     exit (-1);
00314   }
00315   /* read version signature from the file.
00316      but, I don't care about version for now.. */  
00317   get_int24_from_file (&version_signature, fp);
00318 
00319   /* read info for number of tables */
00320   get_int24_from_file (&table_size, fp);
00321   *size = table_size;
00322 
00323   if (table_size > 0){
00324     *table = (HHList *) calloc (table_size, sizeof (HHList));
00325     for (i = 0 ; i < table_size; i++)
00326       hhlist_init ((*table) + i);
00327   }
00328   
00329   for (i = 0 ; i < table_size; i++){
00330     hhlist_read_from_file (fp, (*table) + i);
00331   }
00332 }
00333 #endif
00334 
00335 void
00336 hash_table_dump_content (HHList table[], int size, FILE *fp)
00337 {
00338   int i;
00339   char *dict_name = IIIM_KO_LE_DIC;
00340   int major_version = 1;
00341   int minor_version = 5;
00342   int version_signature;
00343 
00344   int table_size  = size;
00345 
00346   version_signature = 0;
00347   
00348   version_signature = (major_version & 0x0f);
00349   version_signature = (version_signature << 8) | (minor_version & 0x0000000f);
00350 
00351   /* write dictionary name to file*/
00352   fwrite (dict_name, strlen(dict_name) + 1, 1, fp);
00353 
00354   /* write version signature */
00355   put_int24_to_file (version_signature, fp);
00356 
00357   /* write info for number of tables */
00358   put_int24_to_file (table_size, fp);
00359   
00360   for (i = 0  ; i < table_size; i++){
00361     hhlist_dump_content (&table[i], fp);
00362   }
00363 }
00364 
00365 #if 0
00366 /* read  each bucket */
00367 void
00368 hhlist_read_from_file (FILE *fp, HHList *hhlist)
00369 {
00370   int i_total;
00371   int i;
00372   HHItem *hhentry;
00373   hhlist_init (hhlist);
00374 
00375   /* number of hangul-hanja pairs in this bucket */
00376   get_int24_from_file (&i_total, fp);
00377 
00378   for (i = 0 ; i < i_total; i++){
00379     hhentry = hhitem_new ();
00380     hhitem_read_from_file (fp, hhentry);
00381     hhlist_add_hhitem (hhlist, hhentry);
00382     hhitem_free (hhentry);
00383     hhentry = NULL;
00384   }
00385 }
00386 #endif
00387 
00388 /* entry_return is a pointer to existing buffer */
00389 void
00390 hhitem_read_from_file (FILE *fp, HHItem *entry_return)
00391 {
00392   int n_hanja;
00393   int total_length;
00394   int hangul_length;
00395   
00396   int i;
00397   int j;
00398 
00399   assert (entry_return != NULL);
00400   /* read length info of this HHItem
00401      this is not used for now
00402   */
00403   get_int24_from_file (&total_length, fp);
00404 
00405   /* read length info of hangul */
00406   get_int24_from_file (&hangul_length, fp);
00407 
00408   /* read hangul */
00409   entry_return->hangul =
00410     (unsigned char *)calloc (hangul_length + 1, sizeof (unsigned char));
00411   fread (entry_return->hangul, sizeof(unsigned char), hangul_length + 1, fp);
00412 
00413   /* read info for number of hanja */
00414   get_int24_from_file (&n_hanja, fp);
00415   entry_return->n_hanja = n_hanja;
00416   
00417   entry_return->hanja_list =
00418     (unsigned char **) calloc (n_hanja, sizeof (unsigned char *));
00419   for (i = 0 ; i < n_hanja; i++){
00420     /* now read length info of each hanja string */
00421     get_int24_from_file (&j, fp);
00422     entry_return->hanja_list[i] =
00423       (unsigned char *) calloc (j + 1, sizeof (unsigned char));
00424     fread (entry_return->hanja_list[i], sizeof (unsigned char), j + 1, fp);
00425   }
00426   return;
00427 }
00428 
00429 #if 0
00430 void
00431 print_utfchar_value (unsigned char *str)
00432 {
00433   unsigned char *p = str;
00434   while(*p){
00435     printf ("0xhhx ", *p);
00436     p++;
00437   }
00438 }
00439 #endif
00440 
00441 HHItem *
00442 hhlist_search_hhitem (HHList *hhlist, HH *hh)
00443 {
00444   int i;
00445   assert (hhlist != NULL);
00446   assert (hh != NULL);
00447 
00448   if (hhlist == NULL || hh == NULL){
00449     return NULL;
00450   }
00451 
00452   for (i = 0 ; i < hhlist->n_count; i++){
00453     if ( strcmp (hhlist->list[i]->hangul, hh->utf_hangul) == 0)
00454       return hhlist->list[i];
00455   }
00456   return NULL;
00457 }
00458 
00459 void
00460 hhlist_add_hhitem (HHList *hhlist, HHItem *hhitem)
00461 {
00462   
00463   int n_count;
00464   HHItem **tmp;
00465   assert (hhlist != NULL);
00466   assert (hhitem != NULL);
00467 
00468   n_count = hhlist->n_count;
00469   tmp = (HHItem **) calloc (n_count + 1, sizeof (HHItem *));
00470   memcpy (tmp, hhlist->list, n_count * sizeof (HHItem *));
00471 
00472   tmp [n_count] =  hhitem_new ();
00473   hhitem_copy (tmp [n_count], hhitem);
00474   hhlist->n_count += 1;
00475   hhlist->list = tmp;
00476 }
00477 
00478 HHItem *
00479 hhlist_add_hh (HHList *hhlist, HH *item)
00480 {
00481   HHItem *hhitem;
00482   HHItem **tmp;
00483   int n_count;
00484 
00485   assert (hhlist != NULL);
00486   assert (item != NULL);
00487 
00488   if (hhlist == NULL || item == NULL)
00489     return NULL;
00490 
00491   n_count = hhlist->n_count;
00492   
00493   hhitem = hhlist_search_hhitem (hhlist, item);
00494 
00495   if (hhitem == NULL){
00496     /* need to add new item */
00497     hhitem = hhitem_new_with_data (item);
00498     tmp = (HHItem **) calloc (n_count + 1, sizeof (HHItem *));
00499     memcpy (tmp, hhlist->list, n_count * sizeof (HHItem *));
00500     tmp[n_count] = hhitem;
00501     free (hhlist->list);
00502     hhlist->list = tmp;
00503     hhlist->n_count = n_count +1;
00504     
00505   } else {
00506     /* need to update existing item */
00507     hhitem_add_hanja (hhitem, item->utf_hanja);
00508   }
00509 
00510   return hhitem;
00511 }
00512 
00513 void
00514 hhlist_init (HHList *hhlist)
00515 {
00516   assert (hhlist != NULL);
00517   hhlist->n_count = 0;
00518   hhlist = NULL;
00519 }
00520 
00521 void
00522 hhlist_print_content (HHList *hhlist, FILE *fp)
00523 {
00524   int n_hangul;
00525   int i, j;
00526   HHItem *p_cursor;
00527   
00528   if (fp == NULL)
00529     fp = stdout;
00530 
00531   n_hangul = hhlist->n_count;
00532   for (i = 0 ; i < n_hangul; i++){
00533     p_cursor = hhlist->list[i];
00534     fprintf (fp, "[ ");    
00535     print_utfchar_hex_value (p_cursor->hangul, fp);
00536     fprintf (fp, " ]");
00537     fprintf (fp, "\n\t");
00538 
00539     for (j = 0 ; j < p_cursor->n_hanja; j++){
00540       fprintf (fp, "[ ");    
00541       print_utfchar_hex_value (p_cursor->hanja_list[j], fp);
00542       fprintf (fp, " ] ");
00543 
00544     }
00545     fprintf (fp, "\n");
00546 
00547   }
00548 }
00549 
00550 void
00551 hhitem_print_string (HHItem *hhitem, FILE *fp)
00552 {
00553   int n;
00554   assert (hhitem != NULL);
00555   if (fp == NULL)
00556     fp = stdout;
00557   
00558   fprintf (fp, "HANGUL[");
00559   fprintf (fp, hhitem->hangul);
00560 #if 0
00561   _utfchar_print (hhitem->hangul);
00562 #endif
00563   fprintf (fp, "]: HANJA[ ");
00564   
00565   for (n = 0 ; n < hhitem->n_hanja; n++){
00566 #if 0
00567     _utfchar_print (hhitem->hanja_list[n]);
00568 #endif
00569     fprintf (fp, hhitem->hanja_list[n]);
00570     fprintf (fp, " ");
00571   }
00572   fprintf (fp, "]");
00573 }
00574 
00575 void
00576 hhlist_print_string (HHList *hhlist, FILE *fp)
00577 {
00578   int n_count;
00579   if (fp == NULL)
00580     fp = stdout;
00581   fprintf (fp, "item count: %d\n", hhlist->n_count);
00582   for (n_count = 0; n_count < hhlist->n_count; n_count++){
00583     hhitem_print_string (hhlist->list[n_count], fp);
00584     fprintf (fp, "\n");
00585   }
00586 }
00587 
00588 
00589 void
00590 print_utfchar_hex_value (unsigned char *str, FILE *fp)
00591 {
00592   unsigned char *p = str;
00593 
00594   while (*p){
00595     fprintf (fp, "0x%hhx, ", *p);
00596     p++;
00597   }
00598 }
00599 
00600 
00601 int
00602 hash (UTFCHAR *string)
00603 {
00604   UTFCHAR *p;
00605   int hv = 0;
00606   
00607   for (p = string; *p; p++){
00608     hv += *p;
00609   }
00610   hv = (hv >> 2) & 0x00ff;
00611   return hv;
00612 }
00613 
00614 void
00615 put_int24_to_buffer (int i, char *buffer)
00616 {
00617   char *ptr = buffer;
00618   if (buffer == NULL){
00619     fprintf (stderr, "put_int24_to_buffer error: buffer is null\n");
00620     return;
00621   }
00622   *ptr++ = (i >> 16)  & 0xff;
00623   *ptr++ = (i >> 8)  & 0xff;
00624   *ptr   = (i)  & 0xff;
00625   
00626   return;
00627 }
00628 
00629 void
00630 put_int24_to_file (int i, FILE *fp)
00631 {
00632   int a, b, c;
00633   if (fp == NULL){
00634     fprintf (stderr, "put_int24_to_buffer error: fp is null\n");
00635     return;
00636   }
00637   a  = (i >> 16)  & 0x000000ff;
00638   b  = (i >> 8)  & 0x000000ff;
00639   c  = (i)  & 0x000000ff;
00640   
00641   fputc (a, fp);
00642   fputc (b, fp);
00643   fputc (c, fp);
00644   
00645   return;
00646 }
00647 
00648 void
00649 get_int24_from_buffer (int *val, char *buffer)
00650 {
00651   int i = 0;
00652   char *ptr = buffer;
00653   if ((val == NULL) || (buffer == NULL)){
00654     fprintf (stderr, "get_int24_from_buffer error: val or buffer is null\n");
00655     return;
00656   }
00657   i = *ptr++;
00658   i = (i << 8) | *ptr++;
00659   i = ( i << 8) | *ptr;
00660 
00661   *val = i;
00662   return;
00663 }
00664 
00665 
00666 void
00667 get_int24_from_file (int *val, FILE *fp)
00668 {
00669   int i = 0;
00670   if ((val == NULL) || (fp == NULL)){
00671     fprintf (stderr, "get_int24_from_buffer error: val or fp is null\n");
00672     return;
00673   }
00674   i = fgetc (fp);
00675   i = (i << 8) | fgetc (fp);
00676   i = ( i << 8) | fgetc (fp);
00677 
00678   *val = i;
00679   return;
00680 }
00681 
00682 void
00683 get_int8_from_file (int *val, FILE *fp)
00684 {
00685   int i = 0;
00686   if ((val == NULL) || (fp == NULL)){
00687     fprintf (stderr, "get_int24_from_buffer error: val or fp is null\n");
00688     return;
00689   }
00690   i = fgetc (fp);
00691   *val = i;
00692   return;
00693 }
00694