Back to index

im-sdk  12.3.91
hhentry.c
Go to the documentation of this file.
00001 #include <assert.h>
00002 #include <stdio.h>
00003 #include <string.h>
00004 #include <stdlib.h>
00005 #include "utfchar.h"
00006 #include "hhentry.h"
00007 
00008 /* hh basic functions */
00009 void print_utfchar_hex_value (unsigned char *str, FILE *fp);
00010 void put_int24_to_buffer (int i, char *buffer);
00011 void put_int24_to_file (int i, FILE *fp);
00012 void get_int24_from_buffer (int *val, char *buffer);
00013 void get_int24_from_file (int *val, FILE *fp);
00014 
00015 void
00016 hh_free (HH *hh)
00017 {
00018   assert (hh != NULL);
00019   if (hh == NULL)
00020     return;
00021   free (hh->utf_hangul);
00022   free (hh->utf_hanja);
00023   free (hh);
00024   hh = NULL;
00025 }
00026 
00027 HH *
00028 hh_new ()
00029 {
00030   HH *new_hh;
00031   new_hh = (HH *) calloc (1, sizeof (HH));
00032   return new_hh;
00033 }
00034 
00035 
00036 HH *
00037 hh_new_with_data (unsigned char *utf_hangul, unsigned char *utf_hanja)
00038 {
00039   
00040   HH *new_hh;
00041   
00042   assert (utf_hangul != NULL);
00043   assert (utf_hanja != NULL);
00044 
00045   if (!utf_hangul || !utf_hanja)
00046     return NULL;
00047   
00048 
00049   new_hh = (HH *) calloc (1, sizeof (HH));
00050   new_hh->utf_hangul = (unsigned char *) strdup (utf_hangul);
00051   new_hh->utf_hanja =  (unsigned char *) strdup (utf_hanja);
00052   
00053   return new_hh;
00054 }
00055 
00056 
00057 
00058 /* hhitem basic functions */
00059 HHEntry
00060 hhitem_new ()
00061 {
00062   HHEntry new_entry;
00063   new_entry = (HHEntry) calloc (1, sizeof (HHItem));
00064   new_entry->n_hanja = 0;
00065   new_entry->hanja_list = NULL;
00066   new_entry->hangul = NULL;
00067 
00068   return new_entry;
00069 }
00070 
00071 HHEntry
00072 hhitem_new_with_data (HH *hh)
00073 {
00074   HHEntry new_entry;
00075 
00076   assert (hh != NULL);
00077   assert (hh->utf_hangul != NULL);
00078   assert (hh->utf_hanja != NULL);
00079   
00080   new_entry = (HHEntry) calloc (1, sizeof (HHItem));
00081   new_entry->n_hanja = 1;
00082   new_entry->hangul = (unsigned char *) strdup (hh->utf_hangul);
00083   new_entry->hanja_list =
00084     (unsigned char **) calloc (1, sizeof (unsigned char *));
00085   new_entry->hanja_list[0] =
00086     (unsigned char *) strdup (hh->utf_hanja);;
00087 
00088   return new_entry;
00089 }
00090 
00091 
00092 
00093 void
00094 hhitem_init (HHEntry hhentry)
00095 {
00096   int n;
00097   assert (hhentry != NULL);
00098   
00099   free (hhentry->hangul);
00100   hhentry->hangul = NULL;
00101   
00102   for (n = 0 ; n < hhentry->n_hanja; n++){
00103     free (hhentry->hanja_list[n]);
00104   }
00105   free (hhentry->hanja_list);
00106   hhentry->hanja_list = NULL;
00107   hhentry->n_hanja = 0;
00108 }
00109 
00110 int
00111 hhitem_comp (HHEntry a, HHEntry b)
00112 {
00113   return strcmp (a->hangul ,b->hangul);
00114 
00115 }
00116 
00117 HHEntry
00118 hhitem_add_hanja (HHEntry hhentry, unsigned char *utf_hanja)
00119 {
00120   int n_hanja;
00121   unsigned char **tmp;
00122   
00123   assert (hhentry != NULL);
00124   assert (utf_hanja != NULL);
00125   if (hhentry == NULL || utf_hanja == NULL)
00126     return NULL;
00127   n_hanja = hhentry->n_hanja;
00128 
00129   tmp = (unsigned char **) calloc (n_hanja + 1, sizeof (unsigned char *));
00130   memcpy (tmp, hhentry->hanja_list, n_hanja * sizeof (unsigned char *));
00131   tmp[n_hanja] = (unsigned char *) strdup (utf_hanja);
00132   
00133   free (hhentry->hanja_list);
00134   hhentry->hanja_list = tmp;
00135   hhentry->n_hanja += 1;
00136   return hhentry;
00137 }
00138 
00139 /* src, dst is a pointer to the existing buffer */
00140 void
00141 hhentry_copy (HHEntry dst, HHEntry src)
00142 {
00143   int n_hanja;
00144   int utf_len;
00145   assert (dst != NULL);
00146   assert (src != NULL);
00147 
00148   if (dst == NULL || src == NULL)
00149     return ;
00150   utf_len = strlen (src->hangul);
00151   dst->hangul = (unsigned char *) calloc (utf_len +1, sizeof (unsigned char));
00152   strcpy (dst->hangul, src->hangul);
00153   
00154   dst->n_hanja = src->n_hanja;
00155 
00156   dst->hanja_list =
00157     (unsigned char **) calloc (src->n_hanja, sizeof (unsigned char *));
00158   for (n_hanja = 0; n_hanja < dst->n_hanja; n_hanja++){
00159     utf_len = strlen (src->hanja_list[n_hanja]);
00160     dst->hanja_list[n_hanja] =
00161       (unsigned char *) calloc (utf_len +1, sizeof (unsigned char));
00162     strcpy (dst->hanja_list[n_hanja], src->hanja_list[n_hanja]);
00163   }
00164 }
00165 
00166 
00167 int
00168 hhitem_serialize
00169 (HHEntry hhitem, int buffer_size, unsigned char *buffer_return)
00170 {
00171   int i, i_len = 0;
00172   int total = 0;
00173   unsigned char *pchar;
00174   int buffer_overflow = 0;
00175     
00176   assert (buffer_return != NULL);
00177   if (buffer_return == NULL){
00178     fprintf (stderr, "fp can't be null\n");
00179     return 0;
00180   }
00181   pchar = buffer_return;
00182   /* length infomation */
00183   /* nothing is written at this time */
00184   /* just placeholder will be written to file */
00185   put_int24_to_buffer (i_len, pchar);
00186   pchar += 3;
00187   total += 3;
00188   
00189   /* hangul length */
00190   i_len = strlen (hhitem->hangul);
00191   put_int24_to_buffer (i_len, pchar);
00192   pchar += 3;
00193   total += 3;
00194   
00195   /* hangul string with terminating null */
00196   memcpy (pchar, hhitem->hangul, strlen (hhitem->hangul) + 1);
00197   total += strlen (hhitem->hangul) + 1;
00198   pchar += strlen (hhitem->hangul) + 1;
00199 
00200   /* dump info of 'number of hanjas' */
00201   put_int24_to_buffer (hhitem->n_hanja, pchar);
00202   total += 3;
00203   pchar += 3;
00204 
00205   /* dump hanja strings */
00206   for (i = 0 ; i < hhitem->n_hanja; i++){
00207     i_len = strlen (hhitem->hanja_list[i]);
00208 
00209     /* dump info of 'lenth of hanja' */
00210     put_int24_to_buffer (i_len, pchar);
00211     total += 3;
00212     if (total >= buffer_size){
00213       buffer_overflow = 1;
00214       break;
00215     }
00216       
00217     pchar += 3;
00218 
00219     memcpy (pchar, hhitem->hanja_list[i], i_len + 1);
00220     total += i_len + 1;
00221     pchar += i_len + 1;
00222   }
00223 
00224   if (buffer_overflow)
00225     return 0;
00226   
00227   pchar = buffer_return;
00228   put_int24_to_buffer (total, pchar);
00229 
00230   return total;
00231 }
00232 
00233 void
00234 hhlist_dump_content (HHList *hhlist, FILE *fp)
00235 {
00236   int i;
00237   int i_total;
00238   HHEntry p_cursor;
00239   unsigned char buffer_return[1024];
00240   unsigned char *tmp;
00241   int dump_return;
00242 
00243   i_total = hhlist->n_count;
00244   put_int24_to_file (i_total, fp);
00245   
00246   for (i = 0 ; i < hhlist->n_count; i++){
00247     p_cursor = hhlist->list[i];
00248     dump_return = hhitem_serialize ( p_cursor,
00249                                  sizeof (buffer_return), buffer_return);
00250     if (dump_return){
00251       fwrite (buffer_return, 1, dump_return, fp);
00252     } else {
00253       fprintf
00254        (stderr, "fatal error: hhlist_dump_content, buffer not enough\n");
00255       exit (-1);
00256       tmp = (unsigned char *) calloc (2048, sizeof (unsigned char));
00257       dump_return = hhitem_serialize (p_cursor, 2048, tmp);
00258       fwrite (tmp, 1, dump_return, fp);
00259       free (tmp);
00260     }
00261   }
00262 }
00263 
00264 
00265 void
00266 hash_table_read_content (FILE *fp, HHList **table, int *size) 
00267 {
00268   int i;
00269   char dict_name[100];
00270   int version_signature;
00271   int table_size;
00272 
00273   fread (dict_name, strlen(IIIM_KO_LE_DIC) + 1, 1,  fp);
00274   if (strcmp (dict_name, IIIM_KO_LE_DIC)){
00275     /* this is not ko-le dictionary, cancelling... */
00276     exit (-1);
00277   }
00278   /* read version signature from the file.
00279      but, I don't care about version for now.. */  
00280   get_int24_from_file (&version_signature, fp);
00281 
00282   /* read info for number of tables */
00283   get_int24_from_file (&table_size, fp);
00284   *size = table_size;
00285 
00286   if (table_size > 0){
00287     *table = (HHList *) calloc (table_size, sizeof (HHList));
00288     for (i = 0 ; i < table_size; i++)
00289       hhlist_init ((*table) + i);
00290   }
00291   
00292   for (i = 0 ; i < table_size; i++){
00293 #if 0
00294     fprintf (stdout, "reading table : %d\n", i);
00295 #endif
00296     hhlist_read_from_file (fp, (*table) + i);
00297   }
00298 }
00299 
00300 void
00301 hash_table_dump_content (HHList table[], int size, FILE *fp)
00302 {
00303   int i;
00304   char *dict_name = IIIM_KO_LE_DIC;
00305   int major_version = 1;
00306   int minor_version = 5;
00307   int version_signature;
00308 
00309   int table_size  = size;
00310 
00311   version_signature = 0;
00312   
00313   version_signature = (major_version & 0x0f);
00314   version_signature = (version_signature << 8) | (minor_version & 0x0000000f);
00315 
00316   /* write dictionary name to file*/
00317   fwrite (dict_name, strlen(dict_name) + 1, 1, fp);
00318 
00319   /* write version signature */
00320   put_int24_to_file (version_signature, fp);
00321 
00322   /* write info for number of tables */
00323   put_int24_to_file (table_size, fp);
00324   
00325   for (i = 0  ; i < table_size; i++){
00326     hhlist_dump_content (&table[i], fp);
00327   }
00328 }
00329 
00330 /* read  each bucket */
00331 void
00332 hhlist_read_from_file (FILE *fp, HHList *hhlist)
00333 {
00334   int i_total;
00335   int i;
00336   HHEntry hhentry;
00337   hhlist_init (hhlist);
00338 
00339   /* number of hangul-hanja pairs in this bucket */
00340   get_int24_from_file (&i_total, fp);
00341 
00342   hhentry = hhitem_new ();
00343 
00344   for (i = 0 ; i < i_total; i++){
00345     hhitem_init (hhentry);
00346     hhitem_read_from_file (fp, hhentry);
00347     hhlist_add_hhitem (hhlist, hhentry);
00348   }
00349 }
00350 
00351 /* entry_return is a pointer to existing buffer */
00352 void
00353 hhitem_read_from_file (FILE *fp, HHEntry entry_return)
00354 {
00355   int n_hanja;
00356   int total_length;
00357   int hangul_length;
00358   
00359   int i;
00360   int j;
00361 
00362   assert (entry_return != NULL);
00363   /* read length info of this HHItem
00364      this is not used for now
00365   */
00366   get_int24_from_file (&total_length, fp);
00367 
00368   /* read length info of hangul */
00369   get_int24_from_file (&hangul_length, fp);
00370 
00371   /* read hangul */
00372   entry_return->hangul =
00373     (unsigned char *)calloc (hangul_length + 1, sizeof (unsigned char));
00374   fread (entry_return->hangul, sizeof(unsigned char), hangul_length + 1, fp);
00375 
00376   /* read info for number of hanja */
00377   get_int24_from_file (&n_hanja, fp);
00378   entry_return->n_hanja = n_hanja;
00379   
00380   entry_return->hanja_list =
00381     (unsigned char **) calloc (n_hanja, sizeof (unsigned char *));
00382   for (i = 0 ; i < n_hanja; i++){
00383     /* now read length info of each hanja string */
00384     get_int24_from_file (&j, fp);
00385     entry_return->hanja_list[i] =
00386       (unsigned char *) calloc (j + 1, sizeof (unsigned char));
00387     fread (entry_return->hanja_list[i], sizeof (unsigned char), j + 1, fp);
00388   }
00389   return;
00390 }
00391 
00392 #if 0
00393 void
00394 print_utfchar_value (unsigned char *str)
00395 {
00396   unsigned char *p = str;
00397   while(*p){
00398     printf ("0xhhx ", *p);
00399     p++;
00400   }
00401 }
00402 #endif
00403 
00404 HHEntry
00405 hhlist_search_hhitem (HHList *hhlist, HH *hh)
00406 {
00407   int i;
00408   assert (hhlist != NULL);
00409   assert (hh != NULL);
00410 
00411   if (hhlist == NULL || hh == NULL){
00412     return NULL;
00413   }
00414 
00415   for (i = 0 ; i < hhlist->n_count; i++){
00416     if ( strcmp (hhlist->list[i]->hangul, hh->utf_hangul) == 0)
00417       return hhlist->list[i];
00418   }
00419   return NULL;
00420 }
00421 
00422 void
00423 hhlist_add_hhitem (HHList *hhlist, HHEntry hhitem)
00424 {
00425   
00426   int n_count;
00427   HHEntry *tmp;
00428   assert (hhlist != NULL);
00429   assert (hhitem != NULL);
00430 
00431   n_count = hhlist->n_count;
00432   tmp = (HHEntry *) calloc (n_count + 1, sizeof (HHEntry));
00433   memcpy (tmp, hhlist->list, n_count * sizeof (HHEntry));
00434 
00435   tmp [n_count] =  hhitem_new ();
00436   hhentry_copy (tmp [n_count], hhitem);
00437   hhlist->n_count += 1;
00438   hhlist->list = tmp;
00439 }
00440 
00441 HHEntry
00442 hhlist_add_hh (HHList *hhlist, HH *item)
00443 {
00444   HHEntry hhitem;
00445   HHEntry *tmp;
00446   int n_count;
00447 
00448   assert (hhlist != NULL);
00449   assert (item != NULL);
00450 
00451   if (hhlist == NULL || item == NULL)
00452     return NULL;
00453 
00454   n_count = hhlist->n_count;
00455   
00456   hhitem = hhlist_search_hhitem (hhlist, item);
00457 
00458   if (hhitem == NULL){
00459     /* need to add new item */
00460     hhitem = hhitem_new_with_data (item);
00461     tmp = (HHEntry *) calloc (n_count + 1, sizeof (HHEntry));
00462     memcpy (tmp, hhlist->list, n_count * sizeof (HHEntry ));
00463     tmp[n_count] = hhitem;
00464     free (hhlist->list);
00465     hhlist->list = tmp;
00466     hhlist->n_count = n_count +1;
00467     
00468   } else {
00469     /* need to update existing item */
00470     hhitem_add_hanja (hhitem, item->utf_hanja);
00471   }
00472 
00473   return hhitem;
00474 }
00475 
00476 void
00477 hhlist_init (HHList *hhlist)
00478 {
00479   assert (hhlist != NULL);
00480   hhlist->n_count = 0;
00481   hhlist = NULL;
00482 }
00483 
00484 void
00485 hhlist_print_content (HHList *hhlist, FILE *fp)
00486 {
00487   int n_hangul;
00488   int i, j;
00489   HHEntry p_cursor;
00490   
00491   if (fp == NULL)
00492     fp = stdout;
00493 
00494   n_hangul = hhlist->n_count;
00495   for (i = 0 ; i < n_hangul; i++){
00496     p_cursor = hhlist->list[i];
00497     fprintf (fp, "[ ");    
00498     print_utfchar_hex_value (p_cursor->hangul, fp);
00499     fprintf (fp, " ]");
00500     fprintf (fp, "\n\t");
00501 
00502     for (j = 0 ; j < p_cursor->n_hanja; j++){
00503       fprintf (fp, "[ ");    
00504       print_utfchar_hex_value (p_cursor->hanja_list[j], fp);
00505       fprintf (fp, " ] ");
00506 
00507     }
00508     fprintf (fp, "\n");
00509 
00510   }
00511 }
00512 
00513 void
00514 hhitem_print_string (HHItem *hhitem, FILE *fp)
00515 {
00516   int n;
00517   assert (hhitem != NULL);
00518   if (fp == NULL)
00519     fp = stdout;
00520   
00521   fprintf (fp, "HANGUL[");
00522   fprintf (fp, hhitem->hangul);
00523 #if 0
00524   _utfchar_print (hhitem->hangul);
00525 #endif
00526   fprintf (fp, "]: HANJA[ ");
00527   
00528   for (n = 0 ; n < hhitem->n_hanja; n++){
00529 #if 0
00530     _utfchar_print (hhitem->hanja_list[n]);
00531 #endif
00532     fprintf (fp, hhitem->hanja_list[n]);
00533     fprintf (fp, " ");
00534   }
00535   fprintf (fp, "]");
00536 }
00537 
00538 void
00539 hhlist_print_string (HHList *hhlist, FILE *fp)
00540 {
00541   int n_count;
00542   if (fp == NULL)
00543     fp = stdout;
00544   fprintf (fp, "item count: %d\n", hhlist->n_count);
00545   for (n_count = 0; n_count < hhlist->n_count; n_count++){
00546     hhitem_print_string (hhlist->list[n_count], fp);
00547     fprintf (fp, "\n");
00548   }
00549 }
00550 
00551 
00552 void
00553 print_utfchar_hex_value (unsigned char *str, FILE *fp)
00554 {
00555   unsigned char *p = str;
00556 
00557   while (*p){
00558     fprintf (fp, "0x%hhx, ", *p);
00559     p++;
00560   }
00561 }
00562 
00563 
00564 int
00565 hash (UTFCHAR *string)
00566 {
00567   UTFCHAR *p;
00568   int hv = 0;
00569   
00570   for (p = string; *p; p++){
00571     hv += *p;
00572   }
00573   hv = (hv >> 2) & 0x00ff;
00574   return hv;
00575 }
00576 
00577 void
00578 put_int24_to_buffer (int i, char *buffer)
00579 {
00580   char *ptr = buffer;
00581   if (buffer == NULL){
00582     fprintf (stderr, "put_int24_to_buffer error: buffer is null\n");
00583     return;
00584   }
00585   *ptr++ = (i >> 16)  & 0xff;
00586   *ptr++ = (i >> 8)  & 0xff;
00587   *ptr   = (i)  & 0xff;
00588   
00589   return;
00590 }
00591 
00592 void
00593 put_int24_to_file (int i, FILE *fp)
00594 {
00595   int a, b, c;
00596   if (fp == NULL){
00597     fprintf (stderr, "put_int24_to_buffer error: fp is null\n");
00598     return;
00599   }
00600   a  = (i >> 16)  & 0x000000ff;
00601   b  = (i >> 8)  & 0x000000ff;
00602   c  = (i)  & 0x000000ff;
00603   
00604   fputc (a, fp);
00605   fputc (b, fp);
00606   fputc (c, fp);
00607   
00608   return;
00609 }
00610 
00611 void
00612 get_int24_from_buffer (int *val, char *buffer)
00613 {
00614   int i = 0;
00615   char *ptr = buffer;
00616   if ((val == NULL) || (buffer == NULL)){
00617     fprintf (stderr, "get_int24_from_buffer error: val or buffer is null\n");
00618     return;
00619   }
00620   i = *ptr++;
00621   i = (i << 8) | *ptr++;
00622   i = ( i << 8) | *ptr;
00623 
00624   *val = i;
00625   return;
00626 }
00627 
00628 
00629 void
00630 get_int24_from_file (int *val, FILE *fp)
00631 {
00632   int i = 0;
00633   if ((val == NULL) || (fp == NULL)){
00634     fprintf (stderr, "get_int24_from_buffer error: val or fp is null\n");
00635     return;
00636   }
00637   i = fgetc (fp);
00638   i = (i << 8) | fgetc (fp);
00639   i = ( i << 8) | fgetc (fp);
00640 
00641   *val = i;
00642   return;
00643 }
00644 
00645 void
00646 get_int8_from_file (int *val, FILE *fp)
00647 {
00648   int i = 0;
00649   if ((val == NULL) || (fp == NULL)){
00650     fprintf (stderr, "get_int24_from_buffer error: val or fp is null\n");
00651     return;
00652   }
00653   i = fgetc (fp);
00654   *val = i;
00655   return;
00656 }
00657 
00658 #if 0
00659 void
00660 get_buffer_name (HHEntry hhentry, char *buffer_return, int n)
00661 {
00662   char buffer[256];
00663   int i;
00664   UTFCHAR *p;
00665 
00666   p = hhentry->hangul;
00667 
00668   while(*p){
00669     sprintf (buffer, "%04X", *p);
00670     strcat (buffer, "_");
00671   }
00672   i = strlen (buffer);
00673   assert (i <= n - 1);
00674     
00675   buffer[i - 2] = '\0';
00676   strcpy (buffer_return, buffer);
00677   
00678 }
00679 #endif