Back to index

im-sdk  12.3.91
hhdict.c
Go to the documentation of this file.
00001 #ifdef HAVE_CONFIG_H
00002 #include <config.h>
00003 #endif
00004 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <assert.h>
00008 #include <iconv.h>
00009 #include <sys/types.h>
00010 #include <sys/stat.h>
00011 #include <string.h>
00012 #include <errno.h>
00013 #include "utfchar.h"
00014 #include "hhentry.h"
00015 #include "tree.h"
00016 #include "hhdict.h"
00017 
00018 static int _n_trees = 0;
00019 static Tree *_p_trees = NULL;
00020 static int n_dictionary_creation_counter = 0;
00021 
00022 static Bool dictionary_get_header_info (FILE *fp, DictionaryHeader *hdr);
00023 static void dictionary_build_tree_from_file (FILE *fp, Tree *tree);
00024 
00025 extern void get_int24_from_file (int *val, FILE *fp);
00026 
00027 void
00028 get_inmemory_dictionary (int *n_trees, Tree **p_trees)
00029 {
00030   if (!n_trees || !p_trees)
00031     return;
00032   *n_trees = _n_trees;
00033   *p_trees = _p_trees;
00034 }
00035 
00036 Bool
00037 construct_binary_tree_from_file
00038 (char *dic_path, Tree **trees_return, int *n_trees_return )
00039 {
00040   FILE *fp;
00041   struct stat buf;
00042   int ret;
00043   int i;
00044   DictionaryHeader hdr;
00045 
00046   assert (dic_path != NULL);
00047   n_dictionary_creation_counter += 1;
00048   
00049   if (n_dictionary_creation_counter > 1){
00050     *trees_return = _p_trees;
00051     *n_trees_return = _n_trees;
00052   }
00053     
00054   
00055   if (dic_path == NULL){
00056     fprintf (stderr, "NULL path for dictionary was passed\n");
00057     return False;
00058   }
00059   
00060   ret = stat (dic_path, &buf);
00061   
00062   if (ret){
00063     /* in most case, the file isn't likely to exist */
00064     perror ("dictionary_build_btree_from_file error");
00065     return False;
00066   }
00067   
00068   fp = fopen (dic_path, "r");
00069     
00070   /* how many tree do I need to build */
00071   dictionary_get_header_info (fp, &hdr);
00072   
00073   _n_trees = hdr.table_size;
00074   _p_trees = (Tree *) tree_n_new (hdr.table_size);
00075   
00076   for (i = 0; i < _n_trees; i++)
00077     dictionary_build_tree_from_file (fp, &_p_trees[i]);
00078 
00079   fclose (fp);
00080   *n_trees_return = _n_trees;
00081   *trees_return = _p_trees;
00082   
00083   return True;
00084 }
00085 
00086 static void
00087 dictionary_free ()
00088 {
00089   /*
00090   static int _n_trees = 0;
00091   static Tree *_p_trees = NULL;
00092   */
00093   int i;
00094   Tree *p_tree;
00095   for (i = 0 ; i < _n_trees; i++){
00096     p_tree =  &_p_trees[i];
00097     
00098     
00099   }
00100 }
00101 
00102 static Bool
00103 dictionary_get_header_info (FILE *fp, DictionaryHeader *hdr)
00104 {
00105   int version_signature;
00106   
00107   assert (fp != NULL);
00108   assert (hdr != NULL);
00109   
00110   if (!hdr || !fp){
00111     fprintf (stderr, "dictionary_get_header_info error: ptr or hdr is null\n");
00112     return False;
00113   }
00114   
00115   fread (hdr->dict_name, strlen(IIIM_KO_LE_DIC) + 1, 1,  fp);
00116   if (strcmp (hdr->dict_name, IIIM_KO_LE_DIC)){
00117     /* this is not ko-le dictionary, cancelling... */
00118     fprintf (stderr, "dictionary_get_header_info error: dict_name is wrong\n");
00119     return False;
00120   }
00121   /* read version signature from the file.
00122      but, I don't care about version for now.. */  
00123   get_int24_from_file (&version_signature, fp);
00124 
00125   /* read info for number of tables */
00126   get_int24_from_file (&hdr->table_size, fp);
00127 
00128   return True;
00129 }
00130 
00131 /* read  each bucket */
00132 static void
00133 dictionary_build_tree_from_file (FILE *fp, Tree *tree)
00134 {
00135   int i_total;
00136   int i;
00137   HHItem *hhitem;
00138   TreeNode *node;
00139   
00140   /* number of hangul-hanja pairs in this bucket */
00141   get_int24_from_file (&i_total, fp);
00142 
00143   for (i = 0 ; i < i_total; i++){
00144     hhitem = hhitem_new ();
00145     hhitem_read_from_file (fp, hhitem);
00146     
00147     node = (TreeNode *) tree_node_new_with_hhitem (hhitem);
00148     tree_insert (tree, node);
00149     
00150     hhitem_free (hhitem);
00151     hhitem = NULL;
00152   }
00153 }
00154 
00155 
00156 Bool
00157 dictionary_search_hanja_candidates_in_utf8
00158 (char *u8_hangul, int *n_return, unsigned char***u8_hanja_return )
00159 {
00160   UTFCHAR *p16char = NULL;
00161   int hash_val;
00162   int i;
00163   TreeNode *search_result;
00164     
00165   assert (u8_hangul != NULL);
00166   assert (n_return != NULL);
00167   assert (u8_hanja_return != NULL);
00168 
00169   if (u8_hangul == NULL || !strlen (u8_hangul)){
00170     fprintf (stdout,
00171             "dictionary_search_hanja_candidates_in_utf8 error: "
00172             "u8_hangul is null or zero length");
00173     return False;
00174   }
00175   if (n_return == NULL || u8_hanja_return == NULL){
00176     fprintf (stdout,
00177             "dictionary_search_hanja_candidates_in_utf8 error: "
00178             "n_return or u8_hanja_return is NULL");
00179     return False;
00180   }
00181   p16char = _utfchar_convert_u8_to_u16 (u8_hangul);
00182   if (!p16char){
00183     fprintf (stdout, "dictionary_search_hanja_candidates_in_utf8 error: "
00184             "_utfchar_convert_u8_to_u16 failed\n");
00185     return False;
00186   }
00187 
00188   hash_val = hash (p16char);
00189   search_result = tree_search_hangul (_p_trees + hash_val, u8_hangul);
00190   if (!search_result){
00191     fprintf (stdout, "dictionary_search_hanja_candidates_in_utf8 error"
00192             "no candidates found\n");
00193     if (p16char)
00194       free (p16char);
00195 
00196     return False;
00197   }
00198   *n_return = search_result->data->n_hanja;
00199   *u8_hanja_return =
00200     (unsigned char **) calloc (*n_return, sizeof (unsigned char *));
00201 
00202   for (i = 0 ; i < *n_return; i++)
00203     (*u8_hanja_return)[i]= strdup (search_result->data->hanja_list[i]);
00204   if (p16char)
00205     free (p16char);
00206   
00207   return True;
00208 }
00209 
00210 
00211 
00212 Bool
00213 dictionary_search_hanja_candidates_in_utf16
00214 (UTFCHAR *u16_hangul, int *n_return, UTFCHAR ***u16_hanja_return )
00215 {
00216   int hash_val;
00217   int i;
00218   unsigned char *p8hangul = NULL;
00219   UTFCHAR *conv_return;
00220   TreeNode *search_result;
00221     
00222   assert (u16_hangul != NULL);
00223   assert (n_return != NULL);
00224   assert (u16_hanja_return != NULL);
00225 
00226   if (u16_hangul == NULL || !_utfchar_length (u16_hangul)){
00227     fprintf (stdout,
00228             "dictionary_search_hanja_candidates_in_utf16 error: "
00229             "u16_hangul is null or zero length");
00230     return False;
00231   }
00232   if (n_return == NULL || u16_hanja_return == NULL){
00233     fprintf (stdout,
00234             "dictionary_search_hanja_candidates_in_utf16 error: "
00235             "n_return or u16_hanja_return is NULL");
00236     return False;
00237   }
00238 
00239   hash_val = hash (u16_hangul);
00240   p8hangul = (unsigned char *) _utfchar_convert_u16_to_u8 (u16_hangul);
00241   
00242   search_result = tree_search_hangul (_p_trees + hash_val, p8hangul);
00243   if (!search_result){
00244     fprintf (stdout, "dictionary_search_hanja_candidates_in_utf16 error"
00245             "no candidates found\n");
00246     if (p8hangul)
00247       free (p8hangul);
00248 
00249     return False;
00250   }
00251   
00252   *n_return = search_result->data->n_hanja;
00253   *u16_hanja_return =
00254     (UTFCHAR **) calloc (*n_return, sizeof (UTFCHAR *));
00255 
00256   for (i = 0 ; i < *n_return; i++){
00257     conv_return = 
00258       _utfchar_convert_u8_to_u16 (search_result->data->hanja_list[i]);
00259     /*
00260     if (conv_return == NULL)
00261           (*u16_hanja_return)[i] = NULL;
00262     */
00263     (*u16_hanja_return)[i] = conv_return;
00264   }
00265 
00266   if (p8hangul)
00267     free (p8hangul);
00268   return True;
00269 }
00270 
00271 
00272