Back to index

im-sdk  12.3.91
Classes | Defines | Typedefs | Functions
codetable.c File Reference
#include <stdio.h>
#include <locale.h>
#include "ime.h"
#include "codetable.h"

Go to the source code of this file.

Classes

struct  _HZSearchContext

Defines

#define WILD_MATCH   0 /* exact match */
#define WILD_PREFIX   1 /* no match, but maybe if go deeper */
#define WILD_UNMATCH   2 /* complete mismatch */
#define SC_FAIL   0
#define SC_OK   1
#define WILDCHAR_MATCHSINGLE   '?'
#define WILDCHAR_MATCHANY   '*'
#define MAX_CANDIDATES_NUM   16
#define MAX_CANDIDATE_CHAR_NUM   64
#define MAX_INPUT_KEY_NUM   32
#define car(s)   (*(s))
#define cdr(s)   ((s)+1)
#define empty(s)   (!(*(s)))

Typedefs

typedef struct _HZSearchContext HZSearchContext

Functions

int LoadCodeTableHeader (char *file_name, CodeTableStruct *hztbl)
int LoadCodeTable (char *file_name, CodeTableStruct *hztbl)
void UnloadCodeTable (CodeTableStruct *hztbl)
int Is_WildcharMatchAny_Key (CodeTableStruct *hztbl, int key)
int Is_WildcharMatchSingle_Key (CodeTableStruct *hztbl, int key)
int Is_Wildchar_Key (CodeTableStruct *hztbl, int key)
static int WildcharMatch (CodeTableStruct *hztbl, char *string, char *pattern)
static int Get_NextNode (HZSearchContext *pSC)
int is_valid_candidate (unsigned char *hzptr, int hzlen, int dict_encode, int output_encode)
int normal_search (CodeTableStruct *hztbl, HZSearchContext *pSC, unsigned char **outbuf, unsigned char **attrbuf, int pos, int num)
int wildchar_search (CodeTableStruct *hztbl, HZSearchContext *pSC, unsigned char **outbuf, unsigned char **attrbuf, int pos, int num)
int codetable_search (CodeTableStruct *hztbl, char *inbuf, unsigned char **outbuf, unsigned char **attrbuf, int pos, int num)

Class Documentation

struct _HZSearchContext

Definition at line 22 of file codetable.c.

Collaboration diagram for _HZSearchContext:
Class Members
int depth
int kc_repcode
int mo_repcode
char prefix
char repcode
short int tNnumSb
tableNode * tNstack
char wildpattern

Define Documentation

#define car (   s)    (*(s))

Definition at line 252 of file codetable.c.

#define cdr (   s)    ((s)+1)

Definition at line 253 of file codetable.c.

#define empty (   s)    (!(*(s)))

Definition at line 254 of file codetable.c.

#define MAX_CANDIDATE_CHAR_NUM   64

Definition at line 18 of file codetable.c.

#define MAX_CANDIDATES_NUM   16

Definition at line 17 of file codetable.c.

#define MAX_INPUT_KEY_NUM   32

Definition at line 19 of file codetable.c.

#define SC_FAIL   0

Definition at line 11 of file codetable.c.

#define SC_OK   1

Definition at line 12 of file codetable.c.

#define WILD_MATCH   0 /* exact match */

Definition at line 7 of file codetable.c.

#define WILD_PREFIX   1 /* no match, but maybe if go deeper */

Definition at line 8 of file codetable.c.

#define WILD_UNMATCH   2 /* complete mismatch */

Definition at line 9 of file codetable.c.

#define WILDCHAR_MATCHANY   '*'

Definition at line 15 of file codetable.c.

#define WILDCHAR_MATCHSINGLE   '?'

Definition at line 14 of file codetable.c.


Typedef Documentation


Function Documentation

int codetable_search ( CodeTableStruct hztbl,
char *  inbuf,
unsigned char **  outbuf,
unsigned char **  attrbuf,
int  pos,
int  num 
)

Definition at line 560 of file codetable.c.

{
       HZSearchContext search_context, *pSC;
       char tmp_inbuf[MAX_INPUT_KEY_NUM+1];

       int matched, i, j, len;
       tableNode *tnptr, *tCurTNptr;
       int search_num;
       
       memset(tmp_inbuf, 0, MAX_INPUT_KEY_NUM+1);
       for (i = 0; i < strlen(inbuf); i++) {
              if (i >= MAX_INPUT_KEY_NUM) break;
              tmp_inbuf[i] = tolower(inbuf[i]);
       }

       inbuf = tmp_inbuf;

       /* search maxinum matched top node that not include wildchar */
       tCurTNptr = &(hztbl->nodeList[0]);
       i = 0;
       while (i < strlen(inbuf))
       {
              if (   (Is_WildcharMatchSingle_Key(hztbl, inbuf[i])) || 
                     (Is_WildcharMatchAny_Key(hztbl, inbuf[i])) )
                     break;

              matched = 0;
              j = 0;
              tnptr = &(hztbl->nodeList[tCurTNptr->pos_NextKey]);
              while (j < tCurTNptr->num_NextKeys)
              {
                     if (inbuf[i] == tnptr->key) {
                            matched = 1;
                            break;
                     }
                     tnptr ++;
                     j ++;
              }
              if (matched == 0)
                     return(0);

              tCurTNptr = tnptr;
              i++;
       }

       /* set search context */
       pSC = &search_context;
       pSC->depth = 0;
       pSC->tNnumSb[0] = 0;
       pSC->tNstack[0] = tCurTNptr;
       memset(pSC->repcode, 0, MAX_INPUT_KEY_NUM+1);
       memset(pSC->prefix, 0, MAX_INPUT_KEY_NUM+1);
       memset(pSC->wildpattern, 0, MAX_INPUT_KEY_NUM+1);

       if (i>0) {
              len = MAX_INPUT_KEY_NUM+1>i ? i : MAX_INPUT_KEY_NUM+1;  
              strncpy(pSC->prefix, inbuf, len);
       }

       search_num = num > MAX_CANDIDATES_NUM ? MAX_CANDIDATES_NUM : num;
       if (i == strlen(inbuf)) {
              /* no wildchar,  begin normal mode search */
              num = normal_search(hztbl, pSC, outbuf, attrbuf, pos, search_num);
       } else {
              /* have any wildchar, begin wildchar mode search */
              strncpy(pSC->wildpattern, inbuf+i, MAX_INPUT_KEY_NUM+1);
              num = wildchar_search(hztbl, pSC, outbuf, attrbuf, pos, search_num);
       }
       return(num);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int Get_NextNode ( HZSearchContext pSC) [static]

Definition at line 295 of file codetable.c.

{
       while (pSC->tNnumSb[pSC->depth] == 0) 
       {
              /* no more sibling, go up */
              if (pSC->depth == 0) {
                     /* now at the topmost; we've tried everything! */
                     pSC->tNstack[0] = NULL;
                     return(SC_FAIL);
              } else {
                     pSC->depth--;
                     pSC->repcode[pSC->depth] = '\0';
              }
       }

       /* go to sibling node */
       pSC->tNnumSb[pSC->depth]-- ;
       pSC->tNstack[pSC->depth]++ ; 
       pSC->repcode[pSC->depth-1] = pSC->tNstack[pSC->depth]->key;
       return(SC_OK);
}

Here is the caller graph for this function:

int is_valid_candidate ( unsigned char *  hzptr,
int  hzlen,
int  dict_encode,
int  output_encode 
)

Definition at line 318 of file codetable.c.

{
       int ret;

       if (dict_encode == output_encode)
              return(1);

       if (dict_encode == ENCODE_UTF8) {
              char *ip, *op, buffer[512];
              int ileft, oleft;
              
              ip = (char *)hzptr;
              ileft = hzlen;
              op = buffer;
              oleft = 512;
              memset(buffer, 0, 512);
              ret = Convert_UTF8_To_Native(output_encode, ip, ileft, &op, &oleft);
              DEBUG_printf("ctim: Convert UTF-8 to native, ret: %d\n", ret);
              if (ret == -1) {
                     return(0);
              } else {
                     if(buffer[0] && is_valid_encode_string(output_encode, buffer, strlen(buffer)) == 0)
                            return(1);
                     else
                            return(0);
              }
       }

       if (dict_encode == ENCODE_GB18030) {
              if (output_encode == ENCODE_GB2312 || output_encode == ENCODE_GBK) {
                     /* need check whether valid candidate*/
                     ret = is_valid_encode_string(output_encode, hzptr, hzlen);
                     if (ret == -1) return(0);
              }
              return (1);
       }
              
       if (dict_encode == ENCODE_GBK) {
              if (output_encode == ENCODE_GB2312) {
                     /* need check whether valid candidate*/
                     ret = is_valid_encode_string(output_encode, hzptr, hzlen);
                     if (ret == -1) return(0);
              }
              return (1);
       }
       
       return (1);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int Is_Wildchar_Key ( CodeTableStruct hztbl,
int  key 
)

Definition at line 232 of file codetable.c.

{
       int ret;

       ret = ( Is_WildcharMatchSingle_Key(hztbl, key) || Is_WildcharMatchAny_Key(hztbl, key) );
       return(ret);
}

Here is the caller graph for this function:

int Is_WildcharMatchAny_Key ( CodeTableStruct hztbl,
int  key 
)

Definition at line 208 of file codetable.c.

{
       if ( index(hztbl->UsedCodes, WILDCHAR_MATCHANY) )
              return(0);

       return(key == WILDCHAR_MATCHANY);

}

Here is the caller graph for this function:

int Is_WildcharMatchSingle_Key ( CodeTableStruct hztbl,
int  key 
)

Definition at line 219 of file codetable.c.

{
       if ( index(hztbl->WildChar, key) )
              return(1);

       if ( index(hztbl->UsedCodes, WILDCHAR_MATCHSINGLE) )
              return(0);

       return(key == WILDCHAR_MATCHSINGLE);
}

Here is the caller graph for this function:

int LoadCodeTable ( char *  file_name,
CodeTableStruct hztbl 
)

Definition at line 95 of file codetable.c.

{
       FILE *ifile;
       char ctFlag[256];
       int  ver, i;

       ifile = fopen (file_name, "r");
       if (! ifile) {
              printf("Unable to open the input table file \"%s\"\n",file_name);
              return(-1);
       }

       /* Read CodeTable File Flag */
       if (fread (ctFlag, strlen(CODETABLE_FLAG), 1, ifile) != 1) {
              fprintf (stderr, "Codetable File read Error:%s\n", file_name);
              fclose (ifile);
              return(-1);
       }

       if (strncmp (ctFlag, CODETABLE_FLAG, strlen(CODETABLE_FLAG)) != 0) {
              fprintf (stderr, "File is not in CodeTable format\n");
              fclose (ifile);
              return(-1);
       }

       /* Read CodeTable Version Flag */
       if (fread ((char *)(&ver), sizeof (int), 1, ifile) != 1)  {
              fprintf (stderr, "Codetable File read Error:%s\n", file_name);
              fclose (ifile);
              return(-1);
       }

       if (ver != CODETABLE_VERSION) {
              fprintf (stderr, "File is not in correct Version Number\n");
              fclose (ifile);
              return(-1);
       }

       if (fread((char *)hztbl, sizeof(CodeTableStruct), 1, ifile) == 0) {
              printf( "Error in loading input table for %s\n", file_name);
              fclose (ifile);
              return(-1);
       }

       /* malloc memory for codetable information */
       hztbl->nodeList = (tableNode *)calloc(hztbl->sizeNodeList,sizeof(tableNode));
       hztbl->hzList = (unsigned char *)calloc(hztbl->sizeHZList, sizeof(unsigned char));
       hztbl->keyprompt = (keyPrompt *)calloc(MAX_USEDCODES_NUM, sizeof(keyPrompt));
       hztbl->functionkey = (functionKey *)calloc(MAX_FUNCTIONKEY_NUM, sizeof(functionKey));
       if ((! hztbl->hzList) || (! hztbl->nodeList) ||
           (! hztbl->keyprompt) || (! hztbl->functionkey)) {
              printf( "No memory to load input table for %s\n", file_name);
              fclose (ifile);
              return(-1);
       }

       if ((fread ((char *)(hztbl->nodeList), sizeof(tableNode),
                (int)hztbl->sizeNodeList, ifile) != hztbl->sizeNodeList) ||
           (fread ((char *)hztbl->hzList, sizeof(unsigned char),
                (int)(hztbl->sizeHZList), ifile) != hztbl->sizeHZList))
       {
              printf( "Error in loading input table for %s\n", file_name);
              fclose (ifile);
              return(-1);
       }
       
       if (GETBIT(hztbl->bSectionsFlag, KEYPROMPT_SECTION)) {
              if (fread((char *)(&(hztbl->keyprompt[0])), MAX_USEDCODES_NUM, 
                     sizeof(keyPrompt), ifile) != sizeof(keyPrompt))
              {
                     printf( "Error in loading input table for %s\n", file_name);
                     fclose (ifile);
                     return(-1);
              }
       }

       if (GETBIT(hztbl->bSectionsFlag, FUNCTIONKEY_SECTION)) {
              if (fread((char *)(&(hztbl->functionkey[0])), MAX_FUNCTIONKEY_NUM, 
                     sizeof(functionKey), ifile) != sizeof(functionKey))
              {
                     printf( "Error in loading input table for %s\n", file_name);
                     fclose (ifile);
                     return(-1);
              }
       }

       for (i=0; i<MAX_USEDCODES_NUM; i++) {
              if (hztbl->keyprompt[i].prompt[0] == 0) {
                     hztbl->keyprompt[i].prompt[0] = i;
                     hztbl->keyprompt[i].prompt[1] = 0;
              }
       }

       fclose (ifile);
       return(0);
}

Here is the caller graph for this function:

int LoadCodeTableHeader ( char *  file_name,
CodeTableStruct hztbl 
)

Definition at line 39 of file codetable.c.

{
       FILE *ifile;
       char ctFlag[256];
       int  ver, i;

       DEBUG_printf("LoadCodeTableHeader ====\n");

       /* read table from file to memory buffer  */
       ifile = fopen (file_name, "r");
       if (! ifile) {
              fprintf(stderr, "Unable to open the input table file \"%s\"\n",file_name);
              return(-1);
       }

       /* Read CodeTable File Flag */
       if (fread (ctFlag, strlen(CODETABLE_FLAG), 1, ifile) != 1) {
              fprintf (stderr, "Codetable File read Error:%s\n", file_name);
              fclose(ifile);
              return(-1);
       }

       if (strncmp (ctFlag, CODETABLE_FLAG, strlen(CODETABLE_FLAG)) != 0) {
              fprintf (stderr, "File is not in CodeTable format\n");
              fclose(ifile);
              return(-1);
       }

       /* Read CodeTable Version Flag */
       if (fread ((char *)(&ver), sizeof (int), 1, ifile) != 1)  {
              fprintf (stderr, "Codetable File read Error:%s\n", file_name);
              fclose(ifile);
              return(-1);
       }

       if (ver != CODETABLE_VERSION) {
              fprintf (stderr, "File is not in correct Version Number\n");
              fclose(ifile);
              return(-1);
       }

       if (fread((char *)hztbl, sizeof(CodeTableStruct), 1, ifile) != 1) {
              fprintf(stderr, "Error in loading input table for %s\n", file_name);
              fclose(ifile);
              return(-1);
       }

       fclose (ifile);
       return(0);
}

Here is the caller graph for this function:

int normal_search ( CodeTableStruct hztbl,
HZSearchContext pSC,
unsigned char **  outbuf,
unsigned char **  attrbuf,
int  pos,
int  num 
)

Definition at line 367 of file codetable.c.

{
       tableNode *tnptr;
       unsigned char *hzptr, tmpbuf[MAX_CANDIDATE_CHAR_NUM];
       int outptr, i, j, hzlen, len;
       int num_matched = 0; /* pointer that how many items match the conditions */
       int num_selected = 0;       /* pointer that how many items be selected */
       char dict_encode, output_encode;

       dict_encode = hztbl->Encode;
       output_encode = hztbl->Output_Encode;

       DEBUG_printf("dict_encode:%d, output_encode:%d\n", dict_encode, output_encode);
       /* traversal all the subnodes of pSC->tNstack[0] */
       while (1) 
       {
              tnptr = pSC->tNstack[pSC->depth];

              /* check if any HZ choices on this node */
              if (tnptr->num_HZchoice > 0) 
              {
                     /* if any HZ choices on this node,then check if match search conditions */
/*
                     DEBUG_printf("repcode:%s  \t%d\n", pSC->repcode, tnptr->num_HZchoice);
*/

                     /* get HZ choices */
                     hzptr = hztbl->hzList + tnptr->pos_HZidx;
                     for (i=0; i<tnptr->num_HZchoice; i++) 
                     {
                            if (*hzptr == HZ_PHRASE_TAG) {
                                   hzlen = *(hzptr + 1);
                                   hzptr += 2;
                            } else {
                                   hzlen = get_char_len_by_encodeid(hztbl->Encode, hzptr);
                            }

                            if (is_valid_candidate(hzptr, hzlen, dict_encode, output_encode)) {
                                   num_matched ++;

                                   outptr = 0; 
                                   if (num_matched > pos) {
                                          DEBUG_printf("pos:%d, matched:%d\n", pos, num_matched);
                                          len = hzlen;
                                          if (len>MAX_CANDIDATE_CHAR_NUM) 
                                                 len = MAX_CANDIDATE_CHAR_NUM;
                                          for (j=0; j< len; j++)
                                                 tmpbuf[outptr++] = *(hzptr+j);
                                          tmpbuf[outptr++] = '\0';
                                   }
       
                                   if (outptr > 0) {
                                          strncpy((char *)outbuf[num_selected], tmpbuf, MAX_CANDIDATE_CHAR_NUM);
                                          snprintf((char *)attrbuf[num_selected], MAX_CANDIDATE_CHAR_NUM, "%s%s", 
                                                 pSC->prefix, pSC->repcode); 
                                          num_selected ++;
                                   }
                                   if (num_selected >= num) return (num);
                            }

                            hzptr += hzlen;
                     }
              }

              if (!(hztbl->nKeyByKeyMode))
                     return (num_selected);

              /* if have any addition input key, move down */
              if (tnptr->num_NextKeys > 0) {
                     tableNode *new_tnptr = &(hztbl->nodeList[tnptr->pos_NextKey]);

                     pSC->depth++ ;
                     pSC->tNnumSb[pSC->depth] = tnptr->num_NextKeys - 1;
                     pSC->tNstack[pSC->depth] = new_tnptr;
                     pSC->repcode[pSC->depth-1] = new_tnptr->key;
                     continue;
              }

              /* No more additional key, hence no match for this node. */
              /* Don't go down, move forward */
              if ( Get_NextNode(pSC) == SC_FAIL )
                     return (num_selected);
       }

}

Here is the call graph for this function:

Here is the caller graph for this function:

void UnloadCodeTable ( CodeTableStruct hztbl)

Definition at line 197 of file codetable.c.

{
       if (! hztbl) return;

        if (hztbl->nodeList)  free ((char *)(hztbl->nodeList));
        if (hztbl->hzList)  free ((char *)(hztbl->hzList));
        if (hztbl->keyprompt)  free ((char *)(hztbl->keyprompt));
        if (hztbl->functionkey)  free ((char *)(hztbl->functionkey));
}

Here is the caller graph for this function:

int wildchar_search ( CodeTableStruct hztbl,
HZSearchContext pSC,
unsigned char **  outbuf,
unsigned char **  attrbuf,
int  pos,
int  num 
)

Definition at line 459 of file codetable.c.

{
       tableNode *tnptr;
       unsigned char *hzptr, tmpbuf[MAX_CANDIDATE_CHAR_NUM];
       int outptr, i, j, hzlen, len;
       int num_matched = 0;    /* pointer that how many items match the conditions */
       int num_selected = 0;   /* pointer that how many items be selected */
       int bMatched;

       char dict_encode, output_encode;

       dict_encode = hztbl->Encode;
       output_encode = hztbl->Output_Encode;

       DEBUG_printf("wildpattern:%s\n", pSC->wildpattern);

       /* traversal all the subodes of pSC->tNstack[0] */
       while (1) 
       {
              tnptr = pSC->tNstack[pSC->depth];

              /* check if any HZ choices on this node */
              bMatched = 0xff;
              if (tnptr->num_HZchoice > 0) {
                     DEBUG_printf("repcode:%s  ", pSC->repcode);

                     /* if any HZ choices on this node, then check match search conditions */
                     bMatched = WildcharMatch(hztbl, pSC->repcode, pSC->wildpattern);
              }

              if (bMatched == WILD_MATCH) 
              {
                     /* if any HZ choices on this node,then check if match search conditions */

                     DEBUG_printf("repcode:%s  \t%d\n", pSC->repcode, tnptr->num_HZchoice);

                     /* get HZ choices */
                     hzptr = hztbl->hzList + tnptr->pos_HZidx;
                     for (i=0; i<tnptr->num_HZchoice; i++) {
                            if (*hzptr == HZ_PHRASE_TAG) {
                                   hzlen = *(hzptr + 1);
                                   hzptr += 2;
                            } else {
                                   hzlen = get_char_len_by_encodeid(hztbl->Encode, hzptr);
                            }

                            if (is_valid_candidate(hzptr, hzlen, dict_encode, output_encode)) {
                                   num_matched ++;

                                   outptr = 0; 
                                   if (num_matched > pos) {
                                          len = hzlen;
                                          if (len>MAX_CANDIDATE_CHAR_NUM) 
                                                 len = MAX_CANDIDATE_CHAR_NUM;
                                          for (j=0; j< len; j++)
                                                 tmpbuf[outptr++] = *(hzptr+j);
                                          tmpbuf[outptr++] = '\0';
                                   }
       
                                   if (outptr > 0) {
                                          strncpy((char *)outbuf[num_selected], tmpbuf, MAX_CANDIDATE_CHAR_NUM);
                                          snprintf((char *)attrbuf[num_selected], MAX_CANDIDATE_CHAR_NUM, "%s%s", 
                                                 pSC->prefix, pSC->repcode); 
                                          num_selected ++;
                                   }
                                   if (num_selected >= num) return (num);
                            }

                            hzptr += hzlen;
                     }
              } else if (bMatched == WILD_UNMATCH) {
                     /* need not to go down */
                     if ( Get_NextNode(pSC) == SC_FAIL )
                            return (num_selected);
                     continue;
              }

              /* if have any addition input key, move down */
              if (tnptr->num_NextKeys > 0) {
                     tableNode *new_tnptr = &(hztbl->nodeList[tnptr->pos_NextKey]);

                     pSC->depth++ ;
                     pSC->tNnumSb[pSC->depth] = tnptr->num_NextKeys - 1;
                     pSC->tNstack[pSC->depth] = new_tnptr;
                     pSC->repcode[pSC->depth-1] = new_tnptr->key;
                     continue;
              }

              /* No more additional key, hence no match for this node. */
              /* Don't go down, move forward */
              if ( Get_NextNode(pSC) == SC_FAIL )
                     return (num_selected);
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int WildcharMatch ( CodeTableStruct hztbl,
char *  string,
char *  pattern 
) [static]

Definition at line 259 of file codetable.c.

{
       if (empty(pattern))
              return (empty(string) ? WILD_MATCH : WILD_UNMATCH);
       else if (Is_WildcharMatchAny_Key(hztbl, car(pattern))) {
              int x = WildcharMatch(hztbl, string, cdr(pattern));
              if (x == WILD_UNMATCH)
                     return (WildcharMatch(hztbl, cdr(string), pattern));
              else
                     return x;
       } 
       else if (empty(string))
              return WILD_PREFIX;
       else if (Is_WildcharMatchSingle_Key(hztbl, car(pattern)) || car(pattern) == car(string))
              return WildcharMatch(hztbl, cdr(string), cdr(pattern));
       else
              return WILD_UNMATCH;
}

Here is the call graph for this function:

Here is the caller graph for this function: