Back to index

im-sdk  12.3.91
qjbj_punct.c
Go to the documentation of this file.
00001 #ifdef HAVE_CONFIG_H
00002 #include <config.h>
00003 #endif
00004 #include <stdio.h>
00005 #include <stdlib.h>
00006 #include <ctype.h>
00007 #include <string.h>
00008 #include <strings.h>
00009 
00010 #include "config.h"
00011 #include "encode.h"
00012 #include "kolelog.h"
00013 
00014 #define QJBJ_FILE_NAME             "QjBj.txt"
00015 #define PUNCT_FILE_NAME            "Punct.txt"
00016 
00017 #define MAX_LINE_LEN               256
00018 #define FULLWIDE_CHAR_NUM   94
00019 #define PUNCT_CHAR_NUM             18
00020 
00021 typedef struct _FullWideChar_Struct {
00022        char bInit;
00023        char *QjStr[FULLWIDE_CHAR_NUM];
00024 }  FullWideChar_Struct;
00025 
00026 typedef struct _PunctChar_Struct {
00027        char bInit;
00028        char *PunctStr[PUNCT_CHAR_NUM];
00029 }  PunctChar_Struct;
00030 
00031 char *FullWideCharListByLang_CN[FULLWIDE_CHAR_NUM] = {
00032        "!",
00033        """,
00034        "#",
00035        "$",
00036        "%",
00037        "&",
00038        "'",
00039        "(",
00040        ")",
00041        "*",
00042        "+",
00043        ",",
00044        "-",
00045        ".",
00046        "/",
00047        "0",
00048        "1",
00049        "2",
00050        "3",
00051        "4",
00052        "5",
00053        "6",
00054        "7",
00055        "8",
00056        "9",
00057        ":",
00058        ";",
00059        "<",
00060        "=",
00061        ">",
00062        "?",
00063        "@",
00064        "A",
00065        "B",
00066        "C",
00067        "D",
00068        "E",
00069        "F",
00070        "G",
00071        "H",
00072        "I",
00073        "J",
00074        "K",
00075        "L",
00076        "M",
00077        "N",
00078        "O",
00079        "P",
00080        "Q",
00081        "R",
00082        "S",
00083        "T",
00084        "U",
00085        "V",
00086        "W",
00087        "X",
00088        "Y",
00089        "Z",
00090        "[",
00091        "\",
00092        "]",
00093        "^",
00094        "_",
00095        "`",
00096        "a",
00097        "b",
00098        "c",
00099        "d",
00100        "e",
00101        "f",
00102        "g",
00103        "h",
00104        "i",
00105        "j",
00106        "k",
00107        "l",
00108        "m",
00109        "n",
00110        "o",
00111        "p",
00112        "q",
00113        "r",
00114        "s",
00115        "t",
00116        "u",
00117        "v",
00118        "w",
00119        "x",
00120        "y",
00121        "z",
00122        "{",
00123        "|",
00124        "}",
00125        " ̄",
00126 };
00127 
00128 char *FullWideCharListByLang_TW[FULLWIDE_CHAR_NUM] = {
00129        "!",
00130        "〃",
00131        "#",
00132        "$",
00133        "%",
00134        "&",
00135        "′",
00136        "(",
00137        ")",
00138        "﹡",
00139        "+",
00140        ",",
00141        "-",
00142        ".",
00143        "/",
00144        "0",
00145        "1",
00146        "2",
00147        "3",
00148        "4",
00149        "5",
00150        "6",
00151        "7",
00152        "8",
00153        "9",
00154        ":",
00155        ";",
00156        "〈",
00157        "=",
00158        "〉",
00159        "?",
00160        "@",
00161        "A",
00162        "B",
00163        "C",
00164        "D",
00165        "E",
00166        "F",
00167        "G",
00168        "H",
00169        "I",
00170        "J",
00171        "K",
00172        "L",
00173        "M",
00174        "N",
00175        "O",
00176        "P",
00177        "Q",
00178        "R",
00179        "S",
00180        "T",
00181        "U",
00182        "V",
00183        "W",
00184        "X",
00185        "Y",
00186        "Z",
00187        "【",
00188        "\",
00189        "】",
00190        "︿",
00191        "_",
00192        "‵",
00193        "a",
00194        "b",
00195        "c",
00196        "d",
00197        "e",
00198        "f",
00199        "g",
00200        "h",
00201        "i",
00202        "j",
00203        "k",
00204        "l",
00205        "m",
00206        "n",
00207        "o",
00208        "p",
00209        "q",
00210        "r",
00211        "s",
00212        "t",
00213        "u",
00214        "v",
00215        "w",
00216        "x",
00217        "y",
00218        "z",
00219        "{",
00220        "|",
00221        "}",
00222        "∼",
00223 };
00224 
00225 char PunctChars[PUNCT_CHAR_NUM] = {
00226        '!',     '"',     '$',     '&',     '\'',
00227        '(',     ')',     ',',     '.',
00228        ':',     ';',     '<',     '>',     '?',
00229        '@',     '\\',    '^',     '_'   
00230 };
00231 
00232 char *PunctCharListByLang_CN[PUNCT_CHAR_NUM] = {
00233        "!",
00234        "“",
00235        "¥",
00236        "─",
00237        "‘",
00238        "(",
00239        ")",
00240        ",",
00241        "。",
00242        ":",
00243        ";",
00244        "》",
00245        "《",
00246        "?",
00247        "@",
00248        "、",
00249        "…",
00250        "__",
00251 };
00252        
00253 char *PunctCharListByLang_TW[PUNCT_CHAR_NUM] = {
00254        "!",
00255        "“",
00256        "NT$",
00257        "─",
00258        "‘",
00259        "(",
00260        ")",
00261        ",",
00262        "。",
00263        ":",
00264        ";",
00265        "《",
00266        "》",
00267        "?",
00268        "@",
00269        "\",
00270        "…",
00271        "__",
00272 };
00273        
00274 FullWideChar_Struct  FullWideCharListByLang[LANGS_NUM]; 
00275 PunctChar_Struct     PunctCharListByLang[LANGS_NUM]; 
00276 
00277 void qjbjpunct_init()
00278 {
00279        int lang_id, char_id;
00280 
00281        for (lang_id=0; lang_id<LANGS_NUM; lang_id++) {
00282               FullWideCharListByLang[lang_id].bInit = 0;
00283               for (char_id=0; char_id<FULLWIDE_CHAR_NUM; char_id++) {
00284                      FullWideCharListByLang[lang_id].QjStr[char_id] = NULL;
00285               }
00286 
00287               PunctCharListByLang[lang_id].bInit = 0;
00288               for (char_id=0; char_id<PUNCT_CHAR_NUM; char_id++) {
00289                      PunctCharListByLang[lang_id].PunctStr[char_id] = NULL;
00290               }
00291        }
00292 }
00293 
00294 void qjbjpunct_done()
00295 {
00296        int lang_id, char_id;
00297        char *str;
00298 
00299        for (lang_id=0; lang_id<LANGS_NUM; lang_id++) {
00300               FullWideCharListByLang[lang_id].bInit = 0;
00301               for (char_id=0; char_id<FULLWIDE_CHAR_NUM; char_id++) {
00302                      str = (char *)FullWideCharListByLang[lang_id].QjStr[char_id];
00303                      if (str != NULL) free(str);
00304               }
00305 
00306               PunctCharListByLang[lang_id].bInit = 0;
00307               for (char_id=0; char_id<PUNCT_CHAR_NUM; char_id++) {
00308                      str = (char *)PunctCharListByLang[lang_id].PunctStr[char_id];
00309                      if (str != NULL) free(str);
00310               }
00311        }
00312 }
00313 
00314 void getline(FILE *fd, char *line)
00315 {
00316        int line_ptr;
00317        char line_buf[256], *ptr;
00318 
00319        line_ptr = 0;
00320        line[0] = '\0';
00321 
00322        /* get line with no space */
00323        while(fgets(line_buf, 255, fd) != NULL) {
00324               ptr = line_buf;
00325 
00326               /* skip space keys */
00327               while(*ptr && isspace(*ptr)) ptr++;
00328 
00329               /* if is space line, get new line */
00330               if (*ptr == '\n' || *ptr == '\0')
00331                      continue;
00332 
00333               while(*ptr != '\n' && *ptr != '\0' && line_ptr < MAX_LINE_LEN) 
00334                      line[line_ptr++] = *ptr++;
00335 
00336               while (isspace(line[line_ptr-1])) line_ptr--;
00337               line[line_ptr] = '\0';
00338 
00339               break;
00340        }
00341 }
00342 
00343 int read_qjbj_config_file(int lang_id)
00344 {
00345        char   file_name[256], line[MAX_LINE_LEN];
00346        char   ch, *kptr, *ptr, *lang_name;
00347        int    ch_pos, len;
00348        FILE   *fd;
00349 
00350        FullWideCharListByLang[lang_id].bInit = 1;
00351 
00352        lang_name = (char *)get_langname_from_langid(lang_id);
00353        KOLE_LOG (LOGDEST_STDOUT, "lang_name:%s\n", lang_name);
00354        sprintf(file_name, "%s/%s", LE_IME_MODULES_DIR, QJBJ_FILE_NAME);
00355        KOLE_LOG (LOGDEST_STDOUT, "file_name:%s\n", file_name);
00356 
00357        fd = fopen(file_name, "r");
00358        if (!fd) {
00359               KOLE_LOG (LOGDEST_STDOUT, "Can not open the file:%s\n", file_name);
00360               return(-1);
00361        }
00362 
00363        do {
00364               getline(fd, line);
00365 
00366               if (line[0] == '\0') break;
00367               if (line[0] == '#') continue;
00368               KOLE_LOG (LOGDEST_STDOUT, "line:%s#\n", line);
00369 
00370               kptr = ptr = line;
00371               while(*ptr && !isspace(*ptr))  ptr++;
00372               if (!(*ptr)) continue;
00373 
00374               *ptr++ = '\0';
00375 
00376               len = strlen(kptr);
00377               if (len > 2) continue;
00378 
00379               if (len == 2) {
00380                      if (*kptr == '\\')
00381                             ch = *(kptr + 1);
00382                      else
00383                             continue;
00384               } else if (len == 1) {
00385                      ch = *kptr;
00386               }
00387 
00388               /* skip space keys */
00389               while(*ptr && isspace(*ptr)) ptr++;
00390               if (!(*ptr)) continue;
00391 
00392               ch_pos = ch - 0x21;
00393               FullWideCharListByLang[lang_id].QjStr[ch_pos] = (char *)strdup(ptr);
00394 
00395        } while (1);
00396 
00397        fclose(fd);
00398        return(0);
00399 }
00400 
00401 int read_punct_config_file(int lang_id)
00402 {
00403        char   file_name[256], line[MAX_LINE_LEN];
00404        char   ch, *kptr, *ptr, *lang_name;
00405        int    i, ch_pos, len;
00406        FILE   *fd;
00407 
00408        PunctCharListByLang[lang_id].bInit = 1;
00409 
00410        lang_name = (char *)get_langname_from_langid(lang_id);
00411        sprintf(file_name, "%s/%s", LE_IME_MODULES_DIR, PUNCT_FILE_NAME);
00412        KOLE_LOG (LOGDEST_STDOUT, "file_name:%s\n", file_name);
00413 
00414        fd = fopen(file_name, "r");
00415        if (!fd) {
00416               KOLE_LOG (LOGDEST_STDOUT, "Can not open the file:%s\n", file_name);
00417               return(-1);
00418        }
00419 
00420        do {
00421               getline(fd, line);
00422 
00423               if (line[0] == '\0') break;
00424               if (line[0] == '#') continue;
00425 
00426               kptr = ptr = line;
00427               while(*ptr && !isspace(*ptr))  ptr++;
00428               if (!(*ptr)) continue;
00429 
00430               *ptr++ = '\0';
00431 
00432               len = strlen(kptr);
00433               if (len > 2) continue;
00434 
00435               if (len == 2) {
00436                      if (*kptr == '\\')
00437                             ch = *(kptr + 1);
00438                      else
00439                             continue;
00440               } else if (len == 1) {
00441                      ch = *kptr;
00442               }
00443 
00444               /* skip space keys */
00445               while(*ptr && isspace(*ptr)) ptr++;
00446               if (!(*ptr)) continue;
00447 
00448               ch_pos = -1;
00449               for (i=0; i<PUNCT_CHAR_NUM; i++) {
00450                      if (ch == PunctChars[i]) {
00451                             ch_pos = i;
00452                             break;
00453                      }
00454               }
00455               if (ch_pos == -1) continue;
00456               PunctCharListByLang[lang_id].PunctStr[ch_pos] = (char *)strdup(ptr);
00457        } while (1);
00458 
00459        fclose(fd);
00460        return(0);
00461 }
00462 
00463 char *get_qj_str(int lang_id, char ch)
00464 {
00465        int ch_pos;
00466        char *str;
00467 
00468        if (lang_id < 0 || lang_id > LANGS_NUM) 
00469               return(NULL);
00470 
00471        if (FullWideCharListByLang[lang_id].bInit == 0)
00472               read_qjbj_config_file(lang_id);
00473 
00474        ch_pos = ch - 0x21;
00475 
00476        if (ch_pos < 0 || ch_pos >= FULLWIDE_CHAR_NUM)
00477               return(NULL);
00478 
00479        str = FullWideCharListByLang[lang_id].QjStr[ch_pos];
00480 
00481        if (str != NULL) return(str);
00482 
00483        if (lang_id == LANG_ZH_CN) 
00484               return(FullWideCharListByLang_CN[ch_pos]);
00485        else if (lang_id == LANG_ZH_TW || lang_id == LANG_ZH_HK)
00486               return(FullWideCharListByLang_TW[ch_pos]);
00487        else 
00488               return(NULL);
00489 }
00490 
00491 char *get_punct_str(int lang_id, char ch)
00492 {
00493        int i, ch_pos;
00494        char *str;
00495 
00496        if (lang_id < 0 || lang_id > LANGS_NUM) 
00497               return(NULL);
00498 
00499        if (PunctCharListByLang[lang_id].bInit == 0)
00500               read_punct_config_file(lang_id);
00501 
00502 
00503        ch_pos = -1;
00504        for (i=0; i<PUNCT_CHAR_NUM; i++) {
00505               if (ch == PunctChars[i]) {
00506                      ch_pos = i;
00507                      break;
00508               }
00509        }
00510        if (ch_pos == -1)
00511               return(NULL);
00512 
00513        str = PunctCharListByLang[lang_id].PunctStr[ch_pos];
00514 
00515        if (str != NULL) return(str);
00516 
00517        if (lang_id == LANG_ZH_CN) 
00518               return(PunctCharListByLang_CN[ch_pos]);
00519        else if (lang_id == LANG_ZH_TW || lang_id == LANG_ZH_HK)
00520               return(PunctCharListByLang_TW[ch_pos]);
00521        else 
00522               return(NULL);
00523 }
00524