Back to index

im-sdk  12.3.91
encode.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <iconv.h>
00003 #include <strings.h>
00004 #include "ime.h"
00005 
00006 typedef unsigned short CARD16;
00007 
00008 typedef struct _LangGroup_Info {
00009        int    lang_id;
00010        char   *lang_name;
00011        int    *support_encodes;
00012        char   **support_locales;
00013 } LangGroup_Info;
00014 
00015 /* supported encodes for specified language */
00016 int zh_CN_encodes[] = {
00017        ENCODE_GB2312,
00018        ENCODE_GBK,
00019        ENCODE_GB18030,
00020        ENCODE_INVALID,
00021 };
00022 
00023 int zh_TW_encodes[] = {
00024        ENCODE_BIG5,
00025        ENCODE_EUCTW,
00026        ENCODE_INVALID,
00027 };
00028 
00029 int zh_HK_encodes[] = {
00030        ENCODE_BIG5HKSCS,
00031        ENCODE_INVALID,
00032 };
00033 
00034 int th_TH_encodes[] = {
00035        ENCODE_EUCTH,
00036        ENCODE_INVALID,
00037 };
00038 
00039 int ko_KR_encodes[] = {
00040        ENCODE_EUCKR,
00041        ENCODE_INVALID,
00042 };
00043 
00044 /* supported locales for specified language */
00045 char *zh_CN_locales[] = {
00046        "zh_CN",
00047        "zh_CN.EUC",
00048        "zh_CN.GBK",
00049        "zh_CN.GB18030",
00050        "zh_CN.UTF-8",
00051        "zh_CN.utf8",    /* for Linux */
00052        "zh.GBK",
00053        "zh.UTF-8",
00054        "zh",
00055        NULL
00056 };
00057 
00058 char *zh_TW_locales[] = {
00059        "zh_TW.eucTW",
00060        "zh_TW.EUC",
00061        "zh_TW.BIG5",
00062        "zh_TW.UTF-8",
00063        "zh_TW.utf8",    /* for Linux */
00064        "zh_TW",
00065        NULL
00066 };
00067 
00068 char *zh_HK_locales[] = {
00069        "zh_HK.big5hkscs",
00070        "zh_HK.BIG5HK",
00071        "zh_HK.UTF-8",
00072        "zh_HK.utf8",    /* for Linux */
00073        NULL
00074 };
00075 
00076 char *th_TH_locales[] = {
00077        "th_TH.TIS620",
00078        "th_TH.ISO8859-11",
00079        "th_TH.UTF-8",
00080        "th_TH.utf8",    /* for Linux */
00081        "th_TH",
00082        "th",
00083        NULL
00084 };
00085 
00086 char *ko_KR_locales[] = {
00087        "ko",
00088        "ko_KR.EUC",
00089        "ko.UTF-8",
00090        "ko_KR.utf8",    /* for Linux */
00091        NULL
00092 };
00093 
00094 enum {
00095        LANG_ZH_CN = 0,
00096        LANG_ZH_TW,
00097        LANG_ZH_HK,
00098        LANG_TH_TH,
00099        LANG_KR_KR,
00100        LANGS_NUM,
00101 };
00102 
00103 LangGroup_Info langgroup_info[LANGS_NUM] = {
00104        {
00105               LANG_ZH_CN,
00106               "zh_CN",
00107               zh_CN_encodes,
00108               zh_CN_locales,
00109        },
00110        {
00111               LANG_ZH_TW,
00112               "zh_TW",
00113               zh_TW_encodes,
00114               zh_TW_locales,
00115        },
00116        {
00117               LANG_ZH_HK,
00118               "zh_HK",
00119               zh_HK_encodes,
00120               zh_HK_locales,
00121        },
00122        {
00123               LANG_TH_TH,
00124               "th_TH",
00125               th_TH_encodes,
00126               th_TH_locales,
00127        },
00128        {
00129               LANG_KR_KR,
00130               "ko_KR",
00131               ko_KR_encodes,
00132               ko_KR_locales,
00133        },
00134 };
00135 
00136 typedef struct _Encode_Info {
00137        int    encode_id;
00138        char   **called_names;
00139        char   **support_locales;
00140        char   *iconv_codeset_name;
00141        iconv_t fd_iconv_to_utf8;
00142        iconv_t fd_iconv_from_utf8;
00143 } Encode_Info;
00144 
00145 /* encode names */
00146 char *GB2312_names[] = {
00147        "GB2312",
00148        "GB",
00149        NULL
00150 };
00151 
00152 char *GBK_names[] = {
00153        "GBK",
00154        NULL
00155 };
00156 
00157 char *GB18030_names[] = {
00158        "GB18030",
00159        NULL
00160 };
00161 
00162 char *BIG5_names[] = {
00163        "BIG5",
00164        NULL
00165 };
00166 
00167 char *BIG5HK_names[] = {
00168        "BIG5HK",
00169        "BIG5HKSCS",
00170        NULL
00171 };
00172 
00173 char *EUC_TW_names[] = {
00174        "EUC_TW",
00175        NULL
00176 };
00177 
00178 char *EUC_KR_names[] = {
00179        "EUC_KR",
00180        NULL
00181 };
00182 
00183 char *EUC_JP_names[] = {
00184        "EUC_JP",
00185        NULL
00186 };
00187 
00188 char *EUC_TH_names[] = {
00189        "EUC_TH",
00190        NULL
00191 };
00192 
00193 char *UTF8_names[] = {
00194        "UTF-8",
00195        "UTF_8",
00196        "UTF8",
00197        NULL
00198 };
00199 
00200 /* supported locales for specified encode */
00201 char *GB2312_locales[] = {
00202        "zh_CN.EUC",
00203        "zh_CN",
00204        "zh",
00205        NULL
00206 };
00207 
00208 char *GBK_locales[] = {
00209        "zh_CN.GBK",
00210        "zh.GBK",
00211        NULL
00212 };
00213 
00214 char *GB18030_locales[] = {
00215        "zh_CN.GB18030",
00216        "zh_CN.UTF-8",
00217        "zh_CN.utf8",    /* for Linux */
00218        NULL
00219 };
00220 
00221 char *BIG5_locales[] = {
00222        "zh_TW.BIG5",
00223        "zh_TW.UTF-8",
00224        "zh_TW.utf8",    /* for Linux */
00225        "zh_TW",
00226        NULL
00227 };
00228 
00229 char *BIG5HK_locales[] = {
00230        "zh_HK.big5hkscs",
00231        "zh_HK.BIG5HK",
00232        "zh_HK.UTF-8",
00233        "zh_HK.utf8",    /* for Linux */
00234        NULL
00235 };
00236 
00237 char *EUC_TW_locales[] = {
00238        "zh_TW.eucTW",
00239        "zh_TW.EUC",
00240        "zh_TW",
00241        NULL
00242 };
00243 
00244 char *EUC_KR_locales[] = {
00245        "ko_KR.EUC",
00246        "ko_KR.UTF-8",
00247        "ko_KR.utf8",    /* for Linux */
00248        NULL
00249 };
00250 
00251 char *EUC_JP_locales[] = {
00252        "ja_JP",
00253        "ja_JP.UTF-8",
00254        "ja_JP.eucjp",
00255        "ja_JP.utf8",    /* for Linux */
00256        NULL
00257 };
00258 
00259 char *EUC_TH_locales[] = {
00260        "th_TH",
00261        "th_TH.TIS620",
00262        "th_TH.UTF-8",
00263        "th_TH.utf8",    /* for Linux */
00264        NULL
00265 };
00266 
00267 char *UTF8_locales[] = {
00268        "zh_TW.UTF-8",
00269        "zh_TW.utf8",    /* for Linux */
00270        "zh_HK.UTF-8",
00271        "zh_HK.utf8",    /* for Linux */
00272        NULL
00273 };
00274 
00275 /* iconv codeset name for specified encode */
00276 #define GB2312_CODESET_NAME        "GB2312"
00277 #define GBK_CODESET_NAME    "GBK"
00278 #define GB18030_CODESET_NAME       "GB18030"
00279 #define BIG5_CODESET_NAME   "BIG5"
00280 #define BIG5HK_CODESET_NAME "BIG5-HKSCS"
00281 #define EUC_TW_CODESET_NAME "EUC-TW"
00282 #define EUC_KR_CODESET_NAME "EUC-KR"
00283 #define EUC_JP_CODESET_NAME "EUC-JP"
00284 #define EUC_TH_CODESET_NAME "TIS620"
00285 #define UTF8_CODESET_NAME   "UTF-8"
00286 
00287 /* unknown encode name */
00288 #define UNKNOWN_ENCODE             "UNKNOWN"
00289 
00290 Encode_Info  encode_info[ENCODES_NUM + 1] = {
00291        { 
00292               ENCODE_GB2312,
00293               GB2312_names,
00294               GB2312_locales,
00295               GB2312_CODESET_NAME,
00296               NULL,
00297               NULL,
00298        },
00299        { 
00300               ENCODE_GBK,
00301               GBK_names,
00302               GBK_locales,
00303               GBK_CODESET_NAME,
00304               NULL,
00305               NULL,
00306        },
00307        { 
00308               ENCODE_GB18030,
00309               GB18030_names,
00310               GB18030_locales,
00311               GB18030_CODESET_NAME,
00312               NULL,
00313               NULL,
00314        },
00315        { 
00316               ENCODE_BIG5,
00317               BIG5_names,
00318               BIG5_locales,
00319               BIG5_CODESET_NAME,
00320               NULL,
00321               NULL,
00322        },
00323        { 
00324               ENCODE_BIG5HKSCS,
00325               BIG5HK_names,
00326               BIG5HK_locales,
00327               BIG5HK_CODESET_NAME,
00328               NULL,
00329               NULL,
00330        },
00331        { 
00332               ENCODE_EUCTW,
00333               EUC_TW_names,
00334               EUC_TW_locales,
00335               EUC_TW_CODESET_NAME,
00336               NULL,
00337               NULL,
00338        },
00339        {
00340               ENCODE_EUCKR,
00341               EUC_KR_names,
00342               EUC_KR_locales,
00343               EUC_KR_CODESET_NAME,
00344               NULL,
00345               NULL,
00346        },
00347        {
00348               ENCODE_EUCJP,
00349               EUC_JP_names,
00350               EUC_JP_locales,
00351               EUC_JP_CODESET_NAME,
00352               NULL,
00353               NULL,
00354        },
00355        { 
00356               ENCODE_EUCTH,
00357               EUC_TH_names,
00358               EUC_TH_locales,
00359               EUC_TH_CODESET_NAME,
00360               NULL,
00361               NULL,
00362        },
00363        { 
00364               ENCODE_UTF8,
00365               UTF8_names,
00366               UTF8_locales,
00367               UTF8_CODESET_NAME,
00368               NULL,
00369               NULL,
00370        }
00371 };
00372 
00373 iconv_t  fd_iconv_UTF8_to_UTF16 = NULL;
00374 iconv_t  fd_iconv_UTF16_to_UTF8 = NULL;
00375 
00376 char *get_langname_from_langid(int langid)
00377 {
00378        int ret = langid;
00379        
00380        if (langid < 0 || langid >= LANGS_NUM) 
00381               ret = 0;
00382 
00383        return(langgroup_info[ret].lang_name);
00384 }
00385 
00386 char *get_langname_from_locale(char *locale)
00387 {
00388        int lang_id, i, ret;
00389        char *s;
00390 
00391        ret = -1;
00392        for (lang_id = 0; lang_id < LANGS_NUM; lang_id++) {
00393               i = 0;
00394               while (1) {
00395                      s = langgroup_info[lang_id].support_locales[i];
00396                      if (!s || !*s) break;
00397                      if (!strcasecmp(s, locale)) {
00398                             ret = lang_id;
00399                             break;
00400                      }
00401                      i++;
00402               }
00403               if (ret != -1) break;
00404        }
00405 
00406        if (ret == -1) 
00407               ret = 0;
00408 
00409        return(langgroup_info[ret].lang_name);
00410 }
00411 
00412 char *get_langname_from_encodeid(int encodeid)
00413 {
00414        int lang_id, i, ret;
00415        int support_encode;
00416 
00417        ret = -1;
00418        for (lang_id = 0; lang_id < LANGS_NUM; lang_id++) {
00419               i = 0;
00420               while (1) {
00421                      support_encode = langgroup_info[lang_id].support_encodes[i];
00422                      if (support_encode == ENCODE_INVALID) break;
00423                      if (encodeid == support_encode) {
00424                             ret = lang_id;
00425                             break;
00426                      }
00427                      i++;
00428               }
00429               if (ret != -1) break;
00430        }
00431 
00432        if (ret == -1) 
00433               ret = 0;
00434 
00435        return(langgroup_info[ret].lang_name);
00436 }
00437 
00438 int get_langid_from_locale(char *locale)
00439 {
00440        int lang_id, i, ret;
00441        char *s;
00442 
00443        ret = -1;
00444        for (lang_id = 0; lang_id < LANGS_NUM; lang_id++) {
00445               i = 0;
00446               while (1) {
00447                      s = langgroup_info[lang_id].support_locales[i];
00448                      if (!s || !*s) break;
00449                      if (!strcasecmp(s, locale)) {
00450                             ret = lang_id;
00451                             break;
00452                      }
00453                      i++;
00454               }
00455               if (ret != -1) break;
00456        }
00457 
00458        if (ret == -1) 
00459               ret = 0;
00460 
00461        return(ret);
00462 }
00463 int get_langid_from_localeid(int localeid)
00464 {
00465        int lang_id, i, ret;
00466        int support_encode;
00467 
00468        ret = -1;
00469        for (lang_id = 0; lang_id < LANGS_NUM; lang_id++) {
00470               i = 0;
00471               while (1) {
00472                      support_encode = langgroup_info[lang_id].support_encodes[i];
00473                      if (support_encode == ENCODE_INVALID) break;
00474                      if (localeid == support_encode) {
00475                             ret = lang_id;
00476                             break;
00477                      }
00478                      i++;
00479               }
00480               if (ret != -1) break;
00481        }
00482 
00483        if (ret == -1) 
00484               ret = 0;
00485 
00486        return(ret);
00487 }
00488 
00489 int get_encodeid_from_name(char *name)
00490 {
00491        int encode_id, i, ret;
00492        char *s;
00493        
00494        ret = -1;
00495        for (encode_id = 0; encode_id < ENCODES_NUM; encode_id++) {
00496               i = 0;
00497               while (1) {
00498                      s = encode_info[encode_id].called_names[i];
00499                      if (!s || !*s) break;
00500                      if (!strcasecmp(s, name)) {
00501                             ret = encode_id;
00502                             break;
00503                      }
00504                      i++;
00505               }
00506               if (ret != -1) break;
00507        }
00508        if (ret == -1) ret = ENCODE_GB2312; /* return default encode */
00509        return(ret);  
00510 }
00511 
00512 int get_encodeid_from_locale(char *locale)
00513 {
00514        int encode_id, i, ret;
00515        char *s;
00516        
00517        ret = -1;
00518        for (encode_id = 0; encode_id < ENCODES_NUM; encode_id++) {
00519               i = 0;
00520               while (1) {
00521                      s = encode_info[encode_id].support_locales[i];
00522                      if (!s || !*s) break;
00523                      if (!strcasecmp(s, locale)) {
00524                             ret = encode_id;
00525                             break;
00526                      }
00527                      i++;
00528               }
00529               if (ret != -1) break;
00530        }
00531 
00532        if (ret == -1) ret = ENCODE_INVALID; /* return default encode */
00533 
00534        return(ret);  
00535 }
00536 
00537 char *get_name_from_encodeid(int encode_id)
00538 {
00539        if (encode_id >= 0 && encode_id <= ENCODES_NUM)
00540               return(encode_info[encode_id].called_names[0]);
00541        else
00542               return(NULL);
00543 }
00544 
00545 char *get_default_locale_from_locale(char *locale)
00546 {
00547        int encode_id, i, ret;
00548        char *s;
00549 
00550        ret = -1;
00551        for (encode_id = 0; encode_id < ENCODES_NUM; encode_id++) {
00552               i = 0;
00553               while (1) {
00554                      s = encode_info[encode_id].support_locales[i];
00555                      if (!s || !*s) break;
00556                      if (!strcasecmp(s, locale)) {
00557                             ret = encode_id;
00558                             break;
00559                      }
00560                      i++;
00561               }
00562               if (ret != -1) break;
00563        }
00564        
00565        if (ret == -1) return(NULL);
00566 
00567        return(encode_info[ret].support_locales[0]);
00568 }
00569 
00570 int  get_char_len_by_encodeid(int encode_id, unsigned char *ch_ptr)
00571 {
00572        int ret = 2;  /* default character length */
00573        unsigned char code0, code1;
00574 
00575        code0 = ch_ptr[0];
00576        if (code0 < 0x80) return(1);
00577 
00578        if (encode_id == ENCODE_UTF8) {
00579               if (code0 > 0xe0)           /* 3 bytes */
00580                      ret = 3;
00581        } else if (encode_id == ENCODE_GB18030) {
00582               code1 = ch_ptr[1];
00583               if (code0 >=0x81 && code0 <= 0xFE) {
00584                      if (code1 >= 0x30 && code1 <= 0x39)
00585                             ret = 4;
00586               } 
00587        } else if (encode_id == ENCODE_EUCTW) {
00588               if (code0 == 0x8e)          /* 4 bytes */
00589                      ret = 4;
00590        } else if (encode_id == ENCODE_EUCTH) {
00591               ret = 1;
00592        }
00593 
00594        return(ret);
00595 }
00596 
00597 int get_char_count_by_encodeid(int encode_id, unsigned char *ch_ptr, int *char_count)
00598 {
00599        int str_len = strlen((char *)ch_ptr);
00600        int str_pos = 0;
00601        int char_len;
00602        unsigned char *pStr;
00603 
00604        *char_count = 0;
00605        while (str_pos < str_len) {
00606               pStr = ch_ptr + str_pos;
00607               char_len = get_char_len_by_encodeid(encode_id, pStr);
00608               if (str_pos + char_len <= str_len) {
00609                      (*char_count)++;
00610                      str_pos += char_len;
00611               } else {
00612                      return(-1);
00613               }
00614        }
00615 
00616        return(0);
00617 }
00618 
00619 int is_valid_code(int encode_id, unsigned char *int_code, int code_len)
00620 {
00621        unsigned char code0, code1, code2, code3;
00622        
00623        code0 = int_code[0];
00624        code1 = int_code[1];
00625 
00626        switch (encode_id) {
00627               case ENCODE_GB2312:
00628                      if (code0 < 0xA1 || code0 > 0xFE)
00629                             return (-1);
00630                      if (code1 < 0xA1 || code1 > 0xFE)
00631                             return (-1);
00632                      break;
00633                      
00634               case ENCODE_GBK:
00635                      if (code0 < 0x81 || code0 > 0xFE)
00636                             return (-1);
00637                      if (code1 < 0x40 || code1 > 0xFE || code1 == 0x7F)
00638                             return (-1);
00639                      break;
00640 
00641               case ENCODE_GB18030:
00642                      if (code_len == 2) {
00643                             if (code0 < 0x81 || code0 > 0xFE)
00644                                    return (-1);
00645                             if (code1 < 0x40 || code1 > 0xFE || code1 == 0x7F)
00646                                    return (-1);
00647                      } else if (code_len == 4) {
00648                             code2 = int_code[2];
00649                             code3 = int_code[3];
00650                             if (code0 < 0x81 || code0 > 0xFE)
00651                                    return (-1);
00652                             if (code1 < 0x30 || code1 > 0x39)
00653                                    return (-1);
00654                             if (code2 < 0x81 || code2 > 0xFE)
00655                                    return (-1);
00656                             if (code3 < 0x30 || code3 > 0x39)
00657                                    return (-1);
00658                      }
00659                      break;
00660 
00661               case ENCODE_BIG5:
00662 #if 0
00663                      /* define in lcbig5.c */
00664                      if ((code0 >= 0xA1) && code0 <= 0xC5) || (code0 >= 0xC9 && code0 <= 0xF9)) {
00665                             if (code1 < 0x40 || code1 == 0xFF || (code1 >= 0x7F && code1 <= 0xA0))
00666                                    return (-1);
00667                             else
00668                                    return (0);
00669                      } else {
00670                             if (code0 == 0xC6 && (code1 >= 0x40 && code1 <= 0x7E))
00671                                    return (0);
00672                             else
00673                                    return (-1);
00674                      }
00675 #endif
00676                                    
00677                      if (code0 < 0xA1 || code0 > 0xFE)
00678                             return (-1);
00679                      if (code1 < 0x40 || code1 > 0xFE)
00680                             return (-1);
00681                      if (code1 > 0x7E && code1 < 0xA1)
00682                             return (-1);
00683                      break;
00684 
00685               case ENCODE_EUCTW:
00686                      if (code_len == 2) {
00687                             if (code0 < 0x80 || code1 <0x80)
00688                                    return (-1);
00689                      } if (code_len == 4) {
00690                             code2 = int_code[2];
00691                             code3 = int_code[3];
00692                             if (code0 != 0x8E)
00693                                    return(-1);
00694                             if (code1 < 0x80 || code2 < 0x80 || code3 < 0x80)
00695                                    return(-1);
00696                      }
00697                      break;
00698 
00699               case ENCODE_BIG5HKSCS:
00700                      if (code0 < 0x81 || code0 > 0xFE)
00701                             return (-1);
00702                      if (code1 < 0x40 || code1 > 0xFE)
00703                             return (-1);
00704                      break;
00705 
00706               case ENCODE_EUCKR:
00707                      if (code0 < 0xA1 || code0 > 0xFE)
00708                             return (-1);
00709                      if (code1 < 0xA1 || code1 > 0xFE)
00710                             return (-1);
00711                      break;
00712 
00713               case ENCODE_UTF8:
00714                      break;
00715        }
00716        return(0);
00717 }
00718 
00719 int is_valid_encode_string(int encode_id, unsigned char *hzstr, int hzlen)
00720 {
00721        int i, char_len, ret;
00722        unsigned char *ptr;
00723 
00724        i = 0;
00725        while (i < hzlen) {
00726               ptr = hzstr + i;
00727               if (*ptr < 0x80) {
00728                      if (*ptr == 0x3f && i < hzlen-1) {
00729                             if (*(ptr+1) == 0x3f)
00730                                    return(-1);
00731                      }
00732 
00733                      i++;
00734               } else {
00735                      char_len = get_char_len_by_encodeid(encode_id, ptr);
00736                      ret = is_valid_code(encode_id, ptr, char_len);
00737                      if (ret == -1)
00738                             return(-1);
00739                      i += char_len;
00740               }
00741        }
00742 
00743        return (0);
00744 }
00745 
00746 /* iconv functions */
00747 int Convert_Native_To_UTF8(int encode_id, char *from_buf, size_t from_left,
00748                         char **to_buf, size_t * to_left)
00749 {
00750        char   *ip;
00751        char          *op;
00752        size_t        ileft, oleft;
00753        iconv_t              fd_iconv;
00754        char          *codeset;
00755        size_t        ret = 0;
00756        
00757        if (encode_id < 0 || encode_id >= ENCODES_NUM)
00758               return(-1);
00759 
00760        if ( (from_left < 0) || (*to_left < 0) )
00761               return(-1);
00762 
00763        ip = (char *) from_buf;
00764        ileft = from_left;
00765 
00766        op = *((char **) to_buf);
00767        oleft = *to_left;
00768 
00769        if (encode_id == ENCODE_UTF8) {
00770               if (ileft > oleft)
00771                      return(-1);
00772               memcpy(op, ip, ileft);
00773               *to_left = oleft - ileft;
00774               return(0);
00775        }
00776 
00777        fd_iconv = encode_info[encode_id].fd_iconv_to_utf8;
00778        if (fd_iconv == (iconv_t)-1) return(-1);
00779 
00780        if (fd_iconv == NULL) {
00781               codeset = encode_info[encode_id].iconv_codeset_name;
00782               fd_iconv = iconv_open("UTF-8", codeset);
00783               encode_info[encode_id].fd_iconv_to_utf8 = fd_iconv;
00784               if ( fd_iconv == (iconv_t) -1 )
00785                      return(-1);
00786        }
00787 
00788        ret = iconv(fd_iconv, &ip, &ileft, &op, &oleft);
00789        if (ret == -1) {
00790               return(-1);
00791        }
00792        *to_left = oleft;
00793        return(0);
00794 
00795 }
00796 
00797 int Convert_UTF8_To_Native(int encode_id, char *from_buf, size_t from_left,
00798                         char **to_buf, size_t * to_left)
00799 {
00800        char   *ip;
00801        char          *op;
00802        size_t        ileft, oleft;
00803        iconv_t              fd_iconv;
00804        char          *codeset;
00805        size_t        ret = 0;
00806        
00807        if (encode_id < 0 || encode_id >= ENCODES_NUM)
00808               return(-1);
00809 
00810        if ( (from_left < 0) || (*to_left < 0) )
00811               return(-1);
00812 
00813        ip = (char *) from_buf;
00814        ileft = from_left;
00815 
00816        op = *((char **) to_buf);
00817        oleft = *to_left;
00818 
00819        if (encode_id == ENCODE_UTF8) {
00820               if (ileft > oleft)
00821                      return(-1);
00822               memcpy(op, ip, ileft);
00823               *to_left = oleft - ileft;
00824               return(0);
00825        }
00826 
00827        fd_iconv = encode_info[encode_id].fd_iconv_from_utf8;
00828        if (fd_iconv == (iconv_t)-1) return(-1);
00829 
00830        if (fd_iconv == NULL) {
00831               codeset = encode_info[encode_id].iconv_codeset_name;
00832               fd_iconv = iconv_open(codeset, "UTF-8");
00833               encode_info[encode_id].fd_iconv_from_utf8 = fd_iconv;
00834               if ( fd_iconv == (iconv_t) -1 )
00835                      return(-1);
00836        }
00837 
00838        ret = iconv(fd_iconv, &ip, &ileft, &op, &oleft);
00839        if (ret == -1) {
00840               return(-1);
00841        }
00842        *to_left = oleft;
00843        return(0);
00844 }
00845 
00846 #define UTF16_STRLEN    1024
00847        
00848 int Convert_Native_To_UTF16(int encode_id, char *from_buf, size_t from_left,
00849                         char **to_buf, size_t *to_left)
00850 {
00851        char   *ip;
00852        char          *op;
00853        size_t        ileft, oleft;
00854 
00855        char          *codeset;
00856        iconv_t              fd_iconv_native_to_utf8;
00857 
00858        size_t        ret = 0;
00859        int           skip_native_to_utf8_iconv = 0;
00860 
00861        if (encode_id < 0 || encode_id >= ENCODES_NUM)
00862               return(-1);
00863 
00864        if ( (from_left < 0) || (*to_left < 0) )
00865               return(-1);
00866 
00867        /* Initialize the iconv of utf8_to_ucs2 */
00868        if (fd_iconv_UTF8_to_UTF16 == (iconv_t)-1 )
00869                return(-1);
00870 
00871        if (fd_iconv_UTF8_to_UTF16 == NULL) {
00872               fd_iconv_UTF8_to_UTF16 = iconv_open("UTF-16", "UTF-8");
00873               if (fd_iconv_UTF8_to_UTF16 == (iconv_t)-1 )
00874                      return(-1);
00875        }
00876 
00877        if (encode_id == ENCODE_UTF8)
00878               skip_native_to_utf8_iconv = 1;
00879 
00880        ip = (char *) from_buf;
00881        ileft = from_left;
00882 
00883        op = *((char **) to_buf);
00884        oleft = *to_left;
00885 
00886        if (!skip_native_to_utf8_iconv) {
00887               char          buffer[UTF16_STRLEN];   /* Fix me! */
00888               const size_t  buf_len = UTF16_STRLEN;
00889               char          *src, *dst;
00890               size_t        src_len, dst_len;
00891 
00892               /* Initialize the iconv of native_to_utf8 */
00893               fd_iconv_native_to_utf8 = encode_info[encode_id].fd_iconv_to_utf8;
00894               if (fd_iconv_native_to_utf8 == (iconv_t)-1) return(-1);
00895 
00896               if (fd_iconv_native_to_utf8 == NULL) {
00897                      codeset = encode_info[encode_id].iconv_codeset_name;
00898                      fd_iconv_native_to_utf8 = iconv_open("UTF-8", codeset);
00899                      encode_info[encode_id].fd_iconv_to_utf8 = fd_iconv_native_to_utf8;
00900                      if ( fd_iconv_native_to_utf8 == (iconv_t) -1 )
00901                             return(-1);
00902               }
00903 
00904               while ((ileft > 0) && (oleft > 0)) {
00905                      dst = buffer;
00906                      dst_len = buf_len;
00907                      ret = iconv(fd_iconv_native_to_utf8, &ip, &ileft, (char **) &dst, &dst_len);
00908                      if (ret == -1) {
00909                             return(-1);
00910                      }
00911                      src = buffer;
00912                      src_len = buf_len - dst_len;
00913                      ret = iconv(fd_iconv_UTF8_to_UTF16, (char **) &src, &src_len, &op, &oleft);
00914                      if (ret == -1) {
00915                             return(-1);
00916                      }
00917               }
00918 
00919        } else {
00920               ret = iconv(fd_iconv_UTF8_to_UTF16, &ip, &ileft, &op, &oleft);
00921               if (ret == -1) {
00922                       return(-1);
00923               }
00924        }
00925 
00926        if (0xFEFF == **((CARD16 **) to_buf)) {
00927               memmove(*to_buf, *to_buf + 2, *to_left - oleft - 2);
00928               *to_left = (oleft + 2);
00929        } else {
00930               *to_left = oleft;
00931        }
00932 
00933        return(0);
00934 }
00935 
00936 int Convert_UTF16_To_Native(int encode_id, char *from_buf, size_t from_left,
00937                         char **to_buf, size_t * to_left)
00938 {
00939        char   *ip;
00940        char          *op;
00941        size_t        ileft, oleft;
00942        char          *codeset;
00943        iconv_t              fd_iconv_utf8_to_native;
00944 
00945        size_t        ret = 0;
00946        int           skip_utf8_to_native_iconv = 0;
00947 
00948        if (encode_id < 0 || encode_id >= ENCODES_NUM)
00949               return(-1);
00950 
00951        if ( (from_left < 0) || (*to_left < 0) )
00952               return(-1);
00953 
00954        /* Initialize the iconv of utf8_to_ucs2 */
00955        if (fd_iconv_UTF16_to_UTF8 == (iconv_t)-1 )
00956                return(-1);
00957 
00958        if (fd_iconv_UTF16_to_UTF8 == NULL) {
00959               fd_iconv_UTF16_to_UTF8 = iconv_open("UTF-8", "UTF-16");
00960               if (fd_iconv_UTF16_to_UTF8 == (iconv_t)-1 )
00961                      return(-1);
00962        }
00963 
00964        if (encode_id == ENCODE_UTF8)
00965               skip_utf8_to_native_iconv = 1;
00966 
00967        ip = (char *) from_buf;
00968        ileft = from_left;
00969 
00970        op = *((char **) to_buf);
00971        oleft = *to_left;
00972 
00973        if (!skip_utf8_to_native_iconv) {
00974               char          buffer[UTF16_STRLEN];   /* Fix me! */
00975               const size_t  buf_len = UTF16_STRLEN;
00976               char          *src, *dst;
00977               size_t        src_len, dst_len;
00978 
00979               /* Initialize the iconv of native_to_utf8 */
00980               fd_iconv_utf8_to_native = encode_info[encode_id].fd_iconv_from_utf8;
00981               if (fd_iconv_utf8_to_native == (iconv_t)-1) return(-1);
00982 
00983               if (fd_iconv_utf8_to_native == NULL) {
00984                      codeset = encode_info[encode_id].iconv_codeset_name;
00985                      fd_iconv_utf8_to_native = iconv_open(codeset, "UTF-8");
00986                      encode_info[encode_id].fd_iconv_from_utf8 = fd_iconv_utf8_to_native;
00987                      if ( fd_iconv_utf8_to_native == (iconv_t) -1 )
00988                             return(-1);
00989               }
00990 
00991               while ((ileft > 0) && (oleft > 0)) {
00992                      dst = buffer;
00993                      dst_len = buf_len;
00994                      ret = iconv(fd_iconv_UTF16_to_UTF8, &ip, &ileft, (char **) &dst, &dst_len);
00995                      if (ret == -1) {
00996                             return(-1);
00997                      }
00998                      src = buffer;
00999                      src_len = buf_len - dst_len;
01000                      ret = iconv(fd_iconv_utf8_to_native, (char **) &src, &src_len, &op, &oleft);
01001                      if (ret == -1) {
01002                             return(-1);
01003                      }
01004               }
01005 
01006        } else {
01007               ret = iconv(fd_iconv_UTF16_to_UTF8, &ip, &ileft, &op, &oleft);
01008               if (ret == -1) {
01009                       return(-1);
01010               }
01011        }
01012 
01013        *to_left = oleft;
01014 
01015        return(0);
01016 }