Back to index

im-sdk  12.3.91
encode.c
Go to the documentation of this file.
00001 /*
00002   Copyright 2002-2003 Sun Microsystems, Inc. All Rights Reserved.
00003 
00004   Permission is hereby granted, free of charge, to any person obtaining a
00005   copy of this software and associated documentation files (the
00006   "Software"), to deal in the Software without restriction, including
00007   without limitation the rights to use, copy, modify, merge, publish,
00008   distribute, sublicense, and/or sell copies of the Software, and to
00009   permit persons to whom the Software is furnished to do so, subject to
00010   the following conditions: The above copyright notice and this
00011   permission notice shall be included in all copies or substantial
00012   portions of the Software.
00013 
00014 
00015   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00016   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00017   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00018   IN NO EVENT SHALL THE OPEN GROUP OR SUN MICROSYSTEMS, INC. BE LIABLE
00019   FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
00020   CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
00021   THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE EVEN IF
00022   ADVISED IN ADVANCE OF THE POSSIBILITY OF SUCH DAMAGES.
00023 
00024 
00025   Except as contained in this notice, the names of The Open Group and/or
00026   Sun Microsystems, Inc. shall not be used in advertising or otherwise to
00027   promote the sale, use or other dealings in this Software without prior
00028   written authorization from The Open Group and/or Sun Microsystems,
00029   Inc., as applicable.
00030 
00031 
00032   X Window System is a trademark of The Open Group
00033 
00034   OSF/1, OSF/Motif and Motif are registered trademarks, and OSF, the OSF
00035   logo, LBX, X Window System, and Xinerama are trademarks of the Open
00036   Group. All other trademarks and registered trademarks mentioned herein
00037   are the property of their respective owners. No right, title or
00038   interest in or to any trademark, service mark, logo or trade name of
00039   Sun Microsystems, Inc. or its licensors is granted.
00040 
00041 */
00042 #include <stdio.h>
00043 #include <errno.h>
00044 #include <iconv.h>
00045 #include <string.h>
00046 #include <X11/Xmd.h>
00047 #include "encode.h"
00048 
00049 
00050 typedef struct _Encode_Info {
00051   int  encode_id;
00052   char **called_names;
00053   char **support_locales;
00054   char        *iconv_codeset_name;
00055   iconv_t fd_iconv_to_utf8;
00056   iconv_t fd_iconv_from_utf8;
00057 } Encode_Info;
00058 
00059 char *UTF8_names[] = {
00060   "UTF-8",
00061   "UTF_8",
00062   "UTF8",
00063   NULL
00064 };
00065 
00066 #define UTF8_CODESET_NAME   "UTF-8"
00067 
00068 /* unknown encode name */
00069 #define UNKNOWN_ENCODE             "UNKNOWN"
00070 
00071 char *UTF8_locales[] = {
00072   NULL
00073 };
00074 
00075 Encode_Info  encode_info[ENCODES_NUM + 1] = {
00076   { 
00077     ENCODE_UTF8,
00078     UTF8_names,
00079     UTF8_locales,
00080     UTF8_CODESET_NAME,
00081     NULL,
00082     NULL,
00083   }
00084 };
00085 
00086 iconv_t  fd_iconv_UTF8_to_UTF16 = NULL;
00087 iconv_t  fd_iconv_UTF16_to_UTF8 = NULL;
00088 
00089 int get_encodeid_from_name(char *name)
00090 {
00091   int encode_id, i, ret;
00092   char *s;
00093        
00094   ret = -1;
00095   for (encode_id = 0; encode_id < ENCODES_NUM; encode_id++) {
00096     i = 0;
00097     while (1) {
00098       s = encode_info[encode_id].called_names[i];
00099       if (!s || !*s) break;
00100       if (!strcmp(s, name)) {
00101        ret = encode_id;
00102        break;
00103       }
00104       i++;
00105     }
00106     if (ret != -1) break;
00107   }
00108   return(ret);       
00109 }
00110 
00111 int get_encodeid_from_locale(char *locale)
00112 {
00113   int encode_id, i, ret;
00114   char *s;
00115        
00116   ret = -1;
00117   for (encode_id = 0; encode_id < ENCODES_NUM; encode_id++) {
00118     i = 0;
00119     while (1) {
00120       s = encode_info[encode_id].support_locales[i];
00121       if (!s || !*s) break;
00122       if (!strcmp(s, locale)) {
00123        ret = encode_id;
00124        break;
00125       }
00126       i++;
00127     }
00128     if (ret != -1) break;
00129   }
00130 
00131   if (ret == -1) ret = ENCODE_ERROR; /* return default encode */
00132 
00133   return(ret);       
00134 }
00135 
00136 char *get_name_from_encodeid(int encode_id)
00137 {
00138   if (encode_id >= 0 && encode_id <= ENCODES_NUM)
00139     return(encode_info[encode_id].called_names[0]);
00140   else
00141     return(NULL);
00142 }
00143 
00144 int  get_char_len_by_encodeid(int encode_id, unsigned char *ch_ptr)
00145 {
00146   int ret = 2;  /* default character length */
00147   unsigned char code0;
00148 
00149   code0 = ch_ptr[0];
00150   if (code0 < 0x80) return(1);
00151 
00152   if (encode_id == ENCODE_UTF8) {
00153     if (code0 > 0xe0)              /* 3 bytes */
00154       ret = 3;
00155     else
00156       ret = 3;
00157   } 
00158   return(ret);
00159 }
00160 
00161 int is_valid_code(int encode_id, unsigned char *int_code, int code_len)
00162 {
00163   unsigned char code0, code1;
00164        
00165   code0 = int_code[0];
00166   code1 = int_code[1];
00167 
00168   switch (encode_id) {
00169   case ENCODE_UTF8:
00170     break;
00171   }
00172   return(0);
00173 }
00174 
00175 int is_valid_encode_string(int encode_id, unsigned char *hzstr, int hzlen)
00176 {
00177   int i, char_len, ret;
00178   unsigned char *ptr;
00179 
00180   i = 0;
00181   while (i < hzlen) {
00182     ptr = hzstr + i;
00183     if (*ptr < 0x80) {
00184       if (*ptr == 0x3f && i < hzlen-1) {
00185        if (*(ptr+1) == 0x3f)
00186          return(-1);
00187       }
00188       i++;
00189     } else {
00190       char_len = get_char_len_by_encodeid(encode_id, ptr);
00191       ret = is_valid_code(encode_id, ptr, char_len);
00192       if (ret == -1)
00193        return(-1);
00194       i += char_len;
00195     }
00196   }
00197 
00198   return (0);
00199 }
00200 
00201 int Convert_UTF8_To_Native(int encode_id, char *from_buf, size_t from_left,
00202                         char **to_buf, size_t * to_left)
00203 {
00204   char *ip;
00205   char        *op;
00206   size_t             ileft, oleft;
00207   iconv_t            fd_iconv;
00208   char               *codeset;
00209   size_t             ret = 0;
00210        
00211   if (encode_id < 0 || encode_id >= ENCODES_NUM)
00212     return(-1);
00213 
00214   if ( (from_left < 0) || (*to_left < 0) )
00215     return(-1);
00216 
00217   ip = from_buf;
00218   ileft = from_left;
00219 
00220   op = *((char **) to_buf);
00221   oleft = *to_left;
00222 
00223   if (encode_id == ENCODE_UTF8) {
00224     if (ileft > oleft)
00225       return(-1);
00226     memcpy(op, ip, ileft);
00227     *to_left = oleft - ileft;
00228     return(0);
00229   }
00230 
00231   fd_iconv = encode_info[encode_id].fd_iconv_from_utf8;
00232   if (fd_iconv == (iconv_t)-1) return(-1);
00233 
00234   if (fd_iconv == NULL) {
00235     codeset = encode_info[encode_id].iconv_codeset_name;
00236     fd_iconv = iconv_open(codeset, "UTF-8");
00237     encode_info[encode_id].fd_iconv_from_utf8 = fd_iconv;
00238     if ( fd_iconv == (iconv_t) -1 )
00239       return(-1);
00240   }
00241 
00242   ret = iconv(fd_iconv, &ip, &ileft, &op, &oleft);
00243   if ((ret != 0) && (E2BIG != errno)) {
00244     return(-1);
00245   }
00246   *to_left = oleft;
00247   return(0);
00248 }
00249 
00250 #define UTF16_STRLEN    1024
00251        
00252 int Convert_Native_To_UTF16(int encode_id, char *from_buf, size_t from_left,
00253                          char **to_buf, size_t *to_left)
00254 {
00255   char *ip;
00256   char        *op;
00257   size_t             ileft, oleft;
00258 
00259   char               *codeset;
00260   iconv_t            fd_iconv_native_to_utf8;
00261 
00262   size_t             ret = 0;
00263   int         skip_native_to_utf8_iconv = 0;
00264 
00265   if (encode_id < 0 || encode_id >= ENCODES_NUM)
00266     return(-1);
00267 
00268   if ( (from_left < 0) || (*to_left < 0) )
00269     return(-1);
00270 
00271   /* Initialize the iconv of utf8_to_ucs2 */
00272   if (fd_iconv_UTF8_to_UTF16 == (iconv_t)-1 )
00273     return(-1);
00274 
00275   if (fd_iconv_UTF8_to_UTF16 == NULL) {
00276     fd_iconv_UTF8_to_UTF16 = iconv_open("UCS-2", "UTF-8");
00277     if (fd_iconv_UTF8_to_UTF16 == (iconv_t)-1 )
00278       return(-1);
00279   }
00280 
00281   if (encode_id == ENCODE_UTF8)
00282     skip_native_to_utf8_iconv = 1;
00283 
00284   ip = from_buf;
00285   ileft = from_left;
00286 
00287   op = *((char **) to_buf);
00288   oleft = *to_left;
00289 
00290   if (!skip_native_to_utf8_iconv) {
00291     char             buffer[UTF16_STRLEN];   /* Fix me! */
00292     const size_t     buf_len = UTF16_STRLEN;
00293     char             *src, *dst;
00294     size_t           src_len, dst_len;
00295 
00296     /* Initialize the iconv of native_to_utf8 */
00297     fd_iconv_native_to_utf8 = encode_info[encode_id].fd_iconv_to_utf8;
00298     if (fd_iconv_native_to_utf8 == (iconv_t)-1) return(-1);
00299 
00300     if (fd_iconv_native_to_utf8 == NULL) {
00301       codeset = encode_info[encode_id].iconv_codeset_name;
00302       fd_iconv_native_to_utf8 = iconv_open("UTF-8", codeset);
00303       encode_info[encode_id].fd_iconv_to_utf8 = fd_iconv_native_to_utf8;
00304       if ( fd_iconv_native_to_utf8 == (iconv_t) -1 )
00305        return(-1);
00306     }
00307 
00308     while ((ileft > 0) && (oleft > 0)) {
00309       dst = buffer;
00310       dst_len = buf_len;
00311       ret = iconv(fd_iconv_native_to_utf8, &ip, &ileft, (char **) &dst, &dst_len);
00312       if ((ret != 0) && (E2BIG != errno)) {
00313        return(-1);
00314       }
00315       src = buffer;
00316       src_len = buf_len - dst_len;
00317       ret = iconv(fd_iconv_UTF8_to_UTF16, (const char **) &src, &src_len, &op, &oleft);
00318       if ((ret != 0) && (E2BIG != errno)) {
00319        return(-1);
00320       }
00321     }
00322 
00323   } else {
00324     ret = iconv(fd_iconv_UTF8_to_UTF16, &ip, &ileft, &op, &oleft);
00325     if ((ret != 0) && (E2BIG != errno)) {
00326       return(-1);
00327     }
00328   }
00329 
00330   if (0xFEFF == **((CARD16 **) to_buf)) {
00331     memmove(*to_buf, *to_buf + 2, *to_left - oleft - 2);
00332     *to_left = (oleft + 2);
00333   } else {
00334     *to_left = oleft;
00335   }
00336 
00337   return(0);
00338 }