Back to index

php5  5.3.10
mbfilter_euc_tw.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file: Rui Hirokawa <hirokawa@php.net>
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_tw.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_euc_tw.h"
00036 
00037 #include "unicode_table_cns11643.h"
00038 
00039 static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter);
00040 
00041 static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
00051   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00052   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00057   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00058 };
00059 
00060 
00061 static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
00062 
00063 const mbfl_encoding mbfl_encoding_euc_tw = {
00064        mbfl_no_encoding_euc_tw,
00065        "EUC-TW",
00066        "EUC-TW",
00067        (const char *(*)[])&mbfl_encoding_euc_tw_aliases,
00068        mblen_table_euctw,
00069        MBFL_ENCTYPE_MBCS
00070 };
00071 
00072 const struct mbfl_identify_vtbl vtbl_identify_euctw = {
00073        mbfl_no_encoding_euc_tw,
00074        mbfl_filt_ident_common_ctor,
00075        mbfl_filt_ident_common_dtor,
00076        mbfl_filt_ident_euctw
00077 };
00078 
00079 const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
00080        mbfl_no_encoding_euc_tw,
00081        mbfl_no_encoding_wchar,
00082        mbfl_filt_conv_common_ctor,
00083        mbfl_filt_conv_common_dtor,
00084        mbfl_filt_conv_euctw_wchar,
00085        mbfl_filt_conv_common_flush
00086 };
00087 
00088 const struct mbfl_convert_vtbl vtbl_wchar_euctw = {
00089        mbfl_no_encoding_wchar,
00090        mbfl_no_encoding_euc_tw,
00091        mbfl_filt_conv_common_ctor,
00092        mbfl_filt_conv_common_dtor,
00093        mbfl_filt_conv_wchar_euctw,
00094        mbfl_filt_conv_common_flush
00095 };
00096 
00097 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00098 
00099 /*
00100  * EUC-TW => wchar
00101  */
00102 int
00103 mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter)
00104 {
00105        int c1, s, w, plane;
00106 
00107        switch (filter->status) {
00108        case 0:
00109               if (c >= 0 && c < 0x80) {   /* latin */
00110                      CK((*filter->output_function)(c, filter->data));
00111               } else if (c > 0xa0 && c < 0xff) { /* dbcs first byte */
00112                      filter->status = 1;
00113                      filter->cache = c;
00114               } else if (c == 0x8e) {     /* mbcs first byte */
00115                      filter->status = 2;
00116                      filter->cache = c;
00117               } else {
00118                      w = c & MBFL_WCSGROUP_MASK;
00119                      w |= MBFL_WCSGROUP_THROUGH;
00120                      CK((*filter->output_function)(w, filter->data));
00121               }
00122               break;
00123 
00124        case 1:              /* mbcs second byte */
00125               filter->status = 0;
00126               c1 = filter->cache;
00127               if (c > 0xa0 && c < 0xff) {
00128                      w = (c1 - 0xa1)*94 + (c - 0xa1);
00129                      if (w >= 0 && w < cns11643_1_ucs_table_size) {
00130                             w = cns11643_1_ucs_table[w];
00131                      } else {
00132                             w = 0;
00133                      }
00134                      if (w <= 0) {
00135                             w = (c1 << 8) | c;
00136                             w &= MBFL_WCSPLANE_MASK;
00137                             w |= MBFL_WCSPLANE_CNS11643;
00138                      }
00139                      CK((*filter->output_function)(w, filter->data));
00140               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00141                      CK((*filter->output_function)(c, filter->data));
00142               } else {
00143                      w = (c1 << 8) | c;
00144                      w &= MBFL_WCSGROUP_MASK;
00145                      w |= MBFL_WCSGROUP_THROUGH;
00146                      CK((*filter->output_function)(w, filter->data));
00147               }
00148               break;
00149 
00150        case 2:       /* got 0x8e,  first char */
00151               c1 = filter->cache;
00152               if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00153                      CK((*filter->output_function)(c, filter->data));
00154                      filter->status = 0;
00155               } else if (c > 0xa0 && c < 0xaf) {
00156                      filter->status = 3;
00157                      filter->cache = c - 0xa1;
00158               } else {
00159                      w = (c1 << 8) | c;
00160                      w &= MBFL_WCSGROUP_MASK;
00161                      w |= MBFL_WCSGROUP_THROUGH;
00162                      CK((*filter->output_function)(w, filter->data));
00163               }
00164               break;
00165 
00166        case 3:       /* got 0x8e,  third char */
00167               filter->status = 0;
00168               c1 = filter->cache;
00169               if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00170                      CK((*filter->output_function)(c, filter->data));
00171                      filter->status = 0;
00172               } else if (c > 0xa0 && c < 0xff) {
00173                      filter->status = 4;
00174                      filter->cache = (c1 << 8) + c - 0xa1;
00175               } else {
00176                      w = (c1 << 8) | c;
00177                      w &= MBFL_WCSGROUP_MASK;
00178                      w |= MBFL_WCSGROUP_THROUGH;
00179                      CK((*filter->output_function)(w, filter->data));
00180               }
00181               break;
00182 
00183        case 4:       /* mbcs fourth char */
00184               filter->status = 0;
00185               c1 = filter->cache;
00186               if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff) {
00187                      plane = (c1 & 0xf00) >> 8; 
00188                      s = (c1 & 0xff)*94 + c - 0xa1;
00189                      w = 0;
00190                      if (s >= 0) {
00191                             if (plane == 1 && s < cns11643_2_ucs_table_size) {
00192                                    w = cns11643_2_ucs_table[s];
00193                             }
00194                             if (plane == 13 && s < cns11643_14_ucs_table_size) {
00195                                    w = cns11643_14_ucs_table[s];
00196                             }
00197                      }
00198                      if (w <= 0) {
00199                             w = ((c1 & 0x7f) << 8) | (c & 0x7f);
00200                             w &= MBFL_WCSPLANE_MASK;
00201                             w |= MBFL_WCSPLANE_CNS11643;
00202                      }
00203                      CK((*filter->output_function)(w, filter->data));
00204               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00205                      CK((*filter->output_function)(c, filter->data));
00206               } else {
00207                      w = (c1 << 8) | c | 0x8e0000;
00208                      w &= MBFL_WCSGROUP_MASK;
00209                      w |= MBFL_WCSGROUP_THROUGH;
00210                      CK((*filter->output_function)(w, filter->data));
00211               }
00212               break;
00213 
00214        default:
00215               filter->status = 0;
00216               break;
00217        }
00218 
00219        return c;
00220 }
00221 
00222 /*
00223  * wchar => EUC-TW
00224  */
00225 int
00226 mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
00227 {
00228        int c1, s, plane;
00229 
00230        s = 0;
00231        if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
00232               s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
00233        } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
00234               s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
00235        } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
00236               s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
00237        } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
00238               s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
00239        } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
00240               s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
00241        }
00242        if (s <= 0) {
00243               c1 = c & ~MBFL_WCSPLANE_MASK;
00244               if (c1 == MBFL_WCSPLANE_CNS11643) {
00245                      s = c & MBFL_WCSPLANE_MASK;
00246               }
00247               if (c == 0) {
00248                      s = 0;
00249               } else if (s <= 0) {
00250                      s = -1;
00251               }
00252        }
00253        if (s >= 0) {
00254               plane = (s & 0x1f0000) >> 16;
00255               if (plane <= 1){
00256                      if (s < 0x80) {      /* latin */
00257                             CK((*filter->output_function)(s, filter->data));
00258                      } else {
00259                             s = (s & 0xffff) | 0x8080;
00260                             CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00261                             CK((*filter->output_function)(s & 0xff, filter->data));
00262                      } 
00263               } else {
00264                      s = (0x8ea00000 + (plane << 16)) | ((s & 0xffff) | 0x8080);
00265                      CK((*filter->output_function)(0x8e , filter->data));
00266                      CK((*filter->output_function)((s >> 16) & 0xff, filter->data));
00267                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00268                      CK((*filter->output_function)(s & 0xff, filter->data));
00269               }
00270        } else {
00271               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00272                      CK(mbfl_filt_conv_illegal_output(c, filter));
00273               }
00274        }
00275        return c;
00276 }
00277 
00278 static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter)
00279 {
00280        switch (filter->status) {
00281        case  0:      /* latin */
00282               if (c >= 0 && c < 0x80) {   /* ok */
00283                      ;
00284               } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
00285                      filter->status = 1;
00286               } else if (c == 0x8e) {     /* DBCS lead byte */
00287                      filter->status = 2;
00288               } else {                                                /* bad */
00289                      filter->flag = 1;
00290               }
00291               break;
00292 
00293        case  1:      /* got lead byte */
00294               if (c < 0xa1 || c > 0xfe) {        /* bad */
00295                      filter->flag = 1;
00296               }
00297               filter->status = 0;
00298               break;
00299 
00300        case  2:      /* got lead byte */
00301               if (c >= 0xa1 && c < 0xaf) {       /* ok */
00302                      filter->status = 3;
00303               } else {
00304                      filter->flag = 1; /* bad */
00305               }
00306               break;
00307 
00308        case  3:      /* got lead byte */
00309               if (c < 0xa1 || c > 0xfe) {        /* bad */
00310                      filter->flag = 1;
00311               }
00312               filter->status = 4;
00313               break;
00314 
00315        case  4:      /* got lead byte */
00316               if (c < 0xa1 || c > 0xfe) {        /* bad */
00317                      filter->flag = 1;
00318               }
00319               filter->status = 0;
00320               break;
00321 
00322        default:
00323               filter->status = 0;
00324               break;
00325        }
00326 
00327        return c;
00328 }
00329