Back to index

php5  5.3.10
mbfilter_euc_cn.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_cn.c
00026  * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
00027  *
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_euc_cn.h"
00036 
00037 #include "unicode_table_cp936.h"
00038 
00039 static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter);
00040 
00041 static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00051   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00052   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00057   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00058 };
00059 
00060 static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL};
00061 
00062 const mbfl_encoding mbfl_encoding_euc_cn = {
00063        mbfl_no_encoding_euc_cn,
00064        "EUC-CN",
00065        "CN-GB",
00066        (const char *(*)[])&mbfl_encoding_euc_cn_aliases,
00067        mblen_table_euccn,
00068        MBFL_ENCTYPE_MBCS
00069 };
00070 
00071 const struct mbfl_identify_vtbl vtbl_identify_euccn = {
00072        mbfl_no_encoding_euc_cn,
00073        mbfl_filt_ident_common_ctor,
00074        mbfl_filt_ident_common_dtor,
00075        mbfl_filt_ident_euccn
00076 };
00077 
00078 const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
00079        mbfl_no_encoding_euc_cn,
00080        mbfl_no_encoding_wchar,
00081        mbfl_filt_conv_common_ctor,
00082        mbfl_filt_conv_common_dtor,
00083        mbfl_filt_conv_euccn_wchar,
00084        mbfl_filt_conv_common_flush
00085 };
00086 
00087 const struct mbfl_convert_vtbl vtbl_wchar_euccn = {
00088        mbfl_no_encoding_wchar,
00089        mbfl_no_encoding_euc_cn,
00090        mbfl_filt_conv_common_ctor,
00091        mbfl_filt_conv_common_dtor,
00092        mbfl_filt_conv_wchar_euccn,
00093        mbfl_filt_conv_common_flush
00094 };
00095 
00096 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00097 
00098 /*
00099  * EUC-CN => wchar
00100  */
00101 int
00102 mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter)
00103 {
00104        int c1, w;
00105 
00106        switch (filter->status) {
00107        case 0:
00108               if (c >= 0 && c < 0x80) {   /* latin */
00109                      CK((*filter->output_function)(c, filter->data));
00110               } else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
00111                      filter->status = 1;
00112                      filter->cache = c;
00113               } else {
00114                      w = c & MBFL_WCSGROUP_MASK;
00115                      w |= MBFL_WCSGROUP_THROUGH;
00116                      CK((*filter->output_function)(w, filter->data));
00117               }
00118               break;
00119 
00120        case 1:              /* dbcs second byte */
00121               filter->status = 0;
00122               c1 = filter->cache;
00123               if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
00124                      w = (c1 - 0x81)*192 + (c - 0x40);
00125                      if (w >= 0 && w < cp936_ucs_table_size) {
00126                             w = cp936_ucs_table[w];
00127                      } else {
00128                             w = 0;
00129                      }
00130                      if (w <= 0) {
00131                             w = (c1 << 8) | c;
00132                             w &= MBFL_WCSPLANE_MASK;
00133                             w |= MBFL_WCSPLANE_GB2312;
00134                      }
00135                      CK((*filter->output_function)(w, filter->data));
00136               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00137                      CK((*filter->output_function)(c, filter->data));
00138               } else {
00139                      w = (c1 << 8) | c;
00140                      w &= MBFL_WCSGROUP_MASK;
00141                      w |= MBFL_WCSGROUP_THROUGH;
00142                      CK((*filter->output_function)(w, filter->data));
00143               }
00144               break;
00145 
00146        default:
00147               filter->status = 0;
00148               break;
00149        }
00150 
00151        return c;
00152 }
00153 
00154 /*
00155  * wchar => EUC-CN
00156  */
00157 int
00158 mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter)
00159 {
00160        int c1, c2, s;
00161 
00162        s = 0;
00163        if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
00164               s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
00165        } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
00166               s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
00167        } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
00168               s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
00169        } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
00170               s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
00171        } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
00172               s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
00173        }
00174        c1 = (s >> 8) & 0xff;
00175        c2 = s & 0xff;
00176        
00177        if (c1 < 0xa1 || c2 < 0xa1) { /* exclude CP936 extension */
00178               s = c;
00179        }
00180 
00181        if (s <= 0) {
00182               c1 = c & ~MBFL_WCSPLANE_MASK;
00183               if (c1 == MBFL_WCSPLANE_GB2312) {
00184                      s = c & MBFL_WCSPLANE_MASK;
00185               }
00186               if (c == 0) {
00187                      s = 0;
00188               } else if (s <= 0) {
00189                      s = -1;
00190               }
00191        }
00192        if (s >= 0) {
00193               if (s < 0x80) {      /* latin */
00194                      CK((*filter->output_function)(s, filter->data));
00195               } else {
00196                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00197                      CK((*filter->output_function)(s & 0xff, filter->data));
00198               }
00199        } else {
00200               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00201                      CK(mbfl_filt_conv_illegal_output(c, filter));
00202               }
00203        }
00204 
00205        return c;
00206 }
00207 
00208 static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter)
00209 {
00210        switch (filter->status) {
00211        case  0:      /* latin */
00212               if (c >= 0 && c < 0x80) {   /* ok */
00213                      ;
00214               } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
00215                      filter->status = 1;
00216               } else {                                                /* bad */
00217                      filter->flag = 1;
00218               }
00219               break;
00220 
00221        case  1:      /* got lead byte */
00222               if (c < 0xa1 || c > 0xfe) {        /* bad */
00223                      filter->flag = 1;
00224               }
00225               filter->status = 0;
00226               break;
00227 
00228        default:
00229               filter->status = 0;
00230               break;
00231        }
00232 
00233        return c;
00234 }
00235 
00236