Back to index

php5  5.3.10
mbfilter_cp936.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * the source code included in this files was separated from mbfilter_cn.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  *
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_cp936.h"
00036 
00037 #include "unicode_table_cp936.h"
00038 
00039 static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter);
00040 
00041 static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00051   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00052   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00057   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00058 };
00059 
00060 static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL};
00061 
00062 const mbfl_encoding mbfl_encoding_cp936 = {
00063        mbfl_no_encoding_cp936,
00064        "CP936",
00065        "CP936",
00066        (const char *(*)[])&mbfl_encoding_cp936_aliases,
00067        mblen_table_cp936,
00068        MBFL_ENCTYPE_MBCS
00069 };
00070 
00071 const struct mbfl_identify_vtbl vtbl_identify_cp936 = {
00072        mbfl_no_encoding_cp936,
00073        mbfl_filt_ident_common_ctor,
00074        mbfl_filt_ident_common_dtor,
00075        mbfl_filt_ident_cp936
00076 };
00077 
00078 const struct mbfl_convert_vtbl vtbl_cp936_wchar = {
00079        mbfl_no_encoding_cp936,
00080        mbfl_no_encoding_wchar,
00081        mbfl_filt_conv_common_ctor,
00082        mbfl_filt_conv_common_dtor,
00083        mbfl_filt_conv_cp936_wchar,
00084        mbfl_filt_conv_common_flush
00085 };
00086 
00087 const struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
00088        mbfl_no_encoding_wchar,
00089        mbfl_no_encoding_cp936,
00090        mbfl_filt_conv_common_ctor,
00091        mbfl_filt_conv_common_dtor,
00092        mbfl_filt_conv_wchar_cp936,
00093        mbfl_filt_conv_common_flush
00094 };
00095 
00096 
00097 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00098 
00099 /*
00100  * CP936 => wchar
00101  */
00102 int
00103 mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
00104 {
00105        int c1, w;
00106 
00107        switch (filter->status) {
00108        case 0:
00109               if (c >= 0 && c < 0x80) {   /* latin */
00110                      CK((*filter->output_function)(c, filter->data));
00111               } else if (c == 0x80) {     /* euro sign */
00112                      CK((*filter->output_function)(0x20ac, filter->data));
00113               } else if (c > 0x80 && c < 0xff) { /* dbcs lead byte */
00114                      filter->status = 1;
00115                      filter->cache = c;
00116               } else {
00117                      w = c & MBFL_WCSGROUP_MASK;
00118                      w |= MBFL_WCSGROUP_THROUGH;
00119                      CK((*filter->output_function)(w, filter->data));
00120               }
00121               break;
00122 
00123        case 1:              /* dbcs second byte */
00124               filter->status = 0;
00125               c1 = filter->cache;
00126               if ( c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
00127                      w = (c1 - 0x81)*192 + (c - 0x40);
00128                      if (w >= 0 && w < cp936_ucs_table_size) {
00129                             w = cp936_ucs_table[w];
00130                      } else {
00131                             w = 0;
00132                      }
00133                      if (w <= 0) {
00134                             w = (c1 << 8) | c;
00135                             w &= MBFL_WCSPLANE_MASK;
00136                             w |= MBFL_WCSPLANE_WINCP936;
00137                      }
00138                      CK((*filter->output_function)(w, filter->data));
00139               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00140                      CK((*filter->output_function)(c, filter->data));
00141               } else {
00142                      w = (c1 << 8) | c;
00143                      w &= MBFL_WCSGROUP_MASK;
00144                      w |= MBFL_WCSGROUP_THROUGH;
00145                      CK((*filter->output_function)(w, filter->data));
00146               }
00147               break;
00148 
00149        default:
00150               filter->status = 0;
00151               break;
00152        }
00153 
00154        return c;
00155 }
00156 
00157 /*
00158  * wchar => CP936
00159  */
00160 int
00161 mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
00162 {
00163        int c1, s;
00164 
00165        s = 0;
00166        if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
00167               s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
00168        } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
00169               s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
00170        } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
00171               s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
00172        } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
00173               s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
00174        } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
00175               s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min];
00176        } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
00177               s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
00178        } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
00179               s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min];
00180        } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
00181               s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
00182        }
00183        if (s <= 0) {
00184               c1 = c & ~MBFL_WCSPLANE_MASK;
00185               if (c1 == MBFL_WCSPLANE_WINCP936) {
00186                      s = c & MBFL_WCSPLANE_MASK;
00187               }
00188               if (c == 0) {
00189                      s = 0;
00190               } else if (s <= 0) {
00191                      s = -1;
00192               }
00193        }
00194        if (s >= 0) {
00195               if (s <= 0x80) {     /* latin */
00196                      CK((*filter->output_function)(s, filter->data));
00197               } else {
00198                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00199                      CK((*filter->output_function)(s & 0xff, filter->data));
00200               }
00201        } else {
00202               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00203                      CK(mbfl_filt_conv_illegal_output(c, filter));
00204               }
00205        }
00206 
00207        return c;
00208 }
00209 
00210 static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter)
00211 {
00212        if (filter->status) {              /* kanji second char */
00213               if (c < 0x40 || c > 0xfe || c == 0x7f) {  /* bad */
00214                   filter->flag = 1;
00215               }
00216               filter->status = 0;
00217        } else if (c >= 0 && c < 0x80) {   /* latin  ok */
00218               ;
00219        } else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */
00220               filter->status = 1;
00221        } else {                                                /* bad */
00222               filter->flag = 1;
00223        }
00224 
00225        return c;
00226 }
00227 
00228