Back to index

php5  5.3.10
mbfilter_big5.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file: Rui Hirokawa <hirokawa@php.net>
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_tw.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_big5.h"
00036 
00037 #include "unicode_table_big5.h"
00038 
00039 static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter);
00040 
00041 static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00051   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00052   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00057   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00058 };
00059 
00060 static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
00061 
00062 const mbfl_encoding mbfl_encoding_big5 = {
00063        mbfl_no_encoding_big5,
00064        "BIG-5",
00065        "BIG5",
00066        (const char *(*)[])&mbfl_encoding_big5_aliases,
00067        mblen_table_big5,
00068        MBFL_ENCTYPE_MBCS
00069 };
00070 
00071 const struct mbfl_identify_vtbl vtbl_identify_big5 = {
00072        mbfl_no_encoding_big5,
00073        mbfl_filt_ident_common_ctor,
00074        mbfl_filt_ident_common_dtor,
00075        mbfl_filt_ident_big5
00076 };
00077 
00078 const struct mbfl_convert_vtbl vtbl_big5_wchar = {
00079        mbfl_no_encoding_big5,
00080        mbfl_no_encoding_wchar,
00081        mbfl_filt_conv_common_ctor,
00082        mbfl_filt_conv_common_dtor,
00083        mbfl_filt_conv_big5_wchar,
00084        mbfl_filt_conv_common_flush
00085 };
00086 
00087 const struct mbfl_convert_vtbl vtbl_wchar_big5 = {
00088        mbfl_no_encoding_wchar,
00089        mbfl_no_encoding_big5,
00090        mbfl_filt_conv_common_ctor,
00091        mbfl_filt_conv_common_dtor,
00092        mbfl_filt_conv_wchar_big5,
00093        mbfl_filt_conv_common_flush
00094 };
00095 
00096 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00097 
00098 /*
00099  * Big5 => wchar
00100  */
00101 int
00102 mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
00103 {
00104        int c1, w;
00105 
00106        switch (filter->status) {
00107        case 0:
00108               if (c >= 0 && c < 0x80) {   /* latin */
00109                      CK((*filter->output_function)(c, filter->data));
00110               } else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
00111                      filter->status = 1;
00112                      filter->cache = c;
00113               } else {
00114                      w = c & MBFL_WCSGROUP_MASK;
00115                      w |= MBFL_WCSGROUP_THROUGH;
00116                      CK((*filter->output_function)(w, filter->data));
00117               }
00118               break;
00119 
00120        case 1:              /* dbcs second byte */
00121               filter->status = 0;
00122               c1 = filter->cache;
00123               if ((c > 0x39 && c < 0x7f) | (c > 0xa0 && c < 0xff)) {
00124                      if (c < 0x7f){
00125                             w = (c1 - 0xa1)*157 + (c - 0x40);
00126                      } else {
00127                             w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
00128                      }
00129                      if (w >= 0 && w < big5_ucs_table_size) {
00130                             w = big5_ucs_table[w];
00131                      } else {
00132                             w = 0;
00133                      }
00134                      if (w <= 0) {
00135                             w = (c1 << 8) | c;
00136                             w &= MBFL_WCSPLANE_MASK;
00137                             w |= MBFL_WCSPLANE_BIG5;
00138                      }
00139                      CK((*filter->output_function)(w, filter->data));
00140               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00141                      CK((*filter->output_function)(c, filter->data));
00142               } else {
00143                      w = (c1 << 8) | c;
00144                      w &= MBFL_WCSGROUP_MASK;
00145                      w |= MBFL_WCSGROUP_THROUGH;
00146                      CK((*filter->output_function)(w, filter->data));
00147               }
00148               break;
00149 
00150        default:
00151               filter->status = 0;
00152               break;
00153        }
00154 
00155        return c;
00156 }
00157 
00158 /*
00159  * wchar => Big5
00160  */
00161 int
00162 mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
00163 {
00164        int c1, s;
00165 
00166        s = 0;
00167        if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
00168               s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
00169        } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
00170               s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
00171        } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
00172               s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
00173        } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
00174               s = ucs_i_big5_table[c - ucs_i_big5_table_min];
00175        } else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max) {
00176               s = ucs_pua_big5_table[c - ucs_pua_big5_table_min];
00177        } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
00178               s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
00179        } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
00180               s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
00181        }
00182        if (s <= 0) {
00183               c1 = c & ~MBFL_WCSPLANE_MASK;
00184               if (c1 == MBFL_WCSPLANE_BIG5) {
00185                      s = c & MBFL_WCSPLANE_MASK;
00186               }
00187               if (c == 0) {
00188                      s = 0;
00189               } else if (s <= 0) {
00190                      s = -1;
00191               }
00192        }
00193        if (s >= 0) {
00194               if (s < 0x80) {      /* latin */
00195                      CK((*filter->output_function)(s, filter->data));
00196               } else {
00197                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00198                      CK((*filter->output_function)(s & 0xff, filter->data));
00199               }
00200        } else {
00201               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00202                      CK(mbfl_filt_conv_illegal_output(c, filter));
00203               }
00204        }
00205 
00206        return c;
00207 }
00208 
00209 static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter)
00210 {
00211        if (filter->status) {              /* kanji second char */
00212               if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) {    /* bad */
00213                   filter->flag = 1;
00214               }
00215               filter->status = 0;
00216        } else if (c >= 0 && c < 0x80) {   /* latin  ok */
00217               ;
00218        } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
00219               filter->status = 1;
00220        } else {                                                /* bad */
00221               filter->flag = 1;
00222        }
00223 
00224        return c;
00225 }
00226 
00227