Back to index

php5  5.3.10
mbfilter_euc_kr.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_kr.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_euc_kr.h"
00036 #include "unicode_table_uhc.h"
00037 
00038 static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter);
00039 
00040 static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
00041   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00051   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00052   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00057 };
00058 
00059 static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
00060 
00061 const mbfl_encoding mbfl_encoding_euc_kr = {
00062        mbfl_no_encoding_euc_kr,
00063        "EUC-KR",
00064        "EUC-KR",
00065        (const char *(*)[])&mbfl_encoding_euc_kr_aliases,
00066        mblen_table_euckr,
00067        MBFL_ENCTYPE_MBCS
00068 };
00069 
00070 const struct mbfl_identify_vtbl vtbl_identify_euckr = {
00071        mbfl_no_encoding_euc_kr,
00072        mbfl_filt_ident_common_ctor,
00073        mbfl_filt_ident_common_dtor,
00074        mbfl_filt_ident_euckr
00075 };
00076 
00077 const struct mbfl_convert_vtbl vtbl_euckr_wchar = {
00078        mbfl_no_encoding_euc_kr,
00079        mbfl_no_encoding_wchar,
00080        mbfl_filt_conv_common_ctor,
00081        mbfl_filt_conv_common_dtor,
00082        mbfl_filt_conv_euckr_wchar,
00083        mbfl_filt_conv_common_flush
00084 };
00085 
00086 const struct mbfl_convert_vtbl vtbl_wchar_euckr = {
00087        mbfl_no_encoding_wchar,
00088        mbfl_no_encoding_euc_kr,
00089        mbfl_filt_conv_common_ctor,
00090        mbfl_filt_conv_common_dtor,
00091        mbfl_filt_conv_wchar_euckr,
00092        mbfl_filt_conv_common_flush
00093 };
00094 
00095 
00096 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00097 
00098 /*
00099  * EUC-KR => wchar
00100  */
00101 int
00102 mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter)
00103 {
00104        int c1, w, flag;
00105 
00106        switch (filter->status) {
00107        case 0:
00108               if (c >= 0 && c < 0x80) {   /* latin */
00109                      CK((*filter->output_function)(c, filter->data));
00110               } else if (c > 0xa0 && c < 0xff && c != 0xc9) {  /* dbcs lead byte */
00111                      filter->status = 1;
00112                      filter->cache = c;
00113               } else {
00114                      w = c & MBFL_WCSGROUP_MASK;
00115                      w |= MBFL_WCSGROUP_THROUGH;
00116                      CK((*filter->output_function)(w, filter->data));
00117               }
00118               break;
00119 
00120        case 1:              /* dbcs second byte */
00121               filter->status = 0;
00122               c1 = filter->cache;
00123               flag = 0;
00124               if (c1 >= 0xa1 && c1 <= 0xc6) {
00125                      flag = 1;
00126               } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
00127                      flag = 2;
00128               }
00129               if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
00130                      if (flag == 1){ /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */
00131                             w = (c1 - 0xa1)*190 + (c - 0x41);
00132                             if (w >= 0 && w < uhc2_ucs_table_size) {
00133                                    w = uhc2_ucs_table[w];
00134                             } else {
00135                                    w = 0;
00136                             }
00137                      } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */
00138                             w = (c1 - 0xc7)*94 + (c - 0xa1);
00139                             if (w >= 0 && w < uhc3_ucs_table_size) {
00140                                    w = uhc3_ucs_table[w];
00141                             } else {
00142                                    w = 0;
00143                             }
00144                      }
00145                      
00146                      if (w <= 0) {
00147                             w = (c1 << 8) | c;
00148                             w &= MBFL_WCSPLANE_MASK;
00149                             w |= MBFL_WCSPLANE_KSC5601;
00150                      }
00151                      CK((*filter->output_function)(w, filter->data));
00152               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00153                      CK((*filter->output_function)(c, filter->data));
00154               } else {
00155                      w = (c1 << 8) | c;
00156                      w &= MBFL_WCSGROUP_MASK;
00157                      w |= MBFL_WCSGROUP_THROUGH;
00158                      CK((*filter->output_function)(w, filter->data));
00159               }
00160               break;
00161 
00162        default:
00163               filter->status = 0;
00164               break;
00165        }
00166 
00167        return c;
00168 }
00169 
00170 /*
00171  * wchar => EUC-KR
00172  */
00173 int
00174 mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter)
00175 {
00176        int c1, c2, s;
00177 
00178        s = 0;
00179 
00180        if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
00181               s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
00182        } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
00183               s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
00184        } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
00185               s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
00186        } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
00187               s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
00188        } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
00189               s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
00190        } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
00191               s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
00192        } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
00193               s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
00194        }
00195 
00196        c1 = (s >> 8) & 0xff;
00197        c2 = s & 0xff;
00198        /* exclude UHC extension area */
00199        if (c1 < 0xa1 || c2 < 0xa1){ 
00200               s = c;
00201        }
00202 
00203        if (s <= 0) {
00204               c1 = c & ~MBFL_WCSPLANE_MASK;
00205               if (c1 == MBFL_WCSPLANE_KSC5601) {
00206                      s = c & MBFL_WCSPLANE_MASK;
00207               }
00208               if (c == 0) {
00209                      s = 0;
00210               } else if (s <= 0) {
00211                      s = -1;
00212               }
00213        }
00214        if (s >= 0) {
00215               if (s < 0x80) {      /* latin */
00216                      CK((*filter->output_function)(s, filter->data));
00217               } else {
00218                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00219                      CK((*filter->output_function)(s & 0xff, filter->data));
00220               }
00221        } else {
00222               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00223                      CK(mbfl_filt_conv_illegal_output(c, filter));
00224               }
00225        }
00226 
00227        return c;
00228 }
00229 
00230 static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter)
00231 {
00232        switch (filter->status) {
00233        case  0:      /* latin */
00234               if (c >= 0 && c < 0x80) {   /* ok */
00235                      ;
00236               } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
00237                      filter->status = 1;
00238               } else {                                                /* bad */
00239                      filter->flag = 1;
00240               }
00241               break;
00242 
00243        case  1:      /* got lead byte */
00244               if (c < 0xa1 || c > 0xfe) {        /* bad */
00245                      filter->flag = 1;
00246               }
00247               filter->status = 0;
00248               break;
00249 
00250        default:
00251               filter->status = 0;
00252               break;
00253        }
00254 
00255        return c;
00256 }