Back to index

php5  5.3.10
mbfilter_uhc.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_kr.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_uhc.h"
00036 #include "unicode_table_uhc.h"
00037 
00038 static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter);
00039 
00040 static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
00041   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00050   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00051   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00052   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00054   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00055   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00056   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00057 };
00058 
00059 static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL};
00060 
00061 const mbfl_encoding mbfl_encoding_uhc = {
00062        mbfl_no_encoding_uhc,
00063        "UHC",
00064        "UHC",
00065        (const char *(*)[])&mbfl_encoding_uhc_aliases,
00066        mblen_table_uhc,
00067        MBFL_ENCTYPE_MBCS
00068 };
00069 
00070 const struct mbfl_identify_vtbl vtbl_identify_uhc = {
00071        mbfl_no_encoding_uhc,
00072        mbfl_filt_ident_common_ctor,
00073        mbfl_filt_ident_common_dtor,
00074        mbfl_filt_ident_uhc
00075 };
00076 
00077 const struct mbfl_convert_vtbl vtbl_uhc_wchar = {
00078        mbfl_no_encoding_uhc,
00079        mbfl_no_encoding_wchar,
00080        mbfl_filt_conv_common_ctor,
00081        mbfl_filt_conv_common_dtor,
00082        mbfl_filt_conv_uhc_wchar,
00083        mbfl_filt_conv_common_flush
00084 };
00085 
00086 const struct mbfl_convert_vtbl vtbl_wchar_uhc = {
00087        mbfl_no_encoding_wchar,
00088        mbfl_no_encoding_uhc,
00089        mbfl_filt_conv_common_ctor,
00090        mbfl_filt_conv_common_dtor,
00091        mbfl_filt_conv_wchar_uhc,
00092        mbfl_filt_conv_common_flush
00093 };
00094 
00095 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00096 
00097 /*
00098  * UHC => wchar
00099  */
00100 int
00101 mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter)
00102 {
00103        int c1, w = 0, flag = 0;
00104 
00105        switch (filter->status) {
00106        case 0:
00107               if (c >= 0 && c < 0x80) {   /* latin */
00108                      CK((*filter->output_function)(c, filter->data));
00109               } else if (c > 0x80 && c < 0xff && c != 0xc9) {  /* dbcs lead byte */
00110                      filter->status = 1;
00111                      filter->cache = c;
00112               } else {
00113                      w = c & MBFL_WCSGROUP_MASK;
00114                      w |= MBFL_WCSGROUP_THROUGH;
00115                      CK((*filter->output_function)(w, filter->data));
00116               }
00117               break;
00118 
00119        case 1:              /* dbcs second byte */
00120               filter->status = 0;
00121               c1 = filter->cache;
00122 
00123               if ( c1 >= 0x81 && c1 <= 0xa0){
00124                      w = (c1 - 0x81)*190 + (c - 0x41);
00125                      if (w >= 0 && w < uhc1_ucs_table_size) {
00126                             flag = 1;
00127                             w = uhc1_ucs_table[w];
00128                      } else {
00129                             w = 0;
00130                      }                    
00131               } else if ( c1 >= 0xa1 && c1 <= 0xc6){
00132                      w = (c1 - 0xa1)*190 + (c - 0x41);                
00133                      if (w >= 0 && w < uhc2_ucs_table_size) {
00134                             flag = 2;
00135                             w = uhc2_ucs_table[w];
00136                      } else {
00137                             w = 0;
00138                      }                    
00139               } else if ( c1 >= 0xc7 && c1 <= 0xfe){
00140                      w = (c1 - 0xc7)*94 + (c - 0xa1);          
00141                      if (w >= 0 && w < uhc3_ucs_table_size) {
00142                             flag = 3;
00143                             w = uhc3_ucs_table[w];
00144                      } else {
00145                             w = 0;
00146                      }                    
00147               }
00148               if (flag > 0){
00149                      if (w <= 0) {
00150                             w = (c1 << 8) | c;
00151                             w &= MBFL_WCSPLANE_MASK;
00152                             w |= MBFL_WCSPLANE_UHC;
00153                      }
00154                      CK((*filter->output_function)(w, filter->data));
00155               } else {
00156                      if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00157                             CK((*filter->output_function)(c, filter->data));
00158                      } else {
00159                             w = (c1 << 8) | c;
00160                             w &= MBFL_WCSGROUP_MASK;
00161                             w |= MBFL_WCSGROUP_THROUGH;
00162                             CK((*filter->output_function)(w, filter->data));
00163                      }
00164               }
00165               break;
00166 
00167        default:
00168               filter->status = 0;
00169               break;
00170        }
00171 
00172        return c;
00173 }
00174 
00175 /*
00176  * wchar => UHC
00177  */
00178 int
00179 mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter)
00180 {
00181        int c1, s;
00182 
00183        s = 0;
00184        if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
00185               s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
00186        } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
00187               s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
00188        } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
00189               s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
00190        } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
00191               s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
00192        } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
00193               s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
00194        } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
00195               s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
00196        } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
00197               s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
00198        }
00199        if (s <= 0) {
00200               c1 = c & ~MBFL_WCSPLANE_MASK;
00201               if (c1 == MBFL_WCSPLANE_UHC) {
00202                      s = c & MBFL_WCSPLANE_MASK;
00203               }
00204               if (c == 0) {
00205                      s = 0;
00206               } else if (s <= 0) {
00207                      s = -1;
00208               }
00209        }
00210        if (s >= 0) {
00211               if (s < 0x80) {      /* latin */
00212                      CK((*filter->output_function)(s, filter->data));
00213               } else {
00214                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00215                      CK((*filter->output_function)(s & 0xff, filter->data));
00216               }
00217        } else {
00218               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00219                      CK(mbfl_filt_conv_illegal_output(c, filter));
00220               }
00221        }
00222 
00223        return c;
00224 }
00225 
00226 static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter)
00227 {
00228        switch (filter->status) {
00229        case 0: /* latin */
00230               if (c >= 0 && c < 0x80) { /* ok */
00231                      ;
00232               } else if (c >= 0x81 && c <= 0xa0) {      /* dbcs first char */
00233                   filter->status= 1;
00234               } else if (c >= 0xa1 && c <= 0xc6) {      /* dbcs first char */
00235                   filter->status= 2;
00236               } else if (c >= 0xc7 && c <= 0xfe) {      /* dbcs first char */
00237                   filter->status= 3;
00238               } else { /* bad */
00239                      filter->flag = 1;
00240               }             
00241 
00242        case 1:
00243        case 2:
00244               if (c < 0x41 || (c > 0x5a && c < 0x61)
00245                      || (c > 0x7a && c < 0x81) || c > 0xfe) {  /* bad */
00246                   filter->flag = 1;
00247               }
00248               filter->status = 0;
00249               break;
00250 
00251        case 3:
00252               if (c < 0xa1 || c > 0xfe) { /* bad */
00253                   filter->flag = 1;
00254               }
00255               filter->status = 0;
00256               break;
00257 
00258        default:
00259               filter->status = 0;
00260               break;
00261        }
00262 
00263        return c;
00264 }
00265 
00266