Back to index

php5  5.3.10
mbfilter_iso2022_kr.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_kr.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_iso2022_kr.h"
00036 #include "unicode_table_uhc.h"
00037 
00038 static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter);
00039 
00040 const mbfl_encoding mbfl_encoding_2022kr = {
00041        mbfl_no_encoding_2022kr,
00042        "ISO-2022-KR",
00043        "ISO-2022-KR",
00044        NULL,
00045        NULL,
00046        MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
00047 };
00048 
00049 const struct mbfl_identify_vtbl vtbl_identify_2022kr = {
00050        mbfl_no_encoding_2022kr,
00051        mbfl_filt_ident_common_ctor,
00052        mbfl_filt_ident_common_dtor,
00053        mbfl_filt_ident_2022kr
00054 };
00055 
00056 const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
00057        mbfl_no_encoding_wchar,
00058        mbfl_no_encoding_2022kr,
00059        mbfl_filt_conv_common_ctor,
00060        mbfl_filt_conv_common_dtor,
00061        mbfl_filt_conv_wchar_2022kr,
00062        mbfl_filt_conv_any_2022kr_flush
00063 };
00064 
00065 const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
00066        mbfl_no_encoding_2022kr,
00067        mbfl_no_encoding_wchar,
00068        mbfl_filt_conv_common_ctor,
00069        mbfl_filt_conv_common_dtor,
00070        mbfl_filt_conv_2022kr_wchar,
00071        mbfl_filt_conv_common_flush
00072 };
00073 
00074 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00075 
00076 /*
00077  * ISO-2022-KR => wchar
00078  */
00079 int
00080 mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
00081 {
00082        int c1, w, flag;
00083 
00084 retry:
00085        switch (filter->status & 0xf) {
00086               /* case 0x00: ASCII */
00087               /* case 0x10: KSC5601 */
00088        case 0:
00089               if (c == 0x1b) { /* ESC */
00090                      filter->status += 2;
00091               } else if (c == 0x0f) { /* SI (ASCII) */
00092                      filter->status &= ~0xff; 
00093               } else if (c == 0x0e) { /* SO (KSC5601) */
00094                      filter->status |= 0x10; 
00095               } else if ((filter->status & 0x10) != 0  && c > 0x20 && c < 0x7f) {
00096                      /* KSC5601 lead byte */
00097                      filter->cache = c;
00098                      filter->status += 1;
00099               } else if ((filter->status & 0x10) == 0 &&  c >= 0 && c < 0x80) {
00100                      /* latin, CTLs */
00101                      CK((*filter->output_function)(c, filter->data));
00102               } else {
00103                      w = c & MBFL_WCSGROUP_MASK;
00104                      w |= MBFL_WCSGROUP_THROUGH;
00105                      CK((*filter->output_function)(w, filter->data));
00106               }
00107               break;
00108 
00109        case 1:              /* dbcs second byte */
00110               filter->status &= ~0xf;
00111               c1 = filter->cache;
00112               flag = 0;
00113               if (c1 > 0x20 && c1 < 0x47) {
00114                      flag = 1;
00115               } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
00116                      flag = 2;
00117               }
00118               if (flag > 0 && c > 0x20 && c < 0x7f) {
00119                      if (flag == 1){
00120                             w = (c1 - 0x21)*190 + (c - 0x41) + 0x80;
00121                             if (w >= 0 && w < uhc2_ucs_table_size) {
00122                                    w = uhc2_ucs_table[w];
00123                             } else {
00124                                    w = 0;
00125                             }
00126                      } else {
00127                             w = (c1 - 0x47)*94 + (c - 0x21);
00128                             if (w >= 0 && w < uhc3_ucs_table_size) {
00129                                    w = uhc3_ucs_table[w];
00130                             } else {
00131                                    w = 0;
00132                             }
00133                      }
00134                      
00135                      if (w <= 0) {
00136                             w = (c1 << 8) | c;
00137                             w &= MBFL_WCSPLANE_MASK;
00138                             w |= MBFL_WCSPLANE_KSC5601;
00139                      }
00140                      CK((*filter->output_function)(w, filter->data));
00141               } else if (c == 0x1b) {      /* ESC */
00142                      filter->status++;
00143               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00144                      CK((*filter->output_function)(c, filter->data));
00145               } else {
00146                      w = (c1 << 8) | c;
00147                      w &= MBFL_WCSGROUP_MASK;
00148                      w |= MBFL_WCSGROUP_THROUGH;
00149                      CK((*filter->output_function)(w, filter->data));
00150               }
00151               break;
00152 
00153        case 2:              /* ESC */
00154               if (c == 0x24) { /* '$' */
00155                      filter->status++;
00156               } else {
00157                      filter->status &= ~0xf;
00158                      CK((*filter->output_function)(0x1b, filter->data));
00159                      goto retry;
00160               }
00161               break;
00162        case 3:         /* ESC $ */
00163               if (c == 0x29) { /* ')' */
00164                      filter->status++;
00165               } else {
00166                      filter->status &= ~0xf;
00167                      CK((*filter->output_function)(0x1b, filter->data));
00168                      CK((*filter->output_function)(0x24, filter->data));
00169                      goto retry;
00170               }
00171               break;
00172        case 4:         /* ESC $ )  */
00173               if (c == 0x43) { /* 'C' */
00174                      filter->status &= ~0xf;
00175                      filter->status |= 0x100;
00176               } else {
00177                      filter->status &= ~0xf;
00178                      CK((*filter->output_function)(0x1b, filter->data));
00179                      CK((*filter->output_function)(0x24, filter->data));
00180                      CK((*filter->output_function)(0x29, filter->data));
00181                      goto retry;
00182               }
00183               break;
00184        default:
00185               filter->status = 0;
00186               break;
00187        }
00188 
00189        return c;
00190 }
00191 
00192 /*
00193  * wchar => ISO-2022-KR
00194  */
00195 int
00196 mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
00197 {
00198        int c1, c2, s;
00199 
00200        s = 0;
00201 
00202        if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
00203               s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
00204        } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
00205               s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
00206        } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
00207               s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
00208        } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
00209               s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
00210        } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
00211               s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
00212        } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
00213               s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
00214        } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
00215               s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
00216        }
00217 
00218        c1 = (s >> 8) & 0xff;
00219        c2 = s & 0xff;
00220        /* exclude UHC extension area */
00221        if (c1 < 0xa1 || c2 < 0xa1){ 
00222               s = c;
00223        }
00224        if (s & 0x8000) {
00225               s -= 0x8080;
00226        }
00227 
00228        if (s <= 0) {
00229               c1 = c & ~MBFL_WCSPLANE_MASK;
00230               if (c1 == MBFL_WCSPLANE_KSC5601) {
00231                      s = c & MBFL_WCSPLANE_MASK;
00232               }
00233               if (c == 0) {
00234                      s = 0;
00235               } else if (s <= 0) {
00236                      s = -1;
00237               }
00238        } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
00239               s = -1;
00240        }
00241        if (s >= 0) {
00242               if (s < 0x80 && s > 0) {    /* ASCII */
00243                      if ((filter->status & 0x10) != 0) {
00244                             CK((*filter->output_function)(0x0f, filter->data));            /* SI */
00245                             filter->status &= ~0x10;
00246                      }
00247                      CK((*filter->output_function)(s, filter->data));
00248               } else {
00249                      if ( (filter->status & 0x100) == 0) {
00250                             CK((*filter->output_function)(0x1b, filter->data));            /* ESC */
00251                             CK((*filter->output_function)(0x24, filter->data));            /* '$' */
00252                             CK((*filter->output_function)(0x29, filter->data));            /* ')' */
00253                             CK((*filter->output_function)(0x43, filter->data));            /* 'C' */
00254                             filter->status |= 0x100;
00255                      }
00256                      if ((filter->status & 0x10) == 0) {
00257                             CK((*filter->output_function)(0x0e, filter->data));            /* SO */
00258                             filter->status |= 0x10;
00259                      }
00260                      CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
00261                      CK((*filter->output_function)(s & 0xff, filter->data));
00262               }
00263        } else {
00264               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00265                      CK(mbfl_filt_conv_illegal_output(c, filter));
00266               }
00267        }
00268 
00269        return c;
00270 }
00271 
00272 int
00273 mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
00274 {
00275        /* back to ascii */
00276        if ((filter->status & 0xff00) != 0) {
00277               CK((*filter->output_function)(0x0f, filter->data));            /* SI */
00278        }
00279 
00280        filter->status &= 0xff;
00281 
00282        if (filter->flush_function != NULL) {
00283               return (*filter->flush_function)(filter->data);
00284        }
00285 
00286        return 0;
00287 }
00288 
00289 static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter)
00290 {
00291 retry:
00292        switch (filter->status & 0xf) {
00293 /*     case 0x00:     ASCII */
00294 /*     case 0x10:     KSC5601 mode */
00295 /*     case 0x20:     KSC5601 DBCS */
00296 /*     case 0x40:     KSC5601 SBCS */
00297        case 0:
00298               if (!(filter->status & 0x10)) {
00299                      if (c == 0x1b)
00300                             filter->status += 2;
00301               } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) {          /* kanji first char */
00302                      filter->status += 1;
00303               } else if (c >= 0 && c < 0x80) {          /* latin, CTLs */
00304                      ;
00305               } else {
00306                      filter->flag = 1;    /* bad */
00307               }
00308               break;
00309 
00310 /*     case 0x21:     KSC5601 second char */
00311        case 1:
00312               filter->status &= ~0xf;
00313               if (c < 0x21 || c > 0x7e) {        /* bad */
00314                      filter->flag = 1;
00315               }
00316               break;
00317 
00318        /* ESC */
00319        case 2:
00320               if (c == 0x24) {            /* '$' */
00321                      filter->status++;
00322               } else {
00323                      filter->flag = 1;    /* bad */
00324                      filter->status &= ~0xf;
00325                      goto retry;
00326               }
00327               break;
00328 
00329        /* ESC $ */
00330        case 3:
00331               if (c == 0x29) {            /* ')' */
00332                      filter->status++;
00333               } else {
00334                      filter->flag = 1;    /* bad */
00335                      filter->status &= ~0xf;
00336                      goto retry;
00337               }
00338               break;
00339 
00340        /* ESC $) */
00341        case 5:
00342               if (c == 0x43) {            /* 'C' */
00343                      filter->status = 0x10;
00344               } else {
00345                      filter->flag = 1;    /* bad */
00346                      filter->status &= ~0xf;
00347                      goto retry;
00348               }
00349               break;
00350 
00351        default:
00352               filter->status = 0;
00353               break;
00354        }
00355 
00356        return c;
00357 }
00358 
00359