Back to index

php5  5.3.10
mbfilter_sjis.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter_ja.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_sjis.h"
00036 
00037 #include "unicode_table_cp932_ext.h"
00038 #include "unicode_table_jis.h"
00039 
00040 static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
00041 
00042 static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
00043   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00046   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00047   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00048   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00049   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00050   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00051   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00052   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00053   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00058   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
00059 };
00060 
00061 static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL};
00062 
00063 const mbfl_encoding mbfl_encoding_sjis = {
00064        mbfl_no_encoding_sjis,
00065        "SJIS",
00066        "Shift_JIS",
00067        (const char *(*)[])&mbfl_encoding_sjis_aliases,
00068        mblen_table_sjis,
00069        MBFL_ENCTYPE_MBCS
00070 };
00071 
00072 const struct mbfl_identify_vtbl vtbl_identify_sjis = {
00073        mbfl_no_encoding_sjis,
00074        mbfl_filt_ident_common_ctor,
00075        mbfl_filt_ident_common_dtor,
00076        mbfl_filt_ident_sjis
00077 };
00078 
00079 const struct mbfl_convert_vtbl vtbl_sjis_wchar = {
00080        mbfl_no_encoding_sjis,
00081        mbfl_no_encoding_wchar,
00082        mbfl_filt_conv_common_ctor,
00083        mbfl_filt_conv_common_dtor,
00084        mbfl_filt_conv_sjis_wchar,
00085        mbfl_filt_conv_common_flush
00086 };
00087 
00088 const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
00089        mbfl_no_encoding_wchar,
00090        mbfl_no_encoding_sjis,
00091        mbfl_filt_conv_common_ctor,
00092        mbfl_filt_conv_common_dtor,
00093        mbfl_filt_conv_wchar_sjis,
00094        mbfl_filt_conv_common_flush
00095 };
00096 
00097 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00098 
00099 #define SJIS_ENCODE(c1,c2,s1,s2)   \
00100               do {                                      \
00101                      s1 = c1;                           \
00102                      s1--;                              \
00103                      s1 >>= 1;                          \
00104                      if ((c1) < 0x5f) {          \
00105                             s1 += 0x71;                 \
00106                      } else {                           \
00107                             s1 += 0xb1;                 \
00108                      }                                         \
00109                      s2 = c2;                           \
00110                      if ((c1) & 1) {                    \
00111                             if ((c2) < 0x60) {   \
00112                                    s2--;                \
00113                             }                                  \
00114                             s2 += 0x20;                 \
00115                      } else {                           \
00116                             s2 += 0x7e;                 \
00117                      }                                         \
00118               } while (0)
00119 
00120 #define SJIS_DECODE(c1,c2,s1,s2)   \
00121               do {                                      \
00122                      s1 = c1;                           \
00123                      if (s1 < 0xa0) {            \
00124                             s1 -= 0x81;                 \
00125                      } else {                           \
00126                             s1 -= 0xc1;                 \
00127                      }                                         \
00128                      s1 <<= 1;                          \
00129                      s1 += 0x21;                        \
00130                      s2 = c2;                           \
00131                      if (s2 < 0x9f) {            \
00132                             if (s2 < 0x7f) {     \
00133                                    s2++;                \
00134                             }                                  \
00135                             s2 -= 0x20;                 \
00136                      } else {                           \
00137                             s1++;                       \
00138                             s2 -= 0x7e;                 \
00139                      }                                         \
00140               } while (0)
00141 
00142 
00143 /*
00144  * SJIS => wchar
00145  */
00146 int
00147 mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter)
00148 {
00149        int c1, s1, s2, w;
00150 
00151        switch (filter->status) {
00152        case 0:
00153               if (c >= 0 && c < 0x80) {   /* latin */
00154                      CK((*filter->output_function)(c, filter->data));
00155               } else if (c > 0xa0 && c < 0xe0) { /* kana */
00156                      CK((*filter->output_function)(0xfec0 + c, filter->data));
00157               } else if (c > 0x80 && c < 0xfd && c != 0xa0) {  /* kanji first char */
00158                      filter->status = 1;
00159                      filter->cache = c;
00160               } else {
00161                      w = c & MBFL_WCSGROUP_MASK;
00162                      w |= MBFL_WCSGROUP_THROUGH;
00163                      CK((*filter->output_function)(w, filter->data));
00164               }
00165               break;
00166 
00167        case 1:              /* kanji second char */
00168               filter->status = 0;
00169               c1 = filter->cache;
00170               if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
00171                      SJIS_DECODE(c1, c, s1, s2);
00172                      w = (s1 - 0x21)*94 + s2 - 0x21;
00173                      if (w >= 0 && w < jisx0208_ucs_table_size) {
00174                             w = jisx0208_ucs_table[w];
00175                      } else {
00176                             w = 0;
00177                      }
00178                      if (w <= 0) {
00179                             if (s1 < 0x7f && s2 < 0x7f) {
00180                                    w = (s1 << 8) | s2;
00181                                    w &= MBFL_WCSPLANE_MASK;
00182                                    w |= MBFL_WCSPLANE_JIS0208;
00183                             } else {
00184                                    w = (c1 << 8) | c;
00185                                    w &= MBFL_WCSGROUP_MASK;
00186                                    w |= MBFL_WCSGROUP_THROUGH;
00187                             }
00188                      }
00189                      CK((*filter->output_function)(w, filter->data));
00190               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
00191                      CK((*filter->output_function)(c, filter->data));
00192               } else {
00193                      w = (c1 << 8) | c;
00194                      w &= MBFL_WCSGROUP_MASK;
00195                      w |= MBFL_WCSGROUP_THROUGH;
00196                      CK((*filter->output_function)(w, filter->data));
00197               }
00198               break;
00199 
00200        default:
00201               filter->status = 0;
00202               break;
00203        }
00204 
00205        return c;
00206 }
00207 
00208 /*
00209  * wchar => SJIS
00210  */
00211 int
00212 mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
00213 {
00214        int c1, c2, s1, s2;
00215 
00216        s1 = 0;
00217        if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
00218               s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
00219        } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
00220               s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
00221        } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
00222               s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
00223        } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
00224               s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
00225        }
00226        if (s1 <= 0) {
00227               c1 = c & ~MBFL_WCSPLANE_MASK;
00228               if (c1 == MBFL_WCSPLANE_JIS0208) {
00229                      s1 = c & MBFL_WCSPLANE_MASK;
00230               } else if (c == 0xa5) {            /* YEN SIGN */
00231                      s1 = 0x216f;  /* FULLWIDTH YEN SIGN */
00232               } else if (c == 0x203e) {   /* OVER LINE */
00233                      s1 = 0x2131;  /* FULLWIDTH MACRON */
00234               } else if (c == 0xff3c) {   /* FULLWIDTH REVERSE SOLIDUS */
00235                      s1 = 0x2140;
00236               } else if (c == 0xff5e) {   /* FULLWIDTH TILDE */
00237                      s1 = 0x2141;
00238               } else if (c == 0x2225) {   /* PARALLEL TO */
00239                      s1 = 0x2142;
00240               } else if (c == 0xff0d) {   /* FULLWIDTH HYPHEN-MINUS */
00241                      s1 = 0x215d;
00242               } else if (c == 0xffe0) {   /* FULLWIDTH CENT SIGN */
00243                      s1 = 0x2171;
00244               } else if (c == 0xffe1) {   /* FULLWIDTH POUND SIGN */
00245                      s1 = 0x2172;
00246               } else if (c == 0xffe2) {   /* FULLWIDTH NOT SIGN */
00247                      s1 = 0x224c;
00248               }
00249               if (c == 0) {
00250                      s1 = 0;
00251               } else if (s1 <= 0) {
00252                      s1 = -1;
00253               }
00254        } else if (s1 >= 0x8080) {
00255               s1 = -1;
00256        }
00257        if (s1 >= 0) {
00258               if (s1 < 0x100) { /* latin or kana */
00259                      CK((*filter->output_function)(s1, filter->data));
00260               } else { /* kanji */
00261                      c1 = (s1 >> 8) & 0xff;
00262                      c2 = s1 & 0xff;
00263                      SJIS_ENCODE(c1, c2, s1, s2);
00264                      CK((*filter->output_function)(s1, filter->data));
00265                      CK((*filter->output_function)(s2, filter->data));
00266               }
00267        } else {
00268               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00269                      CK(mbfl_filt_conv_illegal_output(c, filter));
00270               }
00271        }
00272 
00273        return c;
00274 }
00275 
00276 static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
00277 {
00278        if (filter->status) {              /* kanji second char */
00279               if (c < 0x40 || c > 0xfc || c == 0x7f) {  /* bad */
00280                   filter->flag = 1;
00281               }
00282               filter->status = 0;
00283        } else if (c >= 0 && c < 0x80) {   /* latin  ok */
00284               ;
00285        } else if (c > 0xa0 && c < 0xe0) { /* kana  ok */
00286               ;
00287        } else if (c > 0x80 && c < 0xf0 && c != 0xa0) {  /* kanji first char */
00288               filter->status = 1;
00289        } else {                                                /* bad */
00290               filter->flag = 1;
00291        }
00292 
00293        return c;
00294 }
00295