Back to index

php5  5.3.10
mbfilter_tl_jisx0201_jisx0208.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
00022  *
00023  */
00024 
00025 #include "mbfl_allocators.h"
00026 #include "mbfilter_tl_jisx0201_jisx0208.h"
00027 #include "translit_kana_jisx0201_jisx0208.h"
00028 
00029 void
00030 mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt)
00031 {
00032        mbfl_filt_conv_common_ctor(filt);
00033 }
00034 
00035 void
00036 mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter *filt)
00037 {
00038 }
00039 
00040 int
00041 mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt)
00042 {
00043        int s, n;
00044        int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
00045 
00046        s = c;
00047 
00048        if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL)
00049                      && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) {
00050               /* all except <"> <'> <> <~> */
00051               s = c + 0xfee0;
00052        } else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) &&
00053                      ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) {
00054               /* alpha */
00055               s = c + 0xfee0;
00056        } else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) &&
00057                      c >= 0x30 && c <= 0x39) {
00058               /* num */
00059               s = c + 0xfee0;
00060        } else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) {
00061               /* space */
00062               s = 0x3000;
00063        }
00064 
00065        if (mode &
00066                      (MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) {
00067               /* hankaku kana to zenkaku kana */
00068               if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
00069                             (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
00070                      /* hankaku kana to zenkaku katakana and glue voiced sound mark */
00071                      if (c >= 0xff61 && c <= 0xff9f) {
00072                             if (filt->status) {
00073                                    n = (filt->cache - 0xff60) & 0x3f;
00074                                    if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
00075                                           filt->status = 0;
00076                                           s = 0x3001 + hankana2zenkana_table[n];
00077                                    } else if (c == 0xff9e && n == 19) {
00078                                           filt->status = 0;
00079                                           s = 0x30f4;
00080                                    } else if (c == 0xff9f && (n >= 42 && n <= 46)) {
00081                                           filt->status = 0;
00082                                           s = 0x3002 + hankana2zenkana_table[n];
00083                                    } else {
00084                                           filt->status = 1;
00085                                           filt->cache = c;
00086                                           s = 0x3000 + hankana2zenkana_table[n];
00087                                    }
00088                             } else {
00089                                    filt->status = 1;
00090                                    filt->cache = c;
00091                                    return c;
00092                             }
00093                      } else {
00094                             if (filt->status) {
00095                                    n = (filt->cache - 0xff60) & 0x3f;
00096                                    filt->status = 0;
00097                                    (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
00098                             }
00099                      }
00100               } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) &&
00101                             (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
00102                      /* hankaku kana to zenkaku hirangana and glue voiced sound mark */
00103                      if (c >= 0xff61 && c <= 0xff9f) {
00104                             if (filt->status) {
00105                                    n = (filt->cache - 0xff60) & 0x3f;
00106                                    if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
00107                                           filt->status = 0;
00108                                           s = 0x3001 + hankana2zenhira_table[n];
00109                                    } else if (c == 0xff9f && (n >= 42 && n <= 46)) {
00110                                           filt->status = 0;
00111                                           s = 0x3002 + hankana2zenhira_table[n];
00112                                    } else {
00113                                           filt->status = 1;
00114                                           filt->cache = c;
00115                                           s = 0x3000 + hankana2zenhira_table[n];
00116                                    }
00117                             } else {
00118                                    filt->status = 1;
00119                                    filt->cache = c;
00120                                    return c;
00121                             }
00122                      } else {
00123                             if (filt->status) {
00124                                    n = (filt->cache - 0xff60) & 0x3f;
00125                                    filt->status = 0;
00126                                    (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
00127                             }
00128                      }
00129               } else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
00130                             c >= 0xff61 && c <= 0xff9f) {
00131                      /* hankaku kana to zenkaku katakana */
00132                      s = 0x3000 + hankana2zenkana_table[c - 0xff60];
00133               } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA)
00134                             && c >= 0xff61 && c <= 0xff9f) {
00135                      /* hankaku kana to zenkaku hirangana */
00136                      s = 0x3000 + hankana2zenhira_table[c - 0xff60];
00137               }
00138        }
00139 
00140        if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) {
00141               /* special ascii to symbol */
00142               if (c == 0x5c) {
00143                      s = 0xffe5;                        /* FULLWIDTH YEN SIGN */
00144               } else if (c == 0xa5) {            /* YEN SIGN */
00145                      s = 0xffe5;                        /* FULLWIDTH YEN SIGN */
00146               } else if (c == 0x7e) {
00147                      s = 0xffe3;                        /* FULLWIDTH MACRON */
00148               } else if (c == 0x203e) {   /* OVERLINE */
00149                      s = 0xffe3;                        /* FULLWIDTH MACRON */
00150               } else if (c == 0x27) {
00151                      s = 0x2019;                        /* RIGHT SINGLE QUOTATION MARK */
00152               } else if (c == 0x22) {
00153                      s = 0x201d;                        /* RIGHT DOUBLE QUOTATION MARK */
00154               }
00155        } else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) {
00156               /* special ascii to symbol */
00157               if (c == 0x5c) {
00158                      s = 0xff3c;                        /* FULLWIDTH REVERSE SOLIDUS */
00159               } else if (c == 0x7e) {
00160                      s = 0xff5e;                        /* FULLWIDTH TILDE */
00161               } else if (c == 0x27) {
00162                      s = 0xff07;                        /* FULLWIDTH APOSTROPHE */
00163               } else if (c == 0x22) {
00164                      s = 0xff02;                        /* FULLWIDTH QUOTATION MARK */
00165               }
00166        }
00167 
00168        if (mode & 0xf0) { /* zenkaku to hankaku */
00169               if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) {    /* all except <"> <'> <> <~> */
00170                      s = c - 0xfee0;
00171               } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) {     /* alpha */
00172                      s = c - 0xfee0;
00173               } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) {    /* num */
00174                      s = c - 0xfee0;
00175               } else if ((mode & 0x80) && (c == 0x3000)) {     /* spase */
00176                      s = 0x20;
00177               } else if ((mode & 0x10) && (c == 0x2212)) {     /* MINUS SIGN */
00178                      s = 0x2d;
00179               }
00180        }
00181 
00182        if (mode &
00183                      (MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) {
00184               /* Zenkaku kana to hankaku kana */
00185               if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) &&
00186                             c >= 0x30a1 && c <= 0x30f4) {
00187                      /* Zenkaku katakana to hankaku kana */
00188                      n = c - 0x30a1;
00189                      if (zenkana2hankana_table[n][1] != 0) {
00190                             (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
00191                             s = 0xff00 + zenkana2hankana_table[n][1];
00192                      } else {
00193                             s = 0xff00 + zenkana2hankana_table[n][0];
00194                      }
00195               } else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) &&
00196                             c >= 0x3041 && c <= 0x3093) {
00197                      /* Zenkaku hirangana to hankaku kana */
00198                      n = c - 0x3041;
00199                      if (zenkana2hankana_table[n][1] != 0) {
00200                             (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
00201                             s = 0xff00 + zenkana2hankana_table[n][1];
00202                      } else {
00203                             s = 0xff00 + zenkana2hankana_table[n][0];
00204                      }
00205               } else if (c == 0x3001) {
00206                      s = 0xff64;                        /* HALFWIDTH IDEOGRAPHIC COMMA */
00207               } else if (c == 0x3002) {
00208                      s = 0xff61;                        /* HALFWIDTH IDEOGRAPHIC FULL STOP */
00209               } else if (c == 0x300c) {
00210                      s = 0xff62;                        /* HALFWIDTH LEFT CORNER BRACKET */
00211               } else if (c == 0x300d) {
00212                      s = 0xff63;                        /* HALFWIDTH RIGHT CORNER BRACKET */
00213               } else if (c == 0x309b) {
00214                      s = 0xff9e;                        /* HALFWIDTH KATAKANA VOICED SOUND MARK */
00215               } else if (c == 0x309c) {
00216                      s = 0xff9f;                        /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
00217               } else if (c == 0x30fc) {
00218                      s = 0xff70;                        /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
00219               } else if (c == 0x30fb) {
00220                      s = 0xff65;                        /* HALFWIDTH KATAKANA MIDDLE DOT */
00221               }
00222        } else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA
00223                      | MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) { 
00224               if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) &&
00225                             c >= 0x3041 && c <= 0x3093) {
00226                      /* Zenkaku hirangana to Zenkaku katakana */
00227                      s = c + 0x60;
00228               } else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) &&
00229                             c >= 0x30a1 && c <= 0x30f3) {
00230                      /* Zenkaku katakana to Zenkaku hirangana */
00231                      s = c - 0x60;
00232               }
00233        }
00234 
00235        if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) {       /* special symbol to ascii */
00236               if (c == 0xffe5) {                 /* FULLWIDTH YEN SIGN */
00237                      s = 0x5c;
00238               } else if (c == 0xff3c) {   /* FULLWIDTH REVERSE SOLIDUS */
00239                      s = 0x5c;
00240               } else if (c == 0xffe3) {   /* FULLWIDTH MACRON */
00241                      s = 0x7e;
00242               } else if (c == 0x203e) {   /* OVERLINE */
00243                      s = 0x7e;
00244               } else if (c == 0x2018) {   /* LEFT SINGLE QUOTATION MARK*/
00245                      s = 0x27;
00246               } else if (c == 0x2019) {   /* RIGHT SINGLE QUOTATION MARK */
00247                      s = 0x27;
00248               } else if (c == 0x201c) {   /* LEFT DOUBLE QUOTATION MARK */
00249                      s = 0x22;
00250               } else if (c == 0x201d) {   /* RIGHT DOUBLE QUOTATION MARK */
00251                      s = 0x22;
00252               }
00253        }
00254 
00255        if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) {       /* special symbol to ascii */
00256               if (c == 0xff3c) {                 /* FULLWIDTH REVERSE SOLIDUS */
00257                      s = 0x5c;
00258               } else if (c == 0xff5e) {   /* FULLWIDTH TILDE */
00259                      s = 0x7e;
00260               } else if (c == 0xff07) {   /* FULLWIDTH APOSTROPHE */
00261                      s = 0x27;
00262               } else if (c == 0xff02) {   /* FULLWIDTH QUOTATION MARK */
00263                      s = 0x22;
00264               }
00265        }
00266 
00267        return (*filt->output_function)(s, filt->data);
00268 }
00269 
00270 int
00271 mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt)
00272 {
00273        int ret, n;
00274        int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
00275 
00276        ret = 0;
00277        if (filt->status) {
00278               n = (filt->cache - 0xff60) & 0x3f;
00279               if (mode & 0x100) {  /* hankaku kana to zenkaku katakana */
00280                      ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
00281               } else if (mode & 0x200) {  /* hankaku kana to zenkaku hirangana */
00282                      ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
00283               }
00284               filt->status = 0;
00285        }
00286 
00287        if (filt->flush_function != NULL) {
00288               return (*filt->flush_function)(filt->data);
00289        }
00290 
00291        return ret;
00292 }
00293 
00294 const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = {
00295        mbfl_no_encoding_wchar,
00296        mbfl_no_encoding_wchar,
00297        mbfl_filt_tl_jisx0201_jisx0208_init,
00298        mbfl_filt_tl_jisx0201_jisx0208_cleanup,
00299        mbfl_filt_tl_jisx0201_jisx0208,
00300        mbfl_filt_tl_jisx0201_jisx0208_flush
00301 };
00302