Back to index

php5  5.3.10
mbfilter_utf32.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 20 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_utf32.h"
00036 
00037 static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL};
00038 
00039 const mbfl_encoding mbfl_encoding_utf32 = {
00040        mbfl_no_encoding_utf32,
00041        "UTF-32",
00042        "UTF-32",
00043        (const char *(*)[])&mbfl_encoding_utf32_aliases,
00044        NULL,
00045        MBFL_ENCTYPE_WCS4BE
00046 };
00047 
00048 const mbfl_encoding mbfl_encoding_utf32be = {
00049        mbfl_no_encoding_utf32be,
00050        "UTF-32BE",
00051        "UTF-32BE",
00052        NULL,
00053        NULL,
00054        MBFL_ENCTYPE_WCS4BE
00055 };
00056 
00057 const mbfl_encoding mbfl_encoding_utf32le = {
00058        mbfl_no_encoding_utf32le,
00059        "UTF-32LE",
00060        "UTF-32LE",
00061        NULL,
00062        NULL,
00063        MBFL_ENCTYPE_WCS4LE
00064 };
00065 
00066 const struct mbfl_convert_vtbl vtbl_utf32_wchar = {
00067        mbfl_no_encoding_utf32,
00068        mbfl_no_encoding_wchar,
00069        mbfl_filt_conv_common_ctor,
00070        mbfl_filt_conv_common_dtor,
00071        mbfl_filt_conv_utf32_wchar,
00072        mbfl_filt_conv_common_flush
00073 };
00074 
00075 const struct mbfl_convert_vtbl vtbl_wchar_utf32 = {
00076        mbfl_no_encoding_wchar,
00077        mbfl_no_encoding_utf32,
00078        mbfl_filt_conv_common_ctor,
00079        mbfl_filt_conv_common_dtor,
00080        mbfl_filt_conv_wchar_utf32be,
00081        mbfl_filt_conv_common_flush
00082 };
00083 
00084 const struct mbfl_convert_vtbl vtbl_utf32be_wchar = {
00085        mbfl_no_encoding_utf32be,
00086        mbfl_no_encoding_wchar,
00087        mbfl_filt_conv_common_ctor,
00088        mbfl_filt_conv_common_dtor,
00089        mbfl_filt_conv_utf32be_wchar,
00090        mbfl_filt_conv_common_flush
00091 };
00092 
00093 const struct mbfl_convert_vtbl vtbl_wchar_utf32be = {
00094        mbfl_no_encoding_wchar,
00095        mbfl_no_encoding_utf32be,
00096        mbfl_filt_conv_common_ctor,
00097        mbfl_filt_conv_common_dtor,
00098        mbfl_filt_conv_wchar_utf32be,
00099        mbfl_filt_conv_common_flush
00100 };
00101 
00102 const struct mbfl_convert_vtbl vtbl_utf32le_wchar = {
00103        mbfl_no_encoding_utf32le,
00104        mbfl_no_encoding_wchar,
00105        mbfl_filt_conv_common_ctor,
00106        mbfl_filt_conv_common_dtor,
00107        mbfl_filt_conv_utf32le_wchar,
00108        mbfl_filt_conv_common_flush
00109 };
00110 
00111 const struct mbfl_convert_vtbl vtbl_wchar_utf32le = {
00112        mbfl_no_encoding_wchar,
00113        mbfl_no_encoding_utf32le,
00114        mbfl_filt_conv_common_ctor,
00115        mbfl_filt_conv_common_dtor,
00116        mbfl_filt_conv_wchar_utf32le,
00117        mbfl_filt_conv_common_flush
00118 };
00119 
00120 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00121 
00122 /*
00123  * UTF-32 => wchar
00124  */
00125 int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
00126 {
00127        int n, endian;
00128 
00129        endian = filter->status & 0xff00;
00130        switch (filter->status & 0xff) {
00131        case 0:
00132               if (endian) {
00133                      n = c & 0xff;
00134               } else {
00135                      n = (c & 0xff) << 24;
00136               }
00137               filter->cache = n;
00138               filter->status++;
00139               break;
00140        case 1:
00141               if (endian) {
00142                      n = (c & 0xff) << 8;
00143               } else {
00144                      n = (c & 0xff) << 16;
00145               }
00146               filter->cache |= n;
00147               filter->status++;
00148               break;
00149        case 2:
00150               if (endian) {
00151                      n = (c & 0xff) << 16;
00152               } else {
00153                      n = (c & 0xff) << 8;
00154               }
00155               filter->cache |= n;
00156               filter->status++;
00157               break;
00158        default:
00159               if (endian) {
00160                      n = (c & 0xff) << 24;
00161               } else {
00162                      n = c & 0xff;
00163               }
00164               n |= filter->cache;
00165               if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
00166                      if (endian) {
00167                             filter->status = 0;         /* big-endian */
00168                      } else {
00169                             filter->status = 0x100;            /* little-endian */
00170                      }
00171                      CK((*filter->output_function)(0xfeff, filter->data));
00172               } else {
00173                      filter->status &= ~0xff;
00174                      if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
00175                             CK((*filter->output_function)(n, filter->data));
00176                      }
00177               }
00178               break;
00179        }
00180 
00181        return c;
00182 }
00183 
00184 /*
00185  * UTF-32BE => wchar
00186  */
00187 int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
00188 {
00189        int n;
00190 
00191        if (filter->status == 0) {
00192               filter->status = 1;
00193               n = (c & 0xff) << 24;
00194               filter->cache = n;
00195        } else if (filter->status == 1) {
00196               filter->status = 2;
00197               n = (c & 0xff) << 16;
00198               filter->cache |= n;
00199        } else if (filter->status == 2) {
00200               filter->status = 3;
00201               n = (c & 0xff) << 8;
00202               filter->cache |= n;
00203        } else {
00204               filter->status = 0;
00205               n = (c & 0xff) | filter->cache;
00206               if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
00207                      CK((*filter->output_function)(n, filter->data));
00208               }
00209        }
00210        return c;
00211 }
00212 
00213 /*
00214  * wchar => UTF-32BE
00215  */
00216 int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
00217 {
00218        if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
00219               CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
00220               CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
00221               CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
00222               CK((*filter->output_function)(c & 0xff, filter->data));
00223        } else {
00224               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00225                      CK(mbfl_filt_conv_illegal_output(c, filter));
00226               }
00227        }
00228 
00229        return c;
00230 }
00231 
00232 /*
00233  * UTF-32LE => wchar
00234  */
00235 int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
00236 {
00237        int n;
00238 
00239        if (filter->status == 0) {
00240               filter->status = 1;
00241               n = (c & 0xff);
00242               filter->cache = n;
00243        } else if (filter->status == 1) {
00244               filter->status = 2;
00245               n = (c & 0xff) << 8;
00246               filter->cache |= n;
00247        } else if (filter->status == 2) {
00248               filter->status = 3;
00249               n = (c & 0xff) << 16;
00250               filter->cache |= n;
00251        } else {
00252               filter->status = 0;
00253               n = ((c & 0xff) << 24) | filter->cache;
00254               if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
00255                      CK((*filter->output_function)(n, filter->data));
00256               }
00257        }
00258        return c;
00259 }
00260 
00261 /*
00262  * wchar => UTF-32LE
00263  */
00264 int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
00265 {
00266        if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
00267               CK((*filter->output_function)(c & 0xff, filter->data));
00268               CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
00269               CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
00270               CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
00271        } else {
00272               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00273                      CK(mbfl_filt_conv_illegal_output(c, filter));
00274               }
00275        }
00276 
00277        return c;
00278 }