Back to index

php5  5.3.10
mbfilter_utf16.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_utf16.h"
00036 
00037 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
00038 
00039 const mbfl_encoding mbfl_encoding_utf16 = {
00040        mbfl_no_encoding_utf16,
00041        "UTF-16",
00042        "UTF-16",
00043        (const char *(*)[])&mbfl_encoding_utf16_aliases,
00044        NULL,
00045        MBFL_ENCTYPE_MWC2BE
00046 };
00047 
00048 const mbfl_encoding mbfl_encoding_utf16be = {
00049        mbfl_no_encoding_utf16be,
00050        "UTF-16BE",
00051        "UTF-16BE",
00052        NULL,
00053        NULL,
00054        MBFL_ENCTYPE_MWC2BE
00055 };
00056 
00057 const mbfl_encoding mbfl_encoding_utf16le = {
00058        mbfl_no_encoding_utf16le,
00059        "UTF-16LE",
00060        "UTF-16LE",
00061        NULL,
00062        NULL,
00063        MBFL_ENCTYPE_MWC2LE
00064 };
00065 
00066 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
00067        mbfl_no_encoding_utf16,
00068        mbfl_no_encoding_wchar,
00069        mbfl_filt_conv_common_ctor,
00070        mbfl_filt_conv_common_dtor,
00071        mbfl_filt_conv_utf16_wchar,
00072        mbfl_filt_conv_common_flush
00073 };
00074 
00075 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
00076        mbfl_no_encoding_wchar,
00077        mbfl_no_encoding_utf16,
00078        mbfl_filt_conv_common_ctor,
00079        mbfl_filt_conv_common_dtor,
00080        mbfl_filt_conv_wchar_utf16be,
00081        mbfl_filt_conv_common_flush
00082 };
00083 
00084 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
00085        mbfl_no_encoding_utf16be,
00086        mbfl_no_encoding_wchar,
00087        mbfl_filt_conv_common_ctor,
00088        mbfl_filt_conv_common_dtor,
00089        mbfl_filt_conv_utf16be_wchar,
00090        mbfl_filt_conv_common_flush
00091 };
00092 
00093 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
00094        mbfl_no_encoding_wchar,
00095        mbfl_no_encoding_utf16be,
00096        mbfl_filt_conv_common_ctor,
00097        mbfl_filt_conv_common_dtor,
00098        mbfl_filt_conv_wchar_utf16be,
00099        mbfl_filt_conv_common_flush
00100 };
00101 
00102 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
00103        mbfl_no_encoding_utf16le,
00104        mbfl_no_encoding_wchar,
00105        mbfl_filt_conv_common_ctor,
00106        mbfl_filt_conv_common_dtor,
00107        mbfl_filt_conv_utf16le_wchar,
00108        mbfl_filt_conv_common_flush
00109 };
00110 
00111 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
00112        mbfl_no_encoding_wchar,
00113        mbfl_no_encoding_utf16le,
00114        mbfl_filt_conv_common_ctor,
00115        mbfl_filt_conv_common_dtor,
00116        mbfl_filt_conv_wchar_utf16le,
00117        mbfl_filt_conv_common_flush
00118 };
00119 
00120 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00121 
00122 /*
00123  * UTF-16 => wchar
00124  */
00125 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
00126 {
00127        int n, endian;
00128 
00129        endian = filter->status & 0xff00;
00130        switch (filter->status & 0x0f) {
00131        case 0:
00132               if (endian) {
00133                      n = c & 0xff;
00134               } else {
00135                      n = (c & 0xff) << 8;
00136               }
00137               filter->cache |= n;
00138               filter->status++;
00139               break;
00140        default:
00141               if (endian) {
00142                      n = (c & 0xff) << 8;
00143               } else {
00144                      n = c & 0xff;
00145               }
00146               n |= filter->cache & 0xffff;
00147               filter->status &= ~0x0f;
00148               if (n >= 0xd800 && n < 0xdc00) {
00149                      filter->cache = ((n & 0x3ff) << 16) + 0x400000;
00150               } else if (n >= 0xdc00 && n < 0xe000) {
00151                      n &= 0x3ff;
00152                      n |= (filter->cache & 0xfff0000) >> 6;
00153                      filter->cache = 0;
00154                      if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
00155                             CK((*filter->output_function)(n, filter->data));
00156                      } else {             /* illegal character */
00157                             n &= MBFL_WCSGROUP_MASK;
00158                             n |= MBFL_WCSGROUP_THROUGH;
00159                             CK((*filter->output_function)(n, filter->data));
00160                      }
00161               } else {
00162                      int is_first = filter->status & 0x10;
00163                      filter->cache = 0;
00164                      filter->status |= 0x10;
00165                      if (!is_first) {
00166                             if (n == 0xfffe) {
00167                                    if (endian) {
00168                                           filter->status &= ~0x100;          /* big-endian */
00169                                    } else {
00170                                           filter->status |= 0x100;           /* little-endian */
00171                                    }
00172                                    break;
00173                             } else if (n == 0xfeff) {
00174                                    break;
00175                             }
00176                      }
00177                      CK((*filter->output_function)(n, filter->data));
00178               }
00179               break;
00180        }
00181 
00182        return c;
00183 }
00184 
00185 /*
00186  * UTF-16BE => wchar
00187  */
00188 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
00189 {
00190        int n;
00191 
00192        switch (filter->status) {
00193        case 0:
00194               filter->status = 1;
00195               n = (c & 0xff) << 8;
00196               filter->cache |= n;
00197               break;
00198        default:
00199               filter->status = 0;
00200               n = (filter->cache & 0xff00) | (c & 0xff);
00201               if (n >= 0xd800 && n < 0xdc00) {
00202                      filter->cache = ((n & 0x3ff) << 16) + 0x400000;
00203               } else if (n >= 0xdc00 && n < 0xe000) {
00204                      n &= 0x3ff;
00205                      n |= (filter->cache & 0xfff0000) >> 6;
00206                      filter->cache = 0;
00207                      if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
00208                             CK((*filter->output_function)(n, filter->data));
00209                      } else {             /* illegal character */
00210                             n &= MBFL_WCSGROUP_MASK;
00211                             n |= MBFL_WCSGROUP_THROUGH;
00212                             CK((*filter->output_function)(n, filter->data));
00213                      }
00214               } else {
00215                      filter->cache = 0;
00216                      CK((*filter->output_function)(n, filter->data));
00217               }
00218               break;
00219        }
00220 
00221        return c;
00222 }
00223 
00224 /*
00225  * wchar => UTF-16BE
00226  */
00227 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
00228 {
00229        int n;
00230 
00231        if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
00232               CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
00233               CK((*filter->output_function)(c & 0xff, filter->data));
00234        } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
00235               n = ((c >> 10) - 0x40) | 0xd800;
00236               CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
00237               CK((*filter->output_function)(n & 0xff, filter->data));
00238               n = (c & 0x3ff) | 0xdc00;
00239               CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
00240               CK((*filter->output_function)(n & 0xff, filter->data));
00241        } else {
00242               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00243                      CK(mbfl_filt_conv_illegal_output(c, filter));
00244               }
00245        }
00246 
00247        return c;
00248 }
00249 
00250 /*
00251  * UTF-16LE => wchar
00252  */
00253 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
00254 {
00255        int n;
00256 
00257        switch (filter->status) {
00258        case 0:
00259               filter->status = 1;
00260               n = c & 0xff;
00261               filter->cache |= n;
00262               break;
00263        default:
00264               filter->status = 0;
00265               n = (filter->cache & 0xff) | ((c & 0xff) << 8);
00266               if (n >= 0xd800 && n < 0xdc00) {
00267                      filter->cache = ((n & 0x3ff) << 16) + 0x400000;
00268               } else if (n >= 0xdc00 && n < 0xe000) {
00269                      n &= 0x3ff;
00270                      n |= (filter->cache & 0xfff0000) >> 6;
00271                      filter->cache = 0;
00272                      if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
00273                             CK((*filter->output_function)(n, filter->data));
00274                      } else {             /* illegal character */
00275                             n &= MBFL_WCSGROUP_MASK;
00276                             n |= MBFL_WCSGROUP_THROUGH;
00277                             CK((*filter->output_function)(n, filter->data));
00278                      }
00279               } else {
00280                      filter->cache = 0;
00281                      CK((*filter->output_function)(n, filter->data));
00282               }
00283               break;
00284        }
00285 
00286        return c;
00287 }
00288 
00289 /*
00290  * wchar => UTF-16LE
00291  */
00292 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
00293 {
00294        int n;
00295 
00296        if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
00297               CK((*filter->output_function)(c & 0xff, filter->data));
00298               CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
00299        } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
00300               n = ((c >> 10) - 0x40) | 0xd800;
00301               CK((*filter->output_function)(n & 0xff, filter->data));
00302               CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
00303               n = (c & 0x3ff) | 0xdc00;
00304               CK((*filter->output_function)(n & 0xff, filter->data));
00305               CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
00306        } else {
00307               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00308                      CK(mbfl_filt_conv_illegal_output(c, filter));
00309               }
00310        }
00311 
00312        return c;
00313 }
00314 
00315 
00316