Back to index

php5  5.3.10
mbfilter_utf7imap.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter.c
00026  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
00027  * 
00028  */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "config.h"
00032 #endif
00033 
00034 #include "mbfilter.h"
00035 #include "mbfilter_utf7imap.h"
00036 
00037 const mbfl_encoding mbfl_encoding_utf7imap = {
00038        mbfl_no_encoding_utf7imap,
00039        "UTF7-IMAP",
00040        NULL,
00041        NULL,
00042        NULL,
00043        MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
00044 };
00045 
00046 const struct mbfl_convert_vtbl vtbl_utf7imap_wchar = {
00047        mbfl_no_encoding_utf7imap,
00048        mbfl_no_encoding_wchar,
00049        mbfl_filt_conv_common_ctor,
00050        mbfl_filt_conv_common_dtor,
00051        mbfl_filt_conv_utf7imap_wchar,
00052        mbfl_filt_conv_common_flush };
00053 
00054 const struct mbfl_convert_vtbl vtbl_wchar_utf7imap = {
00055        mbfl_no_encoding_wchar,
00056        mbfl_no_encoding_utf7imap,
00057        mbfl_filt_conv_common_ctor,
00058        mbfl_filt_conv_common_dtor,
00059        mbfl_filt_conv_wchar_utf7imap,
00060        mbfl_filt_conv_wchar_utf7imap_flush };
00061 
00062 #define CK(statement)       do { if ((statement) < 0) return (-1); } while (0)
00063 
00064 /*
00065  * UTF7-IMAP => wchar
00066  */
00067 int mbfl_filt_conv_utf7imap_wchar(int c, mbfl_convert_filter *filter)
00068 {
00069        int s, n;
00070 
00071        n = -1;
00072        if (filter->status != 0) {         /* Modified Base64 */
00073               if (c >= 0x41 && c <= 0x5a) {             /* A - Z */
00074                      n = c - 65;
00075               } else if (c >= 0x61 && c <= 0x7a) {      /* a - z */
00076                      n = c - 71;
00077               } else if (c >= 0x30 && c <= 0x39) {      /* 0 - 9 */
00078                      n = c + 4;
00079               } else if (c == 0x2b) {                   /* '+' */
00080                      n = 62;
00081               } else if (c == 0x2c) {                   /* ',' */
00082                      n = 63;
00083               }
00084               if (n < 0 || n > 63) {
00085                      if (c == 0x2d) {
00086                             if (filter->status == 1) {         /* "&-" -> "&" */
00087                                    CK((*filter->output_function)(0x26, filter->data));
00088                             }
00089                      } else if (c >= 0 && c < 0x80) {   /* ASCII exclude '-' */
00090                             CK((*filter->output_function)(c, filter->data));
00091                      } else {             /* illegal character */
00092                             s = c & MBFL_WCSGROUP_MASK;
00093                             s |= MBFL_WCSGROUP_THROUGH;
00094                             CK((*filter->output_function)(s, filter->data));
00095                      }
00096                      filter->cache = 0;
00097                      filter->status = 0;
00098                      return c;
00099               }
00100        }
00101 
00102        switch (filter->status) {
00103        /* directly encoded characters */
00104        case 0:
00105               if (c == 0x26) {     /* '&'  shift character */
00106                      filter->status++;
00107               } else if (c >= 0 && c < 0x80) {   /* ASCII */
00108                      CK((*filter->output_function)(c, filter->data));
00109               } else {             /* illegal character */
00110                      s = c & MBFL_WCSGROUP_MASK;
00111                      s |= MBFL_WCSGROUP_THROUGH;
00112                      CK((*filter->output_function)(s, filter->data));
00113               }
00114               break;
00115 
00116        /* decode Modified Base64 */
00117        case 1:
00118        case 2:
00119               filter->cache |= n << 10;
00120               filter->status = 3;
00121               break;
00122        case 3:
00123               filter->cache |= n << 4;
00124               filter->status = 4;
00125               break;
00126        case 4:
00127               s = ((n >> 2) & 0xf) | (filter->cache & 0xffff);
00128               n = (n & 0x3) << 14;
00129               filter->status = 5;
00130               if (s >= 0xd800 && s < 0xdc00) {
00131                      s = (((s & 0x3ff) << 16) + 0x400000) | n;
00132                      filter->cache = s;
00133               } else if (s >= 0xdc00 && s < 0xe000) {
00134                      s &= 0x3ff;
00135                      s |= (filter->cache & 0xfff0000) >> 6;
00136                      filter->cache = n;
00137                      if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
00138                             CK((*filter->output_function)(s, filter->data));
00139                      } else {             /* illegal character */
00140                             s &= MBFL_WCSGROUP_MASK;
00141                             s |= MBFL_WCSGROUP_THROUGH;
00142                             CK((*filter->output_function)(s, filter->data));
00143                      }
00144               } else {
00145                      filter->cache = n;
00146                      CK((*filter->output_function)(s, filter->data));
00147               }
00148               break;
00149 
00150        case 5:
00151               filter->cache |= n << 8;
00152               filter->status = 6;
00153               break;
00154        case 6:
00155               filter->cache |= n << 2;
00156               filter->status = 7;
00157               break;
00158        case 7:
00159               s = ((n >> 4) & 0x3) | (filter->cache & 0xffff);
00160               n = (n & 0xf) << 12;
00161               filter->status = 8;
00162               if (s >= 0xd800 && s < 0xdc00) {
00163                      s = (((s & 0x3ff) << 16) + 0x400000) | n;
00164                      filter->cache = s;
00165               } else if (s >= 0xdc00 && s < 0xe000) {
00166                      s &= 0x3ff;
00167                      s |= (filter->cache & 0xfff0000) >> 6;
00168                      filter->cache = n;
00169                      if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
00170                             CK((*filter->output_function)(s, filter->data));
00171                      } else {             /* illegal character */
00172                             s &= MBFL_WCSGROUP_MASK;
00173                             s |= MBFL_WCSGROUP_THROUGH;
00174                             CK((*filter->output_function)(s, filter->data));
00175                      }
00176               } else {
00177                      filter->cache = n;
00178                      CK((*filter->output_function)(s, filter->data));
00179               }
00180               break;
00181 
00182        case 8:
00183               filter->cache |= n << 6;
00184               filter->status = 9;
00185               break;
00186        case 9:
00187               s = n | (filter->cache & 0xffff);
00188               filter->status = 2;
00189               if (s >= 0xd800 && s < 0xdc00) {
00190                      s = (((s & 0x3ff) << 16) + 0x400000);
00191                      filter->cache = s;
00192               } else if (s >= 0xdc00 && s < 0xe000) {
00193                      s &= 0x3ff;
00194                      s |= (filter->cache & 0xfff0000) >> 6;
00195                      filter->cache = 0;
00196                      if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
00197                             CK((*filter->output_function)(s, filter->data));
00198                      } else {             /* illegal character */
00199                             s &= MBFL_WCSGROUP_MASK;
00200                             s |= MBFL_WCSGROUP_THROUGH;
00201                             CK((*filter->output_function)(s, filter->data));
00202                      }
00203               } else {
00204                      filter->cache = 0;
00205                      CK((*filter->output_function)(s, filter->data));
00206               }
00207               break;
00208 
00209        default:
00210               filter->status = 0;
00211               break;
00212        }
00213 
00214        return c;
00215 }
00216 
00217 static const unsigned char mbfl_utf7imap_base64_table[] =
00218 {
00219  /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
00220    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
00221  /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
00222    0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
00223  /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
00224    0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
00225  /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
00226    0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
00227  /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ',', '\0' */
00228    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2c,0x00
00229 };
00230 
00231 /*
00232  * wchar => UTF7-IMAP
00233  */
00234 int mbfl_filt_conv_wchar_utf7imap(int c, mbfl_convert_filter *filter)
00235 {
00236        int n, s;
00237 
00238        n = 0;
00239        if (c == 0x26) {
00240               n = 1;
00241        } else if ((c >= 0x20 && c <= 0x7e) || c == 0) {
00242               n = 2;
00243        } else if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
00244               ;
00245        } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
00246               s = ((c >> 10) - 0x40) | 0xd800;
00247               CK((*filter->filter_function)(s, filter));
00248               s = (c & 0x3ff) | 0xdc00;
00249               CK((*filter->filter_function)(s, filter));
00250               return c;
00251        } else {
00252               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
00253                      CK(mbfl_filt_conv_illegal_output(c, filter));
00254               }
00255               return c;
00256        }
00257 
00258        switch (filter->status) {
00259        case 0:
00260               if (n != 0) { /* directly encode characters */
00261                      CK((*filter->output_function)(c, filter->data));
00262                      if (n == 1) {
00263                             CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00264                      }
00265               } else {      /* Modified Base64 */
00266                      CK((*filter->output_function)(0x26, filter->data));            /* '&' */
00267                      filter->status = 1;
00268                      filter->cache = c;
00269               }
00270               break;
00271 
00272        /* encode Modified Base64 */
00273        case 1:
00274               s = filter->cache;
00275               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 10) & 0x3f], filter->data));
00276               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 4) & 0x3f], filter->data));
00277               if (n != 0) {
00278                      CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s << 2) & 0x3c], filter->data));
00279                      CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00280                      CK((*filter->output_function)(c, filter->data));
00281                      if (n == 1) {
00282                             CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00283                      }
00284                      filter->status = 0;
00285               } else {
00286                      filter->status = 2;
00287                      filter->cache = ((s & 0xf) << 16) | c;
00288               }
00289               break;
00290 
00291        case 2:
00292               s = filter->cache;
00293               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 14) & 0x3f], filter->data));
00294               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 8) & 0x3f], filter->data));
00295               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 2) & 0x3f], filter->data));
00296               if (n != 0) {
00297                      CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s << 4) & 0x30], filter->data));
00298                      CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00299                      CK((*filter->output_function)(c, filter->data));
00300                      if (n == 1) {
00301                             CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00302                      }
00303                      filter->status = 0;
00304               } else {
00305                      filter->status = 3;
00306                      filter->cache = ((s & 0x3) << 16) | c;
00307               }
00308               break;
00309 
00310        case 3:
00311               s = filter->cache;
00312               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 12) & 0x3f], filter->data));
00313               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(s >> 6) & 0x3f], filter->data));
00314               CK((*filter->output_function)(mbfl_utf7imap_base64_table[s & 0x3f], filter->data));
00315               if (n != 0) {
00316                      CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00317                      CK((*filter->output_function)(c, filter->data));
00318                      if (n == 1) {
00319                             CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00320                      }
00321                      filter->status = 0;
00322               } else {
00323                      filter->status = 1;
00324                      filter->cache = c;
00325               }
00326               break;
00327 
00328        default:
00329               filter->status = 0;
00330               break;
00331        }
00332 
00333        return c;
00334 
00335 }
00336 
00337 int mbfl_filt_conv_wchar_utf7imap_flush(mbfl_convert_filter *filter)
00338 {
00339        int status, cache;
00340 
00341        status = filter->status;
00342        cache = filter->cache;
00343        filter->status = 0;
00344        filter->cache = 0;
00345        /* flush fragments */
00346        switch (status) {
00347        case 1:
00348               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 10) & 0x3f], filter->data));
00349               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 4) & 0x3f], filter->data));
00350               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache << 2) & 0x3c], filter->data));
00351               CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00352               break;
00353 
00354        case 2:
00355               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 14) & 0x3f], filter->data));
00356               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 8) & 0x3f], filter->data));
00357               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 2) & 0x3f], filter->data));
00358               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache << 4) & 0x30], filter->data));
00359               CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00360               break;
00361 
00362        case 3:
00363               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 12) & 0x3f], filter->data));
00364               CK((*filter->output_function)(mbfl_utf7imap_base64_table[(cache >> 6) & 0x3f], filter->data));
00365               CK((*filter->output_function)(mbfl_utf7imap_base64_table[cache & 0x3f], filter->data));
00366               CK((*filter->output_function)(0x2d, filter->data));            /* '-' */
00367               break;
00368        }
00369        return 0;
00370 }
00371 
00372