Back to index

php5  5.3.10
mbfl_encoding.c
Go to the documentation of this file.
00001 /*
00002  * "streamable kanji code filter and converter"
00003  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
00004  *
00005  * LICENSE NOTICES
00006  *
00007  * This file is part of "streamable kanji code filter and converter",
00008  * which is distributed under the terms of GNU Lesser General Public 
00009  * License (version 2) as published by the Free Software Foundation.
00010  *
00011  * This software is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with "streamable kanji code filter and converter";
00018  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
00019  * Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * The author of this file:
00022  *
00023  */
00024 /*
00025  * The source code included in this files was separated from mbfilter.c
00026  * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
00027  * mbfilter.c is included in this package .
00028  *
00029  */
00030 
00031 #ifdef HAVE_CONFIG_H
00032 #include "config.h"
00033 #endif
00034 
00035 #ifdef HAVE_STDDEF_H
00036 #include <stddef.h>
00037 #endif
00038 
00039 #ifdef HAVE_STRING_H
00040 #include <string.h>
00041 #endif
00042 
00043 #ifdef HAVE_STRINGS_H
00044 #include <strings.h>
00045 #endif
00046 
00047 #include "mbfl_encoding.h"
00048 #include "mbfilter_pass.h"
00049 #include "mbfilter_8bit.h"
00050 #include "mbfilter_wchar.h"
00051 
00052 #include "filters/mbfilter_euc_cn.h"
00053 #include "filters/mbfilter_hz.h"
00054 #include "filters/mbfilter_euc_tw.h"
00055 #include "filters/mbfilter_big5.h"
00056 #include "filters/mbfilter_uhc.h"
00057 #include "filters/mbfilter_euc_kr.h"
00058 #include "filters/mbfilter_iso2022_kr.h"
00059 #include "filters/mbfilter_sjis.h"
00060 #include "filters/mbfilter_sjis_open.h"
00061 #include "filters/mbfilter_cp51932.h"
00062 #include "filters/mbfilter_jis.h"
00063 #include "filters/mbfilter_iso2022_jp_ms.h"
00064 #include "filters/mbfilter_euc_jp.h"
00065 #include "filters/mbfilter_euc_jp_win.h"
00066 #include "filters/mbfilter_ascii.h"
00067 #include "filters/mbfilter_koi8r.h"
00068 #include "filters/mbfilter_koi8u.h"
00069 #include "filters/mbfilter_cp866.h"
00070 #include "filters/mbfilter_cp932.h"
00071 #include "filters/mbfilter_cp936.h"
00072 #include "filters/mbfilter_cp1251.h"
00073 #include "filters/mbfilter_cp1252.h"
00074 #include "filters/mbfilter_cp1254.h"
00075 #include "filters/mbfilter_cp5022x.h"
00076 #include "filters/mbfilter_iso8859_1.h"
00077 #include "filters/mbfilter_iso8859_2.h"
00078 #include "filters/mbfilter_iso8859_3.h"
00079 #include "filters/mbfilter_iso8859_4.h"
00080 #include "filters/mbfilter_iso8859_5.h"
00081 #include "filters/mbfilter_iso8859_6.h"
00082 #include "filters/mbfilter_iso8859_7.h"
00083 #include "filters/mbfilter_iso8859_8.h"
00084 #include "filters/mbfilter_iso8859_9.h"
00085 #include "filters/mbfilter_iso8859_10.h"
00086 #include "filters/mbfilter_iso8859_13.h"
00087 #include "filters/mbfilter_iso8859_14.h"
00088 #include "filters/mbfilter_iso8859_15.h"
00089 #include "filters/mbfilter_iso8859_16.h"
00090 #include "filters/mbfilter_base64.h"
00091 #include "filters/mbfilter_qprint.h"
00092 #include "filters/mbfilter_uuencode.h"
00093 #include "filters/mbfilter_7bit.h"
00094 #include "filters/mbfilter_utf7.h"
00095 #include "filters/mbfilter_utf7imap.h"
00096 #include "filters/mbfilter_utf8.h"
00097 #include "filters/mbfilter_utf16.h"
00098 #include "filters/mbfilter_utf32.h"
00099 #include "filters/mbfilter_byte2.h"
00100 #include "filters/mbfilter_byte4.h"
00101 #include "filters/mbfilter_ucs4.h"
00102 #include "filters/mbfilter_ucs2.h"
00103 #include "filters/mbfilter_htmlent.h"
00104 #include "filters/mbfilter_armscii8.h"
00105 #include "filters/mbfilter_cp850.h"
00106 
00107 #ifndef HAVE_STRCASECMP
00108 #ifdef HAVE_STRICMP
00109 #define strcasecmp stricmp
00110 #endif
00111 #endif 
00112 
00113 
00114 static const char *mbfl_encoding_auto_aliases[] = {"unknown", NULL};
00115 
00116 static const mbfl_encoding mbfl_encoding_auto = {
00117        mbfl_no_encoding_auto,
00118        "auto",
00119        NULL,
00120        (const char *(*)[])&mbfl_encoding_auto_aliases,
00121        NULL,
00122        0
00123 };
00124 
00125 static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
00126        &mbfl_encoding_pass,
00127        &mbfl_encoding_auto,
00128        &mbfl_encoding_wchar,
00129        &mbfl_encoding_byte2be,
00130        &mbfl_encoding_byte2le,
00131        &mbfl_encoding_byte4be,
00132        &mbfl_encoding_byte4le,
00133        &mbfl_encoding_base64,
00134        &mbfl_encoding_uuencode,
00135        &mbfl_encoding_html_ent,
00136        &mbfl_encoding_qprint,
00137        &mbfl_encoding_7bit,
00138        &mbfl_encoding_8bit,
00139        &mbfl_encoding_ucs4,
00140        &mbfl_encoding_ucs4be,
00141        &mbfl_encoding_ucs4le,
00142        &mbfl_encoding_ucs2,
00143        &mbfl_encoding_ucs2be,
00144        &mbfl_encoding_ucs2le,
00145        &mbfl_encoding_utf32,
00146        &mbfl_encoding_utf32be,
00147        &mbfl_encoding_utf32le,
00148        &mbfl_encoding_utf16,
00149        &mbfl_encoding_utf16be,
00150        &mbfl_encoding_utf16le,
00151        &mbfl_encoding_utf8,
00152        &mbfl_encoding_utf7,
00153        &mbfl_encoding_utf7imap,
00154        &mbfl_encoding_ascii,
00155        &mbfl_encoding_euc_jp,
00156        &mbfl_encoding_sjis,
00157        &mbfl_encoding_eucjp_win,
00158        &mbfl_encoding_sjis_open,
00159        &mbfl_encoding_cp932,
00160        &mbfl_encoding_cp51932,
00161        &mbfl_encoding_jis,
00162        &mbfl_encoding_2022jp,
00163        &mbfl_encoding_2022jpms,
00164        &mbfl_encoding_cp1252,
00165        &mbfl_encoding_cp1254,
00166        &mbfl_encoding_8859_1,
00167        &mbfl_encoding_8859_2,
00168        &mbfl_encoding_8859_3,
00169        &mbfl_encoding_8859_4,
00170        &mbfl_encoding_8859_5,
00171        &mbfl_encoding_8859_6,
00172        &mbfl_encoding_8859_7,
00173        &mbfl_encoding_8859_8,
00174        &mbfl_encoding_8859_9,
00175        &mbfl_encoding_8859_10,
00176        &mbfl_encoding_8859_13,
00177        &mbfl_encoding_8859_14,
00178        &mbfl_encoding_8859_15,
00179        &mbfl_encoding_8859_16,
00180        &mbfl_encoding_euc_cn,
00181        &mbfl_encoding_cp936,
00182        &mbfl_encoding_hz,
00183        &mbfl_encoding_euc_tw,
00184        &mbfl_encoding_big5,
00185        &mbfl_encoding_euc_kr,
00186        &mbfl_encoding_uhc,
00187        &mbfl_encoding_2022kr,
00188        &mbfl_encoding_cp1251,
00189        &mbfl_encoding_cp866,
00190        &mbfl_encoding_koi8r,
00191        &mbfl_encoding_koi8u,
00192        &mbfl_encoding_armscii8,
00193        &mbfl_encoding_cp850,
00194        &mbfl_encoding_jis_ms,
00195        &mbfl_encoding_cp50220,
00196        &mbfl_encoding_cp50220raw,
00197        &mbfl_encoding_cp50221,
00198        &mbfl_encoding_cp50222,
00199        NULL
00200 };
00201 
00202 /* encoding resolver */
00203 const mbfl_encoding *
00204 mbfl_name2encoding(const char *name)
00205 {
00206        const mbfl_encoding *encoding;
00207        int i, j;
00208 
00209        if (name == NULL) {
00210               return NULL;
00211        }
00212 
00213        i = 0;
00214        while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
00215               if (strcasecmp(encoding->name, name) == 0) {
00216                      return encoding;
00217               }
00218        }
00219 
00220        /* serch MIME charset name */
00221        i = 0;
00222        while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
00223               if (encoding->mime_name != NULL) {
00224                      if (strcasecmp(encoding->mime_name, name) == 0) {
00225                             return encoding;
00226                      }
00227               }
00228        }
00229 
00230        /* serch aliases */
00231        i = 0;
00232        while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
00233               if (encoding->aliases != NULL) {
00234                      j = 0;
00235                      while ((*encoding->aliases)[j] != NULL) {
00236                             if (strcasecmp((*encoding->aliases)[j], name) == 0) {
00237                                    return encoding;
00238                             }
00239                             j++;
00240                      }
00241               }
00242        }
00243 
00244        return NULL;
00245 }
00246 
00247 const mbfl_encoding *
00248 mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
00249 {
00250        const mbfl_encoding *encoding;
00251        int i;
00252 
00253        i = 0;
00254        while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
00255               if (encoding->no_encoding == no_encoding) {
00256                      return encoding;
00257               }
00258        }
00259 
00260        return NULL;
00261 }
00262 
00263 enum mbfl_no_encoding
00264 mbfl_name2no_encoding(const char *name)
00265 {
00266        const mbfl_encoding *encoding;
00267 
00268        encoding = mbfl_name2encoding(name);
00269        if (encoding == NULL) {
00270               return mbfl_no_encoding_invalid;
00271        } else {
00272               return encoding->no_encoding;
00273        }
00274 }
00275 
00276 const char *
00277 mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
00278 {
00279        const mbfl_encoding *encoding;
00280 
00281        encoding = mbfl_no2encoding(no_encoding);
00282        if (encoding == NULL) {
00283               return "";
00284        } else {
00285               return encoding->name;
00286        }
00287 }
00288 
00289 const mbfl_encoding **
00290 mbfl_get_supported_encodings(void)
00291 {
00292        return mbfl_encoding_ptr_list;
00293 }
00294 
00295 const char *
00296 mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding)
00297 {
00298        const mbfl_encoding *encoding;
00299 
00300        encoding = mbfl_no2encoding(no_encoding);
00301        if (encoding != NULL && encoding->mime_name != NULL && encoding->mime_name[0] != '\0') {
00302               return encoding->mime_name;
00303        } else {
00304               return NULL;
00305        }
00306 }
00307 
00308 int
00309 mbfl_is_support_encoding(const char *name)
00310 {
00311        const mbfl_encoding *encoding;
00312 
00313        encoding = mbfl_name2encoding(name);
00314        if (encoding == NULL) {
00315               return 0;
00316        } else {
00317               return 1;
00318        }
00319 }