Back to index

php5  5.3.10
mysqlnd_charset.c
Go to the documentation of this file.
00001 /*
00002   +----------------------------------------------------------------------+
00003   | PHP Version 5                                                        |
00004   +----------------------------------------------------------------------+
00005   | Copyright (c) 2006-2012 The PHP Group                                |
00006   +----------------------------------------------------------------------+
00007   | This source file is subject to version 3.01 of the PHP license,      |
00008   | that is bundled with this package in the file LICENSE, and is        |
00009   | available through the world-wide-web at the following url:           |
00010   | http://www.php.net/license/3_01.txt                                  |
00011   | If you did not receive a copy of the PHP license and are unable to   |
00012   | obtain it through the world-wide-web, please send a note to          |
00013   | license@php.net so we can mail you a copy immediately.               |
00014   +----------------------------------------------------------------------+
00015   | Authors: Georg Richter <georg@mysql.com>                             |
00016   |          Andrey Hristov <andrey@mysql.com>                           |
00017   |          Ulf Wendel <uwendel@mysql.com>                              |
00018   +----------------------------------------------------------------------+
00019 */
00020 #include "php.h"
00021 #include "php_globals.h"
00022 #include "mysqlnd.h"
00023 #include "mysqlnd_priv.h"
00024 #include "mysqlnd_debug.h"
00025 
00026 /* {{{ utf8 functions */
00027 static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
00028 {
00029        zend_uchar    c;
00030 
00031        if (start >= end) {
00032               return 0;
00033        }
00034 
00035        c = (zend_uchar) start[0];
00036 
00037        if (c < 0x80) {
00038               return 1;            /* single byte character */
00039        }
00040        if (c < 0xC2) {
00041               return 0;            /* invalid mb character */
00042        }
00043        if (c < 0xE0) {
00044               if (start + 2 > end) {
00045                      return 0;     /* too small */
00046               }
00047               if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
00048                      return 0;
00049               }
00050               return 2;
00051        }
00052        if (c < 0xF0) {
00053               if (start + 3 > end) {
00054                      return 0;     /* too small */
00055               }
00056               if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
00057                      (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
00058                      return 0;     /* invalid utf8 character */
00059               }
00060               return 3;
00061        }
00062        return 0;
00063 }
00064 
00065 
00066 static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
00067 {
00068        zend_uchar    c;
00069 
00070        if (start >= end) {
00071               return 0;
00072        }
00073 
00074        c = (zend_uchar) start[0];
00075 
00076        if (c < 0x80) {
00077               return 1;            /* single byte character */
00078        }
00079        if (c < 0xC2) {
00080               return 0;            /* invalid mb character */
00081        }
00082        if (c < 0xE0) {
00083               if (start + 2 > end) {
00084                      return 0;     /* too small */
00085               }
00086               if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
00087                      return 0;
00088               }
00089               return 2;
00090        }
00091        if (c < 0xF0) {
00092               if (start + 3 > end) {
00093                      return 0;     /* too small */
00094               }
00095               if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
00096                      (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
00097                      return 0;     /* invalid utf8 character */
00098               }
00099               return 3;
00100        }
00101        if (c < 0xF5) {
00102               if (start + 4 > end) { /* We need 4 characters */
00103                      return 0;     /* too small */
00104               }
00105 
00106               /*
00107                 UTF-8 quick four-byte mask:
00108                 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
00109                 Encoding allows to encode U+00010000..U+001FFFFF
00110 
00111                 The maximum character defined in the Unicode standard is U+0010FFFF.
00112                 Higher characters U+00110000..U+001FFFFF are not used.
00113 
00114                 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
00115                 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
00116 
00117                 Valid codes:
00118                 [F0][90..BF][80..BF][80..BF]
00119                 [F1][80..BF][80..BF][80..BF]
00120                 [F2][80..BF][80..BF][80..BF]
00121                 [F3][80..BF][80..BF][80..BF]
00122                 [F4][80..8F][80..BF][80..BF]
00123               */
00124 
00125               if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 &&
00126                      ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
00127                      ((zend_uchar)start[3] ^ 0x80) < 0x40 &&
00128                             (c >= 0xf1 || (zend_uchar)start[1] >= 0x90) &&
00129                             (c <= 0xf3 || (zend_uchar)start[1] <= 0x8F)))
00130               {
00131                      return 0;     /* invalid utf8 character */
00132               }
00133               return 4;
00134        }
00135        return 0;
00136 }
00137 
00138 static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
00139 {
00140        unsigned int len = check_mb_utf8mb3_sequence(start, end);
00141        return (len > 1)? len:0;
00142 }
00143 
00144 static unsigned int check_mb_utf8_valid(const char *start, const char *end)
00145 {
00146        unsigned int len = check_mb_utf8_sequence(start, end);
00147        return (len > 1)? len:0;
00148 }
00149 
00150 
00151 static unsigned int mysqlnd_mbcharlen_utf8mb3(unsigned int utf8)
00152 {
00153        if (utf8 < 0x80) {
00154               return 1;            /* single byte character */
00155        }
00156        if (utf8 < 0xC2) {
00157               return 0;            /* invalid multibyte header */
00158        }
00159        if (utf8 < 0xE0) {
00160               return 2;            /* double byte character */
00161        }
00162        if (utf8 < 0xF0) {
00163               return 3;            /* triple byte character */
00164        }
00165        return 0;
00166 }
00167 
00168 
00169 static unsigned int mysqlnd_mbcharlen_utf8(unsigned int utf8)
00170 {
00171        if (utf8 < 0x80) {
00172               return 1;            /* single byte character */
00173        }
00174        if (utf8 < 0xC2) {
00175               return 0;            /* invalid multibyte header */
00176        }
00177        if (utf8 < 0xE0) {
00178               return 2;            /* double byte character */
00179        }
00180        if (utf8 < 0xF0) {
00181               return 3;            /* triple byte character */
00182        }
00183        if (utf8 < 0xF8) {
00184               return 4;            /* four byte character */
00185        }
00186        return 0;
00187 }
00188 /* }}} */
00189 
00190 
00191 /* {{{ big5 functions */
00192 #define valid_big5head(c)   (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
00193 #define valid_big5tail(c)   ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
00194                                                  (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
00195 
00196 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
00197 
00198 static unsigned int check_mb_big5(const char *start, const char *end)
00199 {
00200        return (valid_big5head(*(start)) && (end - start) > 1 && valid_big5tail(*(start + 1)) ? 2 : 0);
00201 }
00202 
00203 
00204 static unsigned int mysqlnd_mbcharlen_big5(unsigned int big5)
00205 {
00206        return (valid_big5head(big5)) ? 2 : 1;
00207 }
00208 /* }}} */
00209 
00210 
00211 /* {{{ cp932 functions */
00212 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
00213 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
00214 
00215 
00216 static unsigned int check_mb_cp932(const char *start, const char *end)
00217 {
00218        return (valid_cp932head((zend_uchar)start[0]) && (end - start >  1) &&
00219                      valid_cp932tail((zend_uchar)start[1])) ? 2 : 0;
00220 }
00221 
00222 
00223 static unsigned int mysqlnd_mbcharlen_cp932(unsigned int cp932)
00224 {
00225        return (valid_cp932head((zend_uchar)cp932)) ? 2 : 1;
00226 }
00227 /* }}} */
00228 
00229 
00230 /* {{{ euckr functions */
00231 #define valid_euckr(c)      ((0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
00232 
00233 static unsigned int check_mb_euckr(const char *start, const char *end)
00234 {
00235        if (end - start <= 1) {
00236               return 0;     /* invalid length */
00237        }
00238        if (*(zend_uchar *)start < 0x80) {
00239               return 0;     /* invalid euckr character */
00240        }
00241        if (valid_euckr(start[1])) {
00242               return 2;
00243        }
00244        return 0;
00245 }
00246 
00247 
00248 static unsigned int mysqlnd_mbcharlen_euckr(unsigned int kr)
00249 {
00250        return (valid_euckr(kr)) ? 2 : 1;
00251 }
00252 /* }}} */
00253 
00254 
00255 /* {{{ eucjpms functions */
00256 #define valid_eucjpms(c)           (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
00257 #define valid_eucjpms_kata(c)      (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
00258 #define valid_eucjpms_ss2(c)       (((c) & 0xFF) == 0x8E)
00259 #define valid_eucjpms_ss3(c)       (((c) & 0xFF) == 0x8F)
00260 
00261 static unsigned int check_mb_eucjpms(const char *start, const char *end)
00262 {
00263        if (*((zend_uchar *)start) < 0x80) {
00264               return 0;     /* invalid eucjpms character */
00265        }
00266        if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
00267               return 2;
00268        }
00269        if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
00270               return 2;
00271        }
00272        if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
00273               valid_eucjpms(start[2])) {
00274               return 2;
00275        }
00276        return 0;
00277 }
00278 
00279 
00280 static unsigned int mysqlnd_mbcharlen_eucjpms(unsigned int jpms)
00281 {
00282        if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
00283               return 2;
00284        }
00285        if (valid_eucjpms_ss3(jpms)) {
00286               return 3;
00287        }
00288        return 1;
00289 }
00290 /* }}} */
00291 
00292 
00293 /* {{{ gb2312 functions */
00294 #define valid_gb2312_head(c)       (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF7)
00295 #define valid_gb2312_tail(c)       (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE)
00296 
00297 
00298 static unsigned int check_mb_gb2312(const char *start, const char *end)
00299 {
00300        return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
00301                      valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
00302 }
00303 
00304 
00305 static unsigned int mysqlnd_mbcharlen_gb2312(unsigned int gb)
00306 {
00307        return (valid_gb2312_head(gb)) ? 2 : 1;
00308 }
00309 /* }}} */
00310 
00311 
00312 /* {{{ gbk functions */
00313 #define valid_gbk_head(c)   (0x81<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE)
00314 #define valid_gbk_tail(c)   ((0x40<=(zend_uchar)(c) && (zend_uchar)(c)<=0x7E) || (0x80<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE))
00315 
00316 static unsigned int check_mb_gbk(const char *start, const char *end)
00317 {
00318        return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
00319 }
00320 
00321 static unsigned int mysqlnd_mbcharlen_gbk(unsigned int gbk)
00322 {
00323        return (valid_gbk_head(gbk) ? 2 : 1);
00324 }
00325 /* }}} */
00326 
00327 
00328 /* {{{  functions */
00329 #define valid_sjis_head(c)  ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
00330 #define valid_sjis_tail(c)  ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
00331 
00332 
00333 static unsigned int check_mb_sjis(const char *start, const char *end)
00334 {
00335        return (valid_sjis_head((zend_uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((zend_uchar)start[1])) ? 2 : 0;
00336 }
00337 
00338 
00339 static unsigned int mysqlnd_mbcharlen_sjis(unsigned int sjis)
00340 {
00341        return (valid_sjis_head((zend_uchar)sjis)) ? 2 : 1;
00342 }
00343 /* }}} */
00344 
00345 
00346 /* {{{ ucs2 functions */
00347 static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
00348 {
00349        return 2; /* always 2 */
00350 }
00351 
00352 static unsigned int mysqlnd_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
00353 {
00354        return 2; /* always 2 */
00355 }
00356 /* }}} */
00357 
00358 
00359 /* {{{ ujis functions */
00360 #define valid_ujis(c)       ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
00361 #define valid_ujis_kata(c)  ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
00362 #define valid_ujis_ss2(c)   (((c)&0xFF) == 0x8E)
00363 #define valid_ujis_ss3(c)   (((c)&0xFF) == 0x8F)
00364 
00365 static unsigned int check_mb_ujis(const char *start, const char *end)
00366 {
00367        if (*(zend_uchar*)start < 0x80) {
00368               return 0;     /* invalid ujis character */
00369        }
00370        if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
00371               return 2;
00372        }
00373        if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
00374               return 2;
00375        }
00376        if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
00377               return 3;
00378        }
00379        return 0;
00380 }
00381 
00382 
00383 static unsigned int mysqlnd_mbcharlen_ujis(unsigned int ujis)
00384 {
00385        return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
00386 }
00387 /* }}} */
00388 
00389 
00390 
00391 /* {{{ utf16 functions */
00392 #define UTF16_HIGH_HEAD(x)  ((((zend_uchar) (x)) & 0xFC) == 0xD8)
00393 #define UTF16_LOW_HEAD(x)   ((((zend_uchar) (x)) & 0xFC) == 0xDC)
00394 
00395 static unsigned int check_mb_utf16(const char *start, const char *end)
00396 {
00397        if (start + 2 > end) {
00398               return 0;
00399        }
00400 
00401        if (UTF16_HIGH_HEAD(*start)) {
00402               return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
00403        }
00404 
00405        if (UTF16_LOW_HEAD(*start)) {
00406               return 0;
00407        }
00408        return 2;
00409 }
00410 
00411 
00412 static uint mysqlnd_mbcharlen_utf16(unsigned int utf16)
00413 {
00414   return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
00415 }
00416 /* }}} */
00417 
00418 
00419 /* {{{ utf32 functions */
00420 static uint
00421 check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
00422 {
00423        return 4;
00424 }
00425 
00426 
00427 static uint
00428 mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
00429 {
00430        return 4;
00431 }
00432 /* }}} */
00433 
00434 /*
00435   The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
00436   for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
00437   Change easily now, with a macro, could be made compilastion dependable.
00438 */
00439 
00440 #define UTF8_MB4 "utf8mb4"
00441 #define UTF8_MB3 "utf8"
00442 
00443 /* {{{ mysqlnd_charsets */
00444 const MYSQLND_CHARSET mysqlnd_charsets[] =
00445 {
00446        {   1, "big5","big5_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
00447        {   3, "dec8", "dec8_swedisch_ci", 1, 1, "", NULL, NULL},
00448        {   4, "cp850", "cp850_general_ci", 1, 1, "", NULL, NULL},
00449        {   6, "hp8", "hp8_english_ci", 1, 1, "", NULL, NULL},
00450        {   7, "koi8r", "koi8r_general_ci", 1, 1, "", NULL, NULL},
00451        {   8, "latin1", "latin1_swedish_ci", 1, 1, "", NULL, NULL},
00452        {   9, "latin2", "latin2_general_ci", 1, 1, "", NULL, NULL},
00453        {  10, "swe7", "swe7_swedish_ci", 1, 1, "", NULL, NULL},
00454        {  11, "ascii", "ascii_general_ci", 1, 1, "", NULL, NULL},
00455        {  12, "ujis", "ujis_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
00456        {  13, "sjis", "sjis_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
00457        {  16, "hebrew", "hebrew_general_ci", 1, 1, "", NULL, NULL},
00458        {  18, "tis620", "tis620_thai_ci", 1, 1, "", NULL, NULL},
00459        {  19, "euckr", "euckr_korean_ci", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
00460        {  22, "koi8u", "koi8u_general_ci", 1, 1, "", NULL, NULL},
00461        {  24, "gb2312", "gb2312_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
00462        {  25, "greek", "greek_general_ci", 1, 1, "", NULL, NULL},
00463        {  26, "cp1250", "cp1250_general_ci", 1, 1, "", NULL, NULL},
00464        {  28, "gbk", "gbk_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
00465        {  30, "latin5", "latin5_turkish_ci", 1, 1, "", NULL, NULL},
00466        {  32, "armscii8", "armscii8_general_ci", 1, 1, "", NULL, NULL},
00467        {  33, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
00468        {  35, "ucs2", "ucs2_general_ci", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00469        {  36, "cp866", "cp866_general_ci", 1, 1, "", NULL, NULL},
00470        {  37, "keybcs2", "keybcs2_general_ci", 1, 1, "", NULL, NULL},
00471        {  38, "macce", "macce_general_ci", 1, 1, "", NULL, NULL},
00472        {  39, "macroman", "macroman_general_ci", 1, 1, "", NULL, NULL},
00473        {  40, "cp852", "cp852_general_ci", 1, 1, "", NULL, NULL},
00474        {  41, "latin7", "latin7_general_ci", 1, 1, "", NULL, NULL},
00475        {  51, "cp1251", "cp1251_general_ci", 1, 1, "", NULL, NULL},
00476        {  57, "cp1256", "cp1256_general_ci", 1, 1, "", NULL, NULL},
00477        {  59, "cp1257", "cp1257_general_ci", 1, 1, "", NULL, NULL},
00478        {  63, "binary", "binary", 1, 1, "", NULL, NULL},
00479        {  92, "geostd8", "geostd8_general_ci", 1, 1, "", NULL, NULL},
00480        {  95, "cp932", "cp932_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
00481        {  97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
00482        {   2, "latin2", "latin2_czech_cs", 1, 1, "", NULL, NULL},
00483        {   5, "latin1", "latin1_german_ci", 1, 1, "", NULL, NULL},
00484        {  14, "cp1251", "cp1251_bulgarian_ci", 1, 1, "", NULL, NULL},
00485        {  15, "latin1", "latin1_danish_ci", 1, 1, "", NULL, NULL},
00486        {  17, "filename", "filename", 1, 5, "", NULL, NULL},
00487        {  20, "latin7", "latin7_estonian_cs", 1, 1, "", NULL, NULL},
00488        {  21, "latin2", "latin2_hungarian_ci", 1, 1, "", NULL, NULL},
00489        {  23, "cp1251", "cp1251_ukrainian_ci", 1, 1, "", NULL, NULL},
00490        {  27, "latin2", "latin2_croatian_ci", 1, 1, "", NULL, NULL},
00491        {  29, "cp1257", "cp1257_lithunian_ci", 1, 1, "", NULL, NULL},
00492        {  31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
00493        {  34, "cp1250", "cp1250_czech_cs", 1, 1, "", NULL, NULL},
00494        {  42, "latin7", "latin7_general_cs", 1, 1, "", NULL, NULL},
00495        {  43, "macce", "macce_bin", 1, 1, "", NULL, NULL},
00496        {  44, "cp1250", "cp1250_croatian_ci", 1, 1, "", NULL, NULL},
00497        {  45, UTF8_MB4, UTF8_MB4"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8,  check_mb_utf8_valid},
00498        {  46, UTF8_MB4, UTF8_MB4"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8,  check_mb_utf8_valid},
00499        {  47, "latin1", "latin1_bin", 1, 1, "", NULL, NULL},
00500        {  48, "latin1", "latin1_general_ci", 1, 1, "", NULL, NULL},
00501        {  49, "latin1", "latin1_general_cs", 1, 1, "", NULL, NULL},
00502        {  50, "cp1251", "cp1251_bin", 1, 1, "", NULL, NULL},
00503        {  52, "cp1251", "cp1251_general_cs", 1, 1, "", NULL, NULL},
00504        {  53, "macroman", "macroman_bin", 1, 1, "", NULL, NULL},
00505        {  54, "utf16", "utf16_general_ci", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
00506        {  55, "utf16", "utf16_bin", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
00507        {  58, "cp1257", "cp1257_bin", 1, 1, "", NULL, NULL},
00508 #ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
00509        {  60, "armascii8", "armascii8_bin", 1, 1, "", NULL, NULL},
00510 #endif
00511        {  60, "utf32", "utf32_general_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
00512        {  61, "utf32", "utf32_bin", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
00513        {  65, "ascii", "ascii_bin", 1, 1, "", NULL, NULL},
00514        {  66, "cp1250", "cp1250_bin", 1, 1, "", NULL, NULL},
00515        {  67, "cp1256", "cp1256_bin", 1, 1, "", NULL, NULL},
00516        {  68, "cp866", "cp866_bin", 1, 1, "", NULL, NULL},
00517        {  69, "dec8", "dec8_bin", 1, 1, "", NULL, NULL},
00518        {  70, "greek", "greek_bin", 1, 1, "", NULL, NULL},
00519        {  71, "hebew", "hebrew_bin", 1, 1, "", NULL, NULL},
00520        {  72, "hp8", "hp8_bin", 1, 1, "", NULL, NULL},
00521        {  73, "keybcs2", "keybcs2_bin", 1, 1, "", NULL, NULL},
00522        {  74, "koi8r", "koi8r_bin", 1, 1, "", NULL, NULL},
00523        {  75, "koi8u", "koi8u_bin", 1, 1, "", NULL, NULL},
00524        {  77, "latin2", "latin2_bin", 1, 1, "", NULL, NULL},
00525        {  78, "latin5", "latin5_bin", 1, 1, "", NULL, NULL},
00526        {  79, "latin7", "latin7_bin", 1, 1, "", NULL, NULL},
00527        {  80, "cp850", "cp850_bin", 1, 1, "", NULL, NULL},
00528        {  81, "cp852", "cp852_bin", 1, 1, "", NULL, NULL},
00529        {  82, "swe7", "swe7_bin", 1, 1, "", NULL, NULL},
00530        {  93, "geostd8", "geostd8_bin", 1, 1, "", NULL, NULL},
00531        {  83, UTF8_MB3, UTF8_MB3"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
00532        {  84, "big5", "big5_bin", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
00533        {  85, "euckr", "euckr_bin", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
00534        {  86, "gb2312", "gb2312_bin", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
00535        {  87, "gbk", "gbk_bin", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
00536        {  88, "sjis", "sjis_bin", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
00537        {  89, "tis620", "tis620_bin", 1, 1, "", NULL, NULL},
00538        {  90, "ucs2", "ucs2_bin", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00539        {  91, "ujis", "ujis_bin", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
00540        {  94, "latin1", "latin1_spanish_ci", 1, 1, "", NULL, NULL},
00541        {  96, "cp932", "cp932_bin", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
00542        {  99, "cp1250", "cp1250_polish_ci", 1, 1, "", NULL, NULL},
00543        {  98, "eucjpms", "eucjpms_bin", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
00544        { 128, "ucs2", "ucs2_unicode_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00545        { 129, "ucs2", "ucs2_icelandic_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00546        { 130, "ucs2", "ucs2_latvian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00547        { 131, "ucs2", "ucs2_romanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00548        { 132, "ucs2", "ucs2_slovenian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00549        { 133, "ucs2", "ucs2_polish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00550        { 134, "ucs2", "ucs2_estonian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00551        { 135, "ucs2", "ucs2_spanish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00552        { 136, "ucs2", "ucs2_swedish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00553        { 137, "ucs2", "ucs2_turkish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00554        { 138, "ucs2", "ucs2_czech_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00555        { 139, "ucs2", "ucs2_danish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00556        { 140, "ucs2", "ucs2_lithunian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00557        { 141, "ucs2", "ucs2_slovak_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00558        { 142, "ucs2", "ucs2_spanish2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00559        { 143, "ucs2", "ucs2_roman_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00560        { 144, "ucs2", "ucs2_persian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00561        { 145, "ucs2", "ucs2_esperanto_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00562        { 146, "ucs2", "ucs2_hungarian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00563        { 147, "ucs2", "ucs2_sinhala_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
00564        { 149, "ucs2", "ucs2_croatian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
00565 
00566        { 192, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00567        { 193, UTF8_MB3, UTF8_MB3"_icelandic_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00568        { 194, UTF8_MB3, UTF8_MB3"_latvian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
00569        { 195, UTF8_MB3, UTF8_MB3"_romanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00570        { 196, UTF8_MB3, UTF8_MB3"_slovenian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00571        { 197, UTF8_MB3, UTF8_MB3"_polish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00572        { 198, UTF8_MB3, UTF8_MB3"_estonian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00573        { 119, UTF8_MB3, UTF8_MB3"_spanish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00574        { 200, UTF8_MB3, UTF8_MB3"_swedish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00575        { 201, UTF8_MB3, UTF8_MB3"_turkish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00576        { 202, UTF8_MB3, UTF8_MB3"_czech_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00577        { 203, UTF8_MB3, UTF8_MB3"_danish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
00578        { 204, UTF8_MB3, UTF8_MB3"_lithunian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
00579        { 205, UTF8_MB3, UTF8_MB3"_slovak_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00580        { 206, UTF8_MB3, UTF8_MB3"_spanish2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00581        { 207, UTF8_MB3, UTF8_MB3"_roman_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00582        { 208, UTF8_MB3, UTF8_MB3"_persian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00583        { 209, UTF8_MB3, UTF8_MB3"_esperanto_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00584        { 210, UTF8_MB3, UTF8_MB3"_hungarian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00585        { 211, UTF8_MB3, UTF8_MB3"_sinhala_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
00586        { 213, UTF8_MB3, UTF8_MB3"_croatian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
00587 
00588        { 224, UTF8_MB4, UTF8_MB4"_unicode_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00589        { 225, UTF8_MB4, UTF8_MB4"_icelandic_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00590        { 226, UTF8_MB4, UTF8_MB4"_latvian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00591        { 227, UTF8_MB4, UTF8_MB4"_romanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00592        { 228, UTF8_MB4, UTF8_MB4"_slovenian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00593        { 229, UTF8_MB4, UTF8_MB4"_polish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00594        { 230, UTF8_MB4, UTF8_MB4"_estonian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00595        { 231, UTF8_MB4, UTF8_MB4"_spanish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00596        { 232, UTF8_MB4, UTF8_MB4"_swedish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00597        { 233, UTF8_MB4, UTF8_MB4"_turkish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00598        { 234, UTF8_MB4, UTF8_MB4"_czech_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00599        { 235, UTF8_MB4, UTF8_MB4"_danish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00600        { 236, UTF8_MB4, UTF8_MB4"_lithuanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00601        { 237, UTF8_MB4, UTF8_MB4"_slovak_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00602        { 238, UTF8_MB4, UTF8_MB4"_spanish2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00603        { 239, UTF8_MB4, UTF8_MB4"_roman_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00604        { 240, UTF8_MB4, UTF8_MB4"_persian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00605        { 241, UTF8_MB4, UTF8_MB4"_esperanto_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00606        { 242, UTF8_MB4, UTF8_MB4"_hungarian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00607        { 243, UTF8_MB4, UTF8_MB4"_sinhala_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00608 
00609        { 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
00610        {   0, NULL, NULL, 0, 0, NULL, NULL, NULL}
00611 };
00612 /* }}} */
00613 
00614 
00615 /* {{{ mysqlnd_find_charset_nr */
00616 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_nr(unsigned int charsetnr)
00617 {
00618        const MYSQLND_CHARSET * c = mysqlnd_charsets;
00619 
00620        do {
00621               if (c->nr == charsetnr) {
00622                      return c;
00623               }
00624               ++c;
00625        } while (c[0].nr != 0);
00626        return NULL;
00627 }
00628 /* }}} */
00629 
00630 
00631 /* {{{ mysqlnd_find_charset_name */
00632 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_name(const char * const name)
00633 {
00634        const MYSQLND_CHARSET *c = mysqlnd_charsets;
00635 
00636        do {
00637               if (!strcasecmp(c->name, name)) {
00638                      return c;
00639               }
00640               ++c;
00641        } while (c[0].nr != 0);
00642        return NULL;
00643 }
00644 /* }}} */
00645 
00646 
00647 /* {{{ mysqlnd_cset_escape_quotes */
00648 PHPAPI ulong mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset, char *newstr,
00649                                                                       const char * escapestr, size_t escapestr_len TSRMLS_DC)
00650 {
00651        const char    *newstr_s = newstr;
00652        const char    *newstr_e = newstr + 2 * escapestr_len;
00653        const char    *end = escapestr + escapestr_len;
00654        zend_bool     escape_overflow = FALSE;
00655 
00656        DBG_ENTER("mysqlnd_cset_escape_quotes");
00657 
00658        for (;escapestr < end; escapestr++) {
00659               unsigned int len = 0;
00660               /* check unicode characters */
00661 
00662               if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
00663 
00664                      /* check possible overflow */
00665                      if ((newstr + len) > newstr_e) {
00666                             escape_overflow = TRUE;
00667                             break;
00668                      }
00669                      /* copy mb char without escaping it */
00670                      while (len--) {
00671                             *newstr++ = *escapestr++;
00672                      }
00673                      escapestr--;
00674                      continue;
00675               }
00676               if (*escapestr == '\'') {
00677                      if (newstr + 2 > newstr_e) {
00678                             escape_overflow = TRUE;
00679                             break;
00680                      }
00681                      *newstr++ = '\'';
00682                      *newstr++ = '\'';
00683               } else {
00684                      if (newstr + 1 > newstr_e) {
00685                             escape_overflow = TRUE;
00686                             break;
00687                      }
00688                      *newstr++ = *escapestr;
00689               }
00690        }
00691        *newstr = '\0';
00692 
00693        if (escape_overflow) {
00694               DBG_RETURN((ulong)~0);
00695        }
00696        DBG_RETURN((ulong)(newstr - newstr_s));
00697 }
00698 /* }}} */
00699 
00700 
00701 /* {{{ mysqlnd_cset_escape_slashes */
00702 PHPAPI ulong mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset, char *newstr,
00703                                                                        const char * escapestr, size_t escapestr_len TSRMLS_DC)
00704 {
00705        const char    *newstr_s = newstr;
00706        const char    *newstr_e = newstr + 2 * escapestr_len;
00707        const char    *end = escapestr + escapestr_len;
00708        zend_bool     escape_overflow = FALSE;
00709 
00710        DBG_ENTER("mysqlnd_cset_escape_slashes");
00711        DBG_INF_FMT("charset=%s", cset->name);
00712 
00713        for (;escapestr < end; escapestr++) {
00714               char esc = '\0';
00715               unsigned int len = 0;
00716 
00717               /* check unicode characters */
00718               if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
00719                      /* check possible overflow */
00720                      if ((newstr + len) > newstr_e) {
00721                             escape_overflow = TRUE;
00722                             break;
00723                      }
00724                      /* copy mb char without escaping it */
00725                      while (len--) {
00726                             *newstr++ = *escapestr++;
00727                      }
00728                      escapestr--;
00729                      continue;
00730               }
00731               if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
00732                      esc = *escapestr;
00733               } else {
00734                      switch (*escapestr) {
00735                             case 0:
00736                                    esc = '0';
00737                                    break;
00738                             case '\n':
00739                                    esc = 'n';
00740                                    break;
00741                             case '\r':
00742                                    esc = 'r';
00743                                    break;
00744                             case '\\':
00745                             case '\'':
00746                             case '"':
00747                                    esc = *escapestr;
00748                                    break;
00749                             case '\032':
00750                                    esc = 'Z';
00751                                    break;
00752                      }
00753               }
00754               if (esc) {
00755                      if (newstr + 2 > newstr_e) {
00756                             escape_overflow = TRUE;
00757                             break;
00758                      }
00759                      /* copy escaped character */
00760                      *newstr++ = '\\';
00761                      *newstr++ = esc;
00762               } else {
00763                      if (newstr + 1 > newstr_e) {
00764                             escape_overflow = TRUE;
00765                             break;
00766                      }
00767                      /* copy non escaped character */
00768                      *newstr++ = *escapestr;
00769               }
00770        }
00771        *newstr = '\0';
00772 
00773        if (escape_overflow) {
00774               DBG_RETURN((ulong)~0);
00775        }
00776        DBG_RETURN((ulong)(newstr - newstr_s));
00777 }
00778 /* }}} */
00779 
00780 /*
00781  * Local variables:
00782  * tab-width: 4
00783  * c-basic-offset: 4
00784  * End:
00785  * vim600: noet sw=4 ts=4 fdm=marker
00786  * vim<600: noet sw=4 ts=4
00787  */