Back to index

php5  5.3.10
php_mbregex.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
00016    +----------------------------------------------------------------------+
00017  */
00018 
00019 /* $Id: php_mbregex.c 321634 2012-01-01 13:15:04Z felipe $ */
00020 
00021 
00022 #ifdef HAVE_CONFIG_H
00023 #include "config.h"
00024 #endif
00025 
00026 #include "php.h"
00027 #include "php_ini.h"
00028 
00029 #if HAVE_MBREGEX
00030 
00031 #include "ext/standard/php_smart_str.h"
00032 #include "ext/standard/info.h"
00033 #include "php_mbregex.h"
00034 #include "mbstring.h"
00035  
00036 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
00037 #include <oniguruma.h>
00038 #undef UChar
00039 
00040 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
00041 
00042 struct _zend_mb_regex_globals {
00043        OnigEncoding default_mbctype;
00044        OnigEncoding current_mbctype;
00045        HashTable ht_rc;
00046        zval *search_str;
00047        zval *search_str_val;
00048        unsigned int search_pos;
00049        php_mb_regex_t *search_re;
00050        OnigRegion *search_regs;
00051        OnigOptionType regex_default_options;
00052        OnigSyntaxType *regex_default_syntax;
00053 };
00054 
00055 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
00056 
00057 /* {{{ static void php_mb_regex_free_cache() */
00058 static void php_mb_regex_free_cache(php_mb_regex_t **pre) 
00059 {
00060        onig_free(*pre);
00061 }
00062 /* }}} */
00063 
00064 /* {{{ _php_mb_regex_globals_ctor */
00065 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
00066 {
00067        pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
00068        pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
00069        zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
00070        pglobals->search_str = (zval*) NULL;
00071        pglobals->search_re = (php_mb_regex_t*)NULL;
00072        pglobals->search_pos = 0;
00073        pglobals->search_regs = (OnigRegion*)NULL;
00074        pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
00075        pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
00076        return SUCCESS;
00077 }
00078 /* }}} */
00079 
00080 /* {{{ _php_mb_regex_globals_dtor */
00081 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC) 
00082 {
00083        zend_hash_destroy(&pglobals->ht_rc);
00084 }
00085 /* }}} */
00086 
00087 /* {{{ php_mb_regex_globals_alloc */
00088 zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
00089 {
00090        zend_mb_regex_globals *pglobals = pemalloc(
00091                      sizeof(zend_mb_regex_globals), 1);
00092        if (!pglobals) {
00093               return NULL;
00094        }
00095        if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
00096               pefree(pglobals, 1);
00097               return NULL;
00098        }
00099        return pglobals;
00100 }
00101 /* }}} */
00102 
00103 /* {{{ php_mb_regex_globals_free */
00104 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
00105 {
00106        if (!pglobals) {
00107               return;
00108        }
00109        _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
00110        pefree(pglobals, 1);
00111 }
00112 /* }}} */
00113 
00114 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
00115 PHP_MINIT_FUNCTION(mb_regex)
00116 {
00117        onig_init();
00118        return SUCCESS;
00119 }
00120 /* }}} */
00121 
00122 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
00123 PHP_MSHUTDOWN_FUNCTION(mb_regex)
00124 {
00125        onig_end();
00126        return SUCCESS;
00127 }
00128 /* }}} */
00129 
00130 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
00131 PHP_RINIT_FUNCTION(mb_regex)
00132 {
00133        return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
00134 }
00135 /* }}} */
00136 
00137 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
00138 PHP_RSHUTDOWN_FUNCTION(mb_regex)
00139 {
00140        MBREX(current_mbctype) = MBREX(default_mbctype);
00141 
00142        if (MBREX(search_str) != NULL) {
00143               zval_ptr_dtor(&MBREX(search_str));
00144               MBREX(search_str) = (zval *)NULL;
00145        }
00146        MBREX(search_pos) = 0;
00147 
00148        if (MBREX(search_regs) != NULL) {
00149               onig_region_free(MBREX(search_regs), 1);
00150               MBREX(search_regs) = (OnigRegion *)NULL;
00151        }
00152        zend_hash_clean(&MBREX(ht_rc));
00153 
00154        return SUCCESS;
00155 }
00156 /* }}} */
00157 
00158 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
00159 PHP_MINFO_FUNCTION(mb_regex)
00160 {
00161        char buf[32];
00162        php_info_print_table_start();
00163        php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
00164        snprintf(buf, sizeof(buf), "%d.%d.%d",
00165                      ONIGURUMA_VERSION_MAJOR,
00166                      ONIGURUMA_VERSION_MINOR,
00167                      ONIGURUMA_VERSION_TEENY);
00168 #ifdef PHP_ONIG_BUNDLED
00169 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00170        php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
00171 #else  /* USE_COMBINATION_EXPLOSION_CHECK */
00172        php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
00173 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
00174 #endif /* PHP_BUNDLED_ONIG */
00175        php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
00176        php_info_print_table_end();
00177 }
00178 /* }}} */
00179 
00180 /*
00181  * encoding name resolver
00182  */
00183 
00184 /* {{{ encoding name map */
00185 typedef struct _php_mb_regex_enc_name_map_t {
00186        const char *names;
00187        OnigEncoding code;
00188 } php_mb_regex_enc_name_map_t;
00189 
00190 php_mb_regex_enc_name_map_t enc_name_map[] = {
00191 #ifdef ONIG_ENCODING_EUC_JP
00192        {
00193               "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
00194               ONIG_ENCODING_EUC_JP
00195        },
00196 #endif
00197 #ifdef ONIG_ENCODING_UTF8
00198        {
00199               "UTF-8\0UTF8\0",
00200               ONIG_ENCODING_UTF8
00201        },
00202 #endif
00203 #ifdef ONIG_ENCODING_UTF16_BE
00204        {
00205               "UTF-16\0UTF-16BE\0",
00206               ONIG_ENCODING_UTF16_BE
00207        },
00208 #endif
00209 #ifdef ONIG_ENCODING_UTF16_LE
00210        {
00211               "UTF-16LE\0",
00212               ONIG_ENCODING_UTF16_LE
00213        },
00214 #endif
00215 #ifdef ONIG_ENCODING_UTF32_BE
00216        {
00217               "UCS-4\0UTF-32\0UTF-32BE\0",
00218               ONIG_ENCODING_UTF32_BE
00219        },
00220 #endif
00221 #ifdef ONIG_ENCODING_UTF32_LE
00222        {
00223               "UCS-4LE\0UTF-32LE\0",
00224               ONIG_ENCODING_UTF32_LE
00225        },
00226 #endif
00227 #ifdef ONIG_ENCODING_SJIS
00228        {
00229               "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
00230               ONIG_ENCODING_SJIS
00231        },
00232 #endif
00233 #ifdef ONIG_ENCODING_BIG5
00234        {
00235               "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
00236               ONIG_ENCODING_BIG5
00237        },
00238 #endif
00239 #ifdef ONIG_ENCODING_EUC_CN
00240        {
00241               "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
00242               ONIG_ENCODING_EUC_CN
00243        },
00244 #endif
00245 #ifdef ONIG_ENCODING_EUC_TW
00246        {
00247               "EUC-TW\0EUCTW\0EUC_TW\0",
00248               ONIG_ENCODING_EUC_TW
00249        },
00250 #endif
00251 #ifdef ONIG_ENCODING_EUC_KR
00252        {
00253               "EUC-KR\0EUCKR\0EUC_KR\0",
00254               ONIG_ENCODING_EUC_KR
00255        },
00256 #endif
00257 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
00258        {
00259               "KOI8\0KOI-8\0",
00260               ONIG_ENCODING_KOI8
00261        },
00262 #endif
00263 #ifdef ONIG_ENCODING_KOI8_R
00264        {
00265               "KOI8R\0KOI8-R\0KOI-8R\0",
00266               ONIG_ENCODING_KOI8_R
00267        },
00268 #endif
00269 #ifdef ONIG_ENCODING_ISO_8859_1
00270        {
00271               "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
00272               ONIG_ENCODING_ISO_8859_1
00273        },
00274 #endif
00275 #ifdef ONIG_ENCODING_ISO_8859_2
00276        {
00277               "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
00278               ONIG_ENCODING_ISO_8859_2
00279        },
00280 #endif
00281 #ifdef ONIG_ENCODING_ISO_8859_3
00282        {
00283               "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
00284               ONIG_ENCODING_ISO_8859_3
00285        },
00286 #endif
00287 #ifdef ONIG_ENCODING_ISO_8859_4
00288        {
00289               "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
00290               ONIG_ENCODING_ISO_8859_4
00291        },
00292 #endif
00293 #ifdef ONIG_ENCODING_ISO_8859_5
00294        {
00295               "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
00296               ONIG_ENCODING_ISO_8859_5
00297        },
00298 #endif
00299 #ifdef ONIG_ENCODING_ISO_8859_6
00300        {
00301               "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
00302               ONIG_ENCODING_ISO_8859_6
00303        },
00304 #endif
00305 #ifdef ONIG_ENCODING_ISO_8859_7
00306        {
00307               "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
00308               ONIG_ENCODING_ISO_8859_7
00309        },
00310 #endif
00311 #ifdef ONIG_ENCODING_ISO_8859_8
00312        {
00313               "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
00314               ONIG_ENCODING_ISO_8859_8
00315        },
00316 #endif
00317 #ifdef ONIG_ENCODING_ISO_8859_9
00318        {
00319               "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
00320               ONIG_ENCODING_ISO_8859_9
00321        },
00322 #endif
00323 #ifdef ONIG_ENCODING_ISO_8859_10
00324        {
00325               "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
00326               ONIG_ENCODING_ISO_8859_10
00327        },
00328 #endif
00329 #ifdef ONIG_ENCODING_ISO_8859_11
00330        {
00331               "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
00332               ONIG_ENCODING_ISO_8859_11
00333        },
00334 #endif
00335 #ifdef ONIG_ENCODING_ISO_8859_13
00336        {
00337               "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
00338               ONIG_ENCODING_ISO_8859_13
00339        },
00340 #endif
00341 #ifdef ONIG_ENCODING_ISO_8859_14
00342        {
00343               "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
00344               ONIG_ENCODING_ISO_8859_14
00345        },
00346 #endif
00347 #ifdef ONIG_ENCODING_ISO_8859_15
00348        {
00349               "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
00350               ONIG_ENCODING_ISO_8859_15
00351        },
00352 #endif
00353 #ifdef ONIG_ENCODING_ISO_8859_16
00354        {
00355               "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
00356               ONIG_ENCODING_ISO_8859_16
00357        },
00358 #endif
00359 #ifdef ONIG_ENCODING_ASCII
00360        {
00361               "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
00362               ONIG_ENCODING_ASCII
00363        },
00364 #endif
00365        { NULL, ONIG_ENCODING_UNDEF }
00366 };
00367 /* }}} */
00368 
00369 /* {{{ php_mb_regex_name2mbctype */
00370 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
00371 {
00372        const char *p;
00373        php_mb_regex_enc_name_map_t *mapping;
00374 
00375        if (pname == NULL) {
00376               return ONIG_ENCODING_UNDEF;
00377        }
00378 
00379        for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
00380               for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
00381                      if (strcasecmp(p, pname) == 0) {
00382                             return mapping->code;
00383                      }
00384               }
00385        }
00386 
00387        return ONIG_ENCODING_UNDEF;
00388 }
00389 /* }}} */
00390 
00391 /* {{{ php_mb_regex_mbctype2name */
00392 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
00393 {
00394        php_mb_regex_enc_name_map_t *mapping;
00395 
00396        for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
00397               if (mapping->code == mbctype) {
00398                      return mapping->names;
00399               }
00400        }
00401 
00402        return NULL;
00403 }
00404 /* }}} */
00405 
00406 /* {{{ php_mb_regex_set_mbctype */
00407 int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
00408 {
00409        OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
00410        if (mbctype == ONIG_ENCODING_UNDEF) {
00411               return FAILURE;
00412        }
00413        MBREX(current_mbctype) = mbctype;
00414        return SUCCESS;
00415 }
00416 /* }}} */
00417 
00418 /* {{{ php_mb_regex_set_default_mbctype */
00419 int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
00420 {
00421        OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
00422        if (mbctype == ONIG_ENCODING_UNDEF) {
00423               return FAILURE;
00424        }
00425        MBREX(default_mbctype) = mbctype;
00426        return SUCCESS;
00427 }
00428 /* }}} */
00429 
00430 /* {{{ php_mb_regex_get_mbctype */
00431 const char *php_mb_regex_get_mbctype(TSRMLS_D)
00432 {
00433        return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
00434 }
00435 /* }}} */
00436 
00437 /* {{{ php_mb_regex_get_default_mbctype */
00438 const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
00439 {
00440        return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
00441 }
00442 /* }}} */
00443 
00444 /*
00445  * regex cache
00446  */
00447 /* {{{ php_mbregex_compile_pattern */
00448 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
00449 {
00450        int err_code = 0;
00451        int found = 0;
00452        php_mb_regex_t *retval = NULL, **rc = NULL;
00453        OnigErrorInfo err_info;
00454        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
00455 
00456        found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
00457        if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
00458               if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
00459                      onig_error_code_to_str(err_str, err_code, err_info);
00460                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
00461                      retval = NULL;
00462                      goto out;
00463               }
00464               zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
00465        } else if (found == SUCCESS) {
00466               retval = *rc;
00467        }
00468 out:
00469        return retval; 
00470 }
00471 /* }}} */
00472 
00473 /* {{{ _php_mb_regex_get_option_string */
00474 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
00475 {
00476        size_t len_left = len;
00477        size_t len_req = 0;
00478        char *p = str;
00479        char c;
00480 
00481        if ((option & ONIG_OPTION_IGNORECASE) != 0) {
00482               if (len_left > 0) {
00483                      --len_left;
00484                      *(p++) = 'i';
00485               }
00486               ++len_req;    
00487        }
00488 
00489        if ((option & ONIG_OPTION_EXTEND) != 0) {
00490               if (len_left > 0) {
00491                      --len_left;
00492                      *(p++) = 'x';
00493               }
00494               ++len_req;    
00495        }
00496 
00497        if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
00498                      (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
00499               if (len_left > 0) {
00500                      --len_left;
00501                      *(p++) = 'p';
00502               }
00503               ++len_req;    
00504        } else {
00505               if ((option & ONIG_OPTION_MULTILINE) != 0) {
00506                      if (len_left > 0) {
00507                             --len_left;
00508                             *(p++) = 'm';
00509                      }
00510                      ++len_req;    
00511               }
00512 
00513               if ((option & ONIG_OPTION_SINGLELINE) != 0) {
00514                      if (len_left > 0) {
00515                             --len_left;
00516                             *(p++) = 's';
00517                      }
00518                      ++len_req;    
00519               }
00520        }      
00521        if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
00522               if (len_left > 0) {
00523                      --len_left;
00524                      *(p++) = 'l';
00525               }
00526               ++len_req;    
00527        }
00528        if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
00529               if (len_left > 0) {
00530                      --len_left;
00531                      *(p++) = 'n';
00532               }
00533               ++len_req;    
00534        }
00535 
00536        c = 0;
00537 
00538        if (syntax == ONIG_SYNTAX_JAVA) {
00539               c = 'j';
00540        } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
00541               c = 'u';
00542        } else if (syntax == ONIG_SYNTAX_GREP) {
00543               c = 'g';
00544        } else if (syntax == ONIG_SYNTAX_EMACS) {
00545               c = 'c';
00546        } else if (syntax == ONIG_SYNTAX_RUBY) {
00547               c = 'r';
00548        } else if (syntax == ONIG_SYNTAX_PERL) {
00549               c = 'z';
00550        } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
00551               c = 'b';
00552        } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
00553               c = 'd';
00554        }
00555 
00556        if (c != 0) {
00557               if (len_left > 0) {
00558                      --len_left;
00559                      *(p++) = c;
00560               }
00561               ++len_req;
00562        }
00563 
00564 
00565        if (len_left > 0) {
00566               --len_left;
00567               *(p++) = '\0';
00568        }
00569        ++len_req;    
00570        if (len < len_req) {
00571               return len_req;
00572        }
00573 
00574        return 0;
00575 }
00576 /* }}} */
00577 
00578 /* {{{ _php_mb_regex_init_options */
00579 static void
00580 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) 
00581 {
00582        int n;
00583        char c;
00584        int optm = 0; 
00585 
00586        *syntax = ONIG_SYNTAX_RUBY;
00587 
00588        if (parg != NULL) {
00589               n = 0;
00590               while(n < narg) {
00591                      c = parg[n++];
00592                      switch (c) {
00593                             case 'i':
00594                                    optm |= ONIG_OPTION_IGNORECASE;
00595                                    break;
00596                             case 'x':
00597                                    optm |= ONIG_OPTION_EXTEND;
00598                                    break;
00599                             case 'm':
00600                                    optm |= ONIG_OPTION_MULTILINE;
00601                                    break;
00602                             case 's':
00603                                    optm |= ONIG_OPTION_SINGLELINE;
00604                                    break;
00605                             case 'p':
00606                                    optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
00607                                    break;
00608                             case 'l':
00609                                    optm |= ONIG_OPTION_FIND_LONGEST;
00610                                    break;
00611                             case 'n':
00612                                    optm |= ONIG_OPTION_FIND_NOT_EMPTY;
00613                                    break;
00614                             case 'j':
00615                                    *syntax = ONIG_SYNTAX_JAVA;
00616                                    break;
00617                             case 'u':
00618                                    *syntax = ONIG_SYNTAX_GNU_REGEX;
00619                                    break;
00620                             case 'g':
00621                                    *syntax = ONIG_SYNTAX_GREP;
00622                                    break;
00623                             case 'c':
00624                                    *syntax = ONIG_SYNTAX_EMACS;
00625                                    break;
00626                             case 'r':
00627                                    *syntax = ONIG_SYNTAX_RUBY;
00628                                    break;
00629                             case 'z':
00630                                    *syntax = ONIG_SYNTAX_PERL;
00631                                    break;
00632                             case 'b':
00633                                    *syntax = ONIG_SYNTAX_POSIX_BASIC;
00634                                    break;
00635                             case 'd':
00636                                    *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
00637                                    break;
00638                             case 'e':
00639                                    if (eval != NULL) *eval = 1; 
00640                                    break;
00641                             default:
00642                                    break;
00643                      }
00644               }
00645               if (option != NULL) *option|=optm; 
00646        }
00647 }
00648 /* }}} */
00649 
00650 /*
00651  * php funcions
00652  */
00653 
00654 /* {{{ proto string mb_regex_encoding([string encoding])
00655    Returns the current encoding for regex as a string. */
00656 PHP_FUNCTION(mb_regex_encoding)
00657 {
00658        size_t argc = ZEND_NUM_ARGS();
00659        char *encoding;
00660        int encoding_len;
00661        OnigEncoding mbctype;
00662 
00663        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
00664               return;
00665        }
00666 
00667        if (argc == 0) {
00668               const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
00669 
00670               if (retval == NULL) {
00671                      RETURN_FALSE;
00672               }
00673 
00674               RETURN_STRING((char *)retval, 1);
00675        } else if (argc == 1) {
00676               mbctype = _php_mb_regex_name2mbctype(encoding);
00677 
00678               if (mbctype == ONIG_ENCODING_UNDEF) {
00679                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
00680                      RETURN_FALSE;
00681               }
00682 
00683               MBREX(current_mbctype) = mbctype;
00684               RETURN_TRUE;
00685        }
00686 }
00687 /* }}} */
00688 
00689 /* {{{ _php_mb_regex_ereg_exec */
00690 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
00691 {
00692        zval **arg_pattern, *array;
00693        char *string;
00694        int string_len;
00695        php_mb_regex_t *re;
00696        OnigRegion *regs = NULL;
00697        int i, match_len, beg, end;
00698        OnigOptionType options;
00699        char *str;
00700 
00701        array = NULL;
00702 
00703        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
00704               RETURN_FALSE;
00705        }
00706 
00707        options = MBREX(regex_default_options);
00708        if (icase) {
00709               options |= ONIG_OPTION_IGNORECASE;
00710        }
00711 
00712        /* compile the regular expression from the supplied regex */
00713        if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
00714               /* we convert numbers to integers and treat them as a string */
00715               if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
00716                      convert_to_long_ex(arg_pattern);   /* get rid of decimal places */
00717               }
00718               convert_to_string_ex(arg_pattern);
00719               /* don't bother doing an extended regex with just a number */
00720        }
00721 
00722        if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
00723               php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
00724               RETVAL_FALSE;
00725               goto out;
00726        }
00727 
00728        re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
00729        if (re == NULL) {
00730               RETVAL_FALSE;
00731               goto out;
00732        }
00733 
00734        regs = onig_region_new();
00735 
00736        /* actually execute the regular expression */
00737        if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
00738               RETVAL_FALSE;
00739               goto out;
00740        }
00741 
00742        match_len = 1;
00743        str = string;
00744        if (array != NULL) {
00745               match_len = regs->end[0] - regs->beg[0];
00746               zval_dtor(array);
00747               array_init(array);
00748               for (i = 0; i < regs->num_regs; i++) {
00749                      beg = regs->beg[i];
00750                      end = regs->end[i];
00751                      if (beg >= 0 && beg < end && end <= string_len) {
00752                             add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
00753                      } else {
00754                             add_index_bool(array, i, 0);
00755                      }
00756               }
00757        }
00758 
00759        if (match_len == 0) {
00760               match_len = 1;
00761        }
00762        RETVAL_LONG(match_len);
00763 out:
00764        if (regs != NULL) {
00765               onig_region_free(regs, 1);
00766        }
00767 }
00768 /* }}} */
00769 
00770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
00771    Regular expression match for multibyte string */
00772 PHP_FUNCTION(mb_ereg)
00773 {
00774        _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
00775 }
00776 /* }}} */
00777 
00778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
00779    Case-insensitive regular expression match for multibyte string */
00780 PHP_FUNCTION(mb_eregi)
00781 {
00782        _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
00783 }
00784 /* }}} */
00785 
00786 /* {{{ _php_mb_regex_ereg_replace_exec */
00787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
00788 {
00789        zval **arg_pattern_zval;
00790 
00791        char *arg_pattern;
00792        int arg_pattern_len;
00793 
00794        char *replace;
00795        int replace_len;
00796 
00797        char *string;
00798        int string_len;
00799 
00800        char *p;
00801        php_mb_regex_t *re;
00802        OnigSyntaxType *syntax;
00803        OnigRegion *regs = NULL;
00804        smart_str out_buf = { 0 };
00805        smart_str eval_buf = { 0 };
00806        smart_str *pbuf;
00807        int i, err, eval, n;
00808        OnigUChar *pos;
00809        OnigUChar *string_lim;
00810        char *description = NULL;
00811        char pat_buf[2];
00812 
00813        const mbfl_encoding *enc;
00814 
00815        {
00816               const char *current_enc_name;
00817               current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
00818               if (current_enc_name == NULL ||
00819                      (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
00820                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
00821                      RETURN_FALSE;
00822               }
00823        }
00824        eval = 0;
00825        {
00826               char *option_str = NULL;
00827               int option_str_len = 0;
00828 
00829               if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
00830                                                                &arg_pattern_zval,
00831                                                                &replace, &replace_len,
00832                                                                &string, &string_len,
00833                                                                &option_str, &option_str_len) == FAILURE) {
00834                      RETURN_FALSE;
00835               }
00836 
00837               if (option_str != NULL) {
00838                      _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
00839               } else {
00840                      options |= MBREX(regex_default_options);
00841                      syntax = MBREX(regex_default_syntax);
00842               }
00843        }
00844        if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
00845               arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
00846               arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
00847        } else {
00848               /* FIXME: this code is not multibyte aware! */
00849               convert_to_long_ex(arg_pattern_zval);
00850               pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);  
00851               pat_buf[1] = '\0';
00852 
00853               arg_pattern = pat_buf;
00854               arg_pattern_len = 1; 
00855        }
00856        /* create regex pattern buffer */
00857        re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
00858        if (re == NULL) {
00859               RETURN_FALSE;
00860        }
00861 
00862        if (eval) {
00863               pbuf = &eval_buf;
00864               description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
00865        } else {
00866               pbuf = &out_buf;
00867               description = NULL;
00868        }
00869 
00870        /* do the actual work */
00871        err = 0;
00872        pos = (OnigUChar *)string;
00873        string_lim = (OnigUChar*)(string + string_len);
00874        regs = onig_region_new();
00875        while (err >= 0) {
00876               err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
00877               if (err <= -2) {
00878                      OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
00879                      onig_error_code_to_str(err_str, err);
00880                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
00881                      break;
00882               }
00883               if (err >= 0) {
00884 #if moriyoshi_0
00885                      if (regs->beg[0] == regs->end[0]) {
00886                             php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
00887                             break;
00888                      }
00889 #endif
00890                      /* copy the part of the string before the match */
00891                      smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
00892                      /* copy replacement and backrefs */
00893                      i = 0;
00894                      p = replace;
00895                      while (i < replace_len) {
00896                             int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
00897                             n = -1;
00898                             if ((replace_len - i) >= 2 && fwd == 1 &&
00899                                    p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
00900                                    n = p[1] - '0';
00901                             }
00902                             if (n >= 0 && n < regs->num_regs) {
00903                                    if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
00904                                           smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
00905                                    }
00906                                    p += 2;
00907                                    i += 2;
00908                             } else {
00909                                    smart_str_appendl(pbuf, p, fwd);
00910                                    p += fwd;
00911                                    i += fwd;
00912                             }
00913                      }
00914                      if (eval) {
00915                             zval v;
00916                             /* null terminate buffer */
00917                             smart_str_0(&eval_buf);
00918                             /* do eval */
00919                             if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
00920                                    efree(description);
00921                                    php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
00922                                    /* zend_error() does not return in this case */
00923                             }
00924 
00925                             /* result of eval */
00926                             convert_to_string(&v);
00927                             smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
00928                             /* Clean up */
00929                             eval_buf.len = 0;
00930                             zval_dtor(&v);
00931                      }
00932                      n = regs->end[0];
00933                      if ((pos - (OnigUChar *)string) < n) {
00934                             pos = (OnigUChar *)string + n;
00935                      } else {
00936                             if (pos < string_lim) {
00937                                    smart_str_appendl(&out_buf, pos, 1); 
00938                             }
00939                             pos++;
00940                      }
00941               } else { /* nomatch */
00942                      /* stick that last bit of string on our output */
00943                      if (string_lim - pos > 0) {
00944                             smart_str_appendl(&out_buf, pos, string_lim - pos);
00945                      }
00946               }
00947               onig_region_free(regs, 0);
00948        }
00949 
00950        if (description) {
00951               efree(description);
00952        }
00953        if (regs != NULL) {
00954               onig_region_free(regs, 1);
00955        }
00956        smart_str_free(&eval_buf);
00957 
00958        if (err <= -2) {
00959               smart_str_free(&out_buf);   
00960               RETVAL_FALSE;
00961        } else {
00962               smart_str_appendc(&out_buf, '\0');
00963               RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
00964        }
00965 }
00966 /* }}} */
00967 
00968 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
00969    Replace regular expression for multibyte string */
00970 PHP_FUNCTION(mb_ereg_replace)
00971 {
00972        _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
00973 }
00974 /* }}} */
00975 
00976 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
00977    Case insensitive replace regular expression for multibyte string */
00978 PHP_FUNCTION(mb_eregi_replace)
00979 {
00980        _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
00981 }
00982 /* }}} */
00983 
00984 /* {{{ proto array mb_split(string pattern, string string [, int limit])
00985    split multibyte string into array by regular expression */
00986 PHP_FUNCTION(mb_split)
00987 {
00988        char *arg_pattern;
00989        int arg_pattern_len;
00990        php_mb_regex_t *re;
00991        OnigRegion *regs = NULL;
00992        char *string;
00993        OnigUChar *pos;
00994        int string_len;
00995 
00996        int n, err;
00997        long count = -1;
00998 
00999        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
01000               RETURN_FALSE;
01001        } 
01002 
01003        if (count == 0) {
01004               count = 1;
01005        }
01006 
01007        /* create regex pattern buffer */
01008        if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
01009               RETURN_FALSE;
01010        }
01011 
01012        array_init(return_value);
01013 
01014        pos = (OnigUChar *)string;
01015        err = 0;
01016        regs = onig_region_new();
01017        /* churn through str, generating array entries as we go */
01018        while ((--count != 0) &&
01019                  (err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
01020               if (regs->beg[0] == regs->end[0]) {
01021                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
01022                      break;
01023               }
01024 
01025               /* add it to the array */
01026               if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
01027                      add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
01028               } else {
01029                      err = -2;
01030                      break;
01031               }
01032               /* point at our new starting point */
01033               n = regs->end[0];
01034               if ((pos - (OnigUChar *)string) < n) {
01035                      pos = (OnigUChar *)string + n;
01036               }
01037               if (count < 0) {
01038                      count = 0;
01039               }
01040               onig_region_free(regs, 0);
01041        }
01042 
01043        onig_region_free(regs, 1);
01044 
01045        /* see if we encountered an error */
01046        if (err <= -2) {
01047               OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
01048               onig_error_code_to_str(err_str, err);
01049               php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
01050               zval_dtor(return_value);
01051               RETURN_FALSE;
01052        }
01053 
01054        /* otherwise we just have one last element to add to the array */
01055        n = ((OnigUChar *)(string + string_len) - pos);
01056        if (n > 0) {
01057               add_next_index_stringl(return_value, (char *)pos, n, 1);
01058        } else {
01059               add_next_index_stringl(return_value, "", 0, 1);
01060        }
01061 }
01062 /* }}} */
01063 
01064 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
01065    Regular expression match for multibyte string */
01066 PHP_FUNCTION(mb_ereg_match)
01067 {
01068        char *arg_pattern;
01069        int arg_pattern_len;
01070 
01071        char *string;
01072        int string_len;
01073 
01074        php_mb_regex_t *re;
01075        OnigSyntaxType *syntax;
01076        OnigOptionType option = 0;
01077        int err;
01078 
01079        {
01080               char *option_str = NULL;
01081               int option_str_len = 0;
01082 
01083               if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
01084                                         &arg_pattern, &arg_pattern_len, &string, &string_len,
01085                                         &option_str, &option_str_len)==FAILURE) {
01086                      RETURN_FALSE;
01087               }
01088 
01089               if (option_str != NULL) {
01090                      _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
01091               } else {
01092                      option |= MBREX(regex_default_options);
01093                      syntax = MBREX(regex_default_syntax);
01094               }
01095        }
01096 
01097        if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
01098               RETURN_FALSE;
01099        }
01100 
01101        /* match */
01102        err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
01103        if (err >= 0) {
01104               RETVAL_TRUE;
01105        } else {
01106               RETVAL_FALSE;
01107        }
01108 }
01109 /* }}} */
01110 
01111 /* regex search */
01112 /* {{{ _php_mb_regex_ereg_search_exec */
01113 static void
01114 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
01115 {
01116        size_t argc = ZEND_NUM_ARGS();
01117        char *arg_pattern, *arg_options;
01118        int arg_pattern_len, arg_options_len;
01119        int n, i, err, pos, len, beg, end;
01120        OnigOptionType option;
01121        OnigUChar *str;
01122        OnigSyntaxType *syntax;
01123 
01124        if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
01125               return;
01126        }
01127 
01128        option = MBREX(regex_default_options);
01129 
01130        if (argc == 2) {
01131               option = 0;
01132               _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
01133        }
01134 
01135        if (argc > 0) {
01136               /* create regex pattern buffer */
01137               if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
01138                      RETURN_FALSE;
01139               }
01140        }
01141 
01142        pos = MBREX(search_pos);
01143        str = NULL;
01144        len = 0;
01145        if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
01146               str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
01147               len = Z_STRLEN_P(MBREX(search_str));
01148        }
01149 
01150        if (MBREX(search_re) == NULL) {
01151               php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
01152               RETURN_FALSE;
01153        }
01154 
01155        if (str == NULL) {
01156               php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
01157               RETURN_FALSE;
01158        }
01159 
01160        if (MBREX(search_regs)) {
01161               onig_region_free(MBREX(search_regs), 1);
01162        }
01163        MBREX(search_regs) = onig_region_new();
01164 
01165        err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
01166        if (err == ONIG_MISMATCH) {
01167               MBREX(search_pos) = len;
01168               RETVAL_FALSE;
01169        } else if (err <= -2) {
01170               OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
01171               onig_error_code_to_str(err_str, err);
01172               php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
01173               RETVAL_FALSE;
01174        } else {
01175               if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
01176                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
01177               }
01178               switch (mode) {
01179               case 1:
01180                      array_init(return_value);
01181                      beg = MBREX(search_regs)->beg[0];
01182                      end = MBREX(search_regs)->end[0];
01183                      add_next_index_long(return_value, beg);
01184                      add_next_index_long(return_value, end - beg);
01185                      break;
01186               case 2:
01187                      array_init(return_value);
01188                      n = MBREX(search_regs)->num_regs;
01189                      for (i = 0; i < n; i++) {
01190                             beg = MBREX(search_regs)->beg[i];
01191                             end = MBREX(search_regs)->end[i];
01192                             if (beg >= 0 && beg <= end && end <= len) {
01193                                    add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
01194                             } else {
01195                                    add_index_bool(return_value, i, 0);
01196                             }
01197                      }
01198                      break;
01199               default:
01200                      RETVAL_TRUE;
01201                      break;
01202               }
01203               end = MBREX(search_regs)->end[0];
01204               if (pos < end) {
01205                      MBREX(search_pos) = end;
01206               } else {
01207                      MBREX(search_pos) = pos + 1;
01208               }
01209        }
01210 
01211        if (err < 0) {
01212               onig_region_free(MBREX(search_regs), 1);
01213               MBREX(search_regs) = (OnigRegion *)NULL;
01214        }
01215 }
01216 /* }}} */
01217 
01218 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
01219    Regular expression search for multibyte string */
01220 PHP_FUNCTION(mb_ereg_search)
01221 {
01222        _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
01223 }
01224 /* }}} */
01225 
01226 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
01227    Regular expression search for multibyte string */
01228 PHP_FUNCTION(mb_ereg_search_pos)
01229 {
01230        _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
01231 }
01232 /* }}} */
01233 
01234 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
01235    Regular expression search for multibyte string */
01236 PHP_FUNCTION(mb_ereg_search_regs)
01237 {
01238        _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
01239 }
01240 /* }}} */
01241 
01242 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
01243    Initialize string and regular expression for search. */
01244 PHP_FUNCTION(mb_ereg_search_init)
01245 {
01246        size_t argc = ZEND_NUM_ARGS();
01247        zval *arg_str;
01248        char *arg_pattern = NULL, *arg_options = NULL;
01249        int arg_pattern_len = 0, arg_options_len = 0;
01250        OnigSyntaxType *syntax = NULL;
01251        OnigOptionType option;
01252 
01253        if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
01254               return;
01255        }
01256        
01257        if (argc > 1 && arg_pattern_len == 0) {
01258               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
01259               RETURN_FALSE;
01260        }
01261 
01262        option = MBREX(regex_default_options);
01263        syntax = MBREX(regex_default_syntax);
01264 
01265        if (argc == 3) {
01266               option = 0;
01267               _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
01268        }
01269 
01270        if (argc > 1) {
01271               /* create regex pattern buffer */
01272               if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
01273                      RETURN_FALSE;
01274               }
01275        }
01276 
01277        if (MBREX(search_str) != NULL) {
01278               zval_ptr_dtor(&MBREX(search_str));
01279               MBREX(search_str) = (zval *)NULL;
01280        }
01281 
01282        MBREX(search_str) = arg_str;
01283        Z_ADDREF_P(MBREX(search_str));
01284        SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
01285 
01286        MBREX(search_pos) = 0;
01287 
01288        if (MBREX(search_regs) != NULL) {
01289               onig_region_free(MBREX(search_regs), 1);
01290               MBREX(search_regs) = (OnigRegion *) NULL;
01291        }
01292 
01293        RETURN_TRUE;
01294 }
01295 /* }}} */
01296 
01297 /* {{{ proto array mb_ereg_search_getregs(void)
01298    Get matched substring of the last time */
01299 PHP_FUNCTION(mb_ereg_search_getregs)
01300 {
01301        int n, i, len, beg, end;
01302        OnigUChar *str;
01303 
01304        if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
01305               array_init(return_value);
01306 
01307               str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
01308               len = Z_STRLEN_P(MBREX(search_str));
01309               n = MBREX(search_regs)->num_regs;
01310               for (i = 0; i < n; i++) {
01311                      beg = MBREX(search_regs)->beg[i];
01312                      end = MBREX(search_regs)->end[i];
01313                      if (beg >= 0 && beg <= end && end <= len) {
01314                             add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
01315                      } else {
01316                             add_index_bool(return_value, i, 0);
01317                      }
01318               }
01319        } else {
01320               RETVAL_FALSE;
01321        }
01322 }
01323 /* }}} */
01324 
01325 /* {{{ proto int mb_ereg_search_getpos(void)
01326    Get search start position */
01327 PHP_FUNCTION(mb_ereg_search_getpos)
01328 {
01329        RETVAL_LONG(MBREX(search_pos));
01330 }
01331 /* }}} */
01332 
01333 /* {{{ proto bool mb_ereg_search_setpos(int position)
01334    Set search start position */
01335 PHP_FUNCTION(mb_ereg_search_setpos)
01336 {
01337        long position;
01338 
01339        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
01340               return;
01341        }
01342 
01343        if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
01344               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
01345               MBREX(search_pos) = 0;
01346               RETURN_FALSE;
01347        }
01348 
01349        MBREX(search_pos) = position;
01350        RETURN_TRUE;
01351 }
01352 /* }}} */
01353 
01354 /* {{{ php_mb_regex_set_options */
01355 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) 
01356 {
01357        if (prev_options != NULL) {
01358               *prev_options = MBREX(regex_default_options);
01359        }
01360        if (prev_syntax != NULL) {
01361               *prev_syntax = MBREX(regex_default_syntax);
01362        }
01363        MBREX(regex_default_options) = options;
01364        MBREX(regex_default_syntax) = syntax;
01365 }
01366 /* }}} */
01367 
01368 /* {{{ proto string mb_regex_set_options([string options])
01369    Set or get the default options for mbregex functions */
01370 PHP_FUNCTION(mb_regex_set_options)
01371 {
01372        OnigOptionType opt;
01373        OnigSyntaxType *syntax;
01374        char *string = NULL;
01375        int string_len;
01376        char buf[16];
01377 
01378        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
01379                                  &string, &string_len) == FAILURE) {
01380               RETURN_FALSE;
01381        }
01382        if (string != NULL) {
01383               opt = 0;
01384               syntax = NULL;
01385               _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
01386               _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
01387        } else {
01388               opt = MBREX(regex_default_options);
01389               syntax = MBREX(regex_default_syntax);
01390        }
01391        _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
01392 
01393        RETVAL_STRING(buf, 1);
01394 }
01395 /* }}} */
01396 
01397 #endif /* HAVE_MBREGEX */
01398 
01399 /*
01400  * Local variables:
01401  * tab-width: 4
01402  * c-basic-offset: 4
01403  * End:
01404  * vim600: fdm=marker
01405  * vim: noet sw=4 ts=4
01406  */