Back to index

php5  5.3.10
php_pcre.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Andrei Zmievski <andrei@php.net>                             |
00016    +----------------------------------------------------------------------+
00017  */
00018 
00019 /* $Id: php_pcre.c 321634 2012-01-01 13:15:04Z felipe $ */
00020 
00021 #include "php.h"
00022 #include "php_ini.h"
00023 #include "php_globals.h"
00024 #include "php_pcre.h"
00025 #include "ext/standard/info.h"
00026 #include "ext/standard/php_smart_str.h"
00027 
00028 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
00029 
00030 #include "ext/standard/php_string.h"
00031 
00032 #define PREG_PATTERN_ORDER                1
00033 #define PREG_SET_ORDER                           2
00034 #define PREG_OFFSET_CAPTURE               (1<<8)
00035 
00036 #define       PREG_SPLIT_NO_EMPTY                (1<<0)
00037 #define PREG_SPLIT_DELIM_CAPTURE   (1<<1)
00038 #define PREG_SPLIT_OFFSET_CAPTURE  (1<<2)
00039 
00040 #define PREG_REPLACE_EVAL                 (1<<0)
00041 
00042 #define PREG_GREP_INVERT                  (1<<0)
00043 
00044 #define PCRE_CACHE_SIZE 4096
00045 
00046 enum {
00047        PHP_PCRE_NO_ERROR = 0,
00048        PHP_PCRE_INTERNAL_ERROR,
00049        PHP_PCRE_BACKTRACK_LIMIT_ERROR,
00050        PHP_PCRE_RECURSION_LIMIT_ERROR,
00051        PHP_PCRE_BAD_UTF8_ERROR,
00052        PHP_PCRE_BAD_UTF8_OFFSET_ERROR
00053 };
00054 
00055 
00056 ZEND_DECLARE_MODULE_GLOBALS(pcre)
00057 
00058 
00059 static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
00060 {
00061        int preg_code = 0;
00062 
00063        switch (pcre_code) {
00064               case PCRE_ERROR_MATCHLIMIT:
00065                      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
00066                      break;
00067 
00068               case PCRE_ERROR_RECURSIONLIMIT:
00069                      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
00070                      break;
00071 
00072               case PCRE_ERROR_BADUTF8:
00073                      preg_code = PHP_PCRE_BAD_UTF8_ERROR;
00074                      break;
00075 
00076               case PCRE_ERROR_BADUTF8_OFFSET:
00077                      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
00078                      break;
00079 
00080               default:
00081                      preg_code = PHP_PCRE_INTERNAL_ERROR;
00082                      break;
00083        }
00084 
00085        PCRE_G(error_code) = preg_code;
00086 }
00087 /* }}} */
00088 
00089 static void php_free_pcre_cache(void *data) /* {{{ */
00090 {
00091        pcre_cache_entry *pce = (pcre_cache_entry *) data;
00092        if (!pce) return;
00093        pefree(pce->re, 1);
00094        if (pce->extra) pefree(pce->extra, 1);
00095 #if HAVE_SETLOCALE
00096        if ((void*)pce->tables) pefree((void*)pce->tables, 1);
00097        pefree(pce->locale, 1);
00098 #endif
00099 }
00100 /* }}} */
00101 
00102 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
00103 {
00104        zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
00105        pcre_globals->backtrack_limit = 0;
00106        pcre_globals->recursion_limit = 0;
00107        pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
00108 }
00109 /* }}} */
00110 
00111 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
00112 {
00113        zend_hash_destroy(&pcre_globals->pcre_cache);
00114 }
00115 /* }}} */
00116 
00117 PHP_INI_BEGIN()
00118        STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
00119        STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
00120 PHP_INI_END()
00121 
00122 
00123 /* {{{ PHP_MINFO_FUNCTION(pcre) */
00124 static PHP_MINFO_FUNCTION(pcre)
00125 {
00126        php_info_print_table_start();
00127        php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
00128        php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
00129        php_info_print_table_end();
00130 
00131        DISPLAY_INI_ENTRIES();
00132 }
00133 /* }}} */
00134 
00135 /* {{{ PHP_MINIT_FUNCTION(pcre) */
00136 static PHP_MINIT_FUNCTION(pcre)
00137 {
00138        REGISTER_INI_ENTRIES();
00139        
00140        REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
00141        REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
00142        REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
00143        REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
00144        REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
00145        REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
00146        REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
00147 
00148        REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
00149        REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
00150        REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
00151        REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
00152        REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
00153        REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
00154        REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
00155 
00156        return SUCCESS;
00157 }
00158 /* }}} */
00159 
00160 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
00161 static PHP_MSHUTDOWN_FUNCTION(pcre)
00162 {
00163        UNREGISTER_INI_ENTRIES();
00164 
00165        return SUCCESS;
00166 }
00167 /* }}} */
00168 
00169 /* {{{ static pcre_clean_cache */
00170 static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
00171 {
00172        int *num_clean = (int *)arg;
00173 
00174        if (*num_clean > 0) {
00175               (*num_clean)--;
00176               return 1;
00177        } else {
00178               return 0;
00179        }
00180 }
00181 /* }}} */
00182 
00183 /* {{{ static make_subpats_table */
00184 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
00185 {
00186        pcre_extra *extra = pce->extra;
00187        int name_cnt = 0, name_size, ni = 0;
00188        int rc;
00189        char *name_table;
00190        unsigned short name_idx;
00191        char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
00192 
00193        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
00194        if (rc < 0) {
00195               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
00196               efree(subpat_names);
00197               return NULL;
00198        }
00199        if (name_cnt > 0) {
00200               int rc1, rc2;
00201 
00202               rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
00203               rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
00204               rc = rc2 ? rc2 : rc1;
00205               if (rc < 0) {
00206                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
00207                      efree(subpat_names);
00208                      return NULL;
00209               }
00210 
00211               while (ni++ < name_cnt) {
00212                      name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
00213                      subpat_names[name_idx] = name_table + 2;
00214                      if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
00215                             php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
00216                             efree(subpat_names);
00217                             return NULL;
00218                      }
00219                      name_table += name_size;
00220               }
00221        }
00222 
00223        return subpat_names;
00224 }
00225 /* }}} */
00226 
00227 /* {{{ pcre_get_compiled_regex_cache
00228  */
00229 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
00230 {
00231        pcre                        *re = NULL;
00232        pcre_extra                  *extra;
00233        int                                 coptions = 0;
00234        int                                 soptions = 0;
00235        const char                  *error;
00236        int                                 erroffset;
00237        char                         delimiter;
00238        char                         start_delimiter;
00239        char                         end_delimiter;
00240        char                        *p, *pp;
00241        char                        *pattern;
00242        int                                 do_study = 0;
00243        int                                 poptions = 0;
00244        unsigned const char *tables = NULL;
00245 #if HAVE_SETLOCALE
00246        char                        *locale = setlocale(LC_CTYPE, NULL);
00247 #endif
00248        pcre_cache_entry     *pce;
00249        pcre_cache_entry      new_entry;
00250 
00251        /* Try to lookup the cached regex entry, and if successful, just pass
00252           back the compiled pattern, otherwise go on and compile it. */
00253        if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
00254               /*
00255                * We use a quick pcre_info() check to see whether cache is corrupted, and if it
00256                * is, we flush it and compile the pattern from scratch.
00257                */
00258               if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
00259                      zend_hash_clean(&PCRE_G(pcre_cache));
00260               } else {
00261 #if HAVE_SETLOCALE
00262                      if (!strcmp(pce->locale, locale)) {
00263 #endif
00264                             return pce;
00265 #if HAVE_SETLOCALE
00266                      }
00267 #endif
00268               }
00269        }
00270        
00271        p = regex;
00272        
00273        /* Parse through the leading whitespace, and display a warning if we
00274           get to the end without encountering a delimiter. */
00275        while (isspace((int)*(unsigned char *)p)) p++;
00276        if (*p == 0) {
00277               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
00278               return NULL;
00279        }
00280        
00281        /* Get the delimiter and display a warning if it is alphanumeric
00282           or a backslash. */
00283        delimiter = *p++;
00284        if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
00285               php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
00286               return NULL;
00287        }
00288 
00289        start_delimiter = delimiter;
00290        if ((pp = strchr("([{< )]}> )]}>", delimiter)))
00291               delimiter = pp[5];
00292        end_delimiter = delimiter;
00293 
00294        if (start_delimiter == end_delimiter) {
00295               /* We need to iterate through the pattern, searching for the ending delimiter,
00296                  but skipping the backslashed delimiters.  If the ending delimiter is not
00297                  found, display a warning. */
00298               pp = p;
00299               while (*pp != 0) {
00300                      if (*pp == '\\' && pp[1] != 0) pp++;
00301                      else if (*pp == delimiter)
00302                             break;
00303                      pp++;
00304               }
00305               if (*pp == 0) {
00306                      php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
00307                      return NULL;
00308               }
00309        } else {
00310               /* We iterate through the pattern, searching for the matching ending
00311                * delimiter. For each matching starting delimiter, we increment nesting
00312                * level, and decrement it for each matching ending delimiter. If we
00313                * reach the end of the pattern without matching, display a warning.
00314                */
00315               int brackets = 1;    /* brackets nesting level */
00316               pp = p;
00317               while (*pp != 0) {
00318                      if (*pp == '\\' && pp[1] != 0) pp++;
00319                      else if (*pp == end_delimiter && --brackets <= 0)
00320                             break;
00321                      else if (*pp == start_delimiter)
00322                             brackets++;
00323                      pp++;
00324               }
00325               if (*pp == 0) {
00326                      php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
00327                      return NULL;
00328               }
00329        }
00330        
00331        /* Make a copy of the actual pattern. */
00332        pattern = estrndup(p, pp-p);
00333 
00334        /* Move on to the options */
00335        pp++;
00336 
00337        /* Parse through the options, setting appropriate flags.  Display
00338           a warning if we encounter an unknown modifier. */    
00339        while (*pp != 0) {
00340               switch (*pp++) {
00341                      /* Perl compatible options */
00342                      case 'i':     coptions |= PCRE_CASELESS;         break;
00343                      case 'm':     coptions |= PCRE_MULTILINE;        break;
00344                      case 's':     coptions |= PCRE_DOTALL;           break;
00345                      case 'x':     coptions |= PCRE_EXTENDED;         break;
00346                      
00347                      /* PCRE specific options */
00348                      case 'A':     coptions |= PCRE_ANCHORED;         break;
00349                      case 'D':     coptions |= PCRE_DOLLAR_ENDONLY;break;
00350                      case 'S':     do_study  = 1;                                   break;
00351                      case 'U':     coptions |= PCRE_UNGREEDY;         break;
00352                      case 'X':     coptions |= PCRE_EXTRA;                   break;
00353                      case 'u':     coptions |= PCRE_UTF8;
00354        /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
00355        characters, even in UTF-8 mode. However, this can be changed by setting
00356        the PCRE_UCP option. */
00357 #ifdef PCRE_UCP
00358                                           coptions |= PCRE_UCP;
00359 #endif               
00360                             break;
00361 
00362                      /* Custom preg options */
00363                      case 'e':     poptions |= PREG_REPLACE_EVAL;     break;
00364                      
00365                      case ' ':
00366                      case '\n':
00367                             break;
00368 
00369                      default:
00370                             php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
00371                             efree(pattern);
00372                             return NULL;
00373               }
00374        }
00375 
00376 #if HAVE_SETLOCALE
00377        if (strcmp(locale, "C"))
00378               tables = pcre_maketables();
00379 #endif
00380 
00381        /* Compile pattern and display a warning if compilation failed. */
00382        re = pcre_compile(pattern,
00383                                      coptions,
00384                                      &error,
00385                                      &erroffset,
00386                                      tables);
00387 
00388        if (re == NULL) {
00389               php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
00390               efree(pattern);
00391               if (tables) {
00392                      pefree((void*)tables, 1);
00393               }
00394               return NULL;
00395        }
00396 
00397        /* If study option was specified, study the pattern and
00398           store the result in extra for passing to pcre_exec. */
00399        if (do_study) {
00400               extra = pcre_study(re, soptions, &error);
00401               if (extra) {
00402                      extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
00403               }
00404               if (error != NULL) {
00405                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
00406               }
00407        } else {
00408               extra = NULL;
00409        }
00410 
00411        efree(pattern);
00412 
00413        /*
00414         * If we reached cache limit, clean out the items from the head of the list;
00415         * these are supposedly the oldest ones (but not necessarily the least used
00416         * ones).
00417         */
00418        if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
00419               int num_clean = PCRE_CACHE_SIZE / 8;
00420               zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
00421        }
00422 
00423        /* Store the compiled pattern and extra info in the cache. */
00424        new_entry.re = re;
00425        new_entry.extra = extra;
00426        new_entry.preg_options = poptions;
00427        new_entry.compile_options = coptions;
00428 #if HAVE_SETLOCALE
00429        new_entry.locale = pestrdup(locale, 1);
00430        new_entry.tables = tables;
00431 #endif
00432        zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
00433                                           sizeof(pcre_cache_entry), (void**)&pce);
00434 
00435        return pce;
00436 }
00437 /* }}} */
00438 
00439 /* {{{ pcre_get_compiled_regex
00440  */
00441 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
00442 {
00443        pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
00444 
00445        if (extra) {
00446               *extra = pce ? pce->extra : NULL;
00447        }
00448        if (preg_options) {
00449               *preg_options = pce ? pce->preg_options : 0;
00450        }
00451        
00452        return pce ? pce->re : NULL;
00453 }
00454 /* }}} */
00455 
00456 /* {{{ pcre_get_compiled_regex_ex
00457  */
00458 PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
00459 {
00460        pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
00461        
00462        if (extra) {
00463               *extra = pce ? pce->extra : NULL;
00464        }
00465        if (preg_options) {
00466               *preg_options = pce ? pce->preg_options : 0;
00467        }
00468        if (compile_options) {
00469               *compile_options = pce ? pce->compile_options : 0;
00470        }
00471        
00472        return pce ? pce->re : NULL;
00473 }
00474 /* }}} */
00475 
00476 /* {{{ add_offset_pair */
00477 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
00478 {
00479        zval *match_pair;
00480 
00481        ALLOC_ZVAL(match_pair);
00482        array_init(match_pair);
00483        INIT_PZVAL(match_pair);
00484 
00485        /* Add (match, offset) to the return value */
00486        add_next_index_stringl(match_pair, str, len, 1);
00487        add_next_index_long(match_pair, offset);
00488        
00489        if (name) {
00490               zval_add_ref(&match_pair);
00491               zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
00492        }
00493        zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
00494 }
00495 /* }}} */
00496 
00497 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
00498 {
00499        /* parameters */
00500        char                  *regex;                    /* Regular expression */
00501        char                  *subject;                  /* String to match against */
00502        int                           regex_len;
00503        int                           subject_len;
00504        pcre_cache_entry *pce;                           /* Compiled regular expression */
00505        zval                  *subpats = NULL;    /* Array for subpatterns */
00506        long                   flags = 0;         /* Match control flags */
00507        long                   start_offset = 0;  /* Where the new search starts */
00508 
00509        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), &regex, &regex_len,
00510                                                    &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
00511               RETURN_FALSE;
00512        }
00513        
00514        /* Compile regex or get it from cache. */
00515        if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
00516               RETURN_FALSE;
00517        }
00518 
00519        php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, 
00520               global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
00521 }
00522 /* }}} */
00523 
00524 /* {{{ php_pcre_match_impl() */
00525 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
00526        zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
00527 {
00528        zval                 *result_set,         /* Holds a set of subpatterns after
00529                                                                          a global match */
00530                                **match_sets = NULL;     /* An array of sets of matches for each
00531                                                                          subpattern after a global match */
00532        pcre_extra           *extra = pce->extra;/* Holds results of studying */
00533        pcre_extra            extra_data;         /* Used locally for exec options */
00534        int                          exoptions = 0;             /* Execution options */
00535        int                          count = 0;                 /* Count of matched subpatterns */
00536        int                         *offsets;                   /* Array of subpattern offsets */
00537        int                          num_subpats;        /* Number of captured subpatterns */
00538        int                          size_offsets;              /* Size of the offsets array */
00539        int                          matched;                   /* Has anything matched */
00540        int                          g_notempty = 0;     /* If the match should not be empty */
00541        const char       **stringlist;            /* Holds list of subpatterns */
00542        char             **subpat_names;          /* Array for named subpatterns */
00543        int                          i, rc;
00544        int                          subpats_order;             /* Order of subpattern matches */
00545        int                          offset_capture;    /* Capture match offsets: yes/no */
00546 
00547        /* Overwrite the passed-in value for subpatterns with an empty array. */
00548        if (subpats != NULL) {
00549               zval_dtor(subpats);
00550               array_init(subpats);
00551        }
00552 
00553        subpats_order = global ? PREG_PATTERN_ORDER : 0;
00554 
00555        if (use_flags) {
00556               offset_capture = flags & PREG_OFFSET_CAPTURE;
00557 
00558               /*
00559                * subpats_order is pre-set to pattern mode so we change it only if
00560                * necessary.
00561                */
00562               if (flags & 0xff) {
00563                      subpats_order = flags & 0xff;
00564               }
00565               if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
00566                      (!global && subpats_order != 0)) {
00567                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
00568                      return;
00569               }
00570        } else {
00571               offset_capture = 0;
00572        }
00573 
00574        /* Negative offset counts from the end of the string. */
00575        if (start_offset < 0) {
00576               start_offset = subject_len + start_offset;
00577               if (start_offset < 0) {
00578                      start_offset = 0;
00579               }
00580        }
00581 
00582        if (extra == NULL) {
00583               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
00584               extra = &extra_data;
00585        }
00586        extra->match_limit = PCRE_G(backtrack_limit);
00587        extra->match_limit_recursion = PCRE_G(recursion_limit);
00588 
00589        /* Calculate the size of the offsets array, and allocate memory for it. */
00590        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
00591        if (rc < 0) {
00592               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
00593               RETURN_FALSE;
00594        }
00595        num_subpats++;
00596        size_offsets = num_subpats * 3;
00597 
00598        /*
00599         * Build a mapping from subpattern numbers to their names. We will always
00600         * allocate the table, even though there may be no named subpatterns. This
00601         * avoids somewhat more complicated logic in the inner loops.
00602         */
00603        subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
00604        if (!subpat_names) {
00605               RETURN_FALSE;
00606        }
00607 
00608        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
00609 
00610        /* Allocate match sets array and initialize the values. */
00611        if (global && subpats_order == PREG_PATTERN_ORDER) {
00612               match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
00613               for (i=0; i<num_subpats; i++) {
00614                      ALLOC_ZVAL(match_sets[i]);
00615                      array_init(match_sets[i]);
00616                      INIT_PZVAL(match_sets[i]);
00617               }
00618        }
00619 
00620        matched = 0;
00621        PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
00622        
00623        do {
00624               /* Execute the regular expression. */
00625               count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
00626                                             exoptions|g_notempty, offsets, size_offsets);
00627 
00628               /* the string was already proved to be valid UTF-8 */
00629               exoptions |= PCRE_NO_UTF8_CHECK;
00630 
00631               /* Check for too many substrings condition. */
00632               if (count == 0) {
00633                      php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
00634                      count = size_offsets/3;
00635               }
00636 
00637               /* If something has matched */
00638               if (count > 0) {
00639                      matched++;
00640 
00641                      /* If subpatterns array has been passed, fill it in with values. */
00642                      if (subpats != NULL) {
00643                             /* Try to get the list of substrings and display a warning if failed. */
00644                             if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
00645                                    efree(subpat_names);
00646                                    efree(offsets);
00647                                    if (match_sets) efree(match_sets);
00648                                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
00649                                    RETURN_FALSE;
00650                             }
00651 
00652                             if (global) { /* global pattern matching */
00653                                    if (subpats_order == PREG_PATTERN_ORDER) {
00654                                           /* For each subpattern, insert it into the appropriate array. */
00655                                           for (i = 0; i < count; i++) {
00656                                                  if (offset_capture) {
00657                                                         add_offset_pair(match_sets[i], (char *)stringlist[i],
00658                                                                                     offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
00659                                                  } else {
00660                                                         add_next_index_stringl(match_sets[i], (char *)stringlist[i],
00661                                                                                               offsets[(i<<1)+1] - offsets[i<<1], 1);
00662                                                  }
00663                                           }
00664                                           /*
00665                                            * If the number of captured subpatterns on this run is
00666                                            * less than the total possible number, pad the result
00667                                            * arrays with empty strings.
00668                                            */
00669                                           if (count < num_subpats) {
00670                                                  for (; i < num_subpats; i++) {
00671                                                         add_next_index_string(match_sets[i], "", 1);
00672                                                  }
00673                                           }
00674                                    } else {
00675                                           /* Allocate the result set array */
00676                                           ALLOC_ZVAL(result_set);
00677                                           array_init(result_set);
00678                                           INIT_PZVAL(result_set);
00679                                           
00680                                           /* Add all the subpatterns to it */
00681                                           for (i = 0; i < count; i++) {
00682                                                  if (offset_capture) {
00683                                                         add_offset_pair(result_set, (char *)stringlist[i],
00684                                                                                     offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
00685                                                  } else {
00686                                                         if (subpat_names[i]) {
00687                                                                add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
00688                                                                                                      offsets[(i<<1)+1] - offsets[i<<1], 1);
00689                                                         }
00690                                                         add_next_index_stringl(result_set, (char *)stringlist[i],
00691                                                                                               offsets[(i<<1)+1] - offsets[i<<1], 1);
00692                                                  }
00693                                           }
00694                                           /* And add it to the output array */
00695                                           zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
00696                                    }
00697                             } else {                    /* single pattern matching */
00698                                    /* For each subpattern, insert it into the subpatterns array. */
00699                                    for (i = 0; i < count; i++) {
00700                                           if (offset_capture) {
00701                                                  add_offset_pair(subpats, (char *)stringlist[i],
00702                                                                              offsets[(i<<1)+1] - offsets[i<<1],
00703                                                                              offsets[i<<1], subpat_names[i]);
00704                                           } else {
00705                                                  if (subpat_names[i]) {
00706                                                         add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
00707                                                                                       offsets[(i<<1)+1] - offsets[i<<1], 1);
00708                                                  }
00709                                                  add_next_index_stringl(subpats, (char *)stringlist[i],
00710                                                                                        offsets[(i<<1)+1] - offsets[i<<1], 1);
00711                                           }
00712                                    }
00713                             }
00714 
00715                             pcre_free((void *) stringlist);
00716                      }
00717               } else if (count == PCRE_ERROR_NOMATCH) {
00718                      /* If we previously set PCRE_NOTEMPTY after a null match,
00719                         this is not necessarily the end. We need to advance
00720                         the start offset, and continue. Fudge the offset values
00721                         to achieve this, unless we're already at the end of the string. */
00722                      if (g_notempty != 0 && start_offset < subject_len) {
00723                             offsets[0] = start_offset;
00724                             offsets[1] = start_offset + 1;
00725                      } else
00726                             break;
00727               } else {
00728                      pcre_handle_exec_error(count TSRMLS_CC);
00729                      break;
00730               }
00731               
00732               /* If we have matched an empty string, mimic what Perl's /g options does.
00733                  This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
00734                  the match again at the same point. If this fails (picked up above) we
00735                  advance to the next character. */
00736               g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
00737               
00738               /* Advance to the position right after the last full match */
00739               start_offset = offsets[1];
00740        } while (global);
00741 
00742        /* Add the match sets to the output array and clean up */
00743        if (global && subpats_order == PREG_PATTERN_ORDER) {
00744               for (i = 0; i < num_subpats; i++) {
00745                      if (subpat_names[i]) {
00746                             zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
00747                                                          strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
00748                             Z_ADDREF_P(match_sets[i]);
00749                      }
00750                      zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
00751               }
00752               efree(match_sets);
00753        }
00754        
00755        efree(offsets);
00756        efree(subpat_names);
00757 
00758        /* Did we encounter an error? */
00759        if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
00760               RETVAL_LONG(matched);
00761        } else {
00762               RETVAL_FALSE;
00763        }
00764 }
00765 /* }}} */
00766 
00767 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
00768    Perform a Perl-style regular expression match */
00769 static PHP_FUNCTION(preg_match)
00770 {
00771        php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
00772 }
00773 /* }}} */
00774 
00775 /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]])
00776    Perform a Perl-style global regular expression match */
00777 static PHP_FUNCTION(preg_match_all)
00778 {
00779        php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
00780 }
00781 /* }}} */
00782 
00783 /* {{{ preg_get_backref
00784  */
00785 static int preg_get_backref(char **str, int *backref)
00786 {
00787        register char in_brace = 0;
00788        register char *walk = *str;
00789 
00790        if (walk[1] == 0)
00791               return 0;
00792 
00793        if (*walk == '$' && walk[1] == '{') {
00794               in_brace = 1;
00795               walk++;
00796        }
00797        walk++;
00798 
00799        if (*walk >= '0' && *walk <= '9') {
00800               *backref = *walk - '0';
00801               walk++;
00802        } else
00803               return 0;
00804        
00805        if (*walk && *walk >= '0' && *walk <= '9') {
00806               *backref = *backref * 10 + *walk - '0';
00807               walk++;
00808        }
00809 
00810        if (in_brace) {
00811               if (*walk == 0 || *walk != '}')
00812                      return 0;
00813               else
00814                      walk++;
00815        }
00816        
00817        *str = walk;
00818        return 1;     
00819 }
00820 /* }}} */
00821 
00822 /* {{{ preg_do_repl_func
00823  */
00824 static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
00825 {
00826        zval          *retval_ptr;         /* Function return value */
00827        zval      **args[1];               /* Argument to pass to function */
00828        zval          *subpats;                   /* Captured subpatterns */ 
00829        int                   result_len;         /* Return value length */
00830        int                   i;
00831 
00832        MAKE_STD_ZVAL(subpats);
00833        array_init(subpats);
00834        for (i = 0; i < count; i++) {
00835               if (subpat_names[i]) {
00836                      add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
00837               }
00838               add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
00839        }
00840        args[0] = &subpats;
00841 
00842        if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
00843               convert_to_string_ex(&retval_ptr);
00844               *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
00845               result_len = Z_STRLEN_P(retval_ptr);
00846               zval_ptr_dtor(&retval_ptr);
00847        } else {
00848               if (!EG(exception)) {
00849                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
00850               }
00851               result_len = offsets[1] - offsets[0];
00852               *result = estrndup(&subject[offsets[0]], result_len);
00853        }
00854 
00855        zval_ptr_dtor(&subpats);
00856 
00857        return result_len;
00858 }
00859 /* }}} */
00860 
00861 /* {{{ preg_do_eval
00862  */
00863 static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
00864                                           int *offsets, int count, char **result TSRMLS_DC)
00865 {
00866        zval           retval;                    /* Return value from evaluation */
00867        char          *eval_str_end,              /* End of eval string */
00868                             *match,                            /* Current match for a backref */
00869                             *esc_match,                 /* Quote-escaped match */
00870                             *walk,                      /* Used to walk the code string */
00871                             *segment,                   /* Start of segment to append while walking */
00872                              walk_last;                 /* Last walked character */
00873        int                   match_len;                 /* Length of the match */
00874        int                   esc_match_len;             /* Length of the quote-escaped match */
00875        int                   result_len;         /* Length of the result of the evaluation */
00876        int                   backref;                   /* Current backref */
00877        char        *compiled_string_description;
00878        smart_str    code = {0};
00879        
00880        eval_str_end = eval_str + eval_str_len;
00881        walk = segment = eval_str;
00882        walk_last = 0;
00883        
00884        while (walk < eval_str_end) {
00885               /* If found a backreference.. */
00886               if ('\\' == *walk || '$' == *walk) {
00887                      smart_str_appendl(&code, segment, walk - segment);
00888                      if (walk_last == '\\') {
00889                             code.c[code.len-1] = *walk++;
00890                             segment = walk;
00891                             walk_last = 0;
00892                             continue;
00893                      }
00894                      segment = walk;
00895                      if (preg_get_backref(&walk, &backref)) {
00896                             if (backref < count) {
00897                                    /* Find the corresponding string match and substitute it
00898                                       in instead of the backref */
00899                                    match = subject + offsets[backref<<1];
00900                                    match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
00901                                    if (match_len) {
00902                                           esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
00903                                    } else {
00904                                           esc_match = match;
00905                                           esc_match_len = 0;
00906                                    }
00907                             } else {
00908                                    esc_match = "";
00909                                    esc_match_len = 0;
00910                             }
00911                             smart_str_appendl(&code, esc_match, esc_match_len);
00912 
00913                             segment = walk;
00914 
00915                             /* Clean up and reassign */
00916                             if (esc_match_len)
00917                                    efree(esc_match);
00918                             continue;
00919                      }
00920               }
00921               walk++;
00922               walk_last = walk[-1];
00923        }
00924        smart_str_appendl(&code, segment, walk - segment);
00925        smart_str_0(&code);
00926 
00927        compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
00928        /* Run the code */
00929        if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
00930               efree(compiled_string_description);
00931               php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
00932               /* zend_error() does not return in this case */
00933        }
00934        efree(compiled_string_description);
00935        convert_to_string(&retval);
00936        
00937        /* Save the return value and its length */
00938        *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
00939        result_len = Z_STRLEN(retval);
00940        
00941        /* Clean up */
00942        zval_dtor(&retval);
00943        smart_str_free(&code);
00944        
00945        return result_len;
00946 }
00947 /* }}} */
00948 
00949 /* {{{ php_pcre_replace
00950  */
00951 PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
00952                                                    char *subject, int subject_len,
00953                                                    zval *replace_val, int is_callable_replace,
00954                                                    int *result_len, int limit, int *replace_count TSRMLS_DC)
00955 {
00956        pcre_cache_entry     *pce;                    /* Compiled regular expression */
00957 
00958        /* Compile regex or get it from cache. */
00959        if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
00960               return NULL;
00961        }
00962 
00963        return php_pcre_replace_impl(pce, subject, subject_len, replace_val, 
00964               is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
00965 }
00966 /* }}} */
00967 
00968 /* {{{ php_pcre_replace_impl() */
00969 PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, 
00970        int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
00971 {
00972        pcre_extra           *extra = pce->extra;/* Holds results of studying */
00973        pcre_extra            extra_data;         /* Used locally for exec options */
00974        int                          exoptions = 0;             /* Execution options */
00975        int                          count = 0;                 /* Count of matched subpatterns */
00976        int                         *offsets;                   /* Array of subpattern offsets */
00977        char                 **subpat_names;             /* Array for named subpatterns */
00978        int                          num_subpats;        /* Number of captured subpatterns */
00979        int                          size_offsets;              /* Size of the offsets array */
00980        int                          new_len;                   /* Length of needed storage */
00981        int                          alloc_len;                 /* Actual allocated length */
00982        int                          eval_result_len=0;  /* Length of the eval'ed or
00983                                                                          function-returned string */
00984        int                          match_len;                 /* Length of the current match */
00985        int                          backref;                   /* Backreference number */
00986        int                          eval;                      /* If the replacement string should be eval'ed */
00987        int                          start_offset;              /* Where the new search starts */
00988        int                          g_notempty=0;              /* If the match should not be empty */
00989        int                          replace_len=0;             /* Length of replacement string */
00990        char                 *result,                    /* Result of replacement */
00991                                    *replace=NULL,              /* Replacement string */
00992                                    *new_buf,                   /* Temporary buffer for re-allocation */
00993                                    *walkbuf,                   /* Location of current replacement in the result */
00994                                    *walk,                      /* Used to walk the replacement string */
00995                                    *match,                            /* The current match */
00996                                    *piece,                            /* The current piece of subject */
00997                                    *replace_end=NULL,   /* End of replacement string */
00998                                    *eval_result,        /* Result of eval or custom function */
00999                                     walk_last;                 /* Last walked character */
01000        int                          rc;
01001 
01002        if (extra == NULL) {
01003               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01004               extra = &extra_data;
01005        }
01006        extra->match_limit = PCRE_G(backtrack_limit);
01007        extra->match_limit_recursion = PCRE_G(recursion_limit);
01008 
01009        eval = pce->preg_options & PREG_REPLACE_EVAL;
01010        if (is_callable_replace) {
01011               if (eval) {
01012                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
01013                      return NULL;
01014               }
01015        } else {
01016               replace = Z_STRVAL_P(replace_val);
01017               replace_len = Z_STRLEN_P(replace_val);
01018               replace_end = replace + replace_len;
01019        }
01020 
01021        /* Calculate the size of the offsets array, and allocate memory for it. */
01022        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
01023        if (rc < 0) {
01024               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
01025               return NULL;
01026        }
01027        num_subpats++;
01028        size_offsets = num_subpats * 3;
01029 
01030        /*
01031         * Build a mapping from subpattern numbers to their names. We will always
01032         * allocate the table, even though there may be no named subpatterns. This
01033         * avoids somewhat more complicated logic in the inner loops.
01034         */
01035        subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
01036        if (!subpat_names) {
01037               return NULL;
01038        }
01039 
01040        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
01041        
01042        alloc_len = 2 * subject_len + 1;
01043        result = safe_emalloc(alloc_len, sizeof(char), 0);
01044 
01045        /* Initialize */
01046        match = NULL;
01047        *result_len = 0;
01048        start_offset = 0;
01049        PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
01050        
01051        while (1) {
01052               /* Execute the regular expression. */
01053               count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
01054                                             exoptions|g_notempty, offsets, size_offsets);
01055 
01056               /* the string was already proved to be valid UTF-8 */
01057               exoptions |= PCRE_NO_UTF8_CHECK;
01058 
01059               /* Check for too many substrings condition. */
01060               if (count == 0) {
01061                      php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
01062                      count = size_offsets/3;
01063               }
01064 
01065               piece = subject + start_offset;
01066 
01067               if (count > 0 && (limit == -1 || limit > 0)) {
01068                      if (replace_count) {
01069                             ++*replace_count;
01070                      }
01071                      /* Set the match location in subject */
01072                      match = subject + offsets[0];
01073 
01074                      new_len = *result_len + offsets[0] - start_offset; /* part before the match */
01075                      
01076                      /* If evaluating, do it and add the return string's length */
01077                      if (eval) {
01078                             eval_result_len = preg_do_eval(replace, replace_len, subject,
01079                                                                                 offsets, count, &eval_result TSRMLS_CC);
01080                             new_len += eval_result_len;
01081                      } else if (is_callable_replace) {
01082                             /* Use custom function to get replacement string and its length. */
01083                             eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
01084                             new_len += eval_result_len;
01085                      } else { /* do regular substitution */
01086                             walk = replace;
01087                             walk_last = 0;
01088                             while (walk < replace_end) {
01089                                    if ('\\' == *walk || '$' == *walk) {
01090                                           if (walk_last == '\\') {
01091                                                  walk++;
01092                                                  walk_last = 0;
01093                                                  continue;
01094                                           }
01095                                           if (preg_get_backref(&walk, &backref)) {
01096                                                  if (backref < count)
01097                                                         new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
01098                                                  continue;
01099                                           }
01100                                    }
01101                                    new_len++;
01102                                    walk++;
01103                                    walk_last = walk[-1];
01104                             }
01105                      }
01106 
01107                      if (new_len + 1 > alloc_len) {
01108                             alloc_len = 1 + alloc_len + 2 * new_len;
01109                             new_buf = emalloc(alloc_len);
01110                             memcpy(new_buf, result, *result_len);
01111                             efree(result);
01112                             result = new_buf;
01113                      }
01114                      /* copy the part of the string before the match */
01115                      memcpy(&result[*result_len], piece, match-piece);
01116                      *result_len += match-piece;
01117 
01118                      /* copy replacement and backrefs */
01119                      walkbuf = result + *result_len;
01120                      
01121                      /* If evaluating or using custom function, copy result to the buffer
01122                       * and clean up. */
01123                      if (eval || is_callable_replace) {
01124                             memcpy(walkbuf, eval_result, eval_result_len);
01125                             *result_len += eval_result_len;
01126                             STR_FREE(eval_result);
01127                      } else { /* do regular backreference copying */
01128                             walk = replace;
01129                             walk_last = 0;
01130                             while (walk < replace_end) {
01131                                    if ('\\' == *walk || '$' == *walk) {
01132                                           if (walk_last == '\\') {
01133                                                  *(walkbuf-1) = *walk++;
01134                                                  walk_last = 0;
01135                                                  continue;
01136                                           }
01137                                           if (preg_get_backref(&walk, &backref)) {
01138                                                  if (backref < count) {
01139                                                         match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
01140                                                         memcpy(walkbuf, subject + offsets[backref<<1], match_len);
01141                                                         walkbuf += match_len;
01142                                                  }
01143                                                  continue;
01144                                           }
01145                                    }
01146                                    *walkbuf++ = *walk++;
01147                                    walk_last = walk[-1];
01148                             }
01149                             *walkbuf = '\0';
01150                             /* increment the result length by how much we've added to the string */
01151                             *result_len += walkbuf - (result + *result_len);
01152                      }
01153 
01154                      if (limit != -1)
01155                             limit--;
01156 
01157               } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
01158                      /* If we previously set PCRE_NOTEMPTY after a null match,
01159                         this is not necessarily the end. We need to advance
01160                         the start offset, and continue. Fudge the offset values
01161                         to achieve this, unless we're already at the end of the string. */
01162                      if (g_notempty != 0 && start_offset < subject_len) {
01163                             offsets[0] = start_offset;
01164                             offsets[1] = start_offset + 1;
01165                             memcpy(&result[*result_len], piece, 1);
01166                             (*result_len)++;
01167                      } else {
01168                             new_len = *result_len + subject_len - start_offset;
01169                             if (new_len + 1 > alloc_len) {
01170                                    alloc_len = new_len + 1; /* now we know exactly how long it is */
01171                                    new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
01172                                    memcpy(new_buf, result, *result_len);
01173                                    efree(result);
01174                                    result = new_buf;
01175                             }
01176                             /* stick that last bit of string on our output */
01177                             memcpy(&result[*result_len], piece, subject_len - start_offset);
01178                             *result_len += subject_len - start_offset;
01179                             result[*result_len] = '\0';
01180                             break;
01181                      }
01182               } else {
01183                      pcre_handle_exec_error(count TSRMLS_CC);
01184                      efree(result);
01185                      result = NULL;
01186                      break;
01187               }
01188                      
01189               /* If we have matched an empty string, mimic what Perl's /g options does.
01190                  This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
01191                  the match again at the same point. If this fails (picked up above) we
01192                  advance to the next character. */
01193               g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
01194               
01195               /* Advance to the next piece. */
01196               start_offset = offsets[1];
01197        }
01198 
01199        efree(offsets);
01200        efree(subpat_names);
01201 
01202        return result;
01203 }
01204 /* }}} */
01205 
01206 /* {{{ php_replace_in_subject
01207  */
01208 static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
01209 {
01210        zval          **regex_entry,
01211                             **replace_entry = NULL,
01212                              *replace_value,
01213                               empty_replace;
01214        char          *subject_value,
01215                             *result;
01216        int                   subject_len;
01217 
01218        /* Make sure we're dealing with strings. */      
01219        convert_to_string_ex(subject);
01220        /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
01221        ZVAL_STRINGL(&empty_replace, "", 0, 0);
01222        
01223        /* If regex is an array */
01224        if (Z_TYPE_P(regex) == IS_ARRAY) {
01225               /* Duplicate subject string for repeated replacement */
01226               subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
01227               subject_len = Z_STRLEN_PP(subject);
01228               *result_len = subject_len;
01229               
01230               zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
01231 
01232               replace_value = replace;
01233               if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
01234                      zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
01235 
01236               /* For each entry in the regex array, get the entry */
01237               while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)&regex_entry) == SUCCESS) {
01238                      /* Make sure we're dealing with strings. */      
01239                      convert_to_string_ex(regex_entry);
01240               
01241                      /* If replace is an array and not a callable construct */
01242                      if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
01243                             /* Get current entry */
01244                             if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
01245                                    if (!is_callable_replace) {
01246                                           convert_to_string_ex(replace_entry);
01247                                    }
01248                                    replace_value = *replace_entry;
01249                                    zend_hash_move_forward(Z_ARRVAL_P(replace));
01250                             } else {
01251                                    /* We've run out of replacement strings, so use an empty one */
01252                                    replace_value = &empty_replace;
01253                             }
01254                      }
01255                      
01256                      /* Do the actual replacement and put the result back into subject_value
01257                         for further replacements. */
01258                      if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
01259                                                                          Z_STRLEN_PP(regex_entry),
01260                                                                          subject_value,
01261                                                                          subject_len,
01262                                                                          replace_value,
01263                                                                          is_callable_replace,
01264                                                                          result_len,
01265                                                                          limit,
01266                                                                          replace_count TSRMLS_CC)) != NULL) {
01267                             efree(subject_value);
01268                             subject_value = result;
01269                             subject_len = *result_len;
01270                      } else {
01271                             efree(subject_value);
01272                             return NULL;
01273                      }
01274 
01275                      zend_hash_move_forward(Z_ARRVAL_P(regex));
01276               }
01277 
01278               return subject_value;
01279        } else {
01280               result = php_pcre_replace(Z_STRVAL_P(regex),
01281                                                           Z_STRLEN_P(regex),
01282                                                           Z_STRVAL_PP(subject),
01283                                                           Z_STRLEN_PP(subject),
01284                                                           replace,
01285                                                           is_callable_replace,
01286                                                           result_len,
01287                                                           limit,
01288                                                           replace_count TSRMLS_CC);
01289               return result;
01290        }
01291 }
01292 /* }}} */
01293 
01294 /* {{{ preg_replace_impl
01295  */
01296 static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
01297 {
01298        zval             **regex,
01299                                **replace,
01300                                **subject,
01301                                **subject_entry,
01302                                **zcount = NULL;
01303        char                 *result;
01304        int                          result_len;
01305        int                          limit_val = -1;
01306        long                 limit = -1;
01307        char                 *string_key;
01308        ulong                 num_key;
01309        char                 *callback_name;
01310        int                          replace_count=0, old_replace_count;
01311        
01312        /* Get function parameters and do error-checking. */
01313        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
01314               return;
01315        }
01316        
01317        if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
01318               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
01319               RETURN_FALSE;
01320        }
01321 
01322        SEPARATE_ZVAL(replace);
01323        if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
01324               convert_to_string_ex(replace);
01325        }
01326        if (is_callable_replace) {
01327               if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
01328                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
01329                      efree(callback_name);
01330                      MAKE_COPY_ZVAL(subject, return_value);
01331                      return;
01332               }
01333               efree(callback_name);
01334        }
01335 
01336        SEPARATE_ZVAL(regex);
01337        SEPARATE_ZVAL(subject);
01338 
01339        if (ZEND_NUM_ARGS() > 3) {
01340               limit_val = limit;
01341        }
01342               
01343        if (Z_TYPE_PP(regex) != IS_ARRAY)
01344               convert_to_string_ex(regex);
01345        
01346        /* if subject is an array */
01347        if (Z_TYPE_PP(subject) == IS_ARRAY) {
01348               array_init(return_value);
01349               zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
01350 
01351               /* For each subject entry, convert it to string, then perform replacement
01352                  and add the result to the return_value array. */
01353               while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
01354                      SEPARATE_ZVAL(subject_entry);
01355                      old_replace_count = replace_count;
01356                      if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
01357                             if (!is_filter || replace_count > old_replace_count) {
01358                                    /* Add to return array */
01359                                    switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
01360                                    {
01361                                    case HASH_KEY_IS_STRING:
01362                                           add_assoc_stringl(return_value, string_key, result, result_len, 0);
01363                                           break;
01364 
01365                                    case HASH_KEY_IS_LONG:
01366                                           add_index_stringl(return_value, num_key, result, result_len, 0);
01367                                           break;
01368                                    }
01369                             } else {
01370                                    efree(result);
01371                             }
01372                      }
01373               
01374                      zend_hash_move_forward(Z_ARRVAL_PP(subject));
01375               }
01376        } else {      /* if subject is not an array */
01377               old_replace_count = replace_count;
01378               if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
01379                      if (!is_filter || replace_count > old_replace_count) {
01380                             RETVAL_STRINGL(result, result_len, 0);
01381                      } else {
01382                             efree(result);
01383                      }
01384               }
01385        }
01386        if (ZEND_NUM_ARGS() > 4) {
01387               zval_dtor(*zcount);
01388               ZVAL_LONG(*zcount, replace_count);
01389        }
01390        
01391 }
01392 /* }}} */
01393 
01394 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
01395    Perform Perl-style regular expression replacement. */
01396 static PHP_FUNCTION(preg_replace)
01397 {
01398        preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
01399 }
01400 /* }}} */
01401 
01402 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
01403    Perform Perl-style regular expression replacement using replacement callback. */
01404 static PHP_FUNCTION(preg_replace_callback)
01405 {
01406        preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
01407 }
01408 /* }}} */
01409 
01410 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
01411    Perform Perl-style regular expression replacement and only return matches. */
01412 static PHP_FUNCTION(preg_filter)
01413 {
01414        preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
01415 }
01416 /* }}} */
01417 
01418 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) 
01419    Split string into an array using a perl-style regular expression as a delimiter */
01420 static PHP_FUNCTION(preg_split)
01421 {
01422        char                        *regex;                     /* Regular expression */
01423        char                        *subject;            /* String to match against */
01424        int                                 regex_len;
01425        int                                 subject_len;
01426        long                         limit_val = -1;/* Integer value of limit */
01427        long                         flags = 0;          /* Match control flags */
01428        pcre_cache_entry     *pce;                /* Compiled regular expression */
01429 
01430        /* Get function parameters and do error checking */     
01431        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", &regex, &regex_len,
01432                                                    &subject, &subject_len, &limit_val, &flags) == FAILURE) {
01433               RETURN_FALSE;
01434        }
01435        
01436        /* Compile regex or get it from cache. */
01437        if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
01438               RETURN_FALSE;
01439        }
01440 
01441        php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
01442 }
01443 /* }}} */
01444 
01445 /* {{{ php_pcre_split
01446  */
01447 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
01448        long limit_val, long flags TSRMLS_DC)
01449 {
01450        pcre_extra           *extra = NULL;              /* Holds results of studying */
01451        pcre                 *re_bump = NULL;     /* Regex instance for empty matches */
01452        pcre_extra           *extra_bump = NULL;  /* Almost dummy */
01453        pcre_extra            extra_data;         /* Used locally for exec options */
01454        int                         *offsets;                   /* Array of subpattern offsets */
01455        int                          size_offsets;              /* Size of the offsets array */
01456        int                          exoptions = 0;             /* Execution options */
01457        int                          count = 0;                 /* Count of matched subpatterns */
01458        int                          start_offset;              /* Where the new search starts */
01459        int                          next_offset;        /* End of the last delimiter match + 1 */
01460        int                          g_notempty = 0;     /* If the match should not be empty */
01461        char                 *last_match;         /* Location of last match */
01462        int                          rc;
01463        int                          no_empty;                  /* If NO_EMPTY flag is set */
01464        int                          delim_capture;      /* If delimiters should be captured */
01465        int                          offset_capture;     /* If offsets should be captured */
01466 
01467        no_empty = flags & PREG_SPLIT_NO_EMPTY;
01468        delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
01469        offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
01470        
01471        if (limit_val == 0) {
01472               limit_val = -1;
01473        }
01474 
01475        if (extra == NULL) {
01476               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01477               extra = &extra_data;
01478        }
01479        extra->match_limit = PCRE_G(backtrack_limit);
01480        extra->match_limit_recursion = PCRE_G(recursion_limit);
01481        
01482        /* Initialize return value */
01483        array_init(return_value);
01484 
01485        /* Calculate the size of the offsets array, and allocate memory for it. */
01486        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
01487        if (rc < 0) {
01488               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
01489               RETURN_FALSE;
01490        }
01491        size_offsets = (size_offsets + 1) * 3;
01492        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
01493        
01494        /* Start at the beginning of the string */
01495        start_offset = 0;
01496        next_offset = 0;
01497        last_match = subject;
01498        PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
01499        
01500        /* Get next piece if no limit or limit not yet reached and something matched*/
01501        while ((limit_val == -1 || limit_val > 1)) {
01502               count = pcre_exec(pce->re, extra, subject,
01503                                             subject_len, start_offset,
01504                                             exoptions|g_notempty, offsets, size_offsets);
01505 
01506               /* the string was already proved to be valid UTF-8 */
01507               exoptions |= PCRE_NO_UTF8_CHECK;
01508 
01509               /* Check for too many substrings condition. */
01510               if (count == 0) {
01511                      php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
01512                      count = size_offsets/3;
01513               }
01514                             
01515               /* If something matched */
01516               if (count > 0) {
01517                      if (!no_empty || &subject[offsets[0]] != last_match) {
01518 
01519                             if (offset_capture) {
01520                                    /* Add (match, offset) pair to the return value */
01521                                    add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
01522                             } else {
01523                                    /* Add the piece to the return value */
01524                                    add_next_index_stringl(return_value, last_match,
01525                                                                   &subject[offsets[0]]-last_match, 1);
01526                             }
01527 
01528                             /* One less left to do */
01529                             if (limit_val != -1)
01530                                    limit_val--;
01531                      }
01532                      
01533                      last_match = &subject[offsets[1]];
01534                      next_offset = offsets[1];
01535 
01536                      if (delim_capture) {
01537                             int i, match_len;
01538                             for (i = 1; i < count; i++) {
01539                                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
01540                                    /* If we have matched a delimiter */
01541                                    if (!no_empty || match_len > 0) {
01542                                           if (offset_capture) {
01543                                                  add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
01544                                           } else {
01545                                                  add_next_index_stringl(return_value,
01546                                                                                        &subject[offsets[i<<1]],
01547                                                                                        match_len, 1);
01548                                           }
01549                                    }
01550                             }
01551                      }
01552               } else if (count == PCRE_ERROR_NOMATCH) {
01553                      /* If we previously set PCRE_NOTEMPTY after a null match,
01554                         this is not necessarily the end. We need to advance
01555                         the start offset, and continue. Fudge the offset values
01556                         to achieve this, unless we're already at the end of the string. */
01557                      if (g_notempty != 0 && start_offset < subject_len) {
01558                             if (pce->compile_options & PCRE_UTF8) {
01559                                    if (re_bump == NULL) {
01560                                           int dummy;
01561 
01562                                           if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
01563                                                  RETURN_FALSE;
01564                                           }
01565                                    }
01566                                    count = pcre_exec(re_bump, extra_bump, subject,
01567                                                    subject_len, start_offset,
01568                                                    exoptions, offsets, size_offsets);
01569                                    if (count < 1) {
01570                                           php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
01571                                           RETURN_FALSE;
01572                                    }
01573                             } else {
01574                                    offsets[0] = start_offset;
01575                                    offsets[1] = start_offset + 1;
01576                             }
01577                      } else
01578                             break;
01579               } else {
01580                      pcre_handle_exec_error(count TSRMLS_CC);
01581                      break;
01582               }
01583 
01584               /* If we have matched an empty string, mimic what Perl's /g options does.
01585                  This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
01586                  the match again at the same point. If this fails (picked up above) we
01587                  advance to the next character. */
01588               g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
01589               
01590               /* Advance to the position right after the last full match */
01591               start_offset = offsets[1];
01592        }
01593 
01594 
01595        start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
01596 
01597        if (!no_empty || start_offset < subject_len)
01598        {
01599               if (offset_capture) {
01600                      /* Add the last (match, offset) pair to the return value */
01601                      add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
01602               } else {
01603                      /* Add the last piece to the return value */
01604                      add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
01605               }
01606        }
01607 
01608        
01609        /* Clean up */
01610        efree(offsets);
01611 }
01612 /* }}} */
01613 
01614 /* {{{ proto string preg_quote(string str [, string delim_char])
01615    Quote regular expression characters plus an optional character */
01616 static PHP_FUNCTION(preg_quote)
01617 {
01618        int            in_str_len;
01619        char   *in_str;             /* Input string argument */
01620        char   *in_str_end;    /* End of the input string */
01621        int            delim_len = 0;
01622        char   *delim = NULL;       /* Additional delimiter argument */
01623        char   *out_str,            /* Output string with quoted characters */
01624                      *p,                         /* Iterator for input string */
01625                      *q,                         /* Iterator for output string */
01626                       delim_char=0,       /* Delimiter character to be quoted */
01627                       c;                         /* Current character */
01628        zend_bool quote_delim = 0; /* Whether to quote additional delim char */
01629        
01630        /* Get the arguments and check for errors */
01631        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
01632                                                    &delim, &delim_len) == FAILURE) {
01633               return;
01634        }
01635        
01636        in_str_end = in_str + in_str_len;
01637 
01638        /* Nothing to do if we got an empty string */
01639        if (in_str == in_str_end) {
01640               RETURN_EMPTY_STRING();
01641        }
01642 
01643        if (delim && *delim) {
01644               delim_char = delim[0];
01645               quote_delim = 1;
01646        }
01647        
01648        /* Allocate enough memory so that even if each character
01649           is quoted, we won't run out of room */
01650        out_str = safe_emalloc(4, in_str_len, 1);
01651        
01652        /* Go through the string and quote necessary characters */
01653        for(p = in_str, q = out_str; p != in_str_end; p++) {
01654               c = *p;
01655               switch(c) {
01656                      case '.':
01657                      case '\\':
01658                      case '+':
01659                      case '*':
01660                      case '?':
01661                      case '[':
01662                      case '^':
01663                      case ']':
01664                      case '$':
01665                      case '(':
01666                      case ')':
01667                      case '{':
01668                      case '}':
01669                      case '=':
01670                      case '!':
01671                      case '>':
01672                      case '<':
01673                      case '|':
01674                      case ':':
01675                      case '-':
01676                             *q++ = '\\';
01677                             *q++ = c;
01678                             break;
01679 
01680                      case '\0':
01681                             *q++ = '\\';
01682                             *q++ = '0';
01683                             *q++ = '0';
01684                             *q++ = '0';
01685                             break;
01686 
01687                      default:
01688                             if (quote_delim && c == delim_char)
01689                                    *q++ = '\\';
01690                             *q++ = c;
01691                             break;
01692               }
01693        }
01694        *q = '\0';
01695        
01696        /* Reallocate string and return it */
01697        RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
01698 }
01699 /* }}} */
01700 
01701 /* {{{ proto array preg_grep(string regex, array input [, int flags])
01702    Searches array and returns entries which match regex */
01703 static PHP_FUNCTION(preg_grep)
01704 {
01705        char                        *regex;                     /* Regular expression */
01706        int                                 regex_len;
01707        zval                        *input;                     /* Input array */
01708        long                         flags = 0;          /* Match control flags */
01709        pcre_cache_entry     *pce;                /* Compiled regular expression */
01710 
01711        /* Get arguments and do error checking */
01712        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", &regex, &regex_len,
01713                                                    &input, &flags) == FAILURE) {
01714               return;
01715        }
01716        
01717        /* Compile regex or get it from cache. */
01718        if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
01719               RETURN_FALSE;
01720        }
01721        
01722        php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
01723 }
01724 /* }}} */
01725 
01726 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
01727 {
01728        zval             **entry;                        /* An entry in the input array */
01729        pcre_extra           *extra = pce->extra;/* Holds results of studying */
01730        pcre_extra            extra_data;         /* Used locally for exec options */
01731        int                         *offsets;                   /* Array of subpattern offsets */
01732        int                          size_offsets;              /* Size of the offsets array */
01733        int                          count = 0;                 /* Count of matched subpatterns */
01734        char                 *string_key;
01735        ulong                 num_key;
01736        zend_bool             invert;                    /* Whether to return non-matching
01737                                                                          entries */
01738        int                          rc;
01739        
01740        invert = flags & PREG_GREP_INVERT ? 1 : 0;
01741        
01742        if (extra == NULL) {
01743               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01744               extra = &extra_data;
01745        }
01746        extra->match_limit = PCRE_G(backtrack_limit);
01747        extra->match_limit_recursion = PCRE_G(recursion_limit);
01748 
01749        /* Calculate the size of the offsets array, and allocate memory for it. */
01750        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
01751        if (rc < 0) {
01752               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
01753               RETURN_FALSE;
01754        }
01755        size_offsets = (size_offsets + 1) * 3;
01756        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
01757        
01758        /* Initialize return array */
01759        array_init(return_value);
01760 
01761        PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
01762 
01763        /* Go through the input array */
01764        zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
01765        while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
01766               zval subject = **entry;
01767 
01768               if (Z_TYPE_PP(entry) != IS_STRING) {
01769                      zval_copy_ctor(&subject);
01770                      convert_to_string(&subject);
01771               }
01772 
01773               /* Perform the match */
01774               count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
01775                                             Z_STRLEN(subject), 0,
01776                                             0, offsets, size_offsets);
01777 
01778               /* Check for too many substrings condition. */
01779               if (count == 0) {
01780                      php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
01781                      count = size_offsets/3;
01782               } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
01783                      pcre_handle_exec_error(count TSRMLS_CC);
01784                      break;
01785               }
01786 
01787               /* If the entry fits our requirements */
01788               if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
01789 
01790                      Z_ADDREF_PP(entry);
01791 
01792                      /* Add to return array */
01793                      switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
01794                      {
01795                             case HASH_KEY_IS_STRING:
01796                                    zend_hash_update(Z_ARRVAL_P(return_value), string_key,
01797                                                                 strlen(string_key)+1, entry, sizeof(zval *), NULL);
01798                                    break;
01799 
01800                             case HASH_KEY_IS_LONG:
01801                                    zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
01802                                                                          sizeof(zval *), NULL);
01803                                    break;
01804                      }
01805               }
01806 
01807               if (Z_TYPE_PP(entry) != IS_STRING) {
01808                      zval_dtor(&subject);
01809               }
01810 
01811               zend_hash_move_forward(Z_ARRVAL_P(input));
01812        }
01813        zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
01814        /* Clean up */
01815        efree(offsets);
01816 }
01817 /* }}} */
01818 
01819 /* {{{ proto int preg_last_error()
01820    Returns the error code of the last regexp execution. */
01821 static PHP_FUNCTION(preg_last_error)
01822 {
01823        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
01824               return;
01825        }
01826 
01827        RETURN_LONG(PCRE_G(error_code));
01828 }
01829 /* }}} */
01830 
01831 /* {{{ module definition structures */
01832 
01833 /* {{{ arginfo */
01834 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
01835     ZEND_ARG_INFO(0, pattern)
01836     ZEND_ARG_INFO(0, subject)
01837     ZEND_ARG_INFO(1, subpatterns) /* array */
01838     ZEND_ARG_INFO(0, flags)
01839     ZEND_ARG_INFO(0, offset)
01840 ZEND_END_ARG_INFO()
01841 
01842 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
01843     ZEND_ARG_INFO(0, pattern)
01844     ZEND_ARG_INFO(0, subject)
01845     ZEND_ARG_INFO(1, subpatterns) /* array */
01846     ZEND_ARG_INFO(0, flags)
01847     ZEND_ARG_INFO(0, offset)
01848 ZEND_END_ARG_INFO()
01849 
01850 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
01851     ZEND_ARG_INFO(0, regex)
01852     ZEND_ARG_INFO(0, replace)
01853     ZEND_ARG_INFO(0, subject)
01854     ZEND_ARG_INFO(0, limit)
01855     ZEND_ARG_INFO(1, count)
01856 ZEND_END_ARG_INFO()
01857 
01858 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
01859     ZEND_ARG_INFO(0, regex)
01860     ZEND_ARG_INFO(0, callback)
01861     ZEND_ARG_INFO(0, subject)
01862     ZEND_ARG_INFO(0, limit)
01863     ZEND_ARG_INFO(1, count)
01864 ZEND_END_ARG_INFO()
01865 
01866 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
01867     ZEND_ARG_INFO(0, pattern)
01868     ZEND_ARG_INFO(0, subject)
01869     ZEND_ARG_INFO(0, limit)
01870     ZEND_ARG_INFO(0, flags) 
01871 ZEND_END_ARG_INFO()
01872 
01873 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
01874     ZEND_ARG_INFO(0, str)
01875     ZEND_ARG_INFO(0, delim_char)
01876 ZEND_END_ARG_INFO()
01877 
01878 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
01879     ZEND_ARG_INFO(0, regex)
01880     ZEND_ARG_INFO(0, input) /* array */
01881     ZEND_ARG_INFO(0, flags)
01882 ZEND_END_ARG_INFO()
01883 
01884 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
01885 ZEND_END_ARG_INFO()
01886 /* }}} */
01887 
01888 static const zend_function_entry pcre_functions[] = {
01889        PHP_FE(preg_match,                        arginfo_preg_match)
01890        PHP_FE(preg_match_all,                    arginfo_preg_match_all)
01891        PHP_FE(preg_replace,               arginfo_preg_replace)
01892        PHP_FE(preg_replace_callback,      arginfo_preg_replace_callback)
01893        PHP_FE(preg_filter,                       arginfo_preg_replace)
01894        PHP_FE(preg_split,                        arginfo_preg_split)
01895        PHP_FE(preg_quote,                        arginfo_preg_quote)
01896        PHP_FE(preg_grep,                         arginfo_preg_grep)
01897        PHP_FE(preg_last_error,                   arginfo_preg_last_error)
01898        PHP_FE_END
01899 };
01900 
01901 zend_module_entry pcre_module_entry = {
01902        STANDARD_MODULE_HEADER,
01903    "pcre",
01904        pcre_functions,
01905        PHP_MINIT(pcre),
01906        PHP_MSHUTDOWN(pcre),
01907        NULL,
01908        NULL,
01909        PHP_MINFO(pcre),
01910        NO_VERSION_YET,
01911        PHP_MODULE_GLOBALS(pcre),
01912        PHP_GINIT(pcre),
01913        PHP_GSHUTDOWN(pcre),
01914        NULL,
01915        STANDARD_MODULE_PROPERTIES_EX
01916 };
01917 
01918 #ifdef COMPILE_DL_PCRE
01919 ZEND_GET_MODULE(pcre)
01920 #endif
01921 
01922 /* }}} */
01923 
01924 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
01925 
01926 /*
01927  * Local variables:
01928  * tab-width: 4
01929  * c-basic-offset: 4
01930  * End:
01931  * vim600: sw=4 ts=4 fdm=marker
01932  * vim<600: sw=4 ts=4
01933  */