Back to index

php5  5.3.10
Classes | Defines | Functions | Variables
ereg.c File Reference
#include <stdio.h>
#include <ctype.h>
#include "php.h"
#include "ext/standard/php_string.h"
#include "php_ereg.h"
#include "ext/standard/info.h"

Go to the source code of this file.

Classes

struct  reg_cache

Defines

#define EREG_CACHE_SIZE   4096
#define regfree(a)   ;
#define regcomp(a, b, c)   _php_regcomp(a, b, c)

Functions

 ZEND_ARG_INFO (1, registers) const
static int ereg_lru_cmp (const void *a, const void *b TSRMLS_DC)
static int ereg_clean_cache (void *data, void *arg TSRMLS_DC)
static int _php_regcomp (regex_t *preg, const char *pattern, int cflags)
static void _free_ereg_cache (reg_cache *rc)
static void php_ereg_init_globals (zend_ereg_globals *ereg_globals TSRMLS_DC)
static void php_ereg_destroy_globals (zend_ereg_globals *ereg_globals TSRMLS_DC)
 PHP_MINIT_FUNCTION (ereg)
 PHP_MSHUTDOWN_FUNCTION (ereg)
 PHP_MINFO_FUNCTION (ereg)
static void php_ereg_eprint (int err, regex_t *re)
static void php_ereg (INTERNAL_FUNCTION_PARAMETERS, int icase)
 PHP_FUNCTION (ereg)
 PHP_FUNCTION (eregi)
PHPAPI char * php_ereg_replace (const char *pattern, const char *replace, const char *string, int icase, int extended)
static void php_do_ereg_replace (INTERNAL_FUNCTION_PARAMETERS, int icase)
 PHP_FUNCTION (ereg_replace)
 PHP_FUNCTION (eregi_replace)
static void php_split (INTERNAL_FUNCTION_PARAMETERS, int icase)
 PHP_FUNCTION (split)
 PHP_FUNCTION (spliti)
PHPAPI PHP_FUNCTION (sql_regcase)

Variables

static int reg_magic = 0
zend_module_entry ereg_module_entry

Class Documentation

struct reg_cache

Definition at line 67 of file ereg.c.

Collaboration diagram for reg_cache:
Class Members
int cflags
unsigned long lastuse
regex_t preg

Define Documentation

#define EREG_CACHE_SIZE   4096

Definition at line 73 of file ereg.c.

#define regcomp (   a,
  b,
 
)    _php_regcomp(a, b, c)

Definition at line 204 of file ereg.c.

#define regfree (   a)    ;

Definition at line 202 of file ereg.c.


Function Documentation

static void _free_ereg_cache ( reg_cache rc) [static]

Definition at line 196 of file ereg.c.

{
       regfree(&rc->preg);
}

Here is the caller graph for this function:

static int _php_regcomp ( regex_t preg,
const char *  pattern,
int  cflags 
) [static]

Definition at line 127 of file ereg.c.

{
       int r = 0;
       int patlen = strlen(pattern);
       reg_cache *rc = NULL;
       TSRMLS_FETCH();

       if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
              /* easier than dealing with overflow as it happens */
              if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
                     zend_hash_clean(&EREG(ht_rc));
                     EREG(lru_counter) = 0;
              } else {
                     int num_clean = EREG_CACHE_SIZE / 4;
                     zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
              }
       }

       if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
          && rc->cflags == cflags) {
#ifdef HAVE_REGEX_T_RE_MAGIC
              /*
               * We use a saved magic number to see whether cache is corrupted, and if it
               * is, we flush it and compile the pattern from scratch.
               */
              if (rc->preg.re_magic != reg_magic) {
                     zend_hash_clean(&EREG(ht_rc));
                     EREG(lru_counter) = 0;
              } else {
                     memcpy(preg, &rc->preg, sizeof(*preg));
                     return r;
              }
       }

       r = regcomp(preg, pattern, cflags);
       if(!r) {
              reg_cache rcp;

              rcp.cflags = cflags;
              rcp.lastuse = ++(EREG(lru_counter));
              memcpy(&rcp.preg, preg, sizeof(*preg));
              /*
               * Since we don't have access to the actual MAGIC1 definition in the private
               * header file, we save the magic value immediately after compilation. Hopefully,
               * it's good.
               */
              if (!reg_magic) reg_magic = preg->re_magic;
              zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
                                           (void *) &rcp, sizeof(rcp), NULL);
       }
#else
              memcpy(preg, &rc->preg, sizeof(*preg));
       } else {
              r = regcomp(preg, pattern, cflags);
              if(!r) {
                     reg_cache rcp;

                     rcp.cflags = cflags;
                     rcp.lastuse = ++(EREG(lru_counter));
                     memcpy(&rcp.preg, preg, sizeof(*preg));
                     zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
                                                  (void *) &rcp, sizeof(rcp), NULL);
              }
       }
#endif
       return r;
}

Here is the call graph for this function:

static int ereg_clean_cache ( void *  data,
void *arg  TSRMLS_DC 
) [static]

Definition at line 112 of file ereg.c.

{
       int *num_clean = (int *)arg;

       if (*num_clean > 0) {
              (*num_clean)--;
              return ZEND_HASH_APPLY_REMOVE;
       } else {
              return ZEND_HASH_APPLY_STOP;
       }
}

Here is the caller graph for this function:

static int ereg_lru_cmp ( const void *  a,
const void *b  TSRMLS_DC 
) [static]

Definition at line 94 of file ereg.c.

{
       Bucket *f = *((Bucket **) a);
       Bucket *s = *((Bucket **) b);

       if (((reg_cache *)f->pData)->lastuse <
                            ((reg_cache *)s->pData)->lastuse) {
              return -1;
       } else if (((reg_cache *)f->pData)->lastuse ==
                            ((reg_cache *)s->pData)->lastuse) {
              return 0;
       } else {
              return 1;
       }
}

Here is the caller graph for this function:

static void php_do_ereg_replace ( INTERNAL_FUNCTION_PARAMETERS  ,
int  icase 
) [static]

Definition at line 550 of file ereg.c.

{
       zval **arg_pattern,
              **arg_replace;
       char *pattern, *arg_string;
       char *string;
       char *replace;
       char *ret;
       int arg_string_len;
       
       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
              return;
       }

       if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
              if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
                     pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
              } else {
                     pattern = STR_EMPTY_ALLOC();
              }
       } else {
              convert_to_long_ex(arg_pattern);
              pattern = emalloc(2);
              pattern[0] = (char) Z_LVAL_PP(arg_pattern);
              pattern[1] = '\0';
       }

       if (Z_TYPE_PP(arg_replace) == IS_STRING) {
              if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
                     replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
              } else {
                     replace = STR_EMPTY_ALLOC();
              }
       } else {
              convert_to_long_ex(arg_replace);
              replace = emalloc(2);
              replace[0] = (char) Z_LVAL_PP(arg_replace);
              replace[1] = '\0';
       }

       if (arg_string && arg_string_len) {
              string = estrndup(arg_string, arg_string_len);
       } else {
              string = STR_EMPTY_ALLOC();
       }

       /* do the actual work */
       ret = php_ereg_replace(pattern, replace, string, icase, 1);
       if (ret == (char *) -1) {
              RETVAL_FALSE;
       } else {
              RETVAL_STRING(ret, 1);
              STR_FREE(ret);
       }

       STR_FREE(string);
       STR_FREE(replace);
       STR_FREE(pattern);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void php_ereg ( INTERNAL_FUNCTION_PARAMETERS  ,
int  icase 
) [static]

Definition at line 289 of file ereg.c.

{
       zval **regex,               /* Regular expression */
              **array = NULL;             /* Optional register array */
       char *findin;        /* String to apply expression to */
       int findin_len;
       regex_t re;
       regmatch_t *subs;
       int err, match_len, string_len;
       uint i;
       int copts = 0;
       off_t start, end;
       char *buf = NULL;
       char *string = NULL;
       int   argc = ZEND_NUM_ARGS();

       if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", &regex, &findin, &findin_len, &array) == FAILURE) {
              return;
       }

       if (icase) {
              copts |= REG_ICASE;
       }
       
       if (argc == 2) {
              copts |= REG_NOSUB;
       }

       /* compile the regular expression from the supplied regex */
       if (Z_TYPE_PP(regex) == IS_STRING) {
              err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
       } else {
              /* we convert numbers to integers and treat them as a string */
              if (Z_TYPE_PP(regex) == IS_DOUBLE) {
                     convert_to_long_ex(regex);  /* get rid of decimal places */
              }
              convert_to_string_ex(regex);
              /* don't bother doing an extended regex with just a number */
              err = regcomp(&re, Z_STRVAL_PP(regex), copts);
       }

       if (err) {
              php_ereg_eprint(err, &re);
              RETURN_FALSE;
       }

       /* make a copy of the string we're looking in */
       string = estrndup(findin, findin_len);

       /* allocate storage for (sub-)expression-matches */
       subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
       
       /* actually execute the regular expression */
       err = regexec(&re, string, re.re_nsub+1, subs, 0);
       if (err && err != REG_NOMATCH) {
              php_ereg_eprint(err, &re);
              regfree(&re);
              efree(subs);
              RETURN_FALSE;
       }
       match_len = 1;

       if (array && err != REG_NOMATCH) {
              match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
              string_len = findin_len + 1;

              buf = emalloc(string_len);

              zval_dtor(*array);   /* start with clean array */
              array_init(*array);

              for (i = 0; i <= re.re_nsub; i++) {
                     start = subs[i].rm_so;
                     end = subs[i].rm_eo;
                     if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
                            add_index_stringl(*array, i, string+start, end-start, 1);
                     } else {
                            add_index_bool(*array, i, 0);
                     }
              }
              efree(buf);
       }

       efree(subs);
       efree(string);
       if (err == REG_NOMATCH) {
              RETVAL_FALSE;
       } else {
              if (match_len == 0)
                     match_len = 1;
              RETVAL_LONG(match_len);
       }
       regfree(&re);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void php_ereg_destroy_globals ( zend_ereg_globals *ereg_globals  TSRMLS_DC) [static]

Definition at line 212 of file ereg.c.

{
       zend_hash_destroy(&ereg_globals->ht_rc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void php_ereg_eprint ( int  err,
regex_t re 
) [static]

Definition at line 247 of file ereg.c.

                                                  {
       char *buf = NULL, *message = NULL;
       size_t len;
       size_t buf_len;

#ifdef REG_ITOA
       /* get the length of the message */
       buf_len = regerror(REG_ITOA | err, re, NULL, 0);
       if (buf_len) {
              buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
              if (!buf) return; /* fail silently */
              /* finally, get the error message */
              regerror(REG_ITOA | err, re, buf, buf_len);
       }
#else
       buf_len = 0;
#endif
       len = regerror(err, re, NULL, 0);
       if (len) {
              TSRMLS_FETCH();

              message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
              if (!message) {
                     return; /* fail silently */
              }
              if (buf_len) {
                     snprintf(message, buf_len, "%s: ", buf);
                     buf_len += 1; /* so pointer math below works */
              }
              /* drop the message into place */
              regerror(err, re, message + buf_len, len);

              php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
       }

       STR_FREE(buf);
       STR_FREE(message);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void php_ereg_init_globals ( zend_ereg_globals *ereg_globals  TSRMLS_DC) [static]

Definition at line 206 of file ereg.c.

{
       zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
       ereg_globals->lru_counter = 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHPAPI char* php_ereg_replace ( const char *  pattern,
const char *  replace,
const char *  string,
int  icase,
int  extended 
)

Definition at line 403 of file ereg.c.

{
       regex_t re;
       regmatch_t *subs;

       char *buf,    /* buf is where we build the replaced string */
               *nbuf,       /* nbuf is used when we grow the buffer */
               *walkbuf; /* used to walk buf when replacing backrefs */
       const char *walk; /* used to walk replacement string for backrefs */
       int buf_len;
       int pos, tmp, string_len, new_l;
       int err, copts = 0;

       string_len = strlen(string);

       if (icase) {
              copts = REG_ICASE;
       }
       if (extended) {
              copts |= REG_EXTENDED;
       }

       err = regcomp(&re, pattern, copts);
       if (err) {
              php_ereg_eprint(err, &re);
              return ((char *) -1);
       }


       /* allocate storage for (sub-)expression-matches */
       subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);

       /* start with a buffer that is twice the size of the stringo
          we're doing replacements in */
       buf_len = 2 * string_len + 1;
       buf = safe_emalloc(buf_len, sizeof(char), 0);

       err = pos = 0;
       buf[0] = '\0';
       while (!err) {
              err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));

              if (err && err != REG_NOMATCH) {
                     php_ereg_eprint(err, &re);
                     efree(subs);
                     efree(buf);
                     regfree(&re);
                     return ((char *) -1);
              }

              if (!err) {
                     /* backref replacement is done in two passes:
                        1) find out how long the string will be, and allocate buf
                        2) copy the part before match, replacement and backrefs to buf

                        Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
                        */

                     new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
                     walk = replace;
                     while (*walk) {
                            if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
                                   if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
                                          new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
                                   }
                                   walk += 2;
                            } else {
                                   new_l++;
                                   walk++;
                            }
                     }
                     if (new_l + 1 > buf_len) {
                            buf_len = 1 + buf_len + 2 * new_l;
                            nbuf = emalloc(buf_len);
                            strncpy(nbuf, buf, buf_len-1);
                            nbuf[buf_len - 1] = '\0';
                            efree(buf);
                            buf = nbuf;
                     }
                     tmp = strlen(buf);
                     /* copy the part of the string before the match */
                     strncat(buf, &string[pos], subs[0].rm_so);

                     /* copy replacement and backrefs */
                     walkbuf = &buf[tmp + subs[0].rm_so];
                     walk = replace;
                     while (*walk) {
                            if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
                                   if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
                                          /* this next case shouldn't happen. it does. */
                                          && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
                                          
                                          tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
                                          memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
                                          walkbuf += tmp;
                                   }
                                   walk += 2;
                            } else {
                                   *walkbuf++ = *walk++;
                            }
                     }
                     *walkbuf = '\0';

                     /* and get ready to keep looking for replacements */
                     if (subs[0].rm_so == subs[0].rm_eo) {
                            if (subs[0].rm_so + pos >= string_len) {
                                   break;
                            }
                            new_l = strlen (buf) + 1;
                            if (new_l + 1 > buf_len) {
                                   buf_len = 1 + buf_len + 2 * new_l;
                                   nbuf = safe_emalloc(buf_len, sizeof(char), 0);
                                   strncpy(nbuf, buf, buf_len-1);
                                   efree(buf);
                                   buf = nbuf;
                            }
                            pos += subs[0].rm_eo + 1;
                            buf [new_l-1] = string [pos-1];
                            buf [new_l] = '\0';
                     } else {
                            pos += subs[0].rm_eo;
                     }
              } else { /* REG_NOMATCH */
                     new_l = strlen(buf) + strlen(&string[pos]);
                     if (new_l + 1 > buf_len) {
                            buf_len = new_l + 1; /* now we know exactly how long it is */
                            nbuf = safe_emalloc(buf_len, sizeof(char), 0);
                            strncpy(nbuf, buf, buf_len-1);
                            efree(buf);
                            buf = nbuf;
                     }
                     /* stick that last bit of string on our output */
                     strlcat(buf, &string[pos], buf_len);
              }
       }

       /* don't want to leak memory .. */
       efree(subs);
       regfree(&re);

       /* whew. */
       return (buf);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHP_FUNCTION ( ereg  )

Definition at line 387 of file ereg.c.

Here is the call graph for this function:

PHP_FUNCTION ( eregi  )

Definition at line 395 of file ereg.c.

Here is the call graph for this function:

PHP_FUNCTION ( ereg_replace  )

Definition at line 613 of file ereg.c.

Here is the call graph for this function:

PHP_FUNCTION ( eregi_replace  )

Definition at line 621 of file ereg.c.

Here is the call graph for this function:

Definition at line 713 of file ereg.c.

Here is the call graph for this function:

PHP_FUNCTION ( spliti  )

Definition at line 722 of file ereg.c.

Here is the call graph for this function:

PHPAPI PHP_FUNCTION ( sql_regcase  )

Definition at line 731 of file ereg.c.

{
       char *string, *tmp;
       int string_len;
       unsigned char c;
       register int i, j;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
              return;
       }
       
       tmp = safe_emalloc(string_len, 4, 1);
       
       for (i = j = 0; i < string_len; i++) {
              c = (unsigned char) string[i];
              if (isalpha(c)) {
                     tmp[j++] = '[';
                     tmp[j++] = toupper(c);
                     tmp[j++] = tolower(c);
                     tmp[j++] = ']';
              } else {
                     tmp[j++] = c;
              }
       }
       tmp[j] = 0;

       RETVAL_STRINGL(tmp, j, 1);
       efree(tmp);
}

Here is the call graph for this function:

PHP_MINFO_FUNCTION ( ereg  )

Definition at line 232 of file ereg.c.

{
       php_info_print_table_start();
#if HSREGEX
       php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
#else
       php_info_print_table_row(2, "Regex Library", "System library enabled");
#endif
       php_info_print_table_end();
}

Here is the call graph for this function:

PHP_MINIT_FUNCTION ( ereg  )

Definition at line 217 of file ereg.c.

Here is the call graph for this function:

Definition at line 223 of file ereg.c.

{
#ifndef ZTS
       php_ereg_destroy_globals(&ereg_globals TSRMLS_CC);
#endif

       return SUCCESS;
}

Here is the call graph for this function:

static void php_split ( INTERNAL_FUNCTION_PARAMETERS  ,
int  icase 
) [static]

Definition at line 629 of file ereg.c.

{
       long count = -1;
       regex_t re;
       regmatch_t subs[1];
       char *spliton, *str, *strp, *endp;
       int spliton_len, str_len;
       int err, size, copts = 0;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
              return;
       }

       if (icase) {
              copts = REG_ICASE;
       }

       strp = str;
       endp = strp + str_len;

       err = regcomp(&re, spliton, REG_EXTENDED | copts);
       if (err) {
              php_ereg_eprint(err, &re);
              RETURN_FALSE;
       }

       array_init(return_value);

       /* churn through str, generating array entries as we go */
       while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
              if (subs[0].rm_so == 0 && subs[0].rm_eo) {
                     /* match is at start of string, return empty string */
                     add_next_index_stringl(return_value, "", 0, 1);
                     /* skip ahead the length of the regex match */
                     strp += subs[0].rm_eo;
              } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
                     /* No more matches */
                     regfree(&re);
                     
                     php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
                     
                     zend_hash_destroy(Z_ARRVAL_P(return_value));
                     efree(Z_ARRVAL_P(return_value));
                     RETURN_FALSE;
              } else {
                     /* On a real match */

                     /* make a copy of the substring */
                     size = subs[0].rm_so;
              
                     /* add it to the array */
                     add_next_index_stringl(return_value, strp, size, 1);

                     /* point at our new starting point */
                     strp = strp + subs[0].rm_eo;
              }

              /* if we're only looking for a certain number of points,
                 stop looking once we hit it */
              if (count != -1) {
                     count--;
              }
       }

       /* see if we encountered an error */
       if (err && err != REG_NOMATCH) {
              php_ereg_eprint(err, &re);
              regfree(&re);
              zend_hash_destroy(Z_ARRVAL_P(return_value));
              efree(Z_ARRVAL_P(return_value));
              RETURN_FALSE;
       }

       /* otherwise we just have one last element to add to the array */
       size = endp - strp;
       
       add_next_index_stringl(return_value, strp, size, 1);

       regfree(&re);
}

Here is the call graph for this function:

Here is the caller graph for this function:

ZEND_ARG_INFO ( ,
registers   
) const

Definition at line 33 of file ereg.c.

                                             {
       PHP_DEP_FE(ereg,                   arginfo_ereg)
       PHP_DEP_FE(ereg_replace,    arginfo_ereg_replace)
       PHP_DEP_FE(eregi,                  arginfo_ereg)
       PHP_DEP_FE(eregi_replace,   arginfo_ereg_replace)
       PHP_DEP_FE(split,                  arginfo_split)
       PHP_DEP_FE(spliti,                 arginfo_split)
       PHP_DEP_FE(sql_regcase,            arginfo_sql_regcase)
       PHP_FE_END
};

Variable Documentation

zend_module_entry ereg_module_entry
Initial value:

Definition at line 79 of file ereg.c.

int reg_magic = 0 [static]

Definition at line 72 of file ereg.c.