Back to index

php5  5.3.10
regexec.c
Go to the documentation of this file.
00001 /*
00002  * the outer shell of regexec()
00003  *
00004  * This file includes engine.c *twice*, after muchos fiddling with the
00005  * macros that code uses.  This lets the same code operate on two different
00006  * representations for state sets.
00007  */
00008 #include <sys/types.h>
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 #include <limits.h>
00013 #include <ctype.h>
00014 
00015 #include "regex.h"
00016 #include "utils.h"
00017 #include "regex2.h"
00018 
00019 #define PHP_REGEX_NOPE 0;          /* for use in asserts; shuts lint up */
00020 
00021 /* macros for manipulating states, small version */
00022 #define       states unsigned
00023 #define       states1       unsigned      /* for later use in regexec() decision */
00024 #define       CLEAR(v)      ((v) = 0)
00025 #define       SET0(v, n)    ((v) &= ~((unsigned)1 << (n)))
00026 #define       SET1(v, n)    ((v) |= (unsigned)1 << (n))
00027 #define       ISSET(v, n)   ((v) & ((unsigned)1 << (n)))
00028 #define       ASSIGN(d, s)  ((d) = (s))
00029 #define       EQ(a, b)      ((a) == (b))
00030 #define       STATEVARS     int dummy     /* dummy version */
00031 #define       STATESETUP(m, n)     /* nothing */
00032 #define       STATETEARDOWN(m)     /* nothing */
00033 #define       SETUP(v)      ((v) = 0)
00034 #define       onestate      unsigned
00035 #define       INIT(o, n)    ((o) = (unsigned)1 << (n))
00036 #define       INC(o) ((o) <<= 1)
00037 #define       ISSTATEIN(v, o)      ((v) & (o))
00038 /* some abbreviations; note that some of these know variable names! */
00039 /* do "if I'm here, I can also be there" etc without branches */
00040 #define       FWD(dst, src, n)     ((dst) |= ((unsigned)(src)&(here)) << (n))
00041 #define       BACK(dst, src, n)    ((dst) |= ((unsigned)(src)&(here)) >> (n))
00042 #define       ISSETBACK(v, n)      ((v) & ((unsigned)here >> (n)))
00043 /* function names */
00044 #define SNAMES                     /* engine.c looks after details */
00045 
00046 #include "engine.c"
00047 
00048 /* now undo things */
00049 #undef states
00050 #undef CLEAR
00051 #undef SET0
00052 #undef SET1
00053 #undef ISSET
00054 #undef ASSIGN
00055 #undef EQ
00056 #undef STATEVARS
00057 #undef STATESETUP
00058 #undef STATETEARDOWN
00059 #undef SETUP
00060 #undef onestate
00061 #undef INIT
00062 #undef INC
00063 #undef ISSTATEIN
00064 #undef FWD
00065 #undef BACK
00066 #undef ISSETBACK
00067 #undef SNAMES
00068 
00069 /* macros for manipulating states, large version */
00070 #define       states unsigned char *
00071 #define       CLEAR(v)      memset(v, 0, m->g->nstates)
00072 #define       SET0(v, n)    ((v)[n] = 0)
00073 #define       SET1(v, n)    ((v)[n] = 1)
00074 #define       ISSET(v, n)   ((v)[n])
00075 #define       ASSIGN(d, s)  memcpy(d, s, m->g->nstates)
00076 #define       EQ(a, b)      (memcmp(a, b, m->g->nstates) == 0)
00077 #define       STATEVARS     int vn; unsigned char *space
00078 #define       STATESETUP(m, nv)    { (m)->space = malloc((nv)*(m)->g->nstates); \
00079                             if ((m)->space == NULL) return(REG_ESPACE); \
00080                             (m)->vn = 0; }
00081 #define       STATETEARDOWN(m)     { free((m)->space); }
00082 #define       SETUP(v)      ((v) = &m->space[m->vn++ * m->g->nstates])
00083 #define       onestate      int
00084 #define       INIT(o, n)    ((o) = (n))
00085 #define       INC(o) ((o)++)
00086 #define       ISSTATEIN(v, o)      ((v)[o])
00087 /* some abbreviations; note that some of these know variable names! */
00088 /* do "if I'm here, I can also be there" etc without branches */
00089 #define       FWD(dst, src, n)     ((dst)[here+(n)] |= (src)[here])
00090 #define       BACK(dst, src, n)    ((dst)[here-(n)] |= (src)[here])
00091 #define       ISSETBACK(v, n)      ((v)[here - (n)])
00092 /* function names */
00093 #define       LNAMES               /* flag */
00094 
00095 #include "engine.c"
00096 
00097 /*
00098  - regexec - interface for matching
00099  = API_EXPORT(int) regexec(const regex_t *, const char *, size_t, \
00100  =                                 regmatch_t [], int);
00101  = #define    REG_NOTBOL    00001
00102  = #define    REG_NOTEOL    00002
00103  = #define    REG_STARTEND  00004
00104  = #define    REG_TRACE     00400  // tracing of execution
00105  = #define    REG_LARGE     01000  // force large representation
00106  = #define    REG_BACKR     02000  // force use of backref code
00107  *
00108  * We put this here so we can exploit knowledge of the state representation
00109  * when choosing which matcher to call.  Also, by this point the matchers
00110  * have been prototyped.
00111  */
00112 API_EXPORT(int)                           /* 0 success, REG_NOMATCH failure */
00113 regexec(preg, string, nmatch, pmatch, eflags)
00114 const regex_t *preg;
00115 const char *string;
00116 size_t nmatch;
00117 regmatch_t pmatch[];
00118 int eflags;
00119 {
00120        register struct re_guts *g = preg->re_g;
00121 #ifdef REDEBUG
00122 #      define GOODFLAGS(f)  (f)
00123 #else
00124 #      define GOODFLAGS(f)  ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
00125 #endif
00126 
00127        if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
00128               return(REG_BADPAT);
00129        assert(!(g->iflags&BAD));
00130        if (g->iflags&BAD)          /* backstop for no-debug case */
00131               return(REG_BADPAT);
00132        eflags = GOODFLAGS(eflags);
00133 
00134        if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
00135               return(smatcher(g, (unsigned char *)string, nmatch, pmatch, eflags));
00136        else
00137               return(lmatcher(g, (unsigned char *)string, nmatch, pmatch, eflags));
00138 }