Back to index

php5  5.3.10
engine.c
Go to the documentation of this file.
00001 /*
00002  * The matching engine and friends.  This file is #included by regexec.c
00003  * after suitable #defines of a variety of macros used herein, so that
00004  * different state representations can be used without duplicating masses
00005  * of code.
00006  */
00007 
00008 #ifdef SNAMES
00009 #define       matcher       smatcher
00010 #define       fast   sfast
00011 #define       slow   sslow
00012 #define       dissect       sdissect
00013 #define       backref       sbackref
00014 #define       step   sstep
00015 #define       print  sprint
00016 #define       at     sat
00017 #define       match  smat
00018 #endif
00019 #ifdef LNAMES
00020 #define       matcher       lmatcher
00021 #define       fast   lfast
00022 #define       slow   lslow
00023 #define       dissect       ldissect
00024 #define       backref       lbackref
00025 #define       step   lstep
00026 #define       print  lprint
00027 #define       at     lat
00028 #define       match  lmat
00029 #endif
00030 
00031 /* another structure passed up and down to avoid zillions of parameters */
00032 struct match {
00033        struct re_guts *g;
00034        int eflags;
00035        regmatch_t *pmatch;  /* [nsub+1] (0 element unused) */
00036        unsigned char *offp;        /* offsets work from here */
00037        unsigned char *beginp;             /* start of string -- virtual NUL precedes */
00038        unsigned char *endp;        /* end of string -- virtual NUL here */
00039        unsigned char *coldp;              /* can be no match starting before here */
00040        unsigned char **lastpos;           /* [nplus+1] */
00041        STATEVARS;
00042        states st;           /* current states */
00043        states fresh;        /* states for a fresh start */
00044        states tmp;          /* temporary */
00045        states empty;        /* empty set of states */
00046 };
00047 
00048 #include "engine.ih"
00049 
00050 #ifdef REDEBUG
00051 #define       SP(t, s, c)   print(m, t, s, c, stdout)
00052 #define       AT(t, p1, p2, s1, s2)       at(m, t, p1, p2, s1, s2)
00053 #define       NOTE(str)     { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
00054 #else
00055 #define       SP(t, s, c)   /* nothing */
00056 #define       AT(t, p1, p2, s1, s2)       /* nothing */
00057 #define       NOTE(s)       /* nothing */
00058 #endif
00059 
00060 /*
00061  - matcher - the actual matching engine
00062  == static int matcher(register struct re_guts *g, char *string, \
00063  ==    size_t nmatch, regmatch_t pmatch[], int eflags);
00064  */
00065 static int                  /* 0 success, REG_NOMATCH failure */
00066 matcher(g, string, nmatch, pmatch, eflags)
00067 register struct re_guts *g;
00068 unsigned char *string;
00069 size_t nmatch;
00070 regmatch_t pmatch[];
00071 int eflags;
00072 {
00073        register unsigned char *endp;
00074        register size_t i;
00075        struct match mv;
00076        register struct match *m = &mv;
00077        register unsigned char *dp;
00078        const register sopno gf = g->firststate+1;       /* +1 for OEND */
00079        const register sopno gl = g->laststate;
00080        unsigned char *start;
00081        unsigned char *stop;
00082 
00083        /* simplify the situation where possible */
00084        if (g->cflags&REG_NOSUB)
00085               nmatch = 0;
00086        if (eflags&REG_STARTEND) {
00087               start = string + pmatch[0].rm_so;
00088               stop = string + pmatch[0].rm_eo;
00089        } else {
00090               start = string;
00091               stop = start + strlen(start);
00092        }
00093        if (stop < start)
00094               return(REG_INVARG);
00095 
00096        /* prescreening; this does wonders for this rather slow code */
00097        if (g->must != NULL) {
00098               for (dp = start; dp < stop; dp++)
00099                      if (*dp == g->must[0] && stop - dp >= g->mlen &&
00100                             memcmp(dp, g->must, (size_t)g->mlen) == 0)
00101                             break;
00102               if (dp == stop)             /* we didn't find g->must */
00103                      return(REG_NOMATCH);
00104        }
00105 
00106        /* match struct setup */
00107        m->g = g;
00108        m->eflags = eflags;
00109        m->pmatch = NULL;
00110        m->lastpos = NULL;
00111        m->offp = string;
00112        m->beginp = start;
00113        m->endp = stop;
00114        STATESETUP(m, 4);
00115        SETUP(m->st);
00116        SETUP(m->fresh);
00117        SETUP(m->tmp);
00118        SETUP(m->empty);
00119        CLEAR(m->empty);
00120 
00121        /* this loop does only one repetition except for backrefs */
00122        for (;;) {
00123               endp = fast(m, start, stop, gf, gl);
00124               if (endp == NULL) {         /* a miss */
00125                      STATETEARDOWN(m);
00126                      return(REG_NOMATCH);
00127               }
00128               if (nmatch == 0 && !g->backrefs)
00129                      break;        /* no further info needed */
00130 
00131               /* where? */
00132               assert(m->coldp != NULL);
00133               for (;;) {
00134                      NOTE("finding start");
00135                      endp = slow(m, m->coldp, stop, gf, gl);
00136                      if (endp != NULL)
00137                             break;
00138                      assert(m->coldp < m->endp);
00139                      m->coldp++;
00140               }
00141               if (nmatch == 1 && !g->backrefs)
00142                      break;        /* no further info needed */
00143 
00144               /* oh my, he wants the subexpressions... */
00145               if (m->pmatch == NULL)
00146                      m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
00147                                                  sizeof(regmatch_t));
00148               if (m->pmatch == NULL) {
00149                      STATETEARDOWN(m);
00150                      return(REG_ESPACE);
00151               }
00152               for (i = 1; i <= m->g->nsub; i++)
00153                      m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
00154               if (!g->backrefs && !(m->eflags&REG_BACKR)) {
00155                      NOTE("dissecting");
00156                      dp = dissect(m, m->coldp, endp, gf, gl);
00157               } else {
00158                      if (g->nplus > 0 && m->lastpos == NULL)
00159                             m->lastpos = (unsigned char **)malloc((g->nplus+1) *
00160                                                  sizeof(unsigned char *));
00161                      if (g->nplus > 0 && m->lastpos == NULL) {
00162                             free((char *)m->pmatch);
00163                             STATETEARDOWN(m);
00164                             return(REG_ESPACE);
00165                      }
00166                      NOTE("backref dissect");
00167                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
00168               }
00169               if (dp != NULL)
00170                      break;
00171 
00172               /* uh-oh... we couldn't find a subexpression-level match */
00173               assert(g->backrefs); /* must be back references doing it */
00174               assert(g->nplus == 0 || m->lastpos != NULL);
00175               for (;;) {
00176                      if (dp != NULL || endp <= m->coldp)
00177                             break;        /* defeat */
00178                      NOTE("backoff");
00179                      endp = slow(m, m->coldp, endp-1, gf, gl);
00180                      if (endp == NULL)
00181                             break;        /* defeat */
00182                      /* try it on a shorter possibility */
00183 #ifndef NDEBUG
00184                      for (i = 1; i <= m->g->nsub; i++) {
00185                             assert(m->pmatch[i].rm_so == -1);
00186                             assert(m->pmatch[i].rm_eo == -1);
00187                      }
00188 #endif
00189                      NOTE("backoff dissect");
00190                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
00191               }
00192               assert(dp == NULL || dp == endp);
00193               if (dp != NULL)             /* found a shorter one */
00194                      break;
00195 
00196               /* despite initial appearances, there is no match here */
00197               NOTE("false alarm");
00198               start = m->coldp + 1;       /* recycle starting later */
00199               assert(start <= stop);
00200        }
00201 
00202        /* fill in the details if requested */
00203        if (nmatch > 0) {
00204               pmatch[0].rm_so = m->coldp - m->offp;
00205               pmatch[0].rm_eo = endp - m->offp;
00206        }
00207        if (nmatch > 1) {
00208               assert(m->pmatch != NULL);
00209               for (i = 1; i < nmatch; i++)
00210                      if (i <= m->g->nsub)
00211                             pmatch[i] = m->pmatch[i];
00212                      else {
00213                             pmatch[i].rm_so = -1;
00214                             pmatch[i].rm_eo = -1;
00215                      }
00216        }
00217 
00218        if (m->pmatch != NULL)
00219               free((char *)m->pmatch);
00220        if (m->lastpos != NULL)
00221               free((char *)m->lastpos);
00222        STATETEARDOWN(m);
00223        return(0);
00224 }
00225 
00226 /*
00227  - dissect - figure out what matched what, no back references
00228  == static unsigned char *dissect(register struct match *m, unsigned char *start, \
00229  ==    unsigned char *stop, sopno startst, sopno stopst);
00230  */
00231 static unsigned char *                    /* == stop (success) always */
00232 dissect(m, start, stop, startst, stopst)
00233 register struct match *m;
00234 unsigned char *start;
00235 unsigned char *stop;
00236 sopno startst;
00237 sopno stopst;
00238 {
00239        register int i;
00240        register sopno ss;   /* start sop of current subRE */
00241        register sopno es;   /* end sop of current subRE */
00242        register unsigned char *sp; /* start of string matched by it */
00243        register unsigned char *stp;       /* string matched by it cannot pass here */
00244        register unsigned char *rest;      /* start of rest of string */
00245        register unsigned char *tail;      /* string unmatched by rest of RE */
00246        register sopno ssub; /* start sop of subsubRE */
00247        register sopno esub; /* end sop of subsubRE */
00248        register unsigned char *ssp;       /* start of string matched by subsubRE */
00249        register unsigned char *sep;       /* end of string matched by subsubRE */
00250        register unsigned char *oldssp;    /* previous ssp */
00251        register unsigned char *dp;
00252 
00253        AT("diss", start, stop, startst, stopst);
00254        sp = start;
00255        for (ss = startst; ss < stopst; ss = es) {
00256               /* identify end of subRE */
00257               es = ss;
00258               switch (OP(m->g->strip[es])) {
00259               case OPLUS_:
00260               case OQUEST_:
00261                      es += OPND(m->g->strip[es]);
00262                      break;
00263               case OCH_:
00264                      while (OP(m->g->strip[es]) != O_CH)
00265                             es += OPND(m->g->strip[es]);
00266                      break;
00267               }
00268               es++;
00269 
00270               /* figure out what it matched */
00271               switch (OP(m->g->strip[ss])) {
00272               case OEND:
00273                      assert(PHP_REGEX_NOPE);
00274                      break;
00275               case OCHAR:
00276                      sp++;
00277                      break;
00278               case OBOL:
00279               case OEOL:
00280               case OBOW:
00281               case OEOW:
00282                      break;
00283               case OANY:
00284               case OANYOF:
00285                      sp++;
00286                      break;
00287               case OBACK_:
00288               case O_BACK:
00289                      assert(PHP_REGEX_NOPE);
00290                      break;
00291               /* cases where length of match is hard to find */
00292               case OQUEST_:
00293                      stp = stop;
00294                      for (;;) {
00295                             /* how long could this one be? */
00296                             rest = slow(m, sp, stp, ss, es);
00297                             assert(rest != NULL);       /* it did match */
00298                             /* could the rest match the rest? */
00299                             tail = slow(m, rest, stop, es, stopst);
00300                             if (tail == stop)
00301                                    break;        /* yes! */
00302                             /* no -- try a shorter match for this one */
00303                             stp = rest - 1;
00304                             assert(stp >= sp);   /* it did work */
00305                      }
00306                      ssub = ss + 1;
00307                      esub = es - 1;
00308                      /* did innards match? */
00309                      if (slow(m, sp, rest, ssub, esub) != NULL) {
00310                             dp = dissect(m, sp, rest, ssub, esub);
00311                             assert(dp == rest);
00312                      } else        /* no */
00313                             assert(sp == rest);
00314                      sp = rest;
00315                      break;
00316               case OPLUS_:
00317                      stp = stop;
00318                      for (;;) {
00319                             /* how long could this one be? */
00320                             rest = slow(m, sp, stp, ss, es);
00321                             assert(rest != NULL);       /* it did match */
00322                             /* could the rest match the rest? */
00323                             tail = slow(m, rest, stop, es, stopst);
00324                             if (tail == stop)
00325                                    break;        /* yes! */
00326                             /* no -- try a shorter match for this one */
00327                             stp = rest - 1;
00328                             assert(stp >= sp);   /* it did work */
00329                      }
00330                      ssub = ss + 1;
00331                      esub = es - 1;
00332                      ssp = sp;
00333                      oldssp = ssp;
00334                      for (;;) {    /* find last match of innards */
00335                             sep = slow(m, ssp, rest, ssub, esub);
00336                             if (sep == NULL || sep == ssp)
00337                                    break; /* failed or matched null */
00338                             oldssp = ssp; /* on to next try */
00339                             ssp = sep;
00340                      }
00341                      if (sep == NULL) {
00342                             /* last successful match */
00343                             sep = ssp;
00344                             ssp = oldssp;
00345                      }
00346                      assert(sep == rest); /* must exhaust substring */
00347                      assert(slow(m, ssp, sep, ssub, esub) == rest);
00348                      dp = dissect(m, ssp, sep, ssub, esub);
00349                      assert(dp == sep);
00350                      sp = rest;
00351                      break;
00352               case OCH_:
00353                      stp = stop;
00354                      for (;;) {
00355                             /* how long could this one be? */
00356                             rest = slow(m, sp, stp, ss, es);
00357                             assert(rest != NULL);       /* it did match */
00358                             /* could the rest match the rest? */
00359                             tail = slow(m, rest, stop, es, stopst);
00360                             if (tail == stop)
00361                                    break;        /* yes! */
00362                             /* no -- try a shorter match for this one */
00363                             stp = rest - 1;
00364                             assert(stp >= sp);   /* it did work */
00365                      }
00366                      ssub = ss + 1;
00367                      esub = ss + OPND(m->g->strip[ss]) - 1;
00368                      assert(OP(m->g->strip[esub]) == OOR1);
00369                      for (;;) {    /* find first matching branch */
00370                             if (slow(m, sp, rest, ssub, esub) == rest)
00371                                    break; /* it matched all of it */
00372                             /* that one missed, try next one */
00373                             assert(OP(m->g->strip[esub]) == OOR1);
00374                             esub++;
00375                             assert(OP(m->g->strip[esub]) == OOR2);
00376                             ssub = esub + 1;
00377                             esub += OPND(m->g->strip[esub]);
00378                             if (OP(m->g->strip[esub]) == OOR2)
00379                                    esub--;
00380                             else
00381                                    assert(OP(m->g->strip[esub]) == O_CH);
00382                      }
00383                      dp = dissect(m, sp, rest, ssub, esub);
00384                      assert(dp == rest);
00385                      sp = rest;
00386                      break;
00387               case O_PLUS:
00388               case O_QUEST:
00389               case OOR1:
00390               case OOR2:
00391               case O_CH:
00392                      assert(PHP_REGEX_NOPE);
00393                      break;
00394               case OLPAREN:
00395                      i = OPND(m->g->strip[ss]);
00396                      assert(0 < i && i <= m->g->nsub);
00397                      m->pmatch[i].rm_so = sp - m->offp;
00398                      break;
00399               case ORPAREN:
00400                      i = OPND(m->g->strip[ss]);
00401                      assert(0 < i && i <= m->g->nsub);
00402                      m->pmatch[i].rm_eo = sp - m->offp;
00403                      break;
00404               default:             /* uh oh */
00405                      assert(PHP_REGEX_NOPE);
00406                      break;
00407               }
00408        }
00409 
00410        assert(sp == stop);
00411        return(sp);
00412 }
00413 
00414 /*
00415  - backref - figure out what matched what, figuring in back references
00416  == static unsigned char *backref(register struct match *m, unsigned char *start, \
00417  ==    unsigned char *stop, sopno startst, sopno stopst, sopno lev);
00418  */
00419 static unsigned char *                    /* == stop (success) or NULL (failure) */
00420 backref(m, start, stop, startst, stopst, lev)
00421 register struct match *m;
00422 unsigned char *start;
00423 unsigned char *stop;
00424 sopno startst;
00425 sopno stopst;
00426 sopno lev;                  /* PLUS nesting level */
00427 {
00428        register int i;
00429        register sopno ss;   /* start sop of current subRE */
00430        register unsigned char *sp; /* start of string matched by it */
00431        register sopno ssub; /* start sop of subsubRE */
00432        register sopno esub; /* end sop of subsubRE */
00433        register unsigned char *ssp;       /* start of string matched by subsubRE */
00434        register unsigned char *dp;
00435        register size_t len;
00436        register int hard;
00437        register sop s;
00438        register regoff_t offsave;
00439        register cset *cs;
00440 
00441        AT("back", start, stop, startst, stopst);
00442        sp = start;
00443 
00444        /* get as far as we can with easy stuff */
00445        hard = 0;
00446        for (ss = startst; !hard && ss < stopst; ss++)
00447               switch (OP(s = m->g->strip[ss])) {
00448               case OCHAR:
00449                      if (sp == stop || *sp++ != (unsigned char)OPND(s))
00450                             return(NULL);
00451                      break;
00452               case OANY:
00453                      if (sp == stop)
00454                             return(NULL);
00455                      sp++;
00456                      break;
00457               case OANYOF:
00458                      cs = &m->g->sets[OPND(s)];
00459                      if (sp == stop || !CHIN(cs, *sp++))
00460                             return(NULL);
00461                      break;
00462               case OBOL:
00463                      if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
00464                                    (sp < m->endp && *(sp-1) == '\n' &&
00465                                           (m->g->cflags&REG_NEWLINE)) )
00466                             { /* yes */ }
00467                      else
00468                             return(NULL);
00469                      break;
00470               case OEOL:
00471                      if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
00472                                    (sp < m->endp && *sp == '\n' &&
00473                                           (m->g->cflags&REG_NEWLINE)) )
00474                             { /* yes */ }
00475                      else
00476                             return(NULL);
00477                      break;
00478               case OBOW:
00479                      if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
00480                                    (sp < m->endp && *(sp-1) == '\n' &&
00481                                           (m->g->cflags&REG_NEWLINE)) ||
00482                                    (sp > m->beginp &&
00483                                                  !ISWORD(*(sp-1))) ) &&
00484                                    (sp < m->endp && ISWORD(*sp)) )
00485                             { /* yes */ }
00486                      else
00487                             return(NULL);
00488                      break;
00489               case OEOW:
00490                      if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
00491                                    (sp < m->endp && *sp == '\n' &&
00492                                           (m->g->cflags&REG_NEWLINE)) ||
00493                                    (sp < m->endp && !ISWORD(*sp)) ) &&
00494                                    (sp > m->beginp && ISWORD(*(sp-1))) )
00495                             { /* yes */ }
00496                      else
00497                             return(NULL);
00498                      break;
00499               case O_QUEST:
00500                      break;
00501               case OOR1:    /* matches null but needs to skip */
00502                      ss++;
00503                      s = m->g->strip[ss];
00504                      do {
00505                             assert(OP(s) == OOR2);
00506                             ss += OPND(s);
00507                      } while (OP(s = m->g->strip[ss]) != O_CH);
00508                      /* note that the ss++ gets us past the O_CH */
00509                      break;
00510               default:      /* have to make a choice */
00511                      hard = 1;
00512                      break;
00513               }
00514        if (!hard) {         /* that was it! */
00515               if (sp != stop)
00516                      return(NULL);
00517               return(sp);
00518        }
00519        ss--;                /* adjust for the for's final increment */
00520 
00521        /* the hard stuff */
00522        AT("hard", sp, stop, ss, stopst);
00523        s = m->g->strip[ss];
00524        switch (OP(s)) {
00525        case OBACK_:         /* the vilest depths */
00526               i = OPND(s);
00527               assert(0 < i && i <= m->g->nsub);
00528               if (m->pmatch[i].rm_eo == -1)
00529                      return(NULL);
00530               assert(m->pmatch[i].rm_so != -1);
00531               len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
00532               assert(stop - m->beginp >= len);
00533               if (sp > stop - len)
00534                      return(NULL); /* not enough left to match */
00535               ssp = m->offp + m->pmatch[i].rm_so;
00536               if (memcmp(sp, ssp, len) != 0)
00537                      return(NULL);
00538               while (m->g->strip[ss] != SOP(O_BACK, i))
00539                      ss++;
00540               return(backref(m, sp+len, stop, ss+1, stopst, lev));
00541               break;
00542        case OQUEST_:        /* to null or not */
00543               dp = backref(m, sp, stop, ss+1, stopst, lev);
00544               if (dp != NULL)
00545                      return(dp);   /* not */
00546               return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
00547               break;
00548        case OPLUS_:
00549               assert(m->lastpos != NULL);
00550               assert(lev+1 <= m->g->nplus);
00551               m->lastpos[lev+1] = sp;
00552               return(backref(m, sp, stop, ss+1, stopst, lev+1));
00553               break;
00554        case O_PLUS:
00555               if (sp == m->lastpos[lev])  /* last pass matched null */
00556                      return(backref(m, sp, stop, ss+1, stopst, lev-1));
00557               /* try another pass */
00558               m->lastpos[lev] = sp;
00559               dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
00560               if (dp == NULL)
00561                      return(backref(m, sp, stop, ss+1, stopst, lev-1));
00562               else
00563                      return(dp);
00564               break;
00565        case OCH_:           /* find the right one, if any */
00566               ssub = ss + 1;
00567               esub = ss + OPND(s) - 1;
00568               assert(OP(m->g->strip[esub]) == OOR1);
00569               for (;;) {    /* find first matching branch */
00570                      dp = backref(m, sp, stop, ssub, esub, lev);
00571                      if (dp != NULL)
00572                             return(dp);
00573                      /* that one missed, try next one */
00574                      if (OP(m->g->strip[esub]) == O_CH)
00575                             return(NULL); /* there is none */
00576                      esub++;
00577                      assert(OP(m->g->strip[esub]) == OOR2);
00578                      ssub = esub + 1;
00579                      esub += OPND(m->g->strip[esub]);
00580                      if (OP(m->g->strip[esub]) == OOR2)
00581                             esub--;
00582                      else
00583                             assert(OP(m->g->strip[esub]) == O_CH);
00584               }
00585               break;
00586        case OLPAREN:        /* must undo assignment if rest fails */
00587               i = OPND(s);
00588               assert(0 < i && i <= m->g->nsub);
00589               offsave = m->pmatch[i].rm_so;
00590               m->pmatch[i].rm_so = sp - m->offp;
00591               dp = backref(m, sp, stop, ss+1, stopst, lev);
00592               if (dp != NULL)
00593                      return(dp);
00594               m->pmatch[i].rm_so = offsave;
00595               return(NULL);
00596               break;
00597        case ORPAREN:        /* must undo assignment if rest fails */
00598               i = OPND(s);
00599               assert(0 < i && i <= m->g->nsub);
00600               offsave = m->pmatch[i].rm_eo;
00601               m->pmatch[i].rm_eo = sp - m->offp;
00602               dp = backref(m, sp, stop, ss+1, stopst, lev);
00603               if (dp != NULL)
00604                      return(dp);
00605               m->pmatch[i].rm_eo = offsave;
00606               return(NULL);
00607               break;
00608        default:             /* uh oh */
00609               assert(PHP_REGEX_NOPE);
00610               break;
00611        }
00612 
00613        /* "can't happen" */
00614        assert(PHP_REGEX_NOPE);
00615        /* NOTREACHED */
00616        return((unsigned char *)NULL);     /* dummy */
00617 }
00618 
00619 /*
00620  - fast - step through the string at top speed
00621  == static unsigned char *fast(register struct match *m, unsigned char *start, \
00622  ==    unsigned char *stop, sopno startst, sopno stopst);
00623  */
00624 static unsigned char *                    /* where tentative match ended, or NULL */
00625 fast(m, start, stop, startst, stopst)
00626 register struct match *m;
00627 unsigned char *start;
00628 unsigned char *stop;
00629 sopno startst;
00630 sopno stopst;
00631 {
00632        register states st = m->st;
00633        register states fresh = m->fresh;
00634        register states tmp = m->tmp;
00635        register unsigned char *p = start;
00636        register int c = (start == m->beginp) ? OUT : *(start-1);
00637        register int lastc;  /* previous c */
00638        register int flagch;
00639        register int i;
00640        register unsigned char *coldp;     /* last p after which no match was underway */
00641 
00642        CLEAR(st);
00643        SET1(st, startst);
00644        st = step(m->g, startst, stopst, st, NOTHING, st);
00645        ASSIGN(fresh, st);
00646        SP("start", st, *p);
00647        coldp = NULL;
00648        for (;;) {
00649               /* next character */
00650               lastc = c;
00651               c = (p == m->endp) ? OUT : *p;
00652               if (EQ(st, fresh))
00653                      coldp = p;
00654 
00655               /* is there an EOL and/or BOL between lastc and c? */
00656               flagch = '\0';
00657               i = 0;
00658               if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
00659                             (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
00660                      flagch = BOL;
00661                      i = m->g->nbol;
00662               }
00663               if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
00664                             (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
00665                      flagch = (flagch == BOL) ? BOLEOL : EOL;
00666                      i += m->g->neol;
00667               }
00668               if (i != 0) {
00669                      for (; i > 0; i--)
00670                             st = step(m->g, startst, stopst, st, flagch, st);
00671                      SP("boleol", st, c);
00672               }
00673 
00674               /* how about a word boundary? */
00675               if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
00676                                    (c != OUT && ISWORD(c)) ) {
00677                      flagch = BOW;
00678               }
00679               if ( (lastc != OUT && ISWORD(lastc)) &&
00680                             (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
00681                      flagch = EOW;
00682               }
00683               if (flagch == BOW || flagch == EOW) {
00684                      st = step(m->g, startst, stopst, st, flagch, st);
00685                      SP("boweow", st, c);
00686               }
00687 
00688               /* are we done? */
00689               if (ISSET(st, stopst) || p == stop)
00690                      break;        /* NOTE BREAK OUT */
00691 
00692               /* no, we must deal with this character */
00693               ASSIGN(tmp, st);
00694               ASSIGN(st, fresh);
00695               assert(c != OUT);
00696               st = step(m->g, startst, stopst, tmp, c, st);
00697               SP("aft", st, c);
00698               assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
00699               p++;
00700        }
00701 
00702        assert(coldp != NULL);
00703        m->coldp = coldp;
00704        if (ISSET(st, stopst))
00705               return(p+1);
00706        else
00707               return(NULL);
00708 }
00709 
00710 /*
00711  - slow - step through the string more deliberately
00712  == static unsigned char *slow(register struct match *m, unsigned char *start, \
00713  ==    unsigned char *stop, sopno startst, sopno stopst);
00714  */
00715 static unsigned char *                    /* where it ended */
00716 slow(m, start, stop, startst, stopst)
00717 register struct match *m;
00718 unsigned char *start;
00719 unsigned char *stop;
00720 sopno startst;
00721 sopno stopst;
00722 {
00723        register states st = m->st;
00724        register states empty = m->empty;
00725        register states tmp = m->tmp;
00726        register unsigned char *p = start;
00727        register int c = (start == m->beginp) ? OUT : *(start-1);
00728        register int lastc;  /* previous c */
00729        register int flagch;
00730        register int i;
00731        register unsigned char *matchp;    /* last p at which a match ended */
00732 
00733        AT("slow", start, stop, startst, stopst);
00734        CLEAR(st);
00735        SET1(st, startst);
00736        SP("sstart", st, *p);
00737        st = step(m->g, startst, stopst, st, NOTHING, st);
00738        matchp = NULL;
00739        for (;;) {
00740               /* next character */
00741               lastc = c;
00742               c = (p == m->endp) ? OUT : *p;
00743 
00744               /* is there an EOL and/or BOL between lastc and c? */
00745               flagch = '\0';
00746               i = 0;
00747               if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
00748                             (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
00749                      flagch = BOL;
00750                      i = m->g->nbol;
00751               }
00752               if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
00753                             (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
00754                      flagch = (flagch == BOL) ? BOLEOL : EOL;
00755                      i += m->g->neol;
00756               }
00757               if (i != 0) {
00758                      for (; i > 0; i--)
00759                             st = step(m->g, startst, stopst, st, flagch, st);
00760                      SP("sboleol", st, c);
00761               }
00762 
00763               /* how about a word boundary? */
00764               if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
00765                                    (c != OUT && ISWORD(c)) ) {
00766                      flagch = BOW;
00767               }
00768               if ( (lastc != OUT && ISWORD(lastc)) &&
00769                             (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
00770                      flagch = EOW;
00771               }
00772               if (flagch == BOW || flagch == EOW) {
00773                      st = step(m->g, startst, stopst, st, flagch, st);
00774                      SP("sboweow", st, c);
00775               }
00776 
00777               /* are we done? */
00778               if (ISSET(st, stopst))
00779                      matchp = p;
00780               if (EQ(st, empty) || p == stop)
00781                      break;        /* NOTE BREAK OUT */
00782 
00783               /* no, we must deal with this character */
00784               ASSIGN(tmp, st);
00785               ASSIGN(st, empty);
00786               assert(c != OUT);
00787               st = step(m->g, startst, stopst, tmp, c, st);
00788               SP("saft", st, c);
00789               assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
00790               p++;
00791        }
00792 
00793        return(matchp);
00794 }
00795 
00796 
00797 /*
00798  - step - map set of states reachable before char to set reachable after
00799  == static states step(register struct re_guts *g, sopno start, sopno stop, \
00800  ==    register states bef, int ch, register states aft);
00801  == #define   BOL    (OUT+1)
00802  == #define   EOL    (BOL+1)
00803  == #define   BOLEOL (BOL+2)
00804  == #define   NOTHING       (BOL+3)
00805  == #define   BOW    (BOL+4)
00806  == #define   EOW    (BOL+5)
00807  == #define   CODEMAX       (BOL+5)              // highest code used
00808  == #define   NONCHAR(c)    ((c) > UCHAR_MAX)
00809  == #define   NNONCHAR      (CODEMAX-UCHAR_MAX)
00810  */
00811 static states
00812 step(g, start, stop, bef, ch, aft)
00813 register struct re_guts *g;
00814 sopno start;                /* start state within strip */
00815 sopno stop;                 /* state after stop state within strip */
00816 register states bef;        /* states reachable before */
00817 int ch;                            /* character or NONCHAR code */
00818 register states aft;        /* states already known reachable after */
00819 {
00820        register cset *cs;
00821        register sop s;
00822        register sopno pc;
00823        register onestate here;            /* note, macros know this name */
00824        register sopno look;
00825        register long i;
00826 
00827        for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
00828               s = g->strip[pc];
00829               switch (OP(s)) {
00830               case OEND:
00831                      assert(pc == stop-1);
00832                      break;
00833               case OCHAR:
00834                      /* only characters can match */
00835                      assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s));
00836                      if (ch == (unsigned char)OPND(s))
00837                             FWD(aft, bef, 1);
00838                      break;
00839               case OBOL:
00840                      if (ch == BOL || ch == BOLEOL)
00841                             FWD(aft, bef, 1);
00842                      break;
00843               case OEOL:
00844                      if (ch == EOL || ch == BOLEOL)
00845                             FWD(aft, bef, 1);
00846                      break;
00847               case OBOW:
00848                      if (ch == BOW)
00849                             FWD(aft, bef, 1);
00850                      break;
00851               case OEOW:
00852                      if (ch == EOW)
00853                             FWD(aft, bef, 1);
00854                      break;
00855               case OANY:
00856                      if (!NONCHAR(ch))
00857                             FWD(aft, bef, 1);
00858                      break;
00859               case OANYOF:
00860                      cs = &g->sets[OPND(s)];
00861                      if (!NONCHAR(ch) && CHIN(cs, ch))
00862                             FWD(aft, bef, 1);
00863                      break;
00864               case OBACK_:         /* ignored here */
00865               case O_BACK:
00866                      FWD(aft, aft, 1);
00867                      break;
00868               case OPLUS_:         /* forward, this is just an empty */
00869                      FWD(aft, aft, 1);
00870                      break;
00871               case O_PLUS:         /* both forward and back */
00872                      FWD(aft, aft, 1);
00873                      i = ISSETBACK(aft, OPND(s));
00874                      BACK(aft, aft, OPND(s));
00875                      if (!i && ISSETBACK(aft, OPND(s))) {
00876                             /* oho, must reconsider loop body */
00877                             pc -= OPND(s) + 1;
00878                             INIT(here, pc);
00879                      }
00880                      break;
00881               case OQUEST_:        /* two branches, both forward */
00882                      FWD(aft, aft, 1);
00883                      FWD(aft, aft, OPND(s));
00884                      break;
00885               case O_QUEST:        /* just an empty */
00886                      FWD(aft, aft, 1);
00887                      break;
00888               case OLPAREN:        /* not significant here */
00889               case ORPAREN:
00890                      FWD(aft, aft, 1);
00891                      break;
00892               case OCH_:           /* mark the first two branches */
00893                      FWD(aft, aft, 1);
00894                      assert(OP(g->strip[pc+OPND(s)]) == OOR2);
00895                      FWD(aft, aft, OPND(s));
00896                      break;
00897               case OOR1:           /* done a branch, find the O_CH */
00898                      if (ISSTATEIN(aft, here)) {
00899                             for (look = 1;
00900                                           OP(s = g->strip[pc+look]) != O_CH;
00901                                           look += OPND(s))
00902                                    assert(OP(s) == OOR2);
00903                             FWD(aft, aft, look);
00904                      }
00905                      break;
00906               case OOR2:           /* propagate OCH_'s marking */
00907                      FWD(aft, aft, 1);
00908                      if (OP(g->strip[pc+OPND(s)]) != O_CH) {
00909                             assert(OP(g->strip[pc+OPND(s)]) == OOR2);
00910                             FWD(aft, aft, OPND(s));
00911                      }
00912                      break;
00913               case O_CH:           /* just empty */
00914                      FWD(aft, aft, 1);
00915                      break;
00916               default:             /* ooooops... */
00917                      assert(PHP_REGEX_NOPE);
00918                      break;
00919               }
00920        }
00921 
00922        return(aft);
00923 }
00924 
00925 #ifdef REDEBUG
00926 /*
00927  - print - print a set of states
00928  == #ifdef REDEBUG
00929  == static void print(struct match *m, unsigned char *caption, states st, \
00930  ==    int ch, FILE *d);
00931  == #endif
00932  */
00933 static void
00934 print(m, caption, st, ch, d)
00935 struct match *m;
00936 unsigned char *caption;
00937 states st;
00938 int ch;
00939 FILE *d;
00940 {
00941        register struct re_guts *g = m->g;
00942        register int i;
00943        register int first = 1;
00944 
00945        if (!(m->eflags&REG_TRACE))
00946               return;
00947 
00948        fprintf(d, "%s", caption);
00949        if (ch != '\0')
00950               fprintf(d, " %s", pchar(ch));
00951        for (i = 0; i < g->nstates; i++)
00952               if (ISSET(st, i)) {
00953                      fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
00954                      first = 0;
00955               }
00956        fprintf(d, "\n");
00957 }
00958 
00959 /* 
00960  - at - print current situation
00961  == #ifdef REDEBUG
00962  == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \
00963  ==                                       sopno startst, sopno stopst);
00964  == #endif
00965  */
00966 static void
00967 at(m, title, start, stop, startst, stopst)
00968 struct match *m;
00969 unsigned char *title;
00970 unsigned char *start;
00971 unsigned char *stop;
00972 sopno startst;
00973 sopno stopst;
00974 {
00975        if (!(m->eflags&REG_TRACE))
00976               return;
00977 
00978        printf("%s %s-", title, pchar(*start));
00979        printf("%s ", pchar(*stop));
00980        printf("%ld-%ld\n", (long)startst, (long)stopst);
00981 }
00982 
00983 #ifndef PCHARDONE
00984 #define       PCHARDONE     /* never again */
00985 /*
00986  - pchar - make a character printable
00987  == #ifdef REDEBUG
00988  == static unsigned char *pchar(int ch);
00989  == #endif
00990  *
00991  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
00992  * duplicate here avoids having a debugging-capable regexec.o tied to
00993  * a matching debug.o, and this is convenient.  It all disappears in
00994  * the non-debug compilation anyway, so it doesn't matter much.
00995  */
00996 static unsigned char *                    /* -> representation */
00997 pchar(ch)
00998 int ch;
00999 {
01000        static unsigned char pbuf[10];
01001 
01002        if (isprint(ch) || ch == ' ')
01003               sprintf(pbuf, "%c", ch);
01004        else
01005               sprintf(pbuf, "\\%o", ch);
01006        return(pbuf);
01007 }
01008 #endif
01009 #endif
01010 
01011 #undef matcher
01012 #undef fast
01013 #undef slow
01014 #undef dissect
01015 #undef backref
01016 #undef step
01017 #undef print
01018 #undef at
01019 #undef match