Back to index

php5  5.3.10
pcre_exec.c
Go to the documentation of this file.
00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2010 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains pcre_exec(), the externally visible function that does
00042 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
00043 possible. There are also some static supporting functions. */
00044 
00045 #include "config.h"
00046 
00047 #define NLBLOCK md             /* Block containing newline information */
00048 #define PSSTART start_subject  /* Field containing processed string start */
00049 #define PSEND   end_subject    /* Field containing processed string end */
00050 
00051 #include "pcre_internal.h"
00052 
00053 /* Undefine some potentially clashing cpp symbols */
00054 
00055 #undef min
00056 #undef max
00057 
00058 /* Flag bits for the match() function */
00059 
00060 #define match_condassert     0x01  /* Called to check a condition assertion */
00061 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
00062 
00063 /* Non-error returns from the match() function. Error returns are externally
00064 defined PCRE_ERROR_xxx codes, which are all negative. */
00065 
00066 #define MATCH_MATCH        1
00067 #define MATCH_NOMATCH      0
00068 
00069 /* Special internal returns from the match() function. Make them sufficiently
00070 negative to avoid the external error codes. */
00071 
00072 #define MATCH_ACCEPT       (-999)
00073 #define MATCH_COMMIT       (-998)
00074 #define MATCH_PRUNE        (-997)
00075 #define MATCH_SKIP         (-996)
00076 #define MATCH_SKIP_ARG     (-995)
00077 #define MATCH_THEN         (-994)
00078 
00079 /* This is a convenience macro for code that occurs many times. */
00080 
00081 #define MRRETURN(ra) \
00082   { \
00083   md->mark = markptr; \
00084   RRETURN(ra); \
00085   }
00086 
00087 /* Maximum number of ints of offset to save on the stack for recursive calls.
00088 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
00089 because the offset vector is always a multiple of 3 long. */
00090 
00091 #define REC_STACK_SAVE_MAX 30
00092 
00093 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
00094 
00095 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
00096 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
00097 
00098 
00099 
00100 #ifdef PCRE_DEBUG
00101 /*************************************************
00102 *        Debugging function to print chars       *
00103 *************************************************/
00104 
00105 /* Print a sequence of chars in printable format, stopping at the end of the
00106 subject if the requested.
00107 
00108 Arguments:
00109   p           points to characters
00110   length      number to print
00111   is_subject  TRUE if printing from within md->start_subject
00112   md          pointer to matching data block, if is_subject is TRUE
00113 
00114 Returns:     nothing
00115 */
00116 
00117 static void
00118 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
00119 {
00120 unsigned int c;
00121 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
00122 while (length-- > 0)
00123   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
00124 }
00125 #endif
00126 
00127 
00128 
00129 /*************************************************
00130 *          Match a back-reference                *
00131 *************************************************/
00132 
00133 /* If a back reference hasn't been set, the length that is passed is greater
00134 than the number of characters left in the string, so the match fails.
00135 
00136 Arguments:
00137   offset      index into the offset vector
00138   eptr        points into the subject
00139   length      length to be matched
00140   md          points to match data block
00141   ims         the ims flags
00142 
00143 Returns:      TRUE if matched
00144 */
00145 
00146 static BOOL
00147 match_ref(int offset, register USPTR eptr, int length, match_data *md,
00148   unsigned long int ims)
00149 {
00150 USPTR p = md->start_subject + md->offset_vector[offset];
00151 
00152 #ifdef PCRE_DEBUG
00153 if (eptr >= md->end_subject)
00154   printf("matching subject <null>");
00155 else
00156   {
00157   printf("matching subject ");
00158   pchars(eptr, length, TRUE, md);
00159   }
00160 printf(" against backref ");
00161 pchars(p, length, FALSE, md);
00162 printf("\n");
00163 #endif
00164 
00165 /* Always fail if not enough characters left */
00166 
00167 if (length > md->end_subject - eptr) return FALSE;
00168 
00169 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
00170 properly if Unicode properties are supported. Otherwise, we can check only
00171 ASCII characters. */
00172 
00173 if ((ims & PCRE_CASELESS) != 0)
00174   {
00175 #ifdef SUPPORT_UTF8
00176 #ifdef SUPPORT_UCP
00177   if (md->utf8)
00178     {
00179     USPTR endptr = eptr + length;
00180     while (eptr < endptr)
00181       {
00182       int c, d;
00183       GETCHARINC(c, eptr);
00184       GETCHARINC(d, p);
00185       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
00186       }
00187     }
00188   else
00189 #endif
00190 #endif
00191 
00192   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
00193   is no UCP support. */
00194 
00195   while (length-- > 0)
00196     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
00197   }
00198 
00199 /* In the caseful case, we can just compare the bytes, whether or not we
00200 are in UTF-8 mode. */
00201 
00202 else
00203   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
00204 
00205 return TRUE;
00206 }
00207 
00208 
00209 
00210 /***************************************************************************
00211 ****************************************************************************
00212                    RECURSION IN THE match() FUNCTION
00213 
00214 The match() function is highly recursive, though not every recursive call
00215 increases the recursive depth. Nevertheless, some regular expressions can cause
00216 it to recurse to a great depth. I was writing for Unix, so I just let it call
00217 itself recursively. This uses the stack for saving everything that has to be
00218 saved for a recursive call. On Unix, the stack can be large, and this works
00219 fine.
00220 
00221 It turns out that on some non-Unix-like systems there are problems with
00222 programs that use a lot of stack. (This despite the fact that every last chip
00223 has oodles of memory these days, and techniques for extending the stack have
00224 been known for decades.) So....
00225 
00226 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
00227 calls by keeping local variables that need to be preserved in blocks of memory
00228 obtained from malloc() instead instead of on the stack. Macros are used to
00229 achieve this so that the actual code doesn't look very different to what it
00230 always used to.
00231 
00232 The original heap-recursive code used longjmp(). However, it seems that this
00233 can be very slow on some operating systems. Following a suggestion from Stan
00234 Switzer, the use of longjmp() has been abolished, at the cost of having to
00235 provide a unique number for each call to RMATCH. There is no way of generating
00236 a sequence of numbers at compile time in C. I have given them names, to make
00237 them stand out more clearly.
00238 
00239 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
00240 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
00241 tests. Furthermore, not using longjmp() means that local dynamic variables
00242 don't have indeterminate values; this has meant that the frame size can be
00243 reduced because the result can be "passed back" by straight setting of the
00244 variable instead of being passed in the frame.
00245 ****************************************************************************
00246 ***************************************************************************/
00247 
00248 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
00249 below must be updated in sync.  */
00250 
00251 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
00252        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
00253        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
00254        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
00255        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
00256        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
00257        RM61,  RM62 };
00258 
00259 /* These versions of the macros use the stack, as normal. There are debugging
00260 versions and production versions. Note that the "rw" argument of RMATCH isn't
00261 actually used in this definition. */
00262 
00263 #ifndef NO_RECURSE
00264 #define REGISTER register
00265 
00266 #ifdef PCRE_DEBUG
00267 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00268   { \
00269   printf("match() called in line %d\n", __LINE__); \
00270   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
00271   printf("to line %d\n", __LINE__); \
00272   }
00273 #define RRETURN(ra) \
00274   { \
00275   printf("match() returned %d from line %d ", ra, __LINE__); \
00276   return ra; \
00277   }
00278 #else
00279 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00280   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
00281 #define RRETURN(ra) return ra
00282 #endif
00283 
00284 #else
00285 
00286 
00287 /* These versions of the macros manage a private stack on the heap. Note that
00288 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
00289 argument of match(), which never changes. */
00290 
00291 #define REGISTER
00292 
00293 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
00294   {\
00295   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
00296   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
00297   frame->Xwhere = rw; \
00298   newframe->Xeptr = ra;\
00299   newframe->Xecode = rb;\
00300   newframe->Xmstart = mstart;\
00301   newframe->Xmarkptr = markptr;\
00302   newframe->Xoffset_top = rc;\
00303   newframe->Xims = re;\
00304   newframe->Xeptrb = rf;\
00305   newframe->Xflags = rg;\
00306   newframe->Xrdepth = frame->Xrdepth + 1;\
00307   newframe->Xprevframe = frame;\
00308   frame = newframe;\
00309   DPRINTF(("restarting from line %d\n", __LINE__));\
00310   goto HEAP_RECURSE;\
00311   L_##rw:\
00312   DPRINTF(("jumped back to line %d\n", __LINE__));\
00313   }
00314 
00315 #define RRETURN(ra)\
00316   {\
00317   heapframe *oldframe = frame;\
00318   frame = oldframe->Xprevframe;\
00319   (pcre_stack_free)(oldframe);\
00320   if (frame != NULL)\
00321     {\
00322     rrc = ra;\
00323     goto HEAP_RETURN;\
00324     }\
00325   return ra;\
00326   }
00327 
00328 
00329 /* Structure for remembering the local variables in a private frame */
00330 
00331 typedef struct heapframe {
00332   struct heapframe *Xprevframe;
00333 
00334   /* Function arguments that may change */
00335 
00336   USPTR Xeptr;
00337   const uschar *Xecode;
00338   USPTR Xmstart;
00339   USPTR Xmarkptr;
00340   int Xoffset_top;
00341   long int Xims;
00342   eptrblock *Xeptrb;
00343   int Xflags;
00344   unsigned int Xrdepth;
00345 
00346   /* Function local variables */
00347 
00348   USPTR Xcallpat;
00349 #ifdef SUPPORT_UTF8
00350   USPTR Xcharptr;
00351 #endif
00352   USPTR Xdata;
00353   USPTR Xnext;
00354   USPTR Xpp;
00355   USPTR Xprev;
00356   USPTR Xsaved_eptr;
00357 
00358   recursion_info Xnew_recursive;
00359 
00360   BOOL Xcur_is_word;
00361   BOOL Xcondition;
00362   BOOL Xprev_is_word;
00363 
00364   unsigned long int Xoriginal_ims;
00365 
00366 #ifdef SUPPORT_UCP
00367   int Xprop_type;
00368   int Xprop_value;
00369   int Xprop_fail_result;
00370   int Xprop_category;
00371   int Xprop_chartype;
00372   int Xprop_script;
00373   int Xoclength;
00374   uschar Xocchars[8];
00375 #endif
00376 
00377   int Xcodelink;
00378   int Xctype;
00379   unsigned int Xfc;
00380   int Xfi;
00381   int Xlength;
00382   int Xmax;
00383   int Xmin;
00384   int Xnumber;
00385   int Xoffset;
00386   int Xop;
00387   int Xsave_capture_last;
00388   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
00389   int Xstacksave[REC_STACK_SAVE_MAX];
00390 
00391   eptrblock Xnewptrb;
00392 
00393   /* Where to jump back to */
00394 
00395   int Xwhere;
00396 
00397 } heapframe;
00398 
00399 #endif
00400 
00401 
00402 /***************************************************************************
00403 ***************************************************************************/
00404 
00405 
00406 
00407 /*************************************************
00408 *         Match from current position            *
00409 *************************************************/
00410 
00411 /* This function is called recursively in many circumstances. Whenever it
00412 returns a negative (error) response, the outer incarnation must also return the
00413 same response. */
00414 
00415 /* These macros pack up tests that are used for partial matching, and which
00416 appears several times in the code. We set the "hit end" flag if the pointer is
00417 at the end of the subject and also past the start of the subject (i.e.
00418 something has been matched). For hard partial matching, we then return
00419 immediately. The second one is used when we already know we are past the end of
00420 the subject. */
00421 
00422 #define CHECK_PARTIAL()\
00423   if (md->partial != 0 && eptr >= md->end_subject && \
00424       eptr > md->start_used_ptr) \
00425     { \
00426     md->hitend = TRUE; \
00427     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
00428     }
00429 
00430 #define SCHECK_PARTIAL()\
00431   if (md->partial != 0 && eptr > md->start_used_ptr) \
00432     { \
00433     md->hitend = TRUE; \
00434     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
00435     }
00436 
00437 
00438 /* Performance note: It might be tempting to extract commonly used fields from
00439 the md structure (e.g. utf8, end_subject) into individual variables to improve
00440 performance. Tests using gcc on a SPARC disproved this; in the first case, it
00441 made performance worse.
00442 
00443 Arguments:
00444    eptr        pointer to current character in subject
00445    ecode       pointer to current position in compiled code
00446    mstart      pointer to the current match start position (can be modified
00447                  by encountering \K)
00448    markptr     pointer to the most recent MARK name, or NULL
00449    offset_top  current top pointer
00450    md          pointer to "static" info for the match
00451    ims         current /i, /m, and /s options
00452    eptrb       pointer to chain of blocks containing eptr at start of
00453                  brackets - for testing for empty matches
00454    flags       can contain
00455                  match_condassert - this is an assertion condition
00456                  match_cbegroup - this is the start of an unlimited repeat
00457                    group that can match an empty string
00458    rdepth      the recursion depth
00459 
00460 Returns:       MATCH_MATCH if matched            )  these values are >= 0
00461                MATCH_NOMATCH if failed to match  )
00462                a negative MATCH_xxx value for PRUNE, SKIP, etc
00463                a negative PCRE_ERROR_xxx value if aborted by an error condition
00464                  (e.g. stopped by repeated call or recursion limit)
00465 */
00466 
00467 static int
00468 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
00469   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
00470   eptrblock *eptrb, int flags, unsigned int rdepth)
00471 {
00472 /* These variables do not need to be preserved over recursion in this function,
00473 so they can be ordinary variables in all cases. Mark some of them with
00474 "register" because they are used a lot in loops. */
00475 
00476 register int  rrc;         /* Returns from recursive calls */
00477 register int  i;           /* Used for loops not involving calls to RMATCH() */
00478 register unsigned int c;   /* Character values not kept over RMATCH() calls */
00479 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
00480 
00481 BOOL minimize, possessive; /* Quantifier options */
00482 int condcode;
00483 
00484 /* When recursion is not being used, all "local" variables that have to be
00485 preserved over calls to RMATCH() are part of a "frame" which is obtained from
00486 heap storage. Set up the top-level frame here; others are obtained from the
00487 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
00488 
00489 #ifdef NO_RECURSE
00490 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
00491 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
00492 frame->Xprevframe = NULL;            /* Marks the top level */
00493 
00494 /* Copy in the original argument variables */
00495 
00496 frame->Xeptr = eptr;
00497 frame->Xecode = ecode;
00498 frame->Xmstart = mstart;
00499 frame->Xmarkptr = markptr;
00500 frame->Xoffset_top = offset_top;
00501 frame->Xims = ims;
00502 frame->Xeptrb = eptrb;
00503 frame->Xflags = flags;
00504 frame->Xrdepth = rdepth;
00505 
00506 /* This is where control jumps back to to effect "recursion" */
00507 
00508 HEAP_RECURSE:
00509 
00510 /* Macros make the argument variables come from the current frame */
00511 
00512 #define eptr               frame->Xeptr
00513 #define ecode              frame->Xecode
00514 #define mstart             frame->Xmstart
00515 #define markptr            frame->Xmarkptr
00516 #define offset_top         frame->Xoffset_top
00517 #define ims                frame->Xims
00518 #define eptrb              frame->Xeptrb
00519 #define flags              frame->Xflags
00520 #define rdepth             frame->Xrdepth
00521 
00522 /* Ditto for the local variables */
00523 
00524 #ifdef SUPPORT_UTF8
00525 #define charptr            frame->Xcharptr
00526 #endif
00527 #define callpat            frame->Xcallpat
00528 #define codelink           frame->Xcodelink
00529 #define data               frame->Xdata
00530 #define next               frame->Xnext
00531 #define pp                 frame->Xpp
00532 #define prev               frame->Xprev
00533 #define saved_eptr         frame->Xsaved_eptr
00534 
00535 #define new_recursive      frame->Xnew_recursive
00536 
00537 #define cur_is_word        frame->Xcur_is_word
00538 #define condition          frame->Xcondition
00539 #define prev_is_word       frame->Xprev_is_word
00540 
00541 #define original_ims       frame->Xoriginal_ims
00542 
00543 #ifdef SUPPORT_UCP
00544 #define prop_type          frame->Xprop_type
00545 #define prop_value         frame->Xprop_value
00546 #define prop_fail_result   frame->Xprop_fail_result
00547 #define prop_category      frame->Xprop_category
00548 #define prop_chartype      frame->Xprop_chartype
00549 #define prop_script        frame->Xprop_script
00550 #define oclength           frame->Xoclength
00551 #define occhars            frame->Xocchars
00552 #endif
00553 
00554 #define ctype              frame->Xctype
00555 #define fc                 frame->Xfc
00556 #define fi                 frame->Xfi
00557 #define length             frame->Xlength
00558 #define max                frame->Xmax
00559 #define min                frame->Xmin
00560 #define number             frame->Xnumber
00561 #define offset             frame->Xoffset
00562 #define op                 frame->Xop
00563 #define save_capture_last  frame->Xsave_capture_last
00564 #define save_offset1       frame->Xsave_offset1
00565 #define save_offset2       frame->Xsave_offset2
00566 #define save_offset3       frame->Xsave_offset3
00567 #define stacksave          frame->Xstacksave
00568 
00569 #define newptrb            frame->Xnewptrb
00570 
00571 /* When recursion is being used, local variables are allocated on the stack and
00572 get preserved during recursion in the normal way. In this environment, fi and
00573 i, and fc and c, can be the same variables. */
00574 
00575 #else         /* NO_RECURSE not defined */
00576 #define fi i
00577 #define fc c
00578 
00579 
00580 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
00581 const uschar *charptr;             /* in small blocks of the code. My normal */
00582 #endif                             /* style of coding would have declared    */
00583 const uschar *callpat;             /* them within each of those blocks.      */
00584 const uschar *data;                /* However, in order to accommodate the   */
00585 const uschar *next;                /* version of this code that uses an      */
00586 USPTR         pp;                  /* external "stack" implemented on the    */
00587 const uschar *prev;                /* heap, it is easier to declare them all */
00588 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
00589                                    /* out in a block. The only declarations  */
00590 recursion_info new_recursive;      /* within blocks below are for variables  */
00591                                    /* that do not have to be preserved over  */
00592 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
00593 BOOL condition;
00594 BOOL prev_is_word;
00595 
00596 unsigned long int original_ims;
00597 
00598 #ifdef SUPPORT_UCP
00599 int prop_type;
00600 int prop_value;
00601 int prop_fail_result;
00602 int prop_category;
00603 int prop_chartype;
00604 int prop_script;
00605 int oclength;
00606 uschar occhars[8];
00607 #endif
00608 
00609 int codelink;
00610 int ctype;
00611 int length;
00612 int max;
00613 int min;
00614 int number;
00615 int offset;
00616 int op;
00617 int save_capture_last;
00618 int save_offset1, save_offset2, save_offset3;
00619 int stacksave[REC_STACK_SAVE_MAX];
00620 
00621 eptrblock newptrb;
00622 #endif     /* NO_RECURSE */
00623 
00624 /* These statements are here to stop the compiler complaining about unitialized
00625 variables. */
00626 
00627 #ifdef SUPPORT_UCP
00628 prop_value = 0;
00629 prop_fail_result = 0;
00630 #endif
00631 
00632 
00633 /* This label is used for tail recursion, which is used in a few cases even
00634 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
00635 used. Thanks to Ian Taylor for noticing this possibility and sending the
00636 original patch. */
00637 
00638 TAIL_RECURSE:
00639 
00640 /* OK, now we can get on with the real code of the function. Recursive calls
00641 are specified by the macro RMATCH and RRETURN is used to return. When
00642 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
00643 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
00644 defined). However, RMATCH isn't like a function call because it's quite a
00645 complicated macro. It has to be used in one particular way. This shouldn't,
00646 however, impact performance when true recursion is being used. */
00647 
00648 #ifdef SUPPORT_UTF8
00649 utf8 = md->utf8;       /* Local copy of the flag */
00650 #else
00651 utf8 = FALSE;
00652 #endif
00653 
00654 /* First check that we haven't called match() too many times, or that we
00655 haven't exceeded the recursive call limit. */
00656 
00657 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
00658 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
00659 
00660 original_ims = ims;    /* Save for resetting on ')' */
00661 
00662 /* At the start of a group with an unlimited repeat that may match an empty
00663 string, the match_cbegroup flag is set. When this is the case, add the current
00664 subject pointer to the chain of such remembered pointers, to be checked when we
00665 hit the closing ket, in order to break infinite loops that match no characters.
00666 When match() is called in other circumstances, don't add to the chain. The
00667 match_cbegroup flag must NOT be used with tail recursion, because the memory
00668 block that is used is on the stack, so a new one may be required for each
00669 match(). */
00670 
00671 if ((flags & match_cbegroup) != 0)
00672   {
00673   newptrb.epb_saved_eptr = eptr;
00674   newptrb.epb_prev = eptrb;
00675   eptrb = &newptrb;
00676   }
00677 
00678 /* Now start processing the opcodes. */
00679 
00680 for (;;)
00681   {
00682   minimize = possessive = FALSE;
00683   op = *ecode;
00684 
00685   switch(op)
00686     {
00687     case OP_MARK:
00688     markptr = ecode + 2;
00689     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
00690       ims, eptrb, flags, RM55);
00691 
00692     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
00693     argument, and we must check whether that argument matches this MARK's
00694     argument. It is passed back in md->start_match_ptr (an overloading of that
00695     variable). If it does match, we reset that variable to the current subject
00696     position and return MATCH_SKIP. Otherwise, pass back the return code
00697     unaltered. */
00698 
00699     if (rrc == MATCH_SKIP_ARG &&
00700         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
00701       {
00702       md->start_match_ptr = eptr;
00703       RRETURN(MATCH_SKIP);
00704       }
00705 
00706     if (md->mark == NULL) md->mark = markptr;
00707     RRETURN(rrc);
00708 
00709     case OP_FAIL:
00710     MRRETURN(MATCH_NOMATCH);
00711 
00712     /* COMMIT overrides PRUNE, SKIP, and THEN */
00713 
00714     case OP_COMMIT:
00715     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00716       ims, eptrb, flags, RM52);
00717     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
00718         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
00719         rrc != MATCH_THEN)
00720       RRETURN(rrc);
00721     MRRETURN(MATCH_COMMIT);
00722 
00723     /* PRUNE overrides THEN */
00724 
00725     case OP_PRUNE:
00726     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00727       ims, eptrb, flags, RM51);
00728     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00729     MRRETURN(MATCH_PRUNE);
00730 
00731     case OP_PRUNE_ARG:
00732     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
00733       ims, eptrb, flags, RM56);
00734     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00735     md->mark = ecode + 2;
00736     RRETURN(MATCH_PRUNE);
00737 
00738     /* SKIP overrides PRUNE and THEN */
00739 
00740     case OP_SKIP:
00741     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00742       ims, eptrb, flags, RM53);
00743     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
00744       RRETURN(rrc);
00745     md->start_match_ptr = eptr;   /* Pass back current position */
00746     MRRETURN(MATCH_SKIP);
00747 
00748     case OP_SKIP_ARG:
00749     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
00750       ims, eptrb, flags, RM57);
00751     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
00752       RRETURN(rrc);
00753 
00754     /* Pass back the current skip name by overloading md->start_match_ptr and
00755     returning the special MATCH_SKIP_ARG return code. This will either be
00756     caught by a matching MARK, or get to the top, where it is treated the same
00757     as PRUNE. */
00758 
00759     md->start_match_ptr = ecode + 2;
00760     RRETURN(MATCH_SKIP_ARG);
00761 
00762     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
00763     the alt that is at the start of the current branch. This makes it possible
00764     to skip back past alternatives that precede the THEN within the current
00765     branch. */
00766 
00767     case OP_THEN:
00768     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00769       ims, eptrb, flags, RM54);
00770     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00771     md->start_match_ptr = ecode - GET(ecode, 1);
00772     MRRETURN(MATCH_THEN);
00773 
00774     case OP_THEN_ARG:
00775     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
00776       offset_top, md, ims, eptrb, flags, RM58);
00777     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00778     md->start_match_ptr = ecode - GET(ecode, 1);
00779     md->mark = ecode + LINK_SIZE + 2;
00780     RRETURN(MATCH_THEN);
00781 
00782     /* Handle a capturing bracket. If there is space in the offset vector, save
00783     the current subject position in the working slot at the top of the vector.
00784     We mustn't change the current values of the data slot, because they may be
00785     set from a previous iteration of this group, and be referred to by a
00786     reference inside the group.
00787 
00788     If the bracket fails to match, we need to restore this value and also the
00789     values of the final offsets, in case they were set by a previous iteration
00790     of the same bracket.
00791 
00792     If there isn't enough space in the offset vector, treat this as if it were
00793     a non-capturing bracket. Don't worry about setting the flag for the error
00794     case here; that is handled in the code for KET. */
00795 
00796     case OP_CBRA:
00797     case OP_SCBRA:
00798     number = GET2(ecode, 1+LINK_SIZE);
00799     offset = number << 1;
00800 
00801 #ifdef PCRE_DEBUG
00802     printf("start bracket %d\n", number);
00803     printf("subject=");
00804     pchars(eptr, 16, TRUE, md);
00805     printf("\n");
00806 #endif
00807 
00808     if (offset < md->offset_max)
00809       {
00810       save_offset1 = md->offset_vector[offset];
00811       save_offset2 = md->offset_vector[offset+1];
00812       save_offset3 = md->offset_vector[md->offset_end - number];
00813       save_capture_last = md->capture_last;
00814 
00815       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
00816       md->offset_vector[md->offset_end - number] =
00817         (int)(eptr - md->start_subject);
00818 
00819       flags = (op == OP_SCBRA)? match_cbegroup : 0;
00820       do
00821         {
00822         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00823           ims, eptrb, flags, RM1);
00824         if (rrc != MATCH_NOMATCH &&
00825             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
00826           RRETURN(rrc);
00827         md->capture_last = save_capture_last;
00828         ecode += GET(ecode, 1);
00829         }
00830       while (*ecode == OP_ALT);
00831 
00832       DPRINTF(("bracket %d failed\n", number));
00833 
00834       md->offset_vector[offset] = save_offset1;
00835       md->offset_vector[offset+1] = save_offset2;
00836       md->offset_vector[md->offset_end - number] = save_offset3;
00837 
00838       if (rrc != MATCH_THEN) md->mark = markptr;
00839       RRETURN(MATCH_NOMATCH);
00840       }
00841 
00842     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
00843     as a non-capturing bracket. */
00844 
00845     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00846     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00847 
00848     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
00849 
00850     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00851     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00852 
00853     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
00854     final alternative within the brackets, we would return the result of a
00855     recursive call to match() whatever happened. We can reduce stack usage by
00856     turning this into a tail recursion, except in the case when match_cbegroup
00857     is set.*/
00858 
00859     case OP_BRA:
00860     case OP_SBRA:
00861     DPRINTF(("start non-capturing bracket\n"));
00862     flags = (op >= OP_SBRA)? match_cbegroup : 0;
00863     for (;;)
00864       {
00865       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
00866         {
00867         if (flags == 0)    /* Not a possibly empty group */
00868           {
00869           ecode += _pcre_OP_lengths[*ecode];
00870           DPRINTF(("bracket 0 tail recursion\n"));
00871           goto TAIL_RECURSE;
00872           }
00873 
00874         /* Possibly empty group; can't use tail recursion. */
00875 
00876         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00877           eptrb, flags, RM48);
00878         if (rrc == MATCH_NOMATCH) md->mark = markptr;
00879         RRETURN(rrc);
00880         }
00881 
00882       /* For non-final alternatives, continue the loop for a NOMATCH result;
00883       otherwise return. */
00884 
00885       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00886         eptrb, flags, RM2);
00887       if (rrc != MATCH_NOMATCH &&
00888           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
00889         RRETURN(rrc);
00890       ecode += GET(ecode, 1);
00891       }
00892     /* Control never reaches here. */
00893 
00894     /* Conditional group: compilation checked that there are no more than
00895     two branches. If the condition is false, skipping the first branch takes us
00896     past the end if there is only one branch, but that's OK because that is
00897     exactly what going to the ket would do. As there is only one branch to be
00898     obeyed, we can use tail recursion to avoid using another stack frame. */
00899 
00900     case OP_COND:
00901     case OP_SCOND:
00902     codelink= GET(ecode, 1);
00903 
00904     /* Because of the way auto-callout works during compile, a callout item is
00905     inserted between OP_COND and an assertion condition. */
00906 
00907     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
00908       {
00909       if (pcre_callout != NULL)
00910         {
00911         pcre_callout_block cb;
00912         cb.version          = 1;   /* Version 1 of the callout block */
00913         cb.callout_number   = ecode[LINK_SIZE+2];
00914         cb.offset_vector    = md->offset_vector;
00915         cb.subject          = (PCRE_SPTR)md->start_subject;
00916         cb.subject_length   = (int)(md->end_subject - md->start_subject);
00917         cb.start_match      = (int)(mstart - md->start_subject);
00918         cb.current_position = (int)(eptr - md->start_subject);
00919         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
00920         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
00921         cb.capture_top      = offset_top/2;
00922         cb.capture_last     = md->capture_last;
00923         cb.callout_data     = md->callout_data;
00924         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
00925         if (rrc < 0) RRETURN(rrc);
00926         }
00927       ecode += _pcre_OP_lengths[OP_CALLOUT];
00928       }
00929 
00930     condcode = ecode[LINK_SIZE+1];
00931 
00932     /* Now see what the actual condition is */
00933 
00934     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
00935       {
00936       if (md->recursive == NULL)                /* Not recursing => FALSE */
00937         {
00938         condition = FALSE;
00939         ecode += GET(ecode, 1);
00940         }
00941       else
00942         {
00943         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
00944         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
00945 
00946         /* If the test is for recursion into a specific subpattern, and it is
00947         false, but the test was set up by name, scan the table to see if the
00948         name refers to any other numbers, and test them. The condition is true
00949         if any one is set. */
00950 
00951         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
00952           {
00953           uschar *slotA = md->name_table;
00954           for (i = 0; i < md->name_count; i++)
00955             {
00956             if (GET2(slotA, 0) == recno) break;
00957             slotA += md->name_entry_size;
00958             }
00959 
00960           /* Found a name for the number - there can be only one; duplicate
00961           names for different numbers are allowed, but not vice versa. First
00962           scan down for duplicates. */
00963 
00964           if (i < md->name_count)
00965             {
00966             uschar *slotB = slotA;
00967             while (slotB > md->name_table)
00968               {
00969               slotB -= md->name_entry_size;
00970               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00971                 {
00972                 condition = GET2(slotB, 0) == md->recursive->group_num;
00973                 if (condition) break;
00974                 }
00975               else break;
00976               }
00977 
00978             /* Scan up for duplicates */
00979 
00980             if (!condition)
00981               {
00982               slotB = slotA;
00983               for (i++; i < md->name_count; i++)
00984                 {
00985                 slotB += md->name_entry_size;
00986                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00987                   {
00988                   condition = GET2(slotB, 0) == md->recursive->group_num;
00989                   if (condition) break;
00990                   }
00991                 else break;
00992                 }
00993               }
00994             }
00995           }
00996 
00997         /* Chose branch according to the condition */
00998 
00999         ecode += condition? 3 : GET(ecode, 1);
01000         }
01001       }
01002 
01003     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
01004       {
01005       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
01006       condition = offset < offset_top && md->offset_vector[offset] >= 0;
01007 
01008       /* If the numbered capture is unset, but the reference was by name,
01009       scan the table to see if the name refers to any other numbers, and test
01010       them. The condition is true if any one is set. This is tediously similar
01011       to the code above, but not close enough to try to amalgamate. */
01012 
01013       if (!condition && condcode == OP_NCREF)
01014         {
01015         int refno = offset >> 1;
01016         uschar *slotA = md->name_table;
01017 
01018         for (i = 0; i < md->name_count; i++)
01019           {
01020           if (GET2(slotA, 0) == refno) break;
01021           slotA += md->name_entry_size;
01022           }
01023 
01024         /* Found a name for the number - there can be only one; duplicate names
01025         for different numbers are allowed, but not vice versa. First scan down
01026         for duplicates. */
01027 
01028         if (i < md->name_count)
01029           {
01030           uschar *slotB = slotA;
01031           while (slotB > md->name_table)
01032             {
01033             slotB -= md->name_entry_size;
01034             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
01035               {
01036               offset = GET2(slotB, 0) << 1;
01037               condition = offset < offset_top &&
01038                 md->offset_vector[offset] >= 0;
01039               if (condition) break;
01040               }
01041             else break;
01042             }
01043 
01044           /* Scan up for duplicates */
01045 
01046           if (!condition)
01047             {
01048             slotB = slotA;
01049             for (i++; i < md->name_count; i++)
01050               {
01051               slotB += md->name_entry_size;
01052               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
01053                 {
01054                 offset = GET2(slotB, 0) << 1;
01055                 condition = offset < offset_top &&
01056                   md->offset_vector[offset] >= 0;
01057                 if (condition) break;
01058                 }
01059               else break;
01060               }
01061             }
01062           }
01063         }
01064 
01065       /* Chose branch according to the condition */
01066 
01067       ecode += condition? 3 : GET(ecode, 1);
01068       }
01069 
01070     else if (condcode == OP_DEF)     /* DEFINE - always false */
01071       {
01072       condition = FALSE;
01073       ecode += GET(ecode, 1);
01074       }
01075 
01076     /* The condition is an assertion. Call match() to evaluate it - setting
01077     the final argument match_condassert causes it to stop at the end of an
01078     assertion. */
01079 
01080     else
01081       {
01082       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
01083           match_condassert, RM3);
01084       if (rrc == MATCH_MATCH)
01085         {
01086         condition = TRUE;
01087         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
01088         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
01089         }
01090       else if (rrc != MATCH_NOMATCH &&
01091               (rrc != MATCH_THEN || md->start_match_ptr != ecode))
01092         {
01093         RRETURN(rrc);         /* Need braces because of following else */
01094         }
01095       else
01096         {
01097         condition = FALSE;
01098         ecode += codelink;
01099         }
01100       }
01101 
01102     /* We are now at the branch that is to be obeyed. As there is only one,
01103     we can use tail recursion to avoid using another stack frame, except when
01104     match_cbegroup is required for an unlimited repeat of a possibly empty
01105     group. If the second alternative doesn't exist, we can just plough on. */
01106 
01107     if (condition || *ecode == OP_ALT)
01108       {
01109       ecode += 1 + LINK_SIZE;
01110       if (op == OP_SCOND)        /* Possibly empty group */
01111         {
01112         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
01113         RRETURN(rrc);
01114         }
01115       else                       /* Group must match something */
01116         {
01117         flags = 0;
01118         goto TAIL_RECURSE;
01119         }
01120       }
01121     else                         /* Condition false & no alternative */
01122       {
01123       ecode += 1 + LINK_SIZE;
01124       }
01125     break;
01126 
01127 
01128     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
01129     to close any currently open capturing brackets. */
01130 
01131     case OP_CLOSE:
01132     number = GET2(ecode, 1);
01133     offset = number << 1;
01134 
01135 #ifdef PCRE_DEBUG
01136       printf("end bracket %d at *ACCEPT", number);
01137       printf("\n");
01138 #endif
01139 
01140     md->capture_last = number;
01141     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01142       {
01143       md->offset_vector[offset] =
01144         md->offset_vector[md->offset_end - number];
01145       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
01146       if (offset_top <= offset) offset_top = offset + 2;
01147       }
01148     ecode += 3;
01149     break;
01150 
01151 
01152     /* End of the pattern, either real or forced. If we are in a top-level
01153     recursion, we should restore the offsets appropriately and continue from
01154     after the call. */
01155 
01156     case OP_ACCEPT:
01157     case OP_END:
01158     if (md->recursive != NULL && md->recursive->group_num == 0)
01159       {
01160       recursion_info *rec = md->recursive;
01161       DPRINTF(("End of pattern in a (?0) recursion\n"));
01162       md->recursive = rec->prevrec;
01163       memmove(md->offset_vector, rec->offset_save,
01164         rec->saved_max * sizeof(int));
01165       offset_top = rec->save_offset_top;
01166       ims = original_ims;
01167       ecode = rec->after_call;
01168       break;
01169       }
01170 
01171     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
01172     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
01173     the subject. In both cases, backtracking will then try other alternatives,
01174     if any. */
01175 
01176     if (eptr == mstart &&
01177         (md->notempty ||
01178           (md->notempty_atstart &&
01179             mstart == md->start_subject + md->start_offset)))
01180       MRRETURN(MATCH_NOMATCH);
01181 
01182     /* Otherwise, we have a match. */
01183 
01184     md->end_match_ptr = eptr;           /* Record where we ended */
01185     md->end_offset_top = offset_top;    /* and how many extracts were taken */
01186     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
01187 
01188     /* For some reason, the macros don't work properly if an expression is
01189     given as the argument to MRRETURN when the heap is in use. */
01190 
01191     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
01192     MRRETURN(rrc);
01193 
01194     /* Change option settings */
01195 
01196     case OP_OPT:
01197     ims = ecode[1];
01198     ecode += 2;
01199     DPRINTF(("ims set to %02lx\n", ims));
01200     break;
01201 
01202     /* Assertion brackets. Check the alternative branches in turn - the
01203     matching won't pass the KET for an assertion. If any one branch matches,
01204     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
01205     start of each branch to move the current point backwards, so the code at
01206     this level is identical to the lookahead case. */
01207 
01208     case OP_ASSERT:
01209     case OP_ASSERTBACK:
01210     do
01211       {
01212       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
01213         RM4);
01214       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
01215         {
01216         mstart = md->start_match_ptr;   /* In case \K reset it */
01217         break;
01218         }
01219       if (rrc != MATCH_NOMATCH &&
01220           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
01221         RRETURN(rrc);
01222       ecode += GET(ecode, 1);
01223       }
01224     while (*ecode == OP_ALT);
01225     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
01226 
01227     /* If checking an assertion for a condition, return MATCH_MATCH. */
01228 
01229     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
01230 
01231     /* Continue from after the assertion, updating the offsets high water
01232     mark, since extracts may have been taken during the assertion. */
01233 
01234     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01235     ecode += 1 + LINK_SIZE;
01236     offset_top = md->end_offset_top;
01237     continue;
01238 
01239     /* Negative assertion: all branches must fail to match. Encountering SKIP,
01240     PRUNE, or COMMIT means we must assume failure without checking subsequent
01241     branches. */
01242 
01243     case OP_ASSERT_NOT:
01244     case OP_ASSERTBACK_NOT:
01245     do
01246       {
01247       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
01248         RM5);
01249       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
01250       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
01251         {
01252         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01253         break;
01254         }
01255       if (rrc != MATCH_NOMATCH &&
01256           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
01257         RRETURN(rrc);
01258       ecode += GET(ecode,1);
01259       }
01260     while (*ecode == OP_ALT);
01261 
01262     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
01263 
01264     ecode += 1 + LINK_SIZE;
01265     continue;
01266 
01267     /* Move the subject pointer back. This occurs only at the start of
01268     each branch of a lookbehind assertion. If we are too close to the start to
01269     move back, this match function fails. When working with UTF-8 we move
01270     back a number of characters, not bytes. */
01271 
01272     case OP_REVERSE:
01273 #ifdef SUPPORT_UTF8
01274     if (utf8)
01275       {
01276       i = GET(ecode, 1);
01277       while (i-- > 0)
01278         {
01279         eptr--;
01280         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
01281         BACKCHAR(eptr);
01282         }
01283       }
01284     else
01285 #endif
01286 
01287     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
01288 
01289       {
01290       eptr -= GET(ecode, 1);
01291       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
01292       }
01293 
01294     /* Save the earliest consulted character, then skip to next op code */
01295 
01296     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
01297     ecode += 1 + LINK_SIZE;
01298     break;
01299 
01300     /* The callout item calls an external function, if one is provided, passing
01301     details of the match so far. This is mainly for debugging, though the
01302     function is able to force a failure. */
01303 
01304     case OP_CALLOUT:
01305     if (pcre_callout != NULL)
01306       {
01307       pcre_callout_block cb;
01308       cb.version          = 1;   /* Version 1 of the callout block */
01309       cb.callout_number   = ecode[1];
01310       cb.offset_vector    = md->offset_vector;
01311       cb.subject          = (PCRE_SPTR)md->start_subject;
01312       cb.subject_length   = (int)(md->end_subject - md->start_subject);
01313       cb.start_match      = (int)(mstart - md->start_subject);
01314       cb.current_position = (int)(eptr - md->start_subject);
01315       cb.pattern_position = GET(ecode, 2);
01316       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
01317       cb.capture_top      = offset_top/2;
01318       cb.capture_last     = md->capture_last;
01319       cb.callout_data     = md->callout_data;
01320       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
01321       if (rrc < 0) RRETURN(rrc);
01322       }
01323     ecode += 2 + 2*LINK_SIZE;
01324     break;
01325 
01326     /* Recursion either matches the current regex, or some subexpression. The
01327     offset data is the offset to the starting bracket from the start of the
01328     whole pattern. (This is so that it works from duplicated subpatterns.)
01329 
01330     If there are any capturing brackets started but not finished, we have to
01331     save their starting points and reinstate them after the recursion. However,
01332     we don't know how many such there are (offset_top records the completed
01333     total) so we just have to save all the potential data. There may be up to
01334     65535 such values, which is too large to put on the stack, but using malloc
01335     for small numbers seems expensive. As a compromise, the stack is used when
01336     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
01337     is used. A problem is what to do if the malloc fails ... there is no way of
01338     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
01339     values on the stack, and accept that the rest may be wrong.
01340 
01341     There are also other values that have to be saved. We use a chained
01342     sequence of blocks that actually live on the stack. Thanks to Robin Houston
01343     for the original version of this logic. */
01344 
01345     case OP_RECURSE:
01346       {
01347       callpat = md->start_code + GET(ecode, 1);
01348       new_recursive.group_num = (callpat == md->start_code)? 0 :
01349         GET2(callpat, 1 + LINK_SIZE);
01350 
01351       /* Add to "recursing stack" */
01352 
01353       new_recursive.prevrec = md->recursive;
01354       md->recursive = &new_recursive;
01355 
01356       /* Find where to continue from afterwards */
01357 
01358       ecode += 1 + LINK_SIZE;
01359       new_recursive.after_call = ecode;
01360 
01361       /* Now save the offset data. */
01362 
01363       new_recursive.saved_max = md->offset_end;
01364       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
01365         new_recursive.offset_save = stacksave;
01366       else
01367         {
01368         new_recursive.offset_save =
01369           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
01370         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
01371         }
01372 
01373       memcpy(new_recursive.offset_save, md->offset_vector,
01374             new_recursive.saved_max * sizeof(int));
01375       new_recursive.save_offset_top = offset_top;
01376 
01377       /* OK, now we can do the recursion. For each top-level alternative we
01378       restore the offset and recursion data. */
01379 
01380       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
01381       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
01382       do
01383         {
01384         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
01385           md, ims, eptrb, flags, RM6);
01386         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
01387           {
01388           DPRINTF(("Recursion matched\n"));
01389           md->recursive = new_recursive.prevrec;
01390           if (new_recursive.offset_save != stacksave)
01391             (pcre_free)(new_recursive.offset_save);
01392           MRRETURN(MATCH_MATCH);
01393           }
01394         else if (rrc != MATCH_NOMATCH &&
01395                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
01396           {
01397           DPRINTF(("Recursion gave error %d\n", rrc));
01398           if (new_recursive.offset_save != stacksave)
01399             (pcre_free)(new_recursive.offset_save);
01400           RRETURN(rrc);
01401           }
01402 
01403         md->recursive = &new_recursive;
01404         memcpy(md->offset_vector, new_recursive.offset_save,
01405             new_recursive.saved_max * sizeof(int));
01406         callpat += GET(callpat, 1);
01407         }
01408       while (*callpat == OP_ALT);
01409 
01410       DPRINTF(("Recursion didn't match\n"));
01411       md->recursive = new_recursive.prevrec;
01412       if (new_recursive.offset_save != stacksave)
01413         (pcre_free)(new_recursive.offset_save);
01414       MRRETURN(MATCH_NOMATCH);
01415       }
01416     /* Control never reaches here */
01417 
01418     /* "Once" brackets are like assertion brackets except that after a match,
01419     the point in the subject string is not moved back. Thus there can never be
01420     a move back into the brackets. Friedl calls these "atomic" subpatterns.
01421     Check the alternative branches in turn - the matching won't pass the KET
01422     for this kind of subpattern. If any one branch matches, we carry on as at
01423     the end of a normal bracket, leaving the subject pointer, but resetting
01424     the start-of-match value in case it was changed by \K. */
01425 
01426     case OP_ONCE:
01427     prev = ecode;
01428     saved_eptr = eptr;
01429 
01430     do
01431       {
01432       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
01433       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
01434         {
01435         mstart = md->start_match_ptr;
01436         break;
01437         }
01438       if (rrc != MATCH_NOMATCH &&
01439           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
01440         RRETURN(rrc);
01441       ecode += GET(ecode,1);
01442       }
01443     while (*ecode == OP_ALT);
01444 
01445     /* If hit the end of the group (which could be repeated), fail */
01446 
01447     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
01448 
01449     /* Continue as from after the assertion, updating the offsets high water
01450     mark, since extracts may have been taken. */
01451 
01452     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
01453 
01454     offset_top = md->end_offset_top;
01455     eptr = md->end_match_ptr;
01456 
01457     /* For a non-repeating ket, just continue at this level. This also
01458     happens for a repeating ket if no characters were matched in the group.
01459     This is the forcible breaking of infinite loops as implemented in Perl
01460     5.005. If there is an options reset, it will get obeyed in the normal
01461     course of events. */
01462 
01463     if (*ecode == OP_KET || eptr == saved_eptr)
01464       {
01465       ecode += 1+LINK_SIZE;
01466       break;
01467       }
01468 
01469     /* The repeating kets try the rest of the pattern or restart from the
01470     preceding bracket, in the appropriate order. The second "call" of match()
01471     uses tail recursion, to avoid using another stack frame. We need to reset
01472     any options that changed within the bracket before re-running it, so
01473     check the next opcode. */
01474 
01475     if (ecode[1+LINK_SIZE] == OP_OPT)
01476       {
01477       ims = (ims & ~PCRE_IMS) | ecode[4];
01478       DPRINTF(("ims set to %02lx at group repeat\n", ims));
01479       }
01480 
01481     if (*ecode == OP_KETRMIN)
01482       {
01483       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
01484       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01485       ecode = prev;
01486       flags = 0;
01487       goto TAIL_RECURSE;
01488       }
01489     else  /* OP_KETRMAX */
01490       {
01491       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
01492       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01493       ecode += 1 + LINK_SIZE;
01494       flags = 0;
01495       goto TAIL_RECURSE;
01496       }
01497     /* Control never gets here */
01498 
01499     /* An alternation is the end of a branch; scan along to find the end of the
01500     bracketed group and go to there. */
01501 
01502     case OP_ALT:
01503     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01504     break;
01505 
01506     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
01507     indicating that it may occur zero times. It may repeat infinitely, or not
01508     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
01509     with fixed upper repeat limits are compiled as a number of copies, with the
01510     optional ones preceded by BRAZERO or BRAMINZERO. */
01511 
01512     case OP_BRAZERO:
01513       {
01514       next = ecode+1;
01515       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
01516       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01517       do next += GET(next,1); while (*next == OP_ALT);
01518       ecode = next + 1 + LINK_SIZE;
01519       }
01520     break;
01521 
01522     case OP_BRAMINZERO:
01523       {
01524       next = ecode+1;
01525       do next += GET(next, 1); while (*next == OP_ALT);
01526       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
01527       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01528       ecode++;
01529       }
01530     break;
01531 
01532     case OP_SKIPZERO:
01533       {
01534       next = ecode+1;
01535       do next += GET(next,1); while (*next == OP_ALT);
01536       ecode = next + 1 + LINK_SIZE;
01537       }
01538     break;
01539 
01540     /* End of a group, repeated or non-repeating. */
01541 
01542     case OP_KET:
01543     case OP_KETRMIN:
01544     case OP_KETRMAX:
01545     prev = ecode - GET(ecode, 1);
01546 
01547     /* If this was a group that remembered the subject start, in order to break
01548     infinite repeats of empty string matches, retrieve the subject start from
01549     the chain. Otherwise, set it NULL. */
01550 
01551     if (*prev >= OP_SBRA)
01552       {
01553       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
01554       eptrb = eptrb->epb_prev;              /* Backup to previous group */
01555       }
01556     else saved_eptr = NULL;
01557 
01558     /* If we are at the end of an assertion group or an atomic group, stop
01559     matching and return MATCH_MATCH, but record the current high water mark for
01560     use by positive assertions. We also need to record the match start in case
01561     it was changed by \K. */
01562 
01563     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
01564         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
01565         *prev == OP_ONCE)
01566       {
01567       md->end_match_ptr = eptr;      /* For ONCE */
01568       md->end_offset_top = offset_top;
01569       md->start_match_ptr = mstart;
01570       MRRETURN(MATCH_MATCH);
01571       }
01572 
01573     /* For capturing groups we have to check the group number back at the start
01574     and if necessary complete handling an extraction by setting the offsets and
01575     bumping the high water mark. Note that whole-pattern recursion is coded as
01576     a recurse into group 0, so it won't be picked up here. Instead, we catch it
01577     when the OP_END is reached. Other recursion is handled here. */
01578 
01579     if (*prev == OP_CBRA || *prev == OP_SCBRA)
01580       {
01581       number = GET2(prev, 1+LINK_SIZE);
01582       offset = number << 1;
01583 
01584 #ifdef PCRE_DEBUG
01585       printf("end bracket %d", number);
01586       printf("\n");
01587 #endif
01588 
01589       md->capture_last = number;
01590       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01591         {
01592         md->offset_vector[offset] =
01593           md->offset_vector[md->offset_end - number];
01594         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
01595         if (offset_top <= offset) offset_top = offset + 2;
01596         }
01597 
01598       /* Handle a recursively called group. Restore the offsets
01599       appropriately and continue from after the call. */
01600 
01601       if (md->recursive != NULL && md->recursive->group_num == number)
01602         {
01603         recursion_info *rec = md->recursive;
01604         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
01605         md->recursive = rec->prevrec;
01606         memcpy(md->offset_vector, rec->offset_save,
01607           rec->saved_max * sizeof(int));
01608         offset_top = rec->save_offset_top;
01609         ecode = rec->after_call;
01610         ims = original_ims;
01611         break;
01612         }
01613       }
01614 
01615     /* For both capturing and non-capturing groups, reset the value of the ims
01616     flags, in case they got changed during the group. */
01617 
01618     ims = original_ims;
01619     DPRINTF(("ims reset to %02lx\n", ims));
01620 
01621     /* For a non-repeating ket, just continue at this level. This also
01622     happens for a repeating ket if no characters were matched in the group.
01623     This is the forcible breaking of infinite loops as implemented in Perl
01624     5.005. If there is an options reset, it will get obeyed in the normal
01625     course of events. */
01626 
01627     if (*ecode == OP_KET || eptr == saved_eptr)
01628       {
01629       ecode += 1 + LINK_SIZE;
01630       break;
01631       }
01632 
01633     /* The repeating kets try the rest of the pattern or restart from the
01634     preceding bracket, in the appropriate order. In the second case, we can use
01635     tail recursion to avoid using another stack frame, unless we have an
01636     unlimited repeat of a group that can match an empty string. */
01637 
01638     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
01639 
01640     if (*ecode == OP_KETRMIN)
01641       {
01642       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
01643       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01644       if (flags != 0)    /* Could match an empty string */
01645         {
01646         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
01647         RRETURN(rrc);
01648         }
01649       ecode = prev;
01650       goto TAIL_RECURSE;
01651       }
01652     else  /* OP_KETRMAX */
01653       {
01654       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
01655       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01656       ecode += 1 + LINK_SIZE;
01657       flags = 0;
01658       goto TAIL_RECURSE;
01659       }
01660     /* Control never gets here */
01661 
01662     /* Start of subject unless notbol, or after internal newline if multiline */
01663 
01664     case OP_CIRC:
01665     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
01666     if ((ims & PCRE_MULTILINE) != 0)
01667       {
01668       if (eptr != md->start_subject &&
01669           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
01670         MRRETURN(MATCH_NOMATCH);
01671       ecode++;
01672       break;
01673       }
01674     /* ... else fall through */
01675 
01676     /* Start of subject assertion */
01677 
01678     case OP_SOD:
01679     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
01680     ecode++;
01681     break;
01682 
01683     /* Start of match assertion */
01684 
01685     case OP_SOM:
01686     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
01687     ecode++;
01688     break;
01689 
01690     /* Reset the start of match point */
01691 
01692     case OP_SET_SOM:
01693     mstart = eptr;
01694     ecode++;
01695     break;
01696 
01697     /* Assert before internal newline if multiline, or before a terminating
01698     newline unless endonly is set, else end of subject unless noteol is set. */
01699 
01700     case OP_DOLL:
01701     if ((ims & PCRE_MULTILINE) != 0)
01702       {
01703       if (eptr < md->end_subject)
01704         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
01705       else
01706         {
01707         if (md->noteol) MRRETURN(MATCH_NOMATCH);
01708         SCHECK_PARTIAL();
01709         }
01710       ecode++;
01711       break;
01712       }
01713     else  /* Not multiline */
01714       {
01715       if (md->noteol) MRRETURN(MATCH_NOMATCH);
01716       if (!md->endonly) goto ASSERT_NL_OR_EOS;
01717       }
01718 
01719     /* ... else fall through for endonly */
01720 
01721     /* End of subject assertion (\z) */
01722 
01723     case OP_EOD:
01724     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
01725     SCHECK_PARTIAL();
01726     ecode++;
01727     break;
01728 
01729     /* End of subject or ending \n assertion (\Z) */
01730 
01731     case OP_EODN:
01732     ASSERT_NL_OR_EOS:
01733     if (eptr < md->end_subject &&
01734         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01735       MRRETURN(MATCH_NOMATCH);
01736 
01737     /* Either at end of string or \n before end. */
01738 
01739     SCHECK_PARTIAL();
01740     ecode++;
01741     break;
01742 
01743     /* Word boundary assertions */
01744 
01745     case OP_NOT_WORD_BOUNDARY:
01746     case OP_WORD_BOUNDARY:
01747       {
01748 
01749       /* Find out if the previous and current characters are "word" characters.
01750       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
01751       be "non-word" characters. Remember the earliest consulted character for
01752       partial matching. */
01753 
01754 #ifdef SUPPORT_UTF8
01755       if (utf8)
01756         {
01757         /* Get status of previous character */
01758 
01759         if (eptr == md->start_subject) prev_is_word = FALSE; else
01760           {
01761           USPTR lastptr = eptr - 1;
01762           while((*lastptr & 0xc0) == 0x80) lastptr--;
01763           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
01764           GETCHAR(c, lastptr);
01765 #ifdef SUPPORT_UCP
01766           if (md->use_ucp)
01767             {
01768             if (c == '_') prev_is_word = TRUE; else
01769               {
01770               int cat = UCD_CATEGORY(c);
01771               prev_is_word = (cat == ucp_L || cat == ucp_N);
01772               }
01773             }
01774           else
01775 #endif
01776           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01777           }
01778 
01779         /* Get status of next character */
01780 
01781         if (eptr >= md->end_subject)
01782           {
01783           SCHECK_PARTIAL();
01784           cur_is_word = FALSE;
01785           }
01786         else
01787           {
01788           GETCHAR(c, eptr);
01789 #ifdef SUPPORT_UCP
01790           if (md->use_ucp)
01791             {
01792             if (c == '_') cur_is_word = TRUE; else
01793               {
01794               int cat = UCD_CATEGORY(c);
01795               cur_is_word = (cat == ucp_L || cat == ucp_N);
01796               }
01797             }
01798           else
01799 #endif
01800           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01801           }
01802         }
01803       else
01804 #endif
01805 
01806       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
01807       consistency with the behaviour of \w we do use it in this case. */
01808 
01809         {
01810         /* Get status of previous character */
01811 
01812         if (eptr == md->start_subject) prev_is_word = FALSE; else
01813           {
01814           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
01815 #ifdef SUPPORT_UCP
01816           if (md->use_ucp)
01817             {
01818             c = eptr[-1];
01819             if (c == '_') prev_is_word = TRUE; else
01820               {
01821               int cat = UCD_CATEGORY(c);
01822               prev_is_word = (cat == ucp_L || cat == ucp_N);
01823               }
01824             }
01825           else
01826 #endif
01827           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
01828           }
01829 
01830         /* Get status of next character */
01831 
01832         if (eptr >= md->end_subject)
01833           {
01834           SCHECK_PARTIAL();
01835           cur_is_word = FALSE;
01836           }
01837         else
01838 #ifdef SUPPORT_UCP
01839         if (md->use_ucp)
01840           {
01841           c = *eptr;
01842           if (c == '_') cur_is_word = TRUE; else
01843             {
01844             int cat = UCD_CATEGORY(c);
01845             cur_is_word = (cat == ucp_L || cat == ucp_N);
01846             }
01847           }
01848         else
01849 #endif
01850         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
01851         }
01852 
01853       /* Now see if the situation is what we want */
01854 
01855       if ((*ecode++ == OP_WORD_BOUNDARY)?
01856            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
01857         MRRETURN(MATCH_NOMATCH);
01858       }
01859     break;
01860 
01861     /* Match a single character type; inline for speed */
01862 
01863     case OP_ANY:
01864     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
01865     /* Fall through */
01866 
01867     case OP_ALLANY:
01868     if (eptr++ >= md->end_subject)
01869       {
01870       SCHECK_PARTIAL();
01871       MRRETURN(MATCH_NOMATCH);
01872       }
01873     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
01874     ecode++;
01875     break;
01876 
01877     /* Match a single byte, even in UTF-8 mode. This opcode really does match
01878     any byte, even newline, independent of the setting of PCRE_DOTALL. */
01879 
01880     case OP_ANYBYTE:
01881     if (eptr++ >= md->end_subject)
01882       {
01883       SCHECK_PARTIAL();
01884       MRRETURN(MATCH_NOMATCH);
01885       }
01886     ecode++;
01887     break;
01888 
01889     case OP_NOT_DIGIT:
01890     if (eptr >= md->end_subject)
01891       {
01892       SCHECK_PARTIAL();
01893       MRRETURN(MATCH_NOMATCH);
01894       }
01895     GETCHARINCTEST(c, eptr);
01896     if (
01897 #ifdef SUPPORT_UTF8
01898        c < 256 &&
01899 #endif
01900        (md->ctypes[c] & ctype_digit) != 0
01901        )
01902       MRRETURN(MATCH_NOMATCH);
01903     ecode++;
01904     break;
01905 
01906     case OP_DIGIT:
01907     if (eptr >= md->end_subject)
01908       {
01909       SCHECK_PARTIAL();
01910       MRRETURN(MATCH_NOMATCH);
01911       }
01912     GETCHARINCTEST(c, eptr);
01913     if (
01914 #ifdef SUPPORT_UTF8
01915        c >= 256 ||
01916 #endif
01917        (md->ctypes[c] & ctype_digit) == 0
01918        )
01919       MRRETURN(MATCH_NOMATCH);
01920     ecode++;
01921     break;
01922 
01923     case OP_NOT_WHITESPACE:
01924     if (eptr >= md->end_subject)
01925       {
01926       SCHECK_PARTIAL();
01927       MRRETURN(MATCH_NOMATCH);
01928       }
01929     GETCHARINCTEST(c, eptr);
01930     if (
01931 #ifdef SUPPORT_UTF8
01932        c < 256 &&
01933 #endif
01934        (md->ctypes[c] & ctype_space) != 0
01935        )
01936       MRRETURN(MATCH_NOMATCH);
01937     ecode++;
01938     break;
01939 
01940     case OP_WHITESPACE:
01941     if (eptr >= md->end_subject)
01942       {
01943       SCHECK_PARTIAL();
01944       MRRETURN(MATCH_NOMATCH);
01945       }
01946     GETCHARINCTEST(c, eptr);
01947     if (
01948 #ifdef SUPPORT_UTF8
01949        c >= 256 ||
01950 #endif
01951        (md->ctypes[c] & ctype_space) == 0
01952        )
01953       MRRETURN(MATCH_NOMATCH);
01954     ecode++;
01955     break;
01956 
01957     case OP_NOT_WORDCHAR:
01958     if (eptr >= md->end_subject)
01959       {
01960       SCHECK_PARTIAL();
01961       MRRETURN(MATCH_NOMATCH);
01962       }
01963     GETCHARINCTEST(c, eptr);
01964     if (
01965 #ifdef SUPPORT_UTF8
01966        c < 256 &&
01967 #endif
01968        (md->ctypes[c] & ctype_word) != 0
01969        )
01970       MRRETURN(MATCH_NOMATCH);
01971     ecode++;
01972     break;
01973 
01974     case OP_WORDCHAR:
01975     if (eptr >= md->end_subject)
01976       {
01977       SCHECK_PARTIAL();
01978       MRRETURN(MATCH_NOMATCH);
01979       }
01980     GETCHARINCTEST(c, eptr);
01981     if (
01982 #ifdef SUPPORT_UTF8
01983        c >= 256 ||
01984 #endif
01985        (md->ctypes[c] & ctype_word) == 0
01986        )
01987       MRRETURN(MATCH_NOMATCH);
01988     ecode++;
01989     break;
01990 
01991     case OP_ANYNL:
01992     if (eptr >= md->end_subject)
01993       {
01994       SCHECK_PARTIAL();
01995       MRRETURN(MATCH_NOMATCH);
01996       }
01997     GETCHARINCTEST(c, eptr);
01998     switch(c)
01999       {
02000       default: MRRETURN(MATCH_NOMATCH);
02001       case 0x000d:
02002       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
02003       break;
02004 
02005       case 0x000a:
02006       break;
02007 
02008       case 0x000b:
02009       case 0x000c:
02010       case 0x0085:
02011       case 0x2028:
02012       case 0x2029:
02013       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
02014       break;
02015       }
02016     ecode++;
02017     break;
02018 
02019     case OP_NOT_HSPACE:
02020     if (eptr >= md->end_subject)
02021       {
02022       SCHECK_PARTIAL();
02023       MRRETURN(MATCH_NOMATCH);
02024       }
02025     GETCHARINCTEST(c, eptr);
02026     switch(c)
02027       {
02028       default: break;
02029       case 0x09:      /* HT */
02030       case 0x20:      /* SPACE */
02031       case 0xa0:      /* NBSP */
02032       case 0x1680:    /* OGHAM SPACE MARK */
02033       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
02034       case 0x2000:    /* EN QUAD */
02035       case 0x2001:    /* EM QUAD */
02036       case 0x2002:    /* EN SPACE */
02037       case 0x2003:    /* EM SPACE */
02038       case 0x2004:    /* THREE-PER-EM SPACE */
02039       case 0x2005:    /* FOUR-PER-EM SPACE */
02040       case 0x2006:    /* SIX-PER-EM SPACE */
02041       case 0x2007:    /* FIGURE SPACE */
02042       case 0x2008:    /* PUNCTUATION SPACE */
02043       case 0x2009:    /* THIN SPACE */
02044       case 0x200A:    /* HAIR SPACE */
02045       case 0x202f:    /* NARROW NO-BREAK SPACE */
02046       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
02047       case 0x3000:    /* IDEOGRAPHIC SPACE */
02048       MRRETURN(MATCH_NOMATCH);
02049       }
02050     ecode++;
02051     break;
02052 
02053     case OP_HSPACE:
02054     if (eptr >= md->end_subject)
02055       {
02056       SCHECK_PARTIAL();
02057       MRRETURN(MATCH_NOMATCH);
02058       }
02059     GETCHARINCTEST(c, eptr);
02060     switch(c)
02061       {
02062       default: MRRETURN(MATCH_NOMATCH);
02063       case 0x09:      /* HT */
02064       case 0x20:      /* SPACE */
02065       case 0xa0:      /* NBSP */
02066       case 0x1680:    /* OGHAM SPACE MARK */
02067       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
02068       case 0x2000:    /* EN QUAD */
02069       case 0x2001:    /* EM QUAD */
02070       case 0x2002:    /* EN SPACE */
02071       case 0x2003:    /* EM SPACE */
02072       case 0x2004:    /* THREE-PER-EM SPACE */
02073       case 0x2005:    /* FOUR-PER-EM SPACE */
02074       case 0x2006:    /* SIX-PER-EM SPACE */
02075       case 0x2007:    /* FIGURE SPACE */
02076       case 0x2008:    /* PUNCTUATION SPACE */
02077       case 0x2009:    /* THIN SPACE */
02078       case 0x200A:    /* HAIR SPACE */
02079       case 0x202f:    /* NARROW NO-BREAK SPACE */
02080       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
02081       case 0x3000:    /* IDEOGRAPHIC SPACE */
02082       break;
02083       }
02084     ecode++;
02085     break;
02086 
02087     case OP_NOT_VSPACE:
02088     if (eptr >= md->end_subject)
02089       {
02090       SCHECK_PARTIAL();
02091       MRRETURN(MATCH_NOMATCH);
02092       }
02093     GETCHARINCTEST(c, eptr);
02094     switch(c)
02095       {
02096       default: break;
02097       case 0x0a:      /* LF */
02098       case 0x0b:      /* VT */
02099       case 0x0c:      /* FF */
02100       case 0x0d:      /* CR */
02101       case 0x85:      /* NEL */
02102       case 0x2028:    /* LINE SEPARATOR */
02103       case 0x2029:    /* PARAGRAPH SEPARATOR */
02104       MRRETURN(MATCH_NOMATCH);
02105       }
02106     ecode++;
02107     break;
02108 
02109     case OP_VSPACE:
02110     if (eptr >= md->end_subject)
02111       {
02112       SCHECK_PARTIAL();
02113       MRRETURN(MATCH_NOMATCH);
02114       }
02115     GETCHARINCTEST(c, eptr);
02116     switch(c)
02117       {
02118       default: MRRETURN(MATCH_NOMATCH);
02119       case 0x0a:      /* LF */
02120       case 0x0b:      /* VT */
02121       case 0x0c:      /* FF */
02122       case 0x0d:      /* CR */
02123       case 0x85:      /* NEL */
02124       case 0x2028:    /* LINE SEPARATOR */
02125       case 0x2029:    /* PARAGRAPH SEPARATOR */
02126       break;
02127       }
02128     ecode++;
02129     break;
02130 
02131 #ifdef SUPPORT_UCP
02132     /* Check the next character by Unicode property. We will get here only
02133     if the support is in the binary; otherwise a compile-time error occurs. */
02134 
02135     case OP_PROP:
02136     case OP_NOTPROP:
02137     if (eptr >= md->end_subject)
02138       {
02139       SCHECK_PARTIAL();
02140       MRRETURN(MATCH_NOMATCH);
02141       }
02142     GETCHARINCTEST(c, eptr);
02143       {
02144       const ucd_record *prop = GET_UCD(c);
02145 
02146       switch(ecode[1])
02147         {
02148         case PT_ANY:
02149         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
02150         break;
02151 
02152         case PT_LAMP:
02153         if ((prop->chartype == ucp_Lu ||
02154              prop->chartype == ucp_Ll ||
02155              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
02156           MRRETURN(MATCH_NOMATCH);
02157         break;
02158 
02159         case PT_GC:
02160         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
02161           MRRETURN(MATCH_NOMATCH);
02162         break;
02163 
02164         case PT_PC:
02165         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
02166           MRRETURN(MATCH_NOMATCH);
02167         break;
02168 
02169         case PT_SC:
02170         if ((ecode[2] != prop->script) == (op == OP_PROP))
02171           MRRETURN(MATCH_NOMATCH);
02172         break;
02173 
02174         /* These are specials */
02175 
02176         case PT_ALNUM:
02177         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
02178              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
02179           MRRETURN(MATCH_NOMATCH);
02180         break;
02181 
02182         case PT_SPACE:    /* Perl space */
02183         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
02184              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
02185                == (op == OP_NOTPROP))
02186           MRRETURN(MATCH_NOMATCH);
02187         break;
02188 
02189         case PT_PXSPACE:  /* POSIX space */
02190         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
02191              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
02192              c == CHAR_FF || c == CHAR_CR)
02193                == (op == OP_NOTPROP))
02194           MRRETURN(MATCH_NOMATCH);
02195         break;
02196 
02197         case PT_WORD:
02198         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
02199              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
02200              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
02201           MRRETURN(MATCH_NOMATCH);
02202         break;
02203 
02204         /* This should never occur */
02205 
02206         default:
02207         RRETURN(PCRE_ERROR_INTERNAL);
02208         }
02209 
02210       ecode += 3;
02211       }
02212     break;
02213 
02214     /* Match an extended Unicode sequence. We will get here only if the support
02215     is in the binary; otherwise a compile-time error occurs. */
02216 
02217     case OP_EXTUNI:
02218     if (eptr >= md->end_subject)
02219       {
02220       SCHECK_PARTIAL();
02221       MRRETURN(MATCH_NOMATCH);
02222       }
02223     GETCHARINCTEST(c, eptr);
02224       {
02225       int category = UCD_CATEGORY(c);
02226       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
02227       while (eptr < md->end_subject)
02228         {
02229         int len = 1;
02230         if (!utf8) c = *eptr; else
02231           {
02232           GETCHARLEN(c, eptr, len);
02233           }
02234         category = UCD_CATEGORY(c);
02235         if (category != ucp_M) break;
02236         eptr += len;
02237         }
02238       }
02239     ecode++;
02240     break;
02241 #endif
02242 
02243 
02244     /* Match a back reference, possibly repeatedly. Look past the end of the
02245     item to see if there is repeat information following. The code is similar
02246     to that for character classes, but repeated for efficiency. Then obey
02247     similar code to character type repeats - written out again for speed.
02248     However, if the referenced string is the empty string, always treat
02249     it as matched, any number of times (otherwise there could be infinite
02250     loops). */
02251 
02252     case OP_REF:
02253       {
02254       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
02255       ecode += 3;
02256 
02257       /* If the reference is unset, there are two possibilities:
02258 
02259       (a) In the default, Perl-compatible state, set the length to be longer
02260       than the amount of subject left; this ensures that every attempt at a
02261       match fails. We can't just fail here, because of the possibility of
02262       quantifiers with zero minima.
02263 
02264       (b) If the JavaScript compatibility flag is set, set the length to zero
02265       so that the back reference matches an empty string.
02266 
02267       Otherwise, set the length to the length of what was matched by the
02268       referenced subpattern. */
02269 
02270       if (offset >= offset_top || md->offset_vector[offset] < 0)
02271         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
02272       else
02273         length = md->offset_vector[offset+1] - md->offset_vector[offset];
02274 
02275       /* Set up for repetition, or handle the non-repeated case */
02276 
02277       switch (*ecode)
02278         {
02279         case OP_CRSTAR:
02280         case OP_CRMINSTAR:
02281         case OP_CRPLUS:
02282         case OP_CRMINPLUS:
02283         case OP_CRQUERY:
02284         case OP_CRMINQUERY:
02285         c = *ecode++ - OP_CRSTAR;
02286         minimize = (c & 1) != 0;
02287         min = rep_min[c];                 /* Pick up values from tables; */
02288         max = rep_max[c];                 /* zero for max => infinity */
02289         if (max == 0) max = INT_MAX;
02290         break;
02291 
02292         case OP_CRRANGE:
02293         case OP_CRMINRANGE:
02294         minimize = (*ecode == OP_CRMINRANGE);
02295         min = GET2(ecode, 1);
02296         max = GET2(ecode, 3);
02297         if (max == 0) max = INT_MAX;
02298         ecode += 5;
02299         break;
02300 
02301         default:               /* No repeat follows */
02302         if (!match_ref(offset, eptr, length, md, ims))
02303           {
02304           CHECK_PARTIAL();
02305           MRRETURN(MATCH_NOMATCH);
02306           }
02307         eptr += length;
02308         continue;              /* With the main loop */
02309         }
02310 
02311       /* If the length of the reference is zero, just continue with the
02312       main loop. */
02313 
02314       if (length == 0) continue;
02315 
02316       /* First, ensure the minimum number of matches are present. We get back
02317       the length of the reference string explicitly rather than passing the
02318       address of eptr, so that eptr can be a register variable. */
02319 
02320       for (i = 1; i <= min; i++)
02321         {
02322         if (!match_ref(offset, eptr, length, md, ims))
02323           {
02324           CHECK_PARTIAL();
02325           MRRETURN(MATCH_NOMATCH);
02326           }
02327         eptr += length;
02328         }
02329 
02330       /* If min = max, continue at the same level without recursion.
02331       They are not both allowed to be zero. */
02332 
02333       if (min == max) continue;
02334 
02335       /* If minimizing, keep trying and advancing the pointer */
02336 
02337       if (minimize)
02338         {
02339         for (fi = min;; fi++)
02340           {
02341           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
02342           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02343           if (fi >= max) MRRETURN(MATCH_NOMATCH);
02344           if (!match_ref(offset, eptr, length, md, ims))
02345             {
02346             CHECK_PARTIAL();
02347             MRRETURN(MATCH_NOMATCH);
02348             }
02349           eptr += length;
02350           }
02351         /* Control never gets here */
02352         }
02353 
02354       /* If maximizing, find the longest string and work backwards */
02355 
02356       else
02357         {
02358         pp = eptr;
02359         for (i = min; i < max; i++)
02360           {
02361           if (!match_ref(offset, eptr, length, md, ims))
02362             {
02363             CHECK_PARTIAL();
02364             break;
02365             }
02366           eptr += length;
02367           }
02368         while (eptr >= pp)
02369           {
02370           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
02371           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02372           eptr -= length;
02373           }
02374         MRRETURN(MATCH_NOMATCH);
02375         }
02376       }
02377     /* Control never gets here */
02378 
02379     /* Match a bit-mapped character class, possibly repeatedly. This op code is
02380     used when all the characters in the class have values in the range 0-255,
02381     and either the matching is caseful, or the characters are in the range
02382     0-127 when UTF-8 processing is enabled. The only difference between
02383     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
02384     encountered.
02385 
02386     First, look past the end of the item to see if there is repeat information
02387     following. Then obey similar code to character type repeats - written out
02388     again for speed. */
02389 
02390     case OP_NCLASS:
02391     case OP_CLASS:
02392       {
02393       data = ecode + 1;                /* Save for matching */
02394       ecode += 33;                     /* Advance past the item */
02395 
02396       switch (*ecode)
02397         {
02398         case OP_CRSTAR:
02399         case OP_CRMINSTAR:
02400         case OP_CRPLUS:
02401         case OP_CRMINPLUS:
02402         case OP_CRQUERY:
02403         case OP_CRMINQUERY:
02404         c = *ecode++ - OP_CRSTAR;
02405         minimize = (c & 1) != 0;
02406         min = rep_min[c];                 /* Pick up values from tables; */
02407         max = rep_max[c];                 /* zero for max => infinity */
02408         if (max == 0) max = INT_MAX;
02409         break;
02410 
02411         case OP_CRRANGE:
02412         case OP_CRMINRANGE:
02413         minimize = (*ecode == OP_CRMINRANGE);
02414         min = GET2(ecode, 1);
02415         max = GET2(ecode, 3);
02416         if (max == 0) max = INT_MAX;
02417         ecode += 5;
02418         break;
02419 
02420         default:               /* No repeat follows */
02421         min = max = 1;
02422         break;
02423         }
02424 
02425       /* First, ensure the minimum number of matches are present. */
02426 
02427 #ifdef SUPPORT_UTF8
02428       /* UTF-8 mode */
02429       if (utf8)
02430         {
02431         for (i = 1; i <= min; i++)
02432           {
02433           if (eptr >= md->end_subject)
02434             {
02435             SCHECK_PARTIAL();
02436             MRRETURN(MATCH_NOMATCH);
02437             }
02438           GETCHARINC(c, eptr);
02439           if (c > 255)
02440             {
02441             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
02442             }
02443           else
02444             {
02445             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
02446             }
02447           }
02448         }
02449       else
02450 #endif
02451       /* Not UTF-8 mode */
02452         {
02453         for (i = 1; i <= min; i++)
02454           {
02455           if (eptr >= md->end_subject)
02456             {
02457             SCHECK_PARTIAL();
02458             MRRETURN(MATCH_NOMATCH);
02459             }
02460           c = *eptr++;
02461           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
02462           }
02463         }
02464 
02465       /* If max == min we can continue with the main loop without the
02466       need to recurse. */
02467 
02468       if (min == max) continue;
02469 
02470       /* If minimizing, keep testing the rest of the expression and advancing
02471       the pointer while it matches the class. */
02472 
02473       if (minimize)
02474         {
02475 #ifdef SUPPORT_UTF8
02476         /* UTF-8 mode */
02477         if (utf8)
02478           {
02479           for (fi = min;; fi++)
02480             {
02481             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
02482             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02483             if (fi >= max) MRRETURN(MATCH_NOMATCH);
02484             if (eptr >= md->end_subject)
02485               {
02486               SCHECK_PARTIAL();
02487               MRRETURN(MATCH_NOMATCH);
02488               }
02489             GETCHARINC(c, eptr);
02490             if (c > 255)
02491               {
02492               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
02493               }
02494             else
02495               {
02496               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
02497               }
02498             }
02499           }
02500         else
02501 #endif
02502         /* Not UTF-8 mode */
02503           {
02504           for (fi = min;; fi++)
02505             {
02506             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
02507             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02508             if (fi >= max) MRRETURN(MATCH_NOMATCH);
02509             if (eptr >= md->end_subject)
02510               {
02511               SCHECK_PARTIAL();
02512               MRRETURN(MATCH_NOMATCH);
02513               }
02514             c = *eptr++;
02515             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
02516             }
02517           }
02518         /* Control never gets here */
02519         }
02520 
02521       /* If maximizing, find the longest possible run, then work backwards. */
02522 
02523       else
02524         {
02525         pp = eptr;
02526 
02527 #ifdef SUPPORT_UTF8
02528         /* UTF-8 mode */
02529         if (utf8)
02530           {
02531           for (i = min; i < max; i++)
02532             {
02533             int len = 1;
02534             if (eptr >= md->end_subject)
02535               {
02536               SCHECK_PARTIAL();
02537               break;
02538               }
02539             GETCHARLEN(c, eptr, len);
02540             if (c > 255)
02541               {
02542               if (op == OP_CLASS) break;
02543               }
02544             else
02545               {
02546               if ((data[c/8] & (1 << (c&7))) == 0) break;
02547               }
02548             eptr += len;
02549             }
02550           for (;;)
02551             {
02552             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
02553             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02554             if (eptr-- == pp) break;        /* Stop if tried at original pos */
02555             BACKCHAR(eptr);
02556             }
02557           }
02558         else
02559 #endif
02560           /* Not UTF-8 mode */
02561           {
02562           for (i = min; i < max; i++)
02563             {
02564             if (eptr >= md->end_subject)
02565               {
02566               SCHECK_PARTIAL();
02567               break;
02568               }
02569             c = *eptr;
02570             if ((data[c/8] & (1 << (c&7))) == 0) break;
02571             eptr++;
02572             }
02573           while (eptr >= pp)
02574             {
02575             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
02576             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02577             eptr--;
02578             }
02579           }
02580 
02581         MRRETURN(MATCH_NOMATCH);
02582         }
02583       }
02584     /* Control never gets here */
02585 
02586 
02587     /* Match an extended character class. This opcode is encountered only
02588     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
02589     mode, because Unicode properties are supported in non-UTF-8 mode. */
02590 
02591 #ifdef SUPPORT_UTF8
02592     case OP_XCLASS:
02593       {
02594       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
02595       ecode += GET(ecode, 1);                      /* Advance past the item */
02596 
02597       switch (*ecode)
02598         {
02599         case OP_CRSTAR:
02600         case OP_CRMINSTAR:
02601         case OP_CRPLUS:
02602         case OP_CRMINPLUS:
02603         case OP_CRQUERY:
02604         case OP_CRMINQUERY:
02605         c = *ecode++ - OP_CRSTAR;
02606         minimize = (c & 1) != 0;
02607         min = rep_min[c];                 /* Pick up values from tables; */
02608         max = rep_max[c];                 /* zero for max => infinity */
02609         if (max == 0) max = INT_MAX;
02610         break;
02611 
02612         case OP_CRRANGE:
02613         case OP_CRMINRANGE:
02614         minimize = (*ecode == OP_CRMINRANGE);
02615         min = GET2(ecode, 1);
02616         max = GET2(ecode, 3);
02617         if (max == 0) max = INT_MAX;
02618         ecode += 5;
02619         break;
02620 
02621         default:               /* No repeat follows */
02622         min = max = 1;
02623         break;
02624         }
02625 
02626       /* First, ensure the minimum number of matches are present. */
02627 
02628       for (i = 1; i <= min; i++)
02629         {
02630         if (eptr >= md->end_subject)
02631           {
02632           SCHECK_PARTIAL();
02633           MRRETURN(MATCH_NOMATCH);
02634           }
02635         GETCHARINCTEST(c, eptr);
02636         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
02637         }
02638 
02639       /* If max == min we can continue with the main loop without the
02640       need to recurse. */
02641 
02642       if (min == max) continue;
02643 
02644       /* If minimizing, keep testing the rest of the expression and advancing
02645       the pointer while it matches the class. */
02646 
02647       if (minimize)
02648         {
02649         for (fi = min;; fi++)
02650           {
02651           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
02652           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02653           if (fi >= max) MRRETURN(MATCH_NOMATCH);
02654           if (eptr >= md->end_subject)
02655             {
02656             SCHECK_PARTIAL();
02657             MRRETURN(MATCH_NOMATCH);
02658             }
02659           GETCHARINCTEST(c, eptr);
02660           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
02661           }
02662         /* Control never gets here */
02663         }
02664 
02665       /* If maximizing, find the longest possible run, then work backwards. */
02666 
02667       else
02668         {
02669         pp = eptr;
02670         for (i = min; i < max; i++)
02671           {
02672           int len = 1;
02673           if (eptr >= md->end_subject)
02674             {
02675             SCHECK_PARTIAL();
02676             break;
02677             }
02678           GETCHARLENTEST(c, eptr, len);
02679           if (!_pcre_xclass(c, data)) break;
02680           eptr += len;
02681           }
02682         for(;;)
02683           {
02684           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
02685           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02686           if (eptr-- == pp) break;        /* Stop if tried at original pos */
02687           if (utf8) BACKCHAR(eptr);
02688           }
02689         MRRETURN(MATCH_NOMATCH);
02690         }
02691 
02692       /* Control never gets here */
02693       }
02694 #endif    /* End of XCLASS */
02695 
02696     /* Match a single character, casefully */
02697 
02698     case OP_CHAR:
02699 #ifdef SUPPORT_UTF8
02700     if (utf8)
02701       {
02702       length = 1;
02703       ecode++;
02704       GETCHARLEN(fc, ecode, length);
02705       if (length > md->end_subject - eptr)
02706         {
02707         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
02708         MRRETURN(MATCH_NOMATCH);
02709         }
02710       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
02711       }
02712     else
02713 #endif
02714 
02715     /* Non-UTF-8 mode */
02716       {
02717       if (md->end_subject - eptr < 1)
02718         {
02719         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
02720         MRRETURN(MATCH_NOMATCH);
02721         }
02722       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
02723       ecode += 2;
02724       }
02725     break;
02726 
02727     /* Match a single character, caselessly */
02728 
02729     case OP_CHARNC:
02730 #ifdef SUPPORT_UTF8
02731     if (utf8)
02732       {
02733       length = 1;
02734       ecode++;
02735       GETCHARLEN(fc, ecode, length);
02736 
02737       if (length > md->end_subject - eptr)
02738         {
02739         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
02740         MRRETURN(MATCH_NOMATCH);
02741         }
02742 
02743       /* If the pattern character's value is < 128, we have only one byte, and
02744       can use the fast lookup table. */
02745 
02746       if (fc < 128)
02747         {
02748         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
02749         }
02750 
02751       /* Otherwise we must pick up the subject character */
02752 
02753       else
02754         {
02755         unsigned int dc;
02756         GETCHARINC(dc, eptr);
02757         ecode += length;
02758 
02759         /* If we have Unicode property support, we can use it to test the other
02760         case of the character, if there is one. */
02761 
02762         if (fc != dc)
02763           {
02764 #ifdef SUPPORT_UCP
02765           if (dc != UCD_OTHERCASE(fc))
02766 #endif
02767             MRRETURN(MATCH_NOMATCH);
02768           }
02769         }
02770       }
02771     else
02772 #endif   /* SUPPORT_UTF8 */
02773 
02774     /* Non-UTF-8 mode */
02775       {
02776       if (md->end_subject - eptr < 1)
02777         {
02778         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
02779         MRRETURN(MATCH_NOMATCH);
02780         }
02781       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
02782       ecode += 2;
02783       }
02784     break;
02785 
02786     /* Match a single character repeatedly. */
02787 
02788     case OP_EXACT:
02789     min = max = GET2(ecode, 1);
02790     ecode += 3;
02791     goto REPEATCHAR;
02792 
02793     case OP_POSUPTO:
02794     possessive = TRUE;
02795     /* Fall through */
02796 
02797     case OP_UPTO:
02798     case OP_MINUPTO:
02799     min = 0;
02800     max = GET2(ecode, 1);
02801     minimize = *ecode == OP_MINUPTO;
02802     ecode += 3;
02803     goto REPEATCHAR;
02804 
02805     case OP_POSSTAR:
02806     possessive = TRUE;
02807     min = 0;
02808     max = INT_MAX;
02809     ecode++;
02810     goto REPEATCHAR;
02811 
02812     case OP_POSPLUS:
02813     possessive = TRUE;
02814     min = 1;
02815     max = INT_MAX;
02816     ecode++;
02817     goto REPEATCHAR;
02818 
02819     case OP_POSQUERY:
02820     possessive = TRUE;
02821     min = 0;
02822     max = 1;
02823     ecode++;
02824     goto REPEATCHAR;
02825 
02826     case OP_STAR:
02827     case OP_MINSTAR:
02828     case OP_PLUS:
02829     case OP_MINPLUS:
02830     case OP_QUERY:
02831     case OP_MINQUERY:
02832     c = *ecode++ - OP_STAR;
02833     minimize = (c & 1) != 0;
02834 
02835     min = rep_min[c];                 /* Pick up values from tables; */
02836     max = rep_max[c];                 /* zero for max => infinity */
02837     if (max == 0) max = INT_MAX;
02838 
02839     /* Common code for all repeated single-character matches. */
02840 
02841     REPEATCHAR:
02842 #ifdef SUPPORT_UTF8
02843     if (utf8)
02844       {
02845       length = 1;
02846       charptr = ecode;
02847       GETCHARLEN(fc, ecode, length);
02848       ecode += length;
02849 
02850       /* Handle multibyte character matching specially here. There is
02851       support for caseless matching if UCP support is present. */
02852 
02853       if (length > 1)
02854         {
02855 #ifdef SUPPORT_UCP
02856         unsigned int othercase;
02857         if ((ims & PCRE_CASELESS) != 0 &&
02858             (othercase = UCD_OTHERCASE(fc)) != fc)
02859           oclength = _pcre_ord2utf8(othercase, occhars);
02860         else oclength = 0;
02861 #endif  /* SUPPORT_UCP */
02862 
02863         for (i = 1; i <= min; i++)
02864           {
02865           if (eptr <= md->end_subject - length &&
02866             memcmp(eptr, charptr, length) == 0) eptr += length;
02867 #ifdef SUPPORT_UCP
02868           else if (oclength > 0 &&
02869                    eptr <= md->end_subject - oclength &&
02870                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02871 #endif  /* SUPPORT_UCP */
02872           else
02873             {
02874             CHECK_PARTIAL();
02875             MRRETURN(MATCH_NOMATCH);
02876             }
02877           }
02878 
02879         if (min == max) continue;
02880 
02881         if (minimize)
02882           {
02883           for (fi = min;; fi++)
02884             {
02885             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
02886             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02887             if (fi >= max) MRRETURN(MATCH_NOMATCH);
02888             if (eptr <= md->end_subject - length &&
02889               memcmp(eptr, charptr, length) == 0) eptr += length;
02890 #ifdef SUPPORT_UCP
02891             else if (oclength > 0 &&
02892                      eptr <= md->end_subject - oclength &&
02893                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02894 #endif  /* SUPPORT_UCP */
02895             else
02896               {
02897               CHECK_PARTIAL();
02898               MRRETURN(MATCH_NOMATCH);
02899               }
02900             }
02901           /* Control never gets here */
02902           }
02903 
02904         else  /* Maximize */
02905           {
02906           pp = eptr;
02907           for (i = min; i < max; i++)
02908             {
02909             if (eptr <= md->end_subject - length &&
02910                 memcmp(eptr, charptr, length) == 0) eptr += length;
02911 #ifdef SUPPORT_UCP
02912             else if (oclength > 0 &&
02913                      eptr <= md->end_subject - oclength &&
02914                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02915 #endif  /* SUPPORT_UCP */
02916             else
02917               {
02918               CHECK_PARTIAL();
02919               break;
02920               }
02921             }
02922 
02923           if (possessive) continue;
02924 
02925           for(;;)
02926             {
02927             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
02928             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02929             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
02930 #ifdef SUPPORT_UCP
02931             eptr--;
02932             BACKCHAR(eptr);
02933 #else   /* without SUPPORT_UCP */
02934             eptr -= length;
02935 #endif  /* SUPPORT_UCP */
02936             }
02937           }
02938         /* Control never gets here */
02939         }
02940 
02941       /* If the length of a UTF-8 character is 1, we fall through here, and
02942       obey the code as for non-UTF-8 characters below, though in this case the
02943       value of fc will always be < 128. */
02944       }
02945     else
02946 #endif  /* SUPPORT_UTF8 */
02947 
02948     /* When not in UTF-8 mode, load a single-byte character. */
02949 
02950     fc = *ecode++;
02951 
02952     /* The value of fc at this point is always less than 256, though we may or
02953     may not be in UTF-8 mode. The code is duplicated for the caseless and
02954     caseful cases, for speed, since matching characters is likely to be quite
02955     common. First, ensure the minimum number of matches are present. If min =
02956     max, continue at the same level without recursing. Otherwise, if
02957     minimizing, keep trying the rest of the expression and advancing one
02958     matching character if failing, up to the maximum. Alternatively, if
02959     maximizing, find the maximum number of characters and work backwards. */
02960 
02961     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02962       max, eptr));
02963 
02964     if ((ims & PCRE_CASELESS) != 0)
02965       {
02966       fc = md->lcc[fc];
02967       for (i = 1; i <= min; i++)
02968         {
02969         if (eptr >= md->end_subject)
02970           {
02971           SCHECK_PARTIAL();
02972           MRRETURN(MATCH_NOMATCH);
02973           }
02974         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
02975         }
02976       if (min == max) continue;
02977       if (minimize)
02978         {
02979         for (fi = min;; fi++)
02980           {
02981           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
02982           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02983           if (fi >= max) MRRETURN(MATCH_NOMATCH);
02984           if (eptr >= md->end_subject)
02985             {
02986             SCHECK_PARTIAL();
02987             MRRETURN(MATCH_NOMATCH);
02988             }
02989           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
02990           }
02991         /* Control never gets here */
02992         }
02993       else  /* Maximize */
02994         {
02995         pp = eptr;
02996         for (i = min; i < max; i++)
02997           {
02998           if (eptr >= md->end_subject)
02999             {
03000             SCHECK_PARTIAL();
03001             break;
03002             }
03003           if (fc != md->lcc[*eptr]) break;
03004           eptr++;
03005           }
03006 
03007         if (possessive) continue;
03008 
03009         while (eptr >= pp)
03010           {
03011           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
03012           eptr--;
03013           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03014           }
03015         MRRETURN(MATCH_NOMATCH);
03016         }
03017       /* Control never gets here */
03018       }
03019 
03020     /* Caseful comparisons (includes all multi-byte characters) */
03021 
03022     else
03023       {
03024       for (i = 1; i <= min; i++)
03025         {
03026         if (eptr >= md->end_subject)
03027           {
03028           SCHECK_PARTIAL();
03029           MRRETURN(MATCH_NOMATCH);
03030           }
03031         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
03032         }
03033 
03034       if (min == max) continue;
03035 
03036       if (minimize)
03037         {
03038         for (fi = min;; fi++)
03039           {
03040           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
03041           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03042           if (fi >= max) MRRETURN(MATCH_NOMATCH);
03043           if (eptr >= md->end_subject)
03044             {
03045             SCHECK_PARTIAL();
03046             MRRETURN(MATCH_NOMATCH);
03047             }
03048           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
03049           }
03050         /* Control never gets here */
03051         }
03052       else  /* Maximize */
03053         {
03054         pp = eptr;
03055         for (i = min; i < max; i++)
03056           {
03057           if (eptr >= md->end_subject)
03058             {
03059             SCHECK_PARTIAL();
03060             break;
03061             }
03062           if (fc != *eptr) break;
03063           eptr++;
03064           }
03065         if (possessive) continue;
03066 
03067         while (eptr >= pp)
03068           {
03069           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
03070           eptr--;
03071           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03072           }
03073         MRRETURN(MATCH_NOMATCH);
03074         }
03075       }
03076     /* Control never gets here */
03077 
03078     /* Match a negated single one-byte character. The character we are
03079     checking can be multibyte. */
03080 
03081     case OP_NOT:
03082     if (eptr >= md->end_subject)
03083       {
03084       SCHECK_PARTIAL();
03085       MRRETURN(MATCH_NOMATCH);
03086       }
03087     ecode++;
03088     GETCHARINCTEST(c, eptr);
03089     if ((ims & PCRE_CASELESS) != 0)
03090       {
03091 #ifdef SUPPORT_UTF8
03092       if (c < 256)
03093 #endif
03094       c = md->lcc[c];
03095       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
03096       }
03097     else
03098       {
03099       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
03100       }
03101     break;
03102 
03103     /* Match a negated single one-byte character repeatedly. This is almost a
03104     repeat of the code for a repeated single character, but I haven't found a
03105     nice way of commoning these up that doesn't require a test of the
03106     positive/negative option for each character match. Maybe that wouldn't add
03107     very much to the time taken, but character matching *is* what this is all
03108     about... */
03109 
03110     case OP_NOTEXACT:
03111     min = max = GET2(ecode, 1);
03112     ecode += 3;
03113     goto REPEATNOTCHAR;
03114 
03115     case OP_NOTUPTO:
03116     case OP_NOTMINUPTO:
03117     min = 0;
03118     max = GET2(ecode, 1);
03119     minimize = *ecode == OP_NOTMINUPTO;
03120     ecode += 3;
03121     goto REPEATNOTCHAR;
03122 
03123     case OP_NOTPOSSTAR:
03124     possessive = TRUE;
03125     min = 0;
03126     max = INT_MAX;
03127     ecode++;
03128     goto REPEATNOTCHAR;
03129 
03130     case OP_NOTPOSPLUS:
03131     possessive = TRUE;
03132     min = 1;
03133     max = INT_MAX;
03134     ecode++;
03135     goto REPEATNOTCHAR;
03136 
03137     case OP_NOTPOSQUERY:
03138     possessive = TRUE;
03139     min = 0;
03140     max = 1;
03141     ecode++;
03142     goto REPEATNOTCHAR;
03143 
03144     case OP_NOTPOSUPTO:
03145     possessive = TRUE;
03146     min = 0;
03147     max = GET2(ecode, 1);
03148     ecode += 3;
03149     goto REPEATNOTCHAR;
03150 
03151     case OP_NOTSTAR:
03152     case OP_NOTMINSTAR:
03153     case OP_NOTPLUS:
03154     case OP_NOTMINPLUS:
03155     case OP_NOTQUERY:
03156     case OP_NOTMINQUERY:
03157     c = *ecode++ - OP_NOTSTAR;
03158     minimize = (c & 1) != 0;
03159     min = rep_min[c];                 /* Pick up values from tables; */
03160     max = rep_max[c];                 /* zero for max => infinity */
03161     if (max == 0) max = INT_MAX;
03162 
03163     /* Common code for all repeated single-byte matches. */
03164 
03165     REPEATNOTCHAR:
03166     fc = *ecode++;
03167 
03168     /* The code is duplicated for the caseless and caseful cases, for speed,
03169     since matching characters is likely to be quite common. First, ensure the
03170     minimum number of matches are present. If min = max, continue at the same
03171     level without recursing. Otherwise, if minimizing, keep trying the rest of
03172     the expression and advancing one matching character if failing, up to the
03173     maximum. Alternatively, if maximizing, find the maximum number of
03174     characters and work backwards. */
03175 
03176     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
03177       max, eptr));
03178 
03179     if ((ims & PCRE_CASELESS) != 0)
03180       {
03181       fc = md->lcc[fc];
03182 
03183 #ifdef SUPPORT_UTF8
03184       /* UTF-8 mode */
03185       if (utf8)
03186         {
03187         register unsigned int d;
03188         for (i = 1; i <= min; i++)
03189           {
03190           if (eptr >= md->end_subject)
03191             {
03192             SCHECK_PARTIAL();
03193             MRRETURN(MATCH_NOMATCH);
03194             }
03195           GETCHARINC(d, eptr);
03196           if (d < 256) d = md->lcc[d];
03197           if (fc == d) MRRETURN(MATCH_NOMATCH);
03198           }
03199         }
03200       else
03201 #endif
03202 
03203       /* Not UTF-8 mode */
03204         {
03205         for (i = 1; i <= min; i++)
03206           {
03207           if (eptr >= md->end_subject)
03208             {
03209             SCHECK_PARTIAL();
03210             MRRETURN(MATCH_NOMATCH);
03211             }
03212           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
03213           }
03214         }
03215 
03216       if (min == max) continue;
03217 
03218       if (minimize)
03219         {
03220 #ifdef SUPPORT_UTF8
03221         /* UTF-8 mode */
03222         if (utf8)
03223           {
03224           register unsigned int d;
03225           for (fi = min;; fi++)
03226             {
03227             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
03228             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03229             if (fi >= max) MRRETURN(MATCH_NOMATCH);
03230             if (eptr >= md->end_subject)
03231               {
03232               SCHECK_PARTIAL();
03233               MRRETURN(MATCH_NOMATCH);
03234               }
03235             GETCHARINC(d, eptr);
03236             if (d < 256) d = md->lcc[d];
03237             if (fc == d) MRRETURN(MATCH_NOMATCH);
03238             }
03239           }
03240         else
03241 #endif
03242         /* Not UTF-8 mode */
03243           {
03244           for (fi = min;; fi++)
03245             {
03246             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
03247             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03248             if (fi >= max) MRRETURN(MATCH_NOMATCH);
03249             if (eptr >= md->end_subject)
03250               {
03251               SCHECK_PARTIAL();
03252               MRRETURN(MATCH_NOMATCH);
03253               }
03254             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
03255             }
03256           }
03257         /* Control never gets here */
03258         }
03259 
03260       /* Maximize case */
03261 
03262       else
03263         {
03264         pp = eptr;
03265 
03266 #ifdef SUPPORT_UTF8
03267         /* UTF-8 mode */
03268         if (utf8)
03269           {
03270           register unsigned int d;
03271           for (i = min; i < max; i++)
03272             {
03273             int len = 1;
03274             if (eptr >= md->end_subject)
03275               {
03276               SCHECK_PARTIAL();
03277               break;
03278               }
03279             GETCHARLEN(d, eptr, len);
03280             if (d < 256) d = md->lcc[d];
03281             if (fc == d) break;
03282             eptr += len;
03283             }
03284         if (possessive) continue;
03285         for(;;)
03286             {
03287             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
03288             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03289             if (eptr-- == pp) break;        /* Stop if tried at original pos */
03290             BACKCHAR(eptr);
03291             }
03292           }
03293         else
03294 #endif
03295         /* Not UTF-8 mode */
03296           {
03297           for (i = min; i < max; i++)
03298             {
03299             if (eptr >= md->end_subject)
03300               {
03301               SCHECK_PARTIAL();
03302               break;
03303               }
03304             if (fc == md->lcc[*eptr]) break;
03305             eptr++;
03306             }
03307           if (possessive) continue;
03308           while (eptr >= pp)
03309             {
03310             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
03311             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03312             eptr--;
03313             }
03314           }
03315 
03316         MRRETURN(MATCH_NOMATCH);
03317         }
03318       /* Control never gets here */
03319       }
03320 
03321     /* Caseful comparisons */
03322 
03323     else
03324       {
03325 #ifdef SUPPORT_UTF8
03326       /* UTF-8 mode */
03327       if (utf8)
03328         {
03329         register unsigned int d;
03330         for (i = 1; i <= min; i++)
03331           {
03332           if (eptr >= md->end_subject)
03333             {
03334             SCHECK_PARTIAL();
03335             MRRETURN(MATCH_NOMATCH);
03336             }
03337           GETCHARINC(d, eptr);
03338           if (fc == d) MRRETURN(MATCH_NOMATCH);
03339           }
03340         }
03341       else
03342 #endif
03343       /* Not UTF-8 mode */
03344         {
03345         for (i = 1; i <= min; i++)
03346           {
03347           if (eptr >= md->end_subject)
03348             {
03349             SCHECK_PARTIAL();
03350             MRRETURN(MATCH_NOMATCH);
03351             }
03352           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
03353           }
03354         }
03355 
03356       if (min == max) continue;
03357 
03358       if (minimize)
03359         {
03360 #ifdef SUPPORT_UTF8
03361         /* UTF-8 mode */
03362         if (utf8)
03363           {
03364           register unsigned int d;
03365           for (fi = min;; fi++)
03366             {
03367             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
03368             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03369             if (fi >= max) MRRETURN(MATCH_NOMATCH);
03370             if (eptr >= md->end_subject)
03371               {
03372               SCHECK_PARTIAL();
03373               MRRETURN(MATCH_NOMATCH);
03374               }
03375             GETCHARINC(d, eptr);
03376             if (fc == d) MRRETURN(MATCH_NOMATCH);
03377             }
03378           }
03379         else
03380 #endif
03381         /* Not UTF-8 mode */
03382           {
03383           for (fi = min;; fi++)
03384             {
03385             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
03386             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03387             if (fi >= max) MRRETURN(MATCH_NOMATCH);
03388             if (eptr >= md->end_subject)
03389               {
03390               SCHECK_PARTIAL();
03391               MRRETURN(MATCH_NOMATCH);
03392               }
03393             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
03394             }
03395           }
03396         /* Control never gets here */
03397         }
03398 
03399       /* Maximize case */
03400 
03401       else
03402         {
03403         pp = eptr;
03404 
03405 #ifdef SUPPORT_UTF8
03406         /* UTF-8 mode */
03407         if (utf8)
03408           {
03409           register unsigned int d;
03410           for (i = min; i < max; i++)
03411             {
03412             int len = 1;
03413             if (eptr >= md->end_subject)
03414               {
03415               SCHECK_PARTIAL();
03416               break;
03417               }
03418             GETCHARLEN(d, eptr, len);
03419             if (fc == d) break;
03420             eptr += len;
03421             }
03422           if (possessive) continue;
03423           for(;;)
03424             {
03425             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
03426             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03427             if (eptr-- == pp) break;        /* Stop if tried at original pos */
03428             BACKCHAR(eptr);
03429             }
03430           }
03431         else
03432 #endif
03433         /* Not UTF-8 mode */
03434           {
03435           for (i = min; i < max; i++)
03436             {
03437             if (eptr >= md->end_subject)
03438               {
03439               SCHECK_PARTIAL();
03440               break;
03441               }
03442             if (fc == *eptr) break;
03443             eptr++;
03444             }
03445           if (possessive) continue;
03446           while (eptr >= pp)
03447             {
03448             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
03449             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03450             eptr--;
03451             }
03452           }
03453 
03454         MRRETURN(MATCH_NOMATCH);
03455         }
03456       }
03457     /* Control never gets here */
03458 
03459     /* Match a single character type repeatedly; several different opcodes
03460     share code. This is very similar to the code for single characters, but we
03461     repeat it in the interests of efficiency. */
03462 
03463     case OP_TYPEEXACT:
03464     min = max = GET2(ecode, 1);
03465     minimize = TRUE;
03466     ecode += 3;
03467     goto REPEATTYPE;
03468 
03469     case OP_TYPEUPTO:
03470     case OP_TYPEMINUPTO:
03471     min = 0;
03472     max = GET2(ecode, 1);
03473     minimize = *ecode == OP_TYPEMINUPTO;
03474     ecode += 3;
03475     goto REPEATTYPE;
03476 
03477     case OP_TYPEPOSSTAR:
03478     possessive = TRUE;
03479     min = 0;
03480     max = INT_MAX;
03481     ecode++;
03482     goto REPEATTYPE;
03483 
03484     case OP_TYPEPOSPLUS:
03485     possessive = TRUE;
03486     min = 1;
03487     max = INT_MAX;
03488     ecode++;
03489     goto REPEATTYPE;
03490 
03491     case OP_TYPEPOSQUERY:
03492     possessive = TRUE;
03493     min = 0;
03494     max = 1;
03495     ecode++;
03496     goto REPEATTYPE;
03497 
03498     case OP_TYPEPOSUPTO:
03499     possessive = TRUE;
03500     min = 0;
03501     max = GET2(ecode, 1);
03502     ecode += 3;
03503     goto REPEATTYPE;
03504 
03505     case OP_TYPESTAR:
03506     case OP_TYPEMINSTAR:
03507     case OP_TYPEPLUS:
03508     case OP_TYPEMINPLUS:
03509     case OP_TYPEQUERY:
03510     case OP_TYPEMINQUERY:
03511     c = *ecode++ - OP_TYPESTAR;
03512     minimize = (c & 1) != 0;
03513     min = rep_min[c];                 /* Pick up values from tables; */
03514     max = rep_max[c];                 /* zero for max => infinity */
03515     if (max == 0) max = INT_MAX;
03516 
03517     /* Common code for all repeated single character type matches. Note that
03518     in UTF-8 mode, '.' matches a character of any length, but for the other
03519     character types, the valid characters are all one-byte long. */
03520 
03521     REPEATTYPE:
03522     ctype = *ecode++;      /* Code for the character type */
03523 
03524 #ifdef SUPPORT_UCP
03525     if (ctype == OP_PROP || ctype == OP_NOTPROP)
03526       {
03527       prop_fail_result = ctype == OP_NOTPROP;
03528       prop_type = *ecode++;
03529       prop_value = *ecode++;
03530       }
03531     else prop_type = -1;
03532 #endif
03533 
03534     /* First, ensure the minimum number of matches are present. Use inline
03535     code for maximizing the speed, and do the type test once at the start
03536     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
03537     is tidier. Also separate the UCP code, which can be the same for both UTF-8
03538     and single-bytes. */
03539 
03540     if (min > 0)
03541       {
03542 #ifdef SUPPORT_UCP
03543       if (prop_type >= 0)
03544         {
03545         switch(prop_type)
03546           {
03547           case PT_ANY:
03548           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
03549           for (i = 1; i <= min; i++)
03550             {
03551             if (eptr >= md->end_subject)
03552               {
03553               SCHECK_PARTIAL();
03554               MRRETURN(MATCH_NOMATCH);
03555               }
03556             GETCHARINCTEST(c, eptr);
03557             }
03558           break;
03559 
03560           case PT_LAMP:
03561           for (i = 1; i <= min; i++)
03562             {
03563             if (eptr >= md->end_subject)
03564               {
03565               SCHECK_PARTIAL();
03566               MRRETURN(MATCH_NOMATCH);
03567               }
03568             GETCHARINCTEST(c, eptr);
03569             prop_chartype = UCD_CHARTYPE(c);
03570             if ((prop_chartype == ucp_Lu ||
03571                  prop_chartype == ucp_Ll ||
03572                  prop_chartype == ucp_Lt) == prop_fail_result)
03573               MRRETURN(MATCH_NOMATCH);
03574             }
03575           break;
03576 
03577           case PT_GC:
03578           for (i = 1; i <= min; i++)
03579             {
03580             if (eptr >= md->end_subject)
03581               {
03582               SCHECK_PARTIAL();
03583               MRRETURN(MATCH_NOMATCH);
03584               }
03585             GETCHARINCTEST(c, eptr);
03586             prop_category = UCD_CATEGORY(c);
03587             if ((prop_category == prop_value) == prop_fail_result)
03588               MRRETURN(MATCH_NOMATCH);
03589             }
03590           break;
03591 
03592           case PT_PC:
03593           for (i = 1; i <= min; i++)
03594             {
03595             if (eptr >= md->end_subject)
03596               {
03597               SCHECK_PARTIAL();
03598               MRRETURN(MATCH_NOMATCH);
03599               }
03600             GETCHARINCTEST(c, eptr);
03601             prop_chartype = UCD_CHARTYPE(c);
03602             if ((prop_chartype == prop_value) == prop_fail_result)
03603               MRRETURN(MATCH_NOMATCH);
03604             }
03605           break;
03606 
03607           case PT_SC:
03608           for (i = 1; i <= min; i++)
03609             {
03610             if (eptr >= md->end_subject)
03611               {
03612               SCHECK_PARTIAL();
03613               MRRETURN(MATCH_NOMATCH);
03614               }
03615             GETCHARINCTEST(c, eptr);
03616             prop_script = UCD_SCRIPT(c);
03617             if ((prop_script == prop_value) == prop_fail_result)
03618               MRRETURN(MATCH_NOMATCH);
03619             }
03620           break;
03621 
03622           case PT_ALNUM:
03623           for (i = 1; i <= min; i++)
03624             {
03625             if (eptr >= md->end_subject)
03626               {
03627               SCHECK_PARTIAL();
03628               MRRETURN(MATCH_NOMATCH);
03629               }
03630             GETCHARINCTEST(c, eptr);
03631             prop_category = UCD_CATEGORY(c);
03632             if ((prop_category == ucp_L || prop_category == ucp_N)
03633                    == prop_fail_result)
03634               MRRETURN(MATCH_NOMATCH);
03635             }
03636           break;
03637 
03638           case PT_SPACE:    /* Perl space */
03639           for (i = 1; i <= min; i++)
03640             {
03641             if (eptr >= md->end_subject)
03642               {
03643               SCHECK_PARTIAL();
03644               MRRETURN(MATCH_NOMATCH);
03645               }
03646             GETCHARINCTEST(c, eptr);
03647             prop_category = UCD_CATEGORY(c);
03648             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
03649                  c == CHAR_FF || c == CHAR_CR)
03650                    == prop_fail_result)
03651               MRRETURN(MATCH_NOMATCH);
03652             }
03653           break;
03654 
03655           case PT_PXSPACE:  /* POSIX space */
03656           for (i = 1; i <= min; i++)
03657             {
03658             if (eptr >= md->end_subject)
03659               {
03660               SCHECK_PARTIAL();
03661               MRRETURN(MATCH_NOMATCH);
03662               }
03663             GETCHARINCTEST(c, eptr);
03664             prop_category = UCD_CATEGORY(c);
03665             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
03666                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
03667                    == prop_fail_result)
03668               MRRETURN(MATCH_NOMATCH);
03669             }
03670           break;
03671 
03672           case PT_WORD:
03673           for (i = 1; i <= min; i++)
03674             {
03675             if (eptr >= md->end_subject)
03676               {
03677               SCHECK_PARTIAL();
03678               MRRETURN(MATCH_NOMATCH);
03679               }
03680             GETCHARINCTEST(c, eptr);
03681             prop_category = UCD_CATEGORY(c);
03682             if ((prop_category == ucp_L || prop_category == ucp_N ||
03683                  c == CHAR_UNDERSCORE)
03684                    == prop_fail_result)
03685               MRRETURN(MATCH_NOMATCH);
03686             }
03687           break;
03688 
03689           /* This should not occur */
03690 
03691           default:
03692           RRETURN(PCRE_ERROR_INTERNAL);
03693           }
03694         }
03695 
03696       /* Match extended Unicode sequences. We will get here only if the
03697       support is in the binary; otherwise a compile-time error occurs. */
03698 
03699       else if (ctype == OP_EXTUNI)
03700         {
03701         for (i = 1; i <= min; i++)
03702           {
03703           if (eptr >= md->end_subject)
03704             {
03705             SCHECK_PARTIAL();
03706             MRRETURN(MATCH_NOMATCH);
03707             }
03708           GETCHARINCTEST(c, eptr);
03709           prop_category = UCD_CATEGORY(c);
03710           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
03711           while (eptr < md->end_subject)
03712             {
03713             int len = 1;
03714             if (!utf8) c = *eptr;
03715               else { GETCHARLEN(c, eptr, len); }
03716             prop_category = UCD_CATEGORY(c);
03717             if (prop_category != ucp_M) break;
03718             eptr += len;
03719             }
03720           }
03721         }
03722 
03723       else
03724 #endif     /* SUPPORT_UCP */
03725 
03726 /* Handle all other cases when the coding is UTF-8 */
03727 
03728 #ifdef SUPPORT_UTF8
03729       if (utf8) switch(ctype)
03730         {
03731         case OP_ANY:
03732         for (i = 1; i <= min; i++)
03733           {
03734           if (eptr >= md->end_subject)
03735             {
03736             SCHECK_PARTIAL();
03737             MRRETURN(MATCH_NOMATCH);
03738             }
03739           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
03740           eptr++;
03741           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03742           }
03743         break;
03744 
03745         case OP_ALLANY:
03746         for (i = 1; i <= min; i++)
03747           {
03748           if (eptr >= md->end_subject)
03749             {
03750             SCHECK_PARTIAL();
03751             MRRETURN(MATCH_NOMATCH);
03752             }
03753           eptr++;
03754           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03755           }
03756         break;
03757 
03758         case OP_ANYBYTE:
03759         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
03760         eptr += min;
03761         break;
03762 
03763         case OP_ANYNL:
03764         for (i = 1; i <= min; i++)
03765           {
03766           if (eptr >= md->end_subject)
03767             {
03768             SCHECK_PARTIAL();
03769             MRRETURN(MATCH_NOMATCH);
03770             }
03771           GETCHARINC(c, eptr);
03772           switch(c)
03773             {
03774             default: MRRETURN(MATCH_NOMATCH);
03775             case 0x000d:
03776             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03777             break;
03778 
03779             case 0x000a:
03780             break;
03781 
03782             case 0x000b:
03783             case 0x000c:
03784             case 0x0085:
03785             case 0x2028:
03786             case 0x2029:
03787             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
03788             break;
03789             }
03790           }
03791         break;
03792 
03793         case OP_NOT_HSPACE:
03794         for (i = 1; i <= min; i++)
03795           {
03796           if (eptr >= md->end_subject)
03797             {
03798             SCHECK_PARTIAL();
03799             MRRETURN(MATCH_NOMATCH);
03800             }
03801           GETCHARINC(c, eptr);
03802           switch(c)
03803             {
03804             default: break;
03805             case 0x09:      /* HT */
03806             case 0x20:      /* SPACE */
03807             case 0xa0:      /* NBSP */
03808             case 0x1680:    /* OGHAM SPACE MARK */
03809             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03810             case 0x2000:    /* EN QUAD */
03811             case 0x2001:    /* EM QUAD */
03812             case 0x2002:    /* EN SPACE */
03813             case 0x2003:    /* EM SPACE */
03814             case 0x2004:    /* THREE-PER-EM SPACE */
03815             case 0x2005:    /* FOUR-PER-EM SPACE */
03816             case 0x2006:    /* SIX-PER-EM SPACE */
03817             case 0x2007:    /* FIGURE SPACE */
03818             case 0x2008:    /* PUNCTUATION SPACE */
03819             case 0x2009:    /* THIN SPACE */
03820             case 0x200A:    /* HAIR SPACE */
03821             case 0x202f:    /* NARROW NO-BREAK SPACE */
03822             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03823             case 0x3000:    /* IDEOGRAPHIC SPACE */
03824             MRRETURN(MATCH_NOMATCH);
03825             }
03826           }
03827         break;
03828 
03829         case OP_HSPACE:
03830         for (i = 1; i <= min; i++)
03831           {
03832           if (eptr >= md->end_subject)
03833             {
03834             SCHECK_PARTIAL();
03835             MRRETURN(MATCH_NOMATCH);
03836             }
03837           GETCHARINC(c, eptr);
03838           switch(c)
03839             {
03840             default: MRRETURN(MATCH_NOMATCH);
03841             case 0x09:      /* HT */
03842             case 0x20:      /* SPACE */
03843             case 0xa0:      /* NBSP */
03844             case 0x1680:    /* OGHAM SPACE MARK */
03845             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03846             case 0x2000:    /* EN QUAD */
03847             case 0x2001:    /* EM QUAD */
03848             case 0x2002:    /* EN SPACE */
03849             case 0x2003:    /* EM SPACE */
03850             case 0x2004:    /* THREE-PER-EM SPACE */
03851             case 0x2005:    /* FOUR-PER-EM SPACE */
03852             case 0x2006:    /* SIX-PER-EM SPACE */
03853             case 0x2007:    /* FIGURE SPACE */
03854             case 0x2008:    /* PUNCTUATION SPACE */
03855             case 0x2009:    /* THIN SPACE */
03856             case 0x200A:    /* HAIR SPACE */
03857             case 0x202f:    /* NARROW NO-BREAK SPACE */
03858             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03859             case 0x3000:    /* IDEOGRAPHIC SPACE */
03860             break;
03861             }
03862           }
03863         break;
03864 
03865         case OP_NOT_VSPACE:
03866         for (i = 1; i <= min; i++)
03867           {
03868           if (eptr >= md->end_subject)
03869             {
03870             SCHECK_PARTIAL();
03871             MRRETURN(MATCH_NOMATCH);
03872             }
03873           GETCHARINC(c, eptr);
03874           switch(c)
03875             {
03876             default: break;
03877             case 0x0a:      /* LF */
03878             case 0x0b:      /* VT */
03879             case 0x0c:      /* FF */
03880             case 0x0d:      /* CR */
03881             case 0x85:      /* NEL */
03882             case 0x2028:    /* LINE SEPARATOR */
03883             case 0x2029:    /* PARAGRAPH SEPARATOR */
03884             MRRETURN(MATCH_NOMATCH);
03885             }
03886           }
03887         break;
03888 
03889         case OP_VSPACE:
03890         for (i = 1; i <= min; i++)
03891           {
03892           if (eptr >= md->end_subject)
03893             {
03894             SCHECK_PARTIAL();
03895             MRRETURN(MATCH_NOMATCH);
03896             }
03897           GETCHARINC(c, eptr);
03898           switch(c)
03899             {
03900             default: MRRETURN(MATCH_NOMATCH);
03901             case 0x0a:      /* LF */
03902             case 0x0b:      /* VT */
03903             case 0x0c:      /* FF */
03904             case 0x0d:      /* CR */
03905             case 0x85:      /* NEL */
03906             case 0x2028:    /* LINE SEPARATOR */
03907             case 0x2029:    /* PARAGRAPH SEPARATOR */
03908             break;
03909             }
03910           }
03911         break;
03912 
03913         case OP_NOT_DIGIT:
03914         for (i = 1; i <= min; i++)
03915           {
03916           if (eptr >= md->end_subject)
03917             {
03918             SCHECK_PARTIAL();
03919             MRRETURN(MATCH_NOMATCH);
03920             }
03921           GETCHARINC(c, eptr);
03922           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
03923             MRRETURN(MATCH_NOMATCH);
03924           }
03925         break;
03926 
03927         case OP_DIGIT:
03928         for (i = 1; i <= min; i++)
03929           {
03930           if (eptr >= md->end_subject)
03931             {
03932             SCHECK_PARTIAL();
03933             MRRETURN(MATCH_NOMATCH);
03934             }
03935           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
03936             MRRETURN(MATCH_NOMATCH);
03937           /* No need to skip more bytes - we know it's a 1-byte character */
03938           }
03939         break;
03940 
03941         case OP_NOT_WHITESPACE:
03942         for (i = 1; i <= min; i++)
03943           {
03944           if (eptr >= md->end_subject)
03945             {
03946             SCHECK_PARTIAL();
03947             MRRETURN(MATCH_NOMATCH);
03948             }
03949           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
03950             MRRETURN(MATCH_NOMATCH);
03951           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03952           }
03953         break;
03954 
03955         case OP_WHITESPACE:
03956         for (i = 1; i <= min; i++)
03957           {
03958           if (eptr >= md->end_subject)
03959             {
03960             SCHECK_PARTIAL();
03961             MRRETURN(MATCH_NOMATCH);
03962             }
03963           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
03964             MRRETURN(MATCH_NOMATCH);
03965           /* No need to skip more bytes - we know it's a 1-byte character */
03966           }
03967         break;
03968 
03969         case OP_NOT_WORDCHAR:
03970         for (i = 1; i <= min; i++)
03971           {
03972           if (eptr >= md->end_subject)
03973             {
03974             SCHECK_PARTIAL();
03975             MRRETURN(MATCH_NOMATCH);
03976             }
03977           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
03978             MRRETURN(MATCH_NOMATCH);
03979           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03980           }
03981         break;
03982 
03983         case OP_WORDCHAR:
03984         for (i = 1; i <= min; i++)
03985           {
03986           if (eptr >= md->end_subject)
03987             {
03988             SCHECK_PARTIAL();
03989             MRRETURN(MATCH_NOMATCH);
03990             }
03991           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
03992             MRRETURN(MATCH_NOMATCH);
03993           /* No need to skip more bytes - we know it's a 1-byte character */
03994           }
03995         break;
03996 
03997         default:
03998         RRETURN(PCRE_ERROR_INTERNAL);
03999         }  /* End switch(ctype) */
04000 
04001       else
04002 #endif     /* SUPPORT_UTF8 */
04003 
04004       /* Code for the non-UTF-8 case for minimum matching of operators other
04005       than OP_PROP and OP_NOTPROP. */
04006 
04007       switch(ctype)
04008         {
04009         case OP_ANY:
04010         for (i = 1; i <= min; i++)
04011           {
04012           if (eptr >= md->end_subject)
04013             {
04014             SCHECK_PARTIAL();
04015             MRRETURN(MATCH_NOMATCH);
04016             }
04017           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
04018           eptr++;
04019           }
04020         break;
04021 
04022         case OP_ALLANY:
04023         if (eptr > md->end_subject - min)
04024           {
04025           SCHECK_PARTIAL();
04026           MRRETURN(MATCH_NOMATCH);
04027           }
04028         eptr += min;
04029         break;
04030 
04031         case OP_ANYBYTE:
04032         if (eptr > md->end_subject - min)
04033           {
04034           SCHECK_PARTIAL();
04035           MRRETURN(MATCH_NOMATCH);
04036           }
04037         eptr += min;
04038         break;
04039 
04040         case OP_ANYNL:
04041         for (i = 1; i <= min; i++)
04042           {
04043           if (eptr >= md->end_subject)
04044             {
04045             SCHECK_PARTIAL();
04046             MRRETURN(MATCH_NOMATCH);
04047             }
04048           switch(*eptr++)
04049             {
04050             default: MRRETURN(MATCH_NOMATCH);
04051             case 0x000d:
04052             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
04053             break;
04054             case 0x000a:
04055             break;
04056 
04057             case 0x000b:
04058             case 0x000c:
04059             case 0x0085:
04060             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
04061             break;
04062             }
04063           }
04064         break;
04065 
04066         case OP_NOT_HSPACE:
04067         for (i = 1; i <= min; i++)
04068           {
04069           if (eptr >= md->end_subject)
04070             {
04071             SCHECK_PARTIAL();
04072             MRRETURN(MATCH_NOMATCH);
04073             }
04074           switch(*eptr++)
04075             {
04076             default: break;
04077             case 0x09:      /* HT */
04078             case 0x20:      /* SPACE */
04079             case 0xa0:      /* NBSP */
04080             MRRETURN(MATCH_NOMATCH);
04081             }
04082           }
04083         break;
04084 
04085         case OP_HSPACE:
04086         for (i = 1; i <= min; i++)
04087           {
04088           if (eptr >= md->end_subject)
04089             {
04090             SCHECK_PARTIAL();
04091             MRRETURN(MATCH_NOMATCH);
04092             }
04093           switch(*eptr++)
04094             {
04095             default: MRRETURN(MATCH_NOMATCH);
04096             case 0x09:      /* HT */
04097             case 0x20:      /* SPACE */
04098             case 0xa0:      /* NBSP */
04099             break;
04100             }
04101           }
04102         break;
04103 
04104         case OP_NOT_VSPACE:
04105         for (i = 1; i <= min; i++)
04106           {
04107           if (eptr >= md->end_subject)
04108             {
04109             SCHECK_PARTIAL();
04110             MRRETURN(MATCH_NOMATCH);
04111             }
04112           switch(*eptr++)
04113             {
04114             default: break;
04115             case 0x0a:      /* LF */
04116             case 0x0b:      /* VT */
04117             case 0x0c:      /* FF */
04118             case 0x0d:      /* CR */
04119             case 0x85:      /* NEL */
04120             MRRETURN(MATCH_NOMATCH);
04121             }
04122           }
04123         break;
04124 
04125         case OP_VSPACE:
04126         for (i = 1; i <= min; i++)
04127           {
04128           if (eptr >= md->end_subject)
04129             {
04130             SCHECK_PARTIAL();
04131             MRRETURN(MATCH_NOMATCH);
04132             }
04133           switch(*eptr++)
04134             {
04135             default: MRRETURN(MATCH_NOMATCH);
04136             case 0x0a:      /* LF */
04137             case 0x0b:      /* VT */
04138             case 0x0c:      /* FF */
04139             case 0x0d:      /* CR */
04140             case 0x85:      /* NEL */
04141             break;
04142             }
04143           }
04144         break;
04145 
04146         case OP_NOT_DIGIT:
04147         for (i = 1; i <= min; i++)
04148           {
04149           if (eptr >= md->end_subject)
04150             {
04151             SCHECK_PARTIAL();
04152             MRRETURN(MATCH_NOMATCH);
04153             }
04154           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
04155           }
04156         break;
04157 
04158         case OP_DIGIT:
04159         for (i = 1; i <= min; i++)
04160           {
04161           if (eptr >= md->end_subject)
04162             {
04163             SCHECK_PARTIAL();
04164             MRRETURN(MATCH_NOMATCH);
04165             }
04166           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
04167           }
04168         break;
04169 
04170         case OP_NOT_WHITESPACE:
04171         for (i = 1; i <= min; i++)
04172           {
04173           if (eptr >= md->end_subject)
04174             {
04175             SCHECK_PARTIAL();
04176             MRRETURN(MATCH_NOMATCH);
04177             }
04178           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
04179           }
04180         break;
04181 
04182         case OP_WHITESPACE:
04183         for (i = 1; i <= min; i++)
04184           {
04185           if (eptr >= md->end_subject)
04186             {
04187             SCHECK_PARTIAL();
04188             MRRETURN(MATCH_NOMATCH);
04189             }
04190           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
04191           }
04192         break;
04193 
04194         case OP_NOT_WORDCHAR:
04195         for (i = 1; i <= min; i++)
04196           {
04197           if (eptr >= md->end_subject)
04198             {
04199             SCHECK_PARTIAL();
04200             MRRETURN(MATCH_NOMATCH);
04201             }
04202           if ((md->ctypes[*eptr++] & ctype_word) != 0)
04203             MRRETURN(MATCH_NOMATCH);
04204           }
04205         break;
04206 
04207         case OP_WORDCHAR:
04208         for (i = 1; i <= min; i++)
04209           {
04210           if (eptr >= md->end_subject)
04211             {
04212             SCHECK_PARTIAL();
04213             MRRETURN(MATCH_NOMATCH);
04214             }
04215           if ((md->ctypes[*eptr++] & ctype_word) == 0)
04216             MRRETURN(MATCH_NOMATCH);
04217           }
04218         break;
04219 
04220         default:
04221         RRETURN(PCRE_ERROR_INTERNAL);
04222         }
04223       }
04224 
04225     /* If min = max, continue at the same level without recursing */
04226 
04227     if (min == max) continue;
04228 
04229     /* If minimizing, we have to test the rest of the pattern before each
04230     subsequent match. Again, separate the UTF-8 case for speed, and also
04231     separate the UCP cases. */
04232 
04233     if (minimize)
04234       {
04235 #ifdef SUPPORT_UCP
04236       if (prop_type >= 0)
04237         {
04238         switch(prop_type)
04239           {
04240           case PT_ANY:
04241           for (fi = min;; fi++)
04242             {
04243             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
04244             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04245             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04246             if (eptr >= md->end_subject)
04247               {
04248               SCHECK_PARTIAL();
04249               MRRETURN(MATCH_NOMATCH);
04250               }
04251             GETCHARINCTEST(c, eptr);
04252             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
04253             }
04254           /* Control never gets here */
04255 
04256           case PT_LAMP:
04257           for (fi = min;; fi++)
04258             {
04259             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
04260             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04261             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04262             if (eptr >= md->end_subject)
04263               {
04264               SCHECK_PARTIAL();
04265               MRRETURN(MATCH_NOMATCH);
04266               }
04267             GETCHARINCTEST(c, eptr);
04268             prop_chartype = UCD_CHARTYPE(c);
04269             if ((prop_chartype == ucp_Lu ||
04270                  prop_chartype == ucp_Ll ||
04271                  prop_chartype == ucp_Lt) == prop_fail_result)
04272               MRRETURN(MATCH_NOMATCH);
04273             }
04274           /* Control never gets here */
04275 
04276           case PT_GC:
04277           for (fi = min;; fi++)
04278             {
04279             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
04280             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04281             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04282             if (eptr >= md->end_subject)
04283               {
04284               SCHECK_PARTIAL();
04285               MRRETURN(MATCH_NOMATCH);
04286               }
04287             GETCHARINCTEST(c, eptr);
04288             prop_category = UCD_CATEGORY(c);
04289             if ((prop_category == prop_value) == prop_fail_result)
04290               MRRETURN(MATCH_NOMATCH);
04291             }
04292           /* Control never gets here */
04293 
04294           case PT_PC:
04295           for (fi = min;; fi++)
04296             {
04297             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
04298             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04299             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04300             if (eptr >= md->end_subject)
04301               {
04302               SCHECK_PARTIAL();
04303               MRRETURN(MATCH_NOMATCH);
04304               }
04305             GETCHARINCTEST(c, eptr);
04306             prop_chartype = UCD_CHARTYPE(c);
04307             if ((prop_chartype == prop_value) == prop_fail_result)
04308               MRRETURN(MATCH_NOMATCH);
04309             }
04310           /* Control never gets here */
04311 
04312           case PT_SC:
04313           for (fi = min;; fi++)
04314             {
04315             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
04316             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04317             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04318             if (eptr >= md->end_subject)
04319               {
04320               SCHECK_PARTIAL();
04321               MRRETURN(MATCH_NOMATCH);
04322               }
04323             GETCHARINCTEST(c, eptr);
04324             prop_script = UCD_SCRIPT(c);
04325             if ((prop_script == prop_value) == prop_fail_result)
04326               MRRETURN(MATCH_NOMATCH);
04327             }
04328           /* Control never gets here */
04329 
04330           case PT_ALNUM:
04331           for (fi = min;; fi++)
04332             {
04333             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
04334             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04335             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04336             if (eptr >= md->end_subject)
04337               {
04338               SCHECK_PARTIAL();
04339               MRRETURN(MATCH_NOMATCH);
04340               }
04341             GETCHARINCTEST(c, eptr);
04342             prop_category = UCD_CATEGORY(c);
04343             if ((prop_category == ucp_L || prop_category == ucp_N)
04344                    == prop_fail_result)
04345               MRRETURN(MATCH_NOMATCH);
04346             }
04347           /* Control never gets here */
04348 
04349           case PT_SPACE:    /* Perl space */
04350           for (fi = min;; fi++)
04351             {
04352             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
04353             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04354             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04355             if (eptr >= md->end_subject)
04356               {
04357               SCHECK_PARTIAL();
04358               MRRETURN(MATCH_NOMATCH);
04359               }
04360             GETCHARINCTEST(c, eptr);
04361             prop_category = UCD_CATEGORY(c);
04362             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
04363                  c == CHAR_FF || c == CHAR_CR)
04364                    == prop_fail_result)
04365               MRRETURN(MATCH_NOMATCH);
04366             }
04367           /* Control never gets here */
04368 
04369           case PT_PXSPACE:  /* POSIX space */
04370           for (fi = min;; fi++)
04371             {
04372             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
04373             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04374             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04375             if (eptr >= md->end_subject)
04376               {
04377               SCHECK_PARTIAL();
04378               MRRETURN(MATCH_NOMATCH);
04379               }
04380             GETCHARINCTEST(c, eptr);
04381             prop_category = UCD_CATEGORY(c);
04382             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
04383                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
04384                    == prop_fail_result)
04385               MRRETURN(MATCH_NOMATCH);
04386             }
04387           /* Control never gets here */
04388 
04389           case PT_WORD:
04390           for (fi = min;; fi++)
04391             {
04392             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
04393             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04394             if (fi >= max) MRRETURN(MATCH_NOMATCH);
04395             if (eptr >= md->end_subject)
04396               {
04397               SCHECK_PARTIAL();
04398               MRRETURN(MATCH_NOMATCH);
04399               }
04400             GETCHARINCTEST(c, eptr);
04401             prop_category = UCD_CATEGORY(c);
04402             if ((prop_category == ucp_L ||
04403                  prop_category == ucp_N ||
04404                  c == CHAR_UNDERSCORE)
04405                    == prop_fail_result)
04406               MRRETURN(MATCH_NOMATCH);
04407             }
04408           /* Control never gets here */
04409 
04410           /* This should never occur */
04411 
04412           default:
04413           RRETURN(PCRE_ERROR_INTERNAL);
04414           }
04415         }
04416 
04417       /* Match extended Unicode sequences. We will get here only if the
04418       support is in the binary; otherwise a compile-time error occurs. */
04419 
04420       else if (ctype == OP_EXTUNI)
04421         {
04422         for (fi = min;; fi++)
04423           {
04424           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
04425           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04426           if (fi >= max) MRRETURN(MATCH_NOMATCH);
04427           if (eptr >= md->end_subject)
04428             {
04429             SCHECK_PARTIAL();
04430             MRRETURN(MATCH_NOMATCH);
04431             }
04432           GETCHARINCTEST(c, eptr);
04433           prop_category = UCD_CATEGORY(c);
04434           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
04435           while (eptr < md->end_subject)
04436             {
04437             int len = 1;
04438             if (!utf8) c = *eptr;
04439               else { GETCHARLEN(c, eptr, len); }
04440             prop_category = UCD_CATEGORY(c);
04441             if (prop_category != ucp_M) break;
04442             eptr += len;
04443             }
04444           }
04445         }
04446 
04447       else
04448 #endif     /* SUPPORT_UCP */
04449 
04450 #ifdef SUPPORT_UTF8
04451       /* UTF-8 mode */
04452       if (utf8)
04453         {
04454         for (fi = min;; fi++)
04455           {
04456           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
04457           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04458           if (fi >= max) MRRETURN(MATCH_NOMATCH);
04459           if (eptr >= md->end_subject)
04460             {
04461             SCHECK_PARTIAL();
04462             MRRETURN(MATCH_NOMATCH);
04463             }
04464           if (ctype == OP_ANY && IS_NEWLINE(eptr))
04465             MRRETURN(MATCH_NOMATCH);
04466           GETCHARINC(c, eptr);
04467           switch(ctype)
04468             {
04469             case OP_ANY:        /* This is the non-NL case */
04470             case OP_ALLANY:
04471             case OP_ANYBYTE:
04472             break;
04473 
04474             case OP_ANYNL:
04475             switch(c)
04476               {
04477               default: MRRETURN(MATCH_NOMATCH);
04478               case 0x000d:
04479               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
04480               break;
04481               case 0x000a:
04482               break;
04483 
04484               case 0x000b:
04485               case 0x000c:
04486               case 0x0085:
04487               case 0x2028:
04488               case 0x2029:
04489               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
04490               break;
04491               }
04492             break;
04493 
04494             case OP_NOT_HSPACE:
04495             switch(c)
04496               {
04497               default: break;
04498               case 0x09:      /* HT */
04499               case 0x20:      /* SPACE */
04500               case 0xa0:      /* NBSP */
04501               case 0x1680:    /* OGHAM SPACE MARK */
04502               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
04503               case 0x2000:    /* EN QUAD */
04504               case 0x2001:    /* EM QUAD */
04505               case 0x2002:    /* EN SPACE */
04506               case 0x2003:    /* EM SPACE */
04507               case 0x2004:    /* THREE-PER-EM SPACE */
04508               case 0x2005:    /* FOUR-PER-EM SPACE */
04509               case 0x2006:    /* SIX-PER-EM SPACE */
04510               case 0x2007:    /* FIGURE SPACE */
04511               case 0x2008:    /* PUNCTUATION SPACE */
04512               case 0x2009:    /* THIN SPACE */
04513               case 0x200A:    /* HAIR SPACE */
04514               case 0x202f:    /* NARROW NO-BREAK SPACE */
04515               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
04516               case 0x3000:    /* IDEOGRAPHIC SPACE */
04517               MRRETURN(MATCH_NOMATCH);
04518               }
04519             break;
04520 
04521             case OP_HSPACE:
04522             switch(c)
04523               {
04524               default: MRRETURN(MATCH_NOMATCH);
04525               case 0x09:      /* HT */
04526               case 0x20:      /* SPACE */
04527               case 0xa0:      /* NBSP */
04528               case 0x1680:    /* OGHAM SPACE MARK */
04529               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
04530               case 0x2000:    /* EN QUAD */
04531               case 0x2001:    /* EM QUAD */
04532               case 0x2002:    /* EN SPACE */
04533               case 0x2003:    /* EM SPACE */
04534               case 0x2004:    /* THREE-PER-EM SPACE */
04535               case 0x2005:    /* FOUR-PER-EM SPACE */
04536               case 0x2006:    /* SIX-PER-EM SPACE */
04537               case 0x2007:    /* FIGURE SPACE */
04538               case 0x2008:    /* PUNCTUATION SPACE */
04539               case 0x2009:    /* THIN SPACE */
04540               case 0x200A:    /* HAIR SPACE */
04541               case 0x202f:    /* NARROW NO-BREAK SPACE */
04542               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
04543               case 0x3000:    /* IDEOGRAPHIC SPACE */
04544               break;
04545               }
04546             break;
04547 
04548             case OP_NOT_VSPACE:
04549             switch(c)
04550               {
04551               default: break;
04552               case 0x0a:      /* LF */
04553               case 0x0b:      /* VT */
04554               case 0x0c:      /* FF */
04555               case 0x0d:      /* CR */
04556               case 0x85:      /* NEL */
04557               case 0x2028:    /* LINE SEPARATOR */
04558               case 0x2029:    /* PARAGRAPH SEPARATOR */
04559               MRRETURN(MATCH_NOMATCH);
04560               }
04561             break;
04562 
04563             case OP_VSPACE:
04564             switch(c)
04565               {
04566               default: MRRETURN(MATCH_NOMATCH);
04567               case 0x0a:      /* LF */
04568               case 0x0b:      /* VT */
04569               case 0x0c:      /* FF */
04570               case 0x0d:      /* CR */
04571               case 0x85:      /* NEL */
04572               case 0x2028:    /* LINE SEPARATOR */
04573               case 0x2029:    /* PARAGRAPH SEPARATOR */
04574               break;
04575               }
04576             break;
04577 
04578             case OP_NOT_DIGIT:
04579             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
04580               MRRETURN(MATCH_NOMATCH);
04581             break;
04582 
04583             case OP_DIGIT:
04584             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
04585               MRRETURN(MATCH_NOMATCH);
04586             break;
04587 
04588             case OP_NOT_WHITESPACE:
04589             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
04590               MRRETURN(MATCH_NOMATCH);
04591             break;
04592 
04593             case OP_WHITESPACE:
04594             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
04595               MRRETURN(MATCH_NOMATCH);
04596             break;
04597 
04598             case OP_NOT_WORDCHAR:
04599             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
04600               MRRETURN(MATCH_NOMATCH);
04601             break;
04602 
04603             case OP_WORDCHAR:
04604             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
04605               MRRETURN(MATCH_NOMATCH);
04606             break;
04607 
04608             default:
04609             RRETURN(PCRE_ERROR_INTERNAL);
04610             }
04611           }
04612         }
04613       else
04614 #endif
04615       /* Not UTF-8 mode */
04616         {
04617         for (fi = min;; fi++)
04618           {
04619           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
04620           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04621           if (fi >= max) MRRETURN(MATCH_NOMATCH);
04622           if (eptr >= md->end_subject)
04623             {
04624             SCHECK_PARTIAL();
04625             MRRETURN(MATCH_NOMATCH);
04626             }
04627           if (ctype == OP_ANY && IS_NEWLINE(eptr))
04628             MRRETURN(MATCH_NOMATCH);
04629           c = *eptr++;
04630           switch(ctype)
04631             {
04632             case OP_ANY:     /* This is the non-NL case */
04633             case OP_ALLANY:
04634             case OP_ANYBYTE:
04635             break;
04636 
04637             case OP_ANYNL:
04638             switch(c)
04639               {
04640               default: MRRETURN(MATCH_NOMATCH);
04641               case 0x000d:
04642               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
04643               break;
04644 
04645               case 0x000a:
04646               break;
04647 
04648               case 0x000b:
04649               case 0x000c:
04650               case 0x0085:
04651               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
04652               break;
04653               }
04654             break;
04655 
04656             case OP_NOT_HSPACE:
04657             switch(c)
04658               {
04659               default: break;
04660               case 0x09:      /* HT */
04661               case 0x20:      /* SPACE */
04662               case 0xa0:      /* NBSP */
04663               MRRETURN(MATCH_NOMATCH);
04664               }
04665             break;
04666 
04667             case OP_HSPACE:
04668             switch(c)
04669               {
04670               default: MRRETURN(MATCH_NOMATCH);
04671               case 0x09:      /* HT */
04672               case 0x20:      /* SPACE */
04673               case 0xa0:      /* NBSP */
04674               break;
04675               }
04676             break;
04677 
04678             case OP_NOT_VSPACE:
04679             switch(c)
04680               {
04681               default: break;
04682               case 0x0a:      /* LF */
04683               case 0x0b:      /* VT */
04684               case 0x0c:      /* FF */
04685               case 0x0d:      /* CR */
04686               case 0x85:      /* NEL */
04687               MRRETURN(MATCH_NOMATCH);
04688               }
04689             break;
04690 
04691             case OP_VSPACE:
04692             switch(c)
04693               {
04694               default: MRRETURN(MATCH_NOMATCH);
04695               case 0x0a:      /* LF */
04696               case 0x0b:      /* VT */
04697               case 0x0c:      /* FF */
04698               case 0x0d:      /* CR */
04699               case 0x85:      /* NEL */
04700               break;
04701               }
04702             break;
04703 
04704             case OP_NOT_DIGIT:
04705             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
04706             break;
04707 
04708             case OP_DIGIT:
04709             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
04710             break;
04711 
04712             case OP_NOT_WHITESPACE:
04713             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
04714             break;
04715 
04716             case OP_WHITESPACE:
04717             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
04718             break;
04719 
04720             case OP_NOT_WORDCHAR:
04721             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
04722             break;
04723 
04724             case OP_WORDCHAR:
04725             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
04726             break;
04727 
04728             default:
04729             RRETURN(PCRE_ERROR_INTERNAL);
04730             }
04731           }
04732         }
04733       /* Control never gets here */
04734       }
04735 
04736     /* If maximizing, it is worth using inline code for speed, doing the type
04737     test once at the start (i.e. keep it out of the loop). Again, keep the
04738     UTF-8 and UCP stuff separate. */
04739 
04740     else
04741       {
04742       pp = eptr;  /* Remember where we started */
04743 
04744 #ifdef SUPPORT_UCP
04745       if (prop_type >= 0)
04746         {
04747         switch(prop_type)
04748           {
04749           case PT_ANY:
04750           for (i = min; i < max; i++)
04751             {
04752             int len = 1;
04753             if (eptr >= md->end_subject)
04754               {
04755               SCHECK_PARTIAL();
04756               break;
04757               }
04758             GETCHARLENTEST(c, eptr, len);
04759             if (prop_fail_result) break;
04760             eptr+= len;
04761             }
04762           break;
04763 
04764           case PT_LAMP:
04765           for (i = min; i < max; i++)
04766             {
04767             int len = 1;
04768             if (eptr >= md->end_subject)
04769               {
04770               SCHECK_PARTIAL();
04771               break;
04772               }
04773             GETCHARLENTEST(c, eptr, len);
04774             prop_chartype = UCD_CHARTYPE(c);
04775             if ((prop_chartype == ucp_Lu ||
04776                  prop_chartype == ucp_Ll ||
04777                  prop_chartype == ucp_Lt) == prop_fail_result)
04778               break;
04779             eptr+= len;
04780             }
04781           break;
04782 
04783           case PT_GC:
04784           for (i = min; i < max; i++)
04785             {
04786             int len = 1;
04787             if (eptr >= md->end_subject)
04788               {
04789               SCHECK_PARTIAL();
04790               break;
04791               }
04792             GETCHARLENTEST(c, eptr, len);
04793             prop_category = UCD_CATEGORY(c);
04794             if ((prop_category == prop_value) == prop_fail_result)
04795               break;
04796             eptr+= len;
04797             }
04798           break;
04799 
04800           case PT_PC:
04801           for (i = min; i < max; i++)
04802             {
04803             int len = 1;
04804             if (eptr >= md->end_subject)
04805               {
04806               SCHECK_PARTIAL();
04807               break;
04808               }
04809             GETCHARLENTEST(c, eptr, len);
04810             prop_chartype = UCD_CHARTYPE(c);
04811             if ((prop_chartype == prop_value) == prop_fail_result)
04812               break;
04813             eptr+= len;
04814             }
04815           break;
04816 
04817           case PT_SC:
04818           for (i = min; i < max; i++)
04819             {
04820             int len = 1;
04821             if (eptr >= md->end_subject)
04822               {
04823               SCHECK_PARTIAL();
04824               break;
04825               }
04826             GETCHARLENTEST(c, eptr, len);
04827             prop_script = UCD_SCRIPT(c);
04828             if ((prop_script == prop_value) == prop_fail_result)
04829               break;
04830             eptr+= len;
04831             }
04832           break;
04833 
04834           case PT_ALNUM:
04835           for (i = min; i < max; i++)
04836             {
04837             int len = 1;
04838             if (eptr >= md->end_subject)
04839               {
04840               SCHECK_PARTIAL();
04841               break;
04842               }
04843             GETCHARLENTEST(c, eptr, len);
04844             prop_category = UCD_CATEGORY(c);
04845             if ((prop_category == ucp_L || prop_category == ucp_N)
04846                  == prop_fail_result)
04847               break;
04848             eptr+= len;
04849             }
04850           break;
04851 
04852           case PT_SPACE:    /* Perl space */
04853           for (i = min; i < max; i++)
04854             {
04855             int len = 1;
04856             if (eptr >= md->end_subject)
04857               {
04858               SCHECK_PARTIAL();
04859               break;
04860               }
04861             GETCHARLENTEST(c, eptr, len);
04862             prop_category = UCD_CATEGORY(c);
04863             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
04864                  c == CHAR_FF || c == CHAR_CR)
04865                  == prop_fail_result)
04866               break;
04867             eptr+= len;
04868             }
04869           break;
04870 
04871           case PT_PXSPACE:  /* POSIX space */
04872           for (i = min; i < max; i++)
04873             {
04874             int len = 1;
04875             if (eptr >= md->end_subject)
04876               {
04877               SCHECK_PARTIAL();
04878               break;
04879               }
04880             GETCHARLENTEST(c, eptr, len);
04881             prop_category = UCD_CATEGORY(c);
04882             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
04883                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
04884                  == prop_fail_result)
04885               break;
04886             eptr+= len;
04887             }
04888           break;
04889 
04890           case PT_WORD:
04891           for (i = min; i < max; i++)
04892             {
04893             int len = 1;
04894             if (eptr >= md->end_subject)
04895               {
04896               SCHECK_PARTIAL();
04897               break;
04898               }
04899             GETCHARLENTEST(c, eptr, len);
04900             prop_category = UCD_CATEGORY(c);
04901             if ((prop_category == ucp_L || prop_category == ucp_N ||
04902                  c == CHAR_UNDERSCORE) == prop_fail_result)
04903               break;
04904             eptr+= len;
04905             }
04906           break;
04907 
04908           default:
04909           RRETURN(PCRE_ERROR_INTERNAL);
04910           }
04911 
04912         /* eptr is now past the end of the maximum run */
04913 
04914         if (possessive) continue;
04915         for(;;)
04916           {
04917           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
04918           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04919           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04920           if (utf8) BACKCHAR(eptr);
04921           }
04922         }
04923 
04924       /* Match extended Unicode sequences. We will get here only if the
04925       support is in the binary; otherwise a compile-time error occurs. */
04926 
04927       else if (ctype == OP_EXTUNI)
04928         {
04929         for (i = min; i < max; i++)
04930           {
04931           if (eptr >= md->end_subject)
04932             {
04933             SCHECK_PARTIAL();
04934             break;
04935             }
04936           GETCHARINCTEST(c, eptr);
04937           prop_category = UCD_CATEGORY(c);
04938           if (prop_category == ucp_M) break;
04939           while (eptr < md->end_subject)
04940             {
04941             int len = 1;
04942             if (!utf8) c = *eptr; else
04943               {
04944               GETCHARLEN(c, eptr, len);
04945               }
04946             prop_category = UCD_CATEGORY(c);
04947             if (prop_category != ucp_M) break;
04948             eptr += len;
04949             }
04950           }
04951 
04952         /* eptr is now past the end of the maximum run */
04953 
04954         if (possessive) continue;
04955 
04956         for(;;)
04957           {
04958           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
04959           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04960           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04961           for (;;)                        /* Move back over one extended */
04962             {
04963             int len = 1;
04964             if (!utf8) c = *eptr; else
04965               {
04966               BACKCHAR(eptr);
04967               GETCHARLEN(c, eptr, len);
04968               }
04969             prop_category = UCD_CATEGORY(c);
04970             if (prop_category != ucp_M) break;
04971             eptr--;
04972             }
04973           }
04974         }
04975 
04976       else
04977 #endif   /* SUPPORT_UCP */
04978 
04979 #ifdef SUPPORT_UTF8
04980       /* UTF-8 mode */
04981 
04982       if (utf8)
04983         {
04984         switch(ctype)
04985           {
04986           case OP_ANY:
04987           if (max < INT_MAX)
04988             {
04989             for (i = min; i < max; i++)
04990               {
04991               if (eptr >= md->end_subject)
04992                 {
04993                 SCHECK_PARTIAL();
04994                 break;
04995                 }
04996               if (IS_NEWLINE(eptr)) break;
04997               eptr++;
04998               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
04999               }
05000             }
05001 
05002           /* Handle unlimited UTF-8 repeat */
05003 
05004           else
05005             {
05006             for (i = min; i < max; i++)
05007               {
05008               if (eptr >= md->end_subject)
05009                 {
05010                 SCHECK_PARTIAL();
05011                 break;
05012                 }
05013               if (IS_NEWLINE(eptr)) break;
05014               eptr++;
05015               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
05016               }
05017             }
05018           break;
05019 
05020           case OP_ALLANY:
05021           if (max < INT_MAX)
05022             {
05023             for (i = min; i < max; i++)
05024               {
05025               if (eptr >= md->end_subject)
05026                 {
05027                 SCHECK_PARTIAL();
05028                 break;
05029                 }
05030               eptr++;
05031               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
05032               }
05033             }
05034           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
05035           break;
05036 
05037           /* The byte case is the same as non-UTF8 */
05038 
05039           case OP_ANYBYTE:
05040           c = max - min;
05041           if (c > (unsigned int)(md->end_subject - eptr))
05042             {
05043             eptr = md->end_subject;
05044             SCHECK_PARTIAL();
05045             }
05046           else eptr += c;
05047           break;
05048 
05049           case OP_ANYNL:
05050           for (i = min; i < max; i++)
05051             {
05052             int len = 1;
05053             if (eptr >= md->end_subject)
05054               {
05055               SCHECK_PARTIAL();
05056               break;
05057               }
05058             GETCHARLEN(c, eptr, len);
05059             if (c == 0x000d)
05060               {
05061               if (++eptr >= md->end_subject) break;
05062               if (*eptr == 0x000a) eptr++;
05063               }
05064             else
05065               {
05066               if (c != 0x000a &&
05067                   (md->bsr_anycrlf ||
05068                    (c != 0x000b && c != 0x000c &&
05069                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
05070                 break;
05071               eptr += len;
05072               }
05073             }
05074           break;
05075 
05076           case OP_NOT_HSPACE:
05077           case OP_HSPACE:
05078           for (i = min; i < max; i++)
05079             {
05080             BOOL gotspace;
05081             int len = 1;
05082             if (eptr >= md->end_subject)
05083               {
05084               SCHECK_PARTIAL();
05085               break;
05086               }
05087             GETCHARLEN(c, eptr, len);
05088             switch(c)
05089               {
05090               default: gotspace = FALSE; break;
05091               case 0x09:      /* HT */
05092               case 0x20:      /* SPACE */
05093               case 0xa0:      /* NBSP */
05094               case 0x1680:    /* OGHAM SPACE MARK */
05095               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
05096               case 0x2000:    /* EN QUAD */
05097               case 0x2001:    /* EM QUAD */
05098               case 0x2002:    /* EN SPACE */
05099               case 0x2003:    /* EM SPACE */
05100               case 0x2004:    /* THREE-PER-EM SPACE */
05101               case 0x2005:    /* FOUR-PER-EM SPACE */
05102               case 0x2006:    /* SIX-PER-EM SPACE */
05103               case 0x2007:    /* FIGURE SPACE */
05104               case 0x2008:    /* PUNCTUATION SPACE */
05105               case 0x2009:    /* THIN SPACE */
05106               case 0x200A:    /* HAIR SPACE */
05107               case 0x202f:    /* NARROW NO-BREAK SPACE */
05108               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
05109               case 0x3000:    /* IDEOGRAPHIC SPACE */
05110               gotspace = TRUE;
05111               break;
05112               }
05113             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
05114             eptr += len;
05115             }
05116           break;
05117 
05118           case OP_NOT_VSPACE:
05119           case OP_VSPACE:
05120           for (i = min; i < max; i++)
05121             {
05122             BOOL gotspace;
05123             int len = 1;
05124             if (eptr >= md->end_subject)
05125               {
05126               SCHECK_PARTIAL();
05127               break;
05128               }
05129             GETCHARLEN(c, eptr, len);
05130             switch(c)
05131               {
05132               default: gotspace = FALSE; break;
05133               case 0x0a:      /* LF */
05134               case 0x0b:      /* VT */
05135               case 0x0c:      /* FF */
05136               case 0x0d:      /* CR */
05137               case 0x85:      /* NEL */
05138               case 0x2028:    /* LINE SEPARATOR */
05139               case 0x2029:    /* PARAGRAPH SEPARATOR */
05140               gotspace = TRUE;
05141               break;
05142               }
05143             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
05144             eptr += len;
05145             }
05146           break;
05147 
05148           case OP_NOT_DIGIT:
05149           for (i = min; i < max; i++)
05150             {
05151             int len = 1;
05152             if (eptr >= md->end_subject)
05153               {
05154               SCHECK_PARTIAL();
05155               break;
05156               }
05157             GETCHARLEN(c, eptr, len);
05158             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
05159             eptr+= len;
05160             }
05161           break;
05162 
05163           case OP_DIGIT:
05164           for (i = min; i < max; i++)
05165             {
05166             int len = 1;
05167             if (eptr >= md->end_subject)
05168               {
05169               SCHECK_PARTIAL();
05170               break;
05171               }
05172             GETCHARLEN(c, eptr, len);
05173             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
05174             eptr+= len;
05175             }
05176           break;
05177 
05178           case OP_NOT_WHITESPACE:
05179           for (i = min; i < max; i++)
05180             {
05181             int len = 1;
05182             if (eptr >= md->end_subject)
05183               {
05184               SCHECK_PARTIAL();
05185               break;
05186               }
05187             GETCHARLEN(c, eptr, len);
05188             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
05189             eptr+= len;
05190             }
05191           break;
05192 
05193           case OP_WHITESPACE:
05194           for (i = min; i < max; i++)
05195             {
05196             int len = 1;
05197             if (eptr >= md->end_subject)
05198               {
05199               SCHECK_PARTIAL();
05200               break;
05201               }
05202             GETCHARLEN(c, eptr, len);
05203             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
05204             eptr+= len;
05205             }
05206           break;
05207 
05208           case OP_NOT_WORDCHAR:
05209           for (i = min; i < max; i++)
05210             {
05211             int len = 1;
05212             if (eptr >= md->end_subject)
05213               {
05214               SCHECK_PARTIAL();
05215               break;
05216               }
05217             GETCHARLEN(c, eptr, len);
05218             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
05219             eptr+= len;
05220             }
05221           break;
05222 
05223           case OP_WORDCHAR:
05224           for (i = min; i < max; i++)
05225             {
05226             int len = 1;
05227             if (eptr >= md->end_subject)
05228               {
05229               SCHECK_PARTIAL();
05230               break;
05231               }
05232             GETCHARLEN(c, eptr, len);
05233             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
05234             eptr+= len;
05235             }
05236           break;
05237 
05238           default:
05239           RRETURN(PCRE_ERROR_INTERNAL);
05240           }
05241 
05242         /* eptr is now past the end of the maximum run */
05243 
05244         if (possessive) continue;
05245         for(;;)
05246           {
05247           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
05248           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05249           if (eptr-- == pp) break;        /* Stop if tried at original pos */
05250           BACKCHAR(eptr);
05251           }
05252         }
05253       else
05254 #endif  /* SUPPORT_UTF8 */
05255 
05256       /* Not UTF-8 mode */
05257         {
05258         switch(ctype)
05259           {
05260           case OP_ANY:
05261           for (i = min; i < max; i++)
05262             {
05263             if (eptr >= md->end_subject)
05264               {
05265               SCHECK_PARTIAL();
05266               break;
05267               }
05268             if (IS_NEWLINE(eptr)) break;
05269             eptr++;
05270             }
05271           break;
05272 
05273           case OP_ALLANY:
05274           case OP_ANYBYTE:
05275           c = max - min;
05276           if (c > (unsigned int)(md->end_subject - eptr))
05277             {
05278             eptr = md->end_subject;
05279             SCHECK_PARTIAL();
05280             }
05281           else eptr += c;
05282           break;
05283 
05284           case OP_ANYNL:
05285           for (i = min; i < max; i++)
05286             {
05287             if (eptr >= md->end_subject)
05288               {
05289               SCHECK_PARTIAL();
05290               break;
05291               }
05292             c = *eptr;
05293             if (c == 0x000d)
05294               {
05295               if (++eptr >= md->end_subject) break;
05296               if (*eptr == 0x000a) eptr++;
05297               }
05298             else
05299               {
05300               if (c != 0x000a &&
05301                   (md->bsr_anycrlf ||
05302                     (c != 0x000b && c != 0x000c && c != 0x0085)))
05303                 break;
05304               eptr++;
05305               }
05306             }
05307           break;
05308 
05309           case OP_NOT_HSPACE:
05310           for (i = min; i < max; i++)
05311             {
05312             if (eptr >= md->end_subject)
05313               {
05314               SCHECK_PARTIAL();
05315               break;
05316               }
05317             c = *eptr;
05318             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
05319             eptr++;
05320             }
05321           break;
05322 
05323           case OP_HSPACE:
05324           for (i = min; i < max; i++)
05325             {
05326             if (eptr >= md->end_subject)
05327               {
05328               SCHECK_PARTIAL();
05329               break;
05330               }
05331             c = *eptr;
05332             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
05333             eptr++;
05334             }
05335           break;
05336 
05337           case OP_NOT_VSPACE:
05338           for (i = min; i < max; i++)
05339             {
05340             if (eptr >= md->end_subject)
05341               {
05342               SCHECK_PARTIAL();
05343               break;
05344               }
05345             c = *eptr;
05346             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
05347               break;
05348             eptr++;
05349             }
05350           break;
05351 
05352           case OP_VSPACE:
05353           for (i = min; i < max; i++)
05354             {
05355             if (eptr >= md->end_subject)
05356               {
05357               SCHECK_PARTIAL();
05358               break;
05359               }
05360             c = *eptr;
05361             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
05362               break;
05363             eptr++;
05364             }
05365           break;
05366 
05367           case OP_NOT_DIGIT:
05368           for (i = min; i < max; i++)
05369             {
05370             if (eptr >= md->end_subject)
05371               {
05372               SCHECK_PARTIAL();
05373               break;
05374               }
05375             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
05376             eptr++;
05377             }
05378           break;
05379 
05380           case OP_DIGIT:
05381           for (i = min; i < max; i++)
05382             {
05383             if (eptr >= md->end_subject)
05384               {
05385               SCHECK_PARTIAL();
05386               break;
05387               }
05388             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
05389             eptr++;
05390             }
05391           break;
05392 
05393           case OP_NOT_WHITESPACE:
05394           for (i = min; i < max; i++)
05395             {
05396             if (eptr >= md->end_subject)
05397               {
05398               SCHECK_PARTIAL();
05399               break;
05400               }
05401             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
05402             eptr++;
05403             }
05404           break;
05405 
05406           case OP_WHITESPACE:
05407           for (i = min; i < max; i++)
05408             {
05409             if (eptr >= md->end_subject)
05410               {
05411               SCHECK_PARTIAL();
05412               break;
05413               }
05414             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
05415             eptr++;
05416             }
05417           break;
05418 
05419           case OP_NOT_WORDCHAR:
05420           for (i = min; i < max; i++)
05421             {
05422             if (eptr >= md->end_subject)
05423               {
05424               SCHECK_PARTIAL();
05425               break;
05426               }
05427             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
05428             eptr++;
05429             }
05430           break;
05431 
05432           case OP_WORDCHAR:
05433           for (i = min; i < max; i++)
05434             {
05435             if (eptr >= md->end_subject)
05436               {
05437               SCHECK_PARTIAL();
05438               break;
05439               }
05440             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
05441             eptr++;
05442             }
05443           break;
05444 
05445           default:
05446           RRETURN(PCRE_ERROR_INTERNAL);
05447           }
05448 
05449         /* eptr is now past the end of the maximum run */
05450 
05451         if (possessive) continue;
05452         while (eptr >= pp)
05453           {
05454           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
05455           eptr--;
05456           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05457           }
05458         }
05459 
05460       /* Get here if we can't make it match with any permitted repetitions */
05461 
05462       MRRETURN(MATCH_NOMATCH);
05463       }
05464     /* Control never gets here */
05465 
05466     /* There's been some horrible disaster. Arrival here can only mean there is
05467     something seriously wrong in the code above or the OP_xxx definitions. */
05468 
05469     default:
05470     DPRINTF(("Unknown opcode %d\n", *ecode));
05471     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
05472     }
05473 
05474   /* Do not stick any code in here without much thought; it is assumed
05475   that "continue" in the code above comes out to here to repeat the main
05476   loop. */
05477 
05478   }             /* End of main loop */
05479 /* Control never reaches here */
05480 
05481 
05482 /* When compiling to use the heap rather than the stack for recursive calls to
05483 match(), the RRETURN() macro jumps here. The number that is saved in
05484 frame->Xwhere indicates which label we actually want to return to. */
05485 
05486 #ifdef NO_RECURSE
05487 #define LBL(val) case val: goto L_RM##val;
05488 HEAP_RETURN:
05489 switch (frame->Xwhere)
05490   {
05491   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
05492   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
05493   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
05494   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
05495   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
05496 #ifdef SUPPORT_UTF8
05497   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
05498   LBL(32) LBL(34) LBL(42) LBL(46)
05499 #ifdef SUPPORT_UCP
05500   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
05501   LBL(59) LBL(60) LBL(61) LBL(62)
05502 #endif  /* SUPPORT_UCP */
05503 #endif  /* SUPPORT_UTF8 */
05504   default:
05505   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
05506   return PCRE_ERROR_INTERNAL;
05507   }
05508 #undef LBL
05509 #endif  /* NO_RECURSE */
05510 }
05511 
05512 
05513 /***************************************************************************
05514 ****************************************************************************
05515                    RECURSION IN THE match() FUNCTION
05516 
05517 Undefine all the macros that were defined above to handle this. */
05518 
05519 #ifdef NO_RECURSE
05520 #undef eptr
05521 #undef ecode
05522 #undef mstart
05523 #undef offset_top
05524 #undef ims
05525 #undef eptrb
05526 #undef flags
05527 
05528 #undef callpat
05529 #undef charptr
05530 #undef data
05531 #undef next
05532 #undef pp
05533 #undef prev
05534 #undef saved_eptr
05535 
05536 #undef new_recursive
05537 
05538 #undef cur_is_word
05539 #undef condition
05540 #undef prev_is_word
05541 
05542 #undef original_ims
05543 
05544 #undef ctype
05545 #undef length
05546 #undef max
05547 #undef min
05548 #undef number
05549 #undef offset
05550 #undef op
05551 #undef save_capture_last
05552 #undef save_offset1
05553 #undef save_offset2
05554 #undef save_offset3
05555 #undef stacksave
05556 
05557 #undef newptrb
05558 
05559 #endif
05560 
05561 /* These two are defined as macros in both cases */
05562 
05563 #undef fc
05564 #undef fi
05565 
05566 /***************************************************************************
05567 ***************************************************************************/
05568 
05569 
05570 
05571 /*************************************************
05572 *         Execute a Regular Expression           *
05573 *************************************************/
05574 
05575 /* This function applies a compiled re to a subject string and picks out
05576 portions of the string if it matches. Two elements in the vector are set for
05577 each substring: the offsets to the start and end of the substring.
05578 
05579 Arguments:
05580   argument_re     points to the compiled expression
05581   extra_data      points to extra data or is NULL
05582   subject         points to the subject string
05583   length          length of subject string (may contain binary zeros)
05584   start_offset    where to start in the subject string
05585   options         option bits
05586   offsets         points to a vector of ints to be filled in with offsets
05587   offsetcount     the number of elements in the vector
05588 
05589 Returns:          > 0 => success; value is the number of elements filled in
05590                   = 0 => success, but offsets is not big enough
05591                    -1 => failed to match
05592                  < -1 => some kind of unexpected problem
05593 */
05594 
05595 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
05596 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
05597   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
05598   int offsetcount)
05599 {
05600 int rc, resetcount, ocount;
05601 int first_byte = -1;
05602 int req_byte = -1;
05603 int req_byte2 = -1;
05604 int newline;
05605 unsigned long int ims;
05606 BOOL using_temporary_offsets = FALSE;
05607 BOOL anchored;
05608 BOOL startline;
05609 BOOL firstline;
05610 BOOL first_byte_caseless = FALSE;
05611 BOOL req_byte_caseless = FALSE;
05612 BOOL utf8;
05613 match_data match_block;
05614 match_data *md = &match_block;
05615 const uschar *tables;
05616 const uschar *start_bits = NULL;
05617 USPTR start_match = (USPTR)subject + start_offset;
05618 USPTR end_subject;
05619 USPTR start_partial = NULL;
05620 USPTR req_byte_ptr = start_match - 1;
05621 
05622 pcre_study_data internal_study;
05623 const pcre_study_data *study;
05624 
05625 real_pcre internal_re;
05626 const real_pcre *external_re = (const real_pcre *)argument_re;
05627 const real_pcre *re = external_re;
05628 
05629 /* Plausibility checks */
05630 
05631 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
05632 if (re == NULL || subject == NULL ||
05633    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
05634 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
05635 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
05636 
05637 /* This information is for finding all the numbers associated with a given
05638 name, for condition testing. */
05639 
05640 md->name_table = (uschar *)re + re->name_table_offset;
05641 md->name_count = re->name_count;
05642 md->name_entry_size = re->name_entry_size;
05643 
05644 /* Fish out the optional data from the extra_data structure, first setting
05645 the default values. */
05646 
05647 study = NULL;
05648 md->match_limit = MATCH_LIMIT;
05649 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
05650 md->callout_data = NULL;
05651 
05652 /* The table pointer is always in native byte order. */
05653 
05654 tables = external_re->tables;
05655 
05656 if (extra_data != NULL)
05657   {
05658   register unsigned int flags = extra_data->flags;
05659   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
05660     study = (const pcre_study_data *)extra_data->study_data;
05661   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
05662     md->match_limit = extra_data->match_limit;
05663   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
05664     md->match_limit_recursion = extra_data->match_limit_recursion;
05665   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
05666     md->callout_data = extra_data->callout_data;
05667   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
05668   }
05669 
05670 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
05671 is a feature that makes it possible to save compiled regex and re-use them
05672 in other programs later. */
05673 
05674 if (tables == NULL) tables = _pcre_default_tables;
05675 
05676 /* Check that the first field in the block is the magic number. If it is not,
05677 test for a regex that was compiled on a host of opposite endianness. If this is
05678 the case, flipped values are put in internal_re and internal_study if there was
05679 study data too. */
05680 
05681 if (re->magic_number != MAGIC_NUMBER)
05682   {
05683   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
05684   if (re == NULL) return PCRE_ERROR_BADMAGIC;
05685   if (study != NULL) study = &internal_study;
05686   }
05687 
05688 /* Set up other data */
05689 
05690 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
05691 startline = (re->flags & PCRE_STARTLINE) != 0;
05692 firstline = (re->options & PCRE_FIRSTLINE) != 0;
05693 
05694 /* The code starts after the real_pcre block and the capture name table. */
05695 
05696 md->start_code = (const uschar *)external_re + re->name_table_offset +
05697   re->name_count * re->name_entry_size;
05698 
05699 md->start_subject = (USPTR)subject;
05700 md->start_offset = start_offset;
05701 md->end_subject = md->start_subject + length;
05702 end_subject = md->end_subject;
05703 
05704 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
05705 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
05706 md->use_ucp = (re->options & PCRE_UCP) != 0;
05707 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
05708 
05709 md->notbol = (options & PCRE_NOTBOL) != 0;
05710 md->noteol = (options & PCRE_NOTEOL) != 0;
05711 md->notempty = (options & PCRE_NOTEMPTY) != 0;
05712 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
05713 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
05714               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
05715 md->hitend = FALSE;
05716 md->mark = NULL;                        /* In case never set */
05717 
05718 md->recursive = NULL;                   /* No recursion at top level */
05719 
05720 md->lcc = tables + lcc_offset;
05721 md->ctypes = tables + ctypes_offset;
05722 
05723 /* Handle different \R options. */
05724 
05725 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
05726   {
05727   case 0:
05728   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
05729     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
05730   else
05731 #ifdef BSR_ANYCRLF
05732   md->bsr_anycrlf = TRUE;
05733 #else
05734   md->bsr_anycrlf = FALSE;
05735 #endif
05736   break;
05737 
05738   case PCRE_BSR_ANYCRLF:
05739   md->bsr_anycrlf = TRUE;
05740   break;
05741 
05742   case PCRE_BSR_UNICODE:
05743   md->bsr_anycrlf = FALSE;
05744   break;
05745 
05746   default: return PCRE_ERROR_BADNEWLINE;
05747   }
05748 
05749 /* Handle different types of newline. The three bits give eight cases. If
05750 nothing is set at run time, whatever was used at compile time applies. */
05751 
05752 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
05753         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
05754   {
05755   case 0: newline = NEWLINE; break;   /* Compile-time default */
05756   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
05757   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
05758   case PCRE_NEWLINE_CR+
05759        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
05760   case PCRE_NEWLINE_ANY: newline = -1; break;
05761   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
05762   default: return PCRE_ERROR_BADNEWLINE;
05763   }
05764 
05765 if (newline == -2)
05766   {
05767   md->nltype = NLTYPE_ANYCRLF;
05768   }
05769 else if (newline < 0)
05770   {
05771   md->nltype = NLTYPE_ANY;
05772   }
05773 else
05774   {
05775   md->nltype = NLTYPE_FIXED;
05776   if (newline > 255)
05777     {
05778     md->nllen = 2;
05779     md->nl[0] = (newline >> 8) & 255;
05780     md->nl[1] = newline & 255;
05781     }
05782   else
05783     {
05784     md->nllen = 1;
05785     md->nl[0] = newline;
05786     }
05787   }
05788 
05789 /* Partial matching was originally supported only for a restricted set of
05790 regexes; from release 8.00 there are no restrictions, but the bits are still
05791 defined (though never set). So there's no harm in leaving this code. */
05792 
05793 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
05794   return PCRE_ERROR_BADPARTIAL;
05795 
05796 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
05797 back the character offset. */
05798 
05799 #ifdef SUPPORT_UTF8
05800 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
05801   {
05802   int tb;
05803   if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
05804     return (tb == length && md->partial > 1)?
05805       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
05806   if (start_offset > 0 && start_offset < length)
05807     {
05808     tb = ((USPTR)subject)[start_offset] & 0xc0;
05809     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
05810     }
05811   }
05812 #endif
05813 
05814 /* The ims options can vary during the matching as a result of the presence
05815 of (?ims) items in the pattern. They are kept in a local variable so that
05816 restoring at the exit of a group is easy. */
05817 
05818 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
05819 
05820 /* If the expression has got more back references than the offsets supplied can
05821 hold, we get a temporary chunk of working store to use during the matching.
05822 Otherwise, we can use the vector supplied, rounding down its size to a multiple
05823 of 3. */
05824 
05825 ocount = offsetcount - (offsetcount % 3);
05826 
05827 if (re->top_backref > 0 && re->top_backref >= ocount/3)
05828   {
05829   ocount = re->top_backref * 3 + 3;
05830   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
05831   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
05832   using_temporary_offsets = TRUE;
05833   DPRINTF(("Got memory to hold back references\n"));
05834   }
05835 else md->offset_vector = offsets;
05836 
05837 md->offset_end = ocount;
05838 md->offset_max = (2*ocount)/3;
05839 md->offset_overflow = FALSE;
05840 md->capture_last = -1;
05841 
05842 /* Compute the minimum number of offsets that we need to reset each time. Doing
05843 this makes a huge difference to execution time when there aren't many brackets
05844 in the pattern. */
05845 
05846 resetcount = 2 + re->top_bracket * 2;
05847 if (resetcount > offsetcount) resetcount = ocount;
05848 
05849 /* Reset the working variable associated with each extraction. These should
05850 never be used unless previously set, but they get saved and restored, and so we
05851 initialize them to avoid reading uninitialized locations. */
05852 
05853 if (md->offset_vector != NULL)
05854   {
05855   register int *iptr = md->offset_vector + ocount;
05856   register int *iend = iptr - resetcount/2 + 1;
05857   while (--iptr >= iend) *iptr = -1;
05858   }
05859 
05860 /* Set up the first character to match, if available. The first_byte value is
05861 never set for an anchored regular expression, but the anchoring may be forced
05862 at run time, so we have to test for anchoring. The first char may be unset for
05863 an unanchored pattern, of course. If there's no first char and the pattern was
05864 studied, there may be a bitmap of possible first characters. */
05865 
05866 if (!anchored)
05867   {
05868   if ((re->flags & PCRE_FIRSTSET) != 0)
05869     {
05870     first_byte = re->first_byte & 255;
05871     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
05872       first_byte = md->lcc[first_byte];
05873     }
05874   else
05875     if (!startline && study != NULL &&
05876       (study->flags & PCRE_STUDY_MAPPED) != 0)
05877         start_bits = study->start_bits;
05878   }
05879 
05880 /* For anchored or unanchored matches, there may be a "last known required
05881 character" set. */
05882 
05883 if ((re->flags & PCRE_REQCHSET) != 0)
05884   {
05885   req_byte = re->req_byte & 255;
05886   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
05887   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
05888   }
05889 
05890 
05891 /* ==========================================================================*/
05892 
05893 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
05894 the loop runs just once. */
05895 
05896 for(;;)
05897   {
05898   USPTR save_end_subject = end_subject;
05899   USPTR new_start_match;
05900 
05901   /* Reset the maximum number of extractions we might see. */
05902 
05903   if (md->offset_vector != NULL)
05904     {
05905     register int *iptr = md->offset_vector;
05906     register int *iend = iptr + resetcount;
05907     while (iptr < iend) *iptr++ = -1;
05908     }
05909 
05910   /* If firstline is TRUE, the start of the match is constrained to the first
05911   line of a multiline string. That is, the match must be before or at the first
05912   newline. Implement this by temporarily adjusting end_subject so that we stop
05913   scanning at a newline. If the match fails at the newline, later code breaks
05914   this loop. */
05915 
05916   if (firstline)
05917     {
05918     USPTR t = start_match;
05919 #ifdef SUPPORT_UTF8
05920     if (utf8)
05921       {
05922       while (t < md->end_subject && !IS_NEWLINE(t))
05923         {
05924         t++;
05925         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
05926         }
05927       }
05928     else
05929 #endif
05930     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
05931     end_subject = t;
05932     }
05933 
05934   /* There are some optimizations that avoid running the match if a known
05935   starting point is not found, or if a known later character is not present.
05936   However, there is an option that disables these, for testing and for ensuring
05937   that all callouts do actually occur. The option can be set in the regex by
05938   (*NO_START_OPT) or passed in match-time options. */
05939 
05940   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
05941     {
05942     /* Advance to a unique first byte if there is one. */
05943 
05944     if (first_byte >= 0)
05945       {
05946       if (first_byte_caseless)
05947         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
05948           start_match++;
05949       else
05950         while (start_match < end_subject && *start_match != first_byte)
05951           start_match++;
05952       }
05953 
05954     /* Or to just after a linebreak for a multiline match */
05955 
05956     else if (startline)
05957       {
05958       if (start_match > md->start_subject + start_offset)
05959         {
05960 #ifdef SUPPORT_UTF8
05961         if (utf8)
05962           {
05963           while (start_match < end_subject && !WAS_NEWLINE(start_match))
05964             {
05965             start_match++;
05966             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
05967               start_match++;
05968             }
05969           }
05970         else
05971 #endif
05972         while (start_match < end_subject && !WAS_NEWLINE(start_match))
05973           start_match++;
05974 
05975         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
05976         and we are now at a LF, advance the match position by one more character.
05977         */
05978 
05979         if (start_match[-1] == CHAR_CR &&
05980              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
05981              start_match < end_subject &&
05982              *start_match == CHAR_NL)
05983           start_match++;
05984         }
05985       }
05986 
05987     /* Or to a non-unique first byte after study */
05988 
05989     else if (start_bits != NULL)
05990       {
05991       while (start_match < end_subject)
05992         {
05993         register unsigned int c = *start_match;
05994         if ((start_bits[c/8] & (1 << (c&7))) == 0)
05995           {
05996           start_match++;
05997 #ifdef SUPPORT_UTF8
05998           if (utf8)
05999             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
06000               start_match++;
06001 #endif
06002           }
06003         else break;
06004         }
06005       }
06006     }   /* Starting optimizations */
06007 
06008   /* Restore fudged end_subject */
06009 
06010   end_subject = save_end_subject;
06011 
06012   /* The following two optimizations are disabled for partial matching or if
06013   disabling is explicitly requested. */
06014 
06015   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
06016     {
06017     /* If the pattern was studied, a minimum subject length may be set. This is
06018     a lower bound; no actual string of that length may actually match the
06019     pattern. Although the value is, strictly, in characters, we treat it as
06020     bytes to avoid spending too much time in this optimization. */
06021 
06022     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
06023         (pcre_uint32)(end_subject - start_match) < study->minlength)
06024       {
06025       rc = MATCH_NOMATCH;
06026       break;
06027       }
06028 
06029     /* If req_byte is set, we know that that character must appear in the
06030     subject for the match to succeed. If the first character is set, req_byte
06031     must be later in the subject; otherwise the test starts at the match point.
06032     This optimization can save a huge amount of backtracking in patterns with
06033     nested unlimited repeats that aren't going to match. Writing separate code
06034     for cased/caseless versions makes it go faster, as does using an
06035     autoincrement and backing off on a match.
06036 
06037     HOWEVER: when the subject string is very, very long, searching to its end
06038     can take a long time, and give bad performance on quite ordinary patterns.
06039     This showed up when somebody was matching something like /^\d+C/ on a
06040     32-megabyte string... so we don't do this when the string is sufficiently
06041     long. */
06042 
06043     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
06044       {
06045       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
06046 
06047       /* We don't need to repeat the search if we haven't yet reached the
06048       place we found it at last time. */
06049 
06050       if (p > req_byte_ptr)
06051         {
06052         if (req_byte_caseless)
06053           {
06054           while (p < end_subject)
06055             {
06056             register int pp = *p++;
06057             if (pp == req_byte || pp == req_byte2) { p--; break; }
06058             }
06059           }
06060         else
06061           {
06062           while (p < end_subject)
06063             {
06064             if (*p++ == req_byte) { p--; break; }
06065             }
06066           }
06067 
06068         /* If we can't find the required character, break the matching loop,
06069         forcing a match failure. */
06070 
06071         if (p >= end_subject)
06072           {
06073           rc = MATCH_NOMATCH;
06074           break;
06075           }
06076 
06077         /* If we have found the required character, save the point where we
06078         found it, so that we don't search again next time round the loop if
06079         the start hasn't passed this character yet. */
06080 
06081         req_byte_ptr = p;
06082         }
06083       }
06084     }
06085 
06086 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
06087   printf(">>>> Match against: ");
06088   pchars(start_match, end_subject - start_match, TRUE, md);
06089   printf("\n");
06090 #endif
06091 
06092   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
06093   first starting point for which a partial match was found. */
06094 
06095   md->start_match_ptr = start_match;
06096   md->start_used_ptr = start_match;
06097   md->match_call_count = 0;
06098   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
06099     0, 0);
06100   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
06101 
06102   switch(rc)
06103     {
06104     /* SKIP passes back the next starting point explicitly, but if it is the
06105     same as the match we have just done, treat it as NOMATCH. */
06106 
06107     case MATCH_SKIP:
06108     if (md->start_match_ptr != start_match)
06109       {
06110       new_start_match = md->start_match_ptr;
06111       break;
06112       }
06113     /* Fall through */
06114 
06115     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
06116     the SKIP's arg was not found. We also treat this as NOMATCH. */
06117 
06118     case MATCH_SKIP_ARG:
06119     /* Fall through */
06120 
06121     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
06122     exactly like PRUNE. */
06123 
06124     case MATCH_NOMATCH:
06125     case MATCH_PRUNE:
06126     case MATCH_THEN:
06127     new_start_match = start_match + 1;
06128 #ifdef SUPPORT_UTF8
06129     if (utf8)
06130       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
06131         new_start_match++;
06132 #endif
06133     break;
06134 
06135     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
06136 
06137     case MATCH_COMMIT:
06138     rc = MATCH_NOMATCH;
06139     goto ENDLOOP;
06140 
06141     /* Any other return is either a match, or some kind of error. */
06142 
06143     default:
06144     goto ENDLOOP;
06145     }
06146 
06147   /* Control reaches here for the various types of "no match at this point"
06148   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
06149 
06150   rc = MATCH_NOMATCH;
06151 
06152   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
06153   newline in the subject (though it may continue over the newline). Therefore,
06154   if we have just failed to match, starting at a newline, do not continue. */
06155 
06156   if (firstline && IS_NEWLINE(start_match)) break;
06157 
06158   /* Advance to new matching position */
06159 
06160   start_match = new_start_match;
06161 
06162   /* Break the loop if the pattern is anchored or if we have passed the end of
06163   the subject. */
06164 
06165   if (anchored || start_match > end_subject) break;
06166 
06167   /* If we have just passed a CR and we are now at a LF, and the pattern does
06168   not contain any explicit matches for \r or \n, and the newline option is CRLF
06169   or ANY or ANYCRLF, advance the match position by one more character. */
06170 
06171   if (start_match[-1] == CHAR_CR &&
06172       start_match < end_subject &&
06173       *start_match == CHAR_NL &&
06174       (re->flags & PCRE_HASCRORLF) == 0 &&
06175         (md->nltype == NLTYPE_ANY ||
06176          md->nltype == NLTYPE_ANYCRLF ||
06177          md->nllen == 2))
06178     start_match++;
06179 
06180   md->mark = NULL;   /* Reset for start of next match attempt */
06181   }                  /* End of for(;;) "bumpalong" loop */
06182 
06183 /* ==========================================================================*/
06184 
06185 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
06186 conditions is true:
06187 
06188 (1) The pattern is anchored or the match was failed by (*COMMIT);
06189 
06190 (2) We are past the end of the subject;
06191 
06192 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
06193     this option requests that a match occur at or before the first newline in
06194     the subject.
06195 
06196 When we have a match and the offset vector is big enough to deal with any
06197 backreferences, captured substring offsets will already be set up. In the case
06198 where we had to get some local store to hold offsets for backreference
06199 processing, copy those that we can. In this case there need not be overflow if
06200 certain parts of the pattern were not used, even though there are more
06201 capturing parentheses than vector slots. */
06202 
06203 ENDLOOP:
06204 
06205 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
06206   {
06207   if (using_temporary_offsets)
06208     {
06209     if (offsetcount >= 4)
06210       {
06211       memcpy(offsets + 2, md->offset_vector + 2,
06212         (offsetcount - 2) * sizeof(int));
06213       DPRINTF(("Copied offsets from temporary memory\n"));
06214       }
06215     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
06216     DPRINTF(("Freeing temporary memory\n"));
06217     (pcre_free)(md->offset_vector);
06218     }
06219 
06220   /* Set the return code to the number of captured strings, or 0 if there are
06221   too many to fit into the vector. */
06222 
06223   rc = md->offset_overflow? 0 : md->end_offset_top/2;
06224 
06225   /* If there is space, set up the whole thing as substring 0. The value of
06226   md->start_match_ptr might be modified if \K was encountered on the success
06227   matching path. */
06228 
06229   if (offsetcount < 2) rc = 0; else
06230     {
06231     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
06232     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
06233     }
06234 
06235   DPRINTF((">>>> returning %d\n", rc));
06236   goto RETURN_MARK;
06237   }
06238 
06239 /* Control gets here if there has been an error, or if the overall match
06240 attempt has failed at all permitted starting positions. */
06241 
06242 if (using_temporary_offsets)
06243   {
06244   DPRINTF(("Freeing temporary memory\n"));
06245   (pcre_free)(md->offset_vector);
06246   }
06247 
06248 /* For anything other than nomatch or partial match, just return the code. */
06249 
06250 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
06251   {
06252   DPRINTF((">>>> error: returning %d\n", rc));
06253   return rc;
06254   }
06255 
06256 /* Handle partial matches - disable any mark data */
06257 
06258 if (start_partial != NULL)
06259   {
06260   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
06261   md->mark = NULL;
06262   if (offsetcount > 1)
06263     {
06264     offsets[0] = (int)(start_partial - (USPTR)subject);
06265     offsets[1] = (int)(end_subject - (USPTR)subject);
06266     }
06267   rc = PCRE_ERROR_PARTIAL;
06268   }
06269 
06270 /* This is the classic nomatch case */
06271 
06272 else
06273   {
06274   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
06275   rc = PCRE_ERROR_NOMATCH;
06276   }
06277 
06278 /* Return the MARK data if it has been requested. */
06279 
06280 RETURN_MARK:
06281 
06282 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
06283   *(extra_data->mark) = (unsigned char *)(md->mark);
06284 return rc;
06285 }
06286 
06287 /* End of pcre_exec.c */