Back to index

lightning-sunbird  0.9+nobinonly
jsregexp.c
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
00002  * vim: set sw=4 ts=8 et tw=78:
00003  *
00004  * ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is Mozilla Communicator client code, released
00018  * March 31, 1998.
00019  *
00020  * The Initial Developer of the Original Code is
00021  * Netscape Communications Corporation.
00022  * Portions created by the Initial Developer are Copyright (C) 1998
00023  * the Initial Developer. All Rights Reserved.
00024  *
00025  * Contributor(s):
00026  *
00027  * Alternatively, the contents of this file may be used under the terms of
00028  * either of the GNU General Public License Version 2 or later (the "GPL"),
00029  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00030  * in which case the provisions of the GPL or the LGPL are applicable instead
00031  * of those above. If you wish to allow use of your version of this file only
00032  * under the terms of either the GPL or the LGPL, and not to allow others to
00033  * use your version of this file under the terms of the MPL, indicate your
00034  * decision by deleting the provisions above and replace them with the notice
00035  * and other provisions required by the GPL or the LGPL. If you do not delete
00036  * the provisions above, a recipient may use your version of this file under
00037  * the terms of any one of the MPL, the GPL or the LGPL.
00038  *
00039  * ***** END LICENSE BLOCK ***** */
00040 
00041 /*
00042  * JS regular expressions, after Perl.
00043  */
00044 #include "jsstddef.h"
00045 #include <stdlib.h>
00046 #include <string.h>
00047 #include "jstypes.h"
00048 #include "jsarena.h" /* Added by JSIFY */
00049 #include "jsutil.h" /* Added by JSIFY */
00050 #include "jsapi.h"
00051 #include "jsarray.h"
00052 #include "jsatom.h"
00053 #include "jscntxt.h"
00054 #include "jsconfig.h"
00055 #include "jsfun.h"
00056 #include "jsgc.h"
00057 #include "jsinterp.h"
00058 #include "jslock.h"
00059 #include "jsnum.h"
00060 #include "jsobj.h"
00061 #include "jsopcode.h"
00062 #include "jsregexp.h"
00063 #include "jsscan.h"
00064 #include "jsstr.h"
00065 
00066 /* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */
00067 typedef enum REOp {
00068     REOP_EMPTY         = 0,  /* match rest of input against rest of r.e. */
00069     REOP_ALT           = 1,  /* alternative subexpressions in kid and next */
00070     REOP_SIMPLE_START  = 2,  /* start of 'simple opcodes' */
00071     REOP_BOL           = 2,  /* beginning of input (or line if multiline) */
00072     REOP_EOL           = 3,  /* end of input (or line if multiline) */
00073     REOP_WBDRY         = 4,  /* match "" at word boundary */
00074     REOP_WNONBDRY      = 5,  /* match "" at word non-boundary */
00075     REOP_DOT           = 6,  /* stands for any character */
00076     REOP_DIGIT         = 7,  /* match a digit char: [0-9] */
00077     REOP_NONDIGIT      = 8,  /* match a non-digit char: [^0-9] */
00078     REOP_ALNUM         = 9,  /* match an alphanumeric char: [0-9a-z_A-Z] */
00079     REOP_NONALNUM      = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
00080     REOP_SPACE         = 11, /* match a whitespace char */
00081     REOP_NONSPACE      = 12, /* match a non-whitespace char */
00082     REOP_BACKREF       = 13, /* back-reference (e.g., \1) to a parenthetical */
00083     REOP_FLAT          = 14, /* match a flat string */
00084     REOP_FLAT1         = 15, /* match a single char */
00085     REOP_FLATi         = 16, /* case-independent REOP_FLAT */
00086     REOP_FLAT1i        = 17, /* case-independent REOP_FLAT1 */
00087     REOP_UCFLAT1       = 18, /* single Unicode char */
00088     REOP_UCFLAT1i      = 19, /* case-independent REOP_UCFLAT1 */
00089     REOP_UCFLAT        = 20, /* flat Unicode string; len immediate counts chars */
00090     REOP_UCFLATi       = 21, /* case-independent REOP_UCFLAT */
00091     REOP_CLASS         = 22, /* character class with index */
00092     REOP_NCLASS        = 23, /* negated character class with index */
00093     REOP_SIMPLE_END    = 23, /* end of 'simple opcodes' */
00094     REOP_QUANT         = 25, /* quantified atom: atom{1,2} */
00095     REOP_STAR          = 26, /* zero or more occurrences of kid */
00096     REOP_PLUS          = 27, /* one or more occurrences of kid */
00097     REOP_OPT           = 28, /* optional subexpression in kid */
00098     REOP_LPAREN        = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
00099     REOP_RPAREN        = 30, /* right paren bytecode */
00100     REOP_JUMP          = 31, /* for deoptimized closure loops */
00101     REOP_DOTSTAR       = 32, /* optimize .* to use a single opcode */
00102     REOP_ANCHOR        = 33, /* like .* but skips left context to unanchored r.e. */
00103     REOP_EOLONLY       = 34, /* $ not preceded by any pattern */
00104     REOP_BACKREFi      = 37, /* case-independent REOP_BACKREF */
00105     REOP_LPARENNON     = 41, /* non-capturing version of REOP_LPAREN */
00106     REOP_ASSERT        = 43, /* zero width positive lookahead assertion */
00107     REOP_ASSERT_NOT    = 44, /* zero width negative lookahead assertion */
00108     REOP_ASSERTTEST    = 45, /* sentinel at end of assertion child */
00109     REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
00110     REOP_MINIMALSTAR   = 47, /* non-greedy version of * */
00111     REOP_MINIMALPLUS   = 48, /* non-greedy version of + */
00112     REOP_MINIMALOPT    = 49, /* non-greedy version of ? */
00113     REOP_MINIMALQUANT  = 50, /* non-greedy version of {} */
00114     REOP_ENDCHILD      = 51, /* sentinel at end of quantifier child */
00115     REOP_REPEAT        = 52, /* directs execution of greedy quantifier */
00116     REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
00117     REOP_ALTPREREQ     = 54, /* prerequisite for ALT, either of two chars */
00118     REOP_ALTPREREQ2    = 55, /* prerequisite for ALT, a char or a class */
00119     REOP_ENDALT        = 56, /* end of final alternate */
00120     REOP_CONCAT        = 57, /* concatenation of terms (parse time only) */
00121 
00122     REOP_END
00123 } REOp;
00124 
00125 #define REOP_IS_SIMPLE(op)  ((unsigned)((op) - REOP_SIMPLE_START) <           \
00126                              (unsigned)REOP_SIMPLE_END)
00127 
00128 struct RENode {
00129     REOp            op;         /* r.e. op bytecode */
00130     RENode          *next;      /* next in concatenation order */
00131     void            *kid;       /* first operand */
00132     union {
00133         void        *kid2;      /* second operand */
00134         jsint       num;        /* could be a number */
00135         size_t      parenIndex; /* or a parenthesis index */
00136         struct {                /* or a quantifier range */
00137             uintN  min;
00138             uintN  max;
00139             JSPackedBool greedy;
00140         } range;
00141         struct {                /* or a character class */
00142             size_t  startIndex;
00143             size_t  kidlen;     /* length of string at kid, in jschars */
00144             size_t  index;      /* index into class list */
00145             uint16  bmsize;     /* bitmap size, based on max char code */
00146             JSPackedBool sense;
00147         } ucclass;
00148         struct {                /* or a literal sequence */
00149             jschar  chr;        /* of one character */
00150             size_t  length;     /* or many (via the kid) */
00151         } flat;
00152         struct {
00153             RENode  *kid2;      /* second operand from ALT */
00154             jschar  ch1;        /* match char for ALTPREREQ */
00155             jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
00156         } altprereq;
00157     } u;
00158 };
00159 
00160 #define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
00161                              ((c >= 'a') && (c <= 'z')) )
00162 #define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
00163                              (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
00164 
00165 #define CLASS_CACHE_SIZE    4
00166 
00167 typedef struct CompilerState {
00168     JSContext       *context;
00169     JSTokenStream   *tokenStream; /* For reporting errors */
00170     const jschar    *cpbegin;
00171     const jschar    *cpend;
00172     const jschar    *cp;
00173     size_t          parenCount;
00174     size_t          classCount;   /* number of [] encountered */
00175     size_t          treeDepth;    /* maximum depth of parse tree */
00176     size_t          progLength;   /* estimated bytecode length */
00177     RENode          *result;
00178     size_t          classBitmapsMem; /* memory to hold all class bitmaps */
00179     struct {
00180         const jschar *start;        /* small cache of class strings */
00181         size_t length;              /* since they're often the same */
00182         size_t index;
00183     } classCache[CLASS_CACHE_SIZE];
00184     uint16          flags;
00185 } CompilerState;
00186 
00187 typedef struct EmitStateStackEntry {
00188     jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
00189     jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
00190     jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
00191     jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
00192     RENode          *continueNode;  /* original REOP_ALT* node being stacked */
00193     jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
00194     JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
00195                                        avoid 16-bit unsigned offset overflow */
00196 } EmitStateStackEntry;
00197 
00198 /*
00199  * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
00200  * the getters and setters take the pc of the offset, not of the opcode before
00201  * the offset.
00202  */
00203 #define ARG_LEN             2
00204 #define GET_ARG(pc)         ((uint16)(((pc)[0] << 8) | (pc)[1]))
00205 #define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
00206                              (pc)[1] = (jsbytecode) (arg))
00207 
00208 #define OFFSET_LEN          ARG_LEN
00209 #define OFFSET_MAX          (JS_BIT(ARG_LEN * 8) - 1)
00210 #define GET_OFFSET(pc)      GET_ARG(pc)
00211 
00212 /*
00213  * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
00214  * For sanity, we limit it to 2^24 bytes.
00215  */
00216 #define TREE_DEPTH_MAX  (JS_BIT(24) / sizeof(EmitStateStackEntry))
00217 
00218 /*
00219  * The maximum memory that can be allocated for class bitmaps.
00220  * For sanity, we limit it to 2^24 bytes.
00221  */
00222 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
00223 
00224 /*
00225  * Functions to get size and write/read bytecode that represent small indexes
00226  * compactly.
00227  * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
00228  * indicates that the following byte brings more bits to the index. Otherwise
00229  * this is the last byte in the index bytecode representing highest index bits.
00230  */
00231 static size_t
00232 GetCompactIndexWidth(size_t index)
00233 {
00234     size_t width;
00235 
00236     for (width = 1; (index >>= 7) != 0; ++width) { }
00237     return width;
00238 }
00239 
00240 static jsbytecode *
00241 WriteCompactIndex(jsbytecode *pc, size_t index)
00242 {
00243     size_t next;
00244 
00245     while ((next = index >> 7) != 0) {
00246         *pc++ = (jsbytecode)(index | 0x80);
00247         index = next;
00248     }
00249     *pc++ = (jsbytecode)index;
00250     return pc;
00251 }
00252 
00253 static jsbytecode *
00254 ReadCompactIndex(jsbytecode *pc, size_t *result)
00255 {
00256     size_t nextByte;
00257 
00258     nextByte = *pc++;
00259     if ((nextByte & 0x80) == 0) {
00260         /*
00261          * Short-circuit the most common case when compact index <= 127.
00262          */
00263         *result = nextByte;
00264     } else {
00265         size_t shift = 7;
00266         *result = 0x7F & nextByte;
00267         do {
00268             nextByte = *pc++;
00269             *result |= (nextByte & 0x7F) << shift;
00270             shift += 7;
00271         } while ((nextByte & 0x80) != 0);
00272     }
00273     return pc;
00274 }
00275 
00276 typedef struct RECapture {
00277     ptrdiff_t index;           /* start of contents, -1 for empty  */
00278     size_t length;             /* length of capture */
00279 } RECapture;
00280 
00281 typedef struct REMatchState {
00282     const jschar *cp;
00283     RECapture parens[1];      /* first of 're->parenCount' captures,
00284                                  allocated at end of this struct */
00285 } REMatchState;
00286 
00287 struct REBackTrackData;
00288 
00289 typedef struct REProgState {
00290     jsbytecode *continue_pc;        /* current continuation data */
00291     jsbytecode continue_op;
00292     ptrdiff_t index;                /* progress in text */
00293     size_t parenSoFar;              /* highest indexed paren started */
00294     union {
00295         struct {
00296             uintN min;             /* current quantifier limits */
00297             uintN max;
00298         } quantifier;
00299         struct {
00300             size_t top;             /* backtrack stack state */
00301             size_t sz;
00302         } assertion;
00303     } u;
00304 } REProgState;
00305 
00306 typedef struct REBackTrackData {
00307     size_t sz;                      /* size of previous stack entry */
00308     jsbytecode *backtrack_pc;       /* where to backtrack to */
00309     jsbytecode backtrack_op;
00310     const jschar *cp;               /* index in text of match at backtrack */
00311     size_t parenIndex;              /* start index of saved paren contents */
00312     size_t parenCount;              /* # of saved paren contents */
00313     size_t saveStateStackTop;       /* number of parent states */
00314     /* saved parent states follow */
00315     /* saved paren contents follow */
00316 } REBackTrackData;
00317 
00318 #define INITIAL_STATESTACK  100
00319 #define INITIAL_BACKTRACK   8000
00320 
00321 typedef struct REGlobalData {
00322     JSContext *cx;
00323     JSRegExp *regexp;               /* the RE in execution */
00324     JSBool ok;                      /* runtime error (out_of_memory only?) */
00325     size_t start;                   /* offset to start at */
00326     ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
00327     const jschar    *cpbegin;       /* text base address */
00328     const jschar    *cpend;         /* text limit address */
00329 
00330     REProgState *stateStack;        /* stack of state of current parents */
00331     size_t stateStackTop;
00332     size_t stateStackLimit;
00333 
00334     REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
00335     REBackTrackData *backTrackSP;
00336     size_t backTrackStackSize;
00337     size_t cursz;                   /* size of current stack entry */
00338 
00339     JSArenaPool     pool;           /* It's faster to use one malloc'd pool
00340                                        than to malloc/free the three items
00341                                        that are allocated from this pool */
00342 } REGlobalData;
00343 
00344 /*
00345  * 1. If IgnoreCase is false, return ch.
00346  * 2. Let u be ch converted to upper case as if by calling
00347  *    String.prototype.toUpperCase on the one-character string ch.
00348  * 3. If u does not consist of a single character, return ch.
00349  * 4. Let cu be u's character.
00350  * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
00351  *    code point value is less than decimal 128, then return ch.
00352  * 6. Return cu.
00353  */
00354 static jschar
00355 upcase(jschar ch)
00356 {
00357     jschar cu = JS_TOUPPER(ch);
00358     if (ch >= 128 && cu < 128)
00359         return ch;
00360     return cu;
00361 }
00362 
00363 static jschar
00364 downcase(jschar ch)
00365 {
00366     jschar cl = JS_TOLOWER(ch);
00367     if (cl >= 128 && ch < 128)
00368         return ch;
00369     return cl;
00370 }
00371 
00372 /* Construct and initialize an RENode, returning NULL for out-of-memory */
00373 static RENode *
00374 NewRENode(CompilerState *state, REOp op)
00375 {
00376     JSContext *cx;
00377     RENode *ren;
00378 
00379     cx = state->context;
00380     JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
00381     if (!ren) {
00382         JS_ReportOutOfMemory(cx);
00383         return NULL;
00384     }
00385     ren->op = op;
00386     ren->next = NULL;
00387     ren->kid = NULL;
00388     return ren;
00389 }
00390 
00391 /*
00392  * Validates and converts hex ascii value.
00393  */
00394 static JSBool
00395 isASCIIHexDigit(jschar c, uintN *digit)
00396 {
00397     uintN cv = c;
00398 
00399     if (cv < '0')
00400         return JS_FALSE;
00401     if (cv <= '9') {
00402         *digit = cv - '0';
00403         return JS_TRUE;
00404     }
00405     cv |= 0x20;
00406     if (cv >= 'a' && cv <= 'f') {
00407         *digit = cv - 'a' + 10;
00408         return JS_TRUE;
00409     }
00410     return JS_FALSE;
00411 }
00412 
00413 
00414 typedef struct {
00415     REOp op;
00416     const jschar *errPos;
00417     size_t parenIndex;
00418 } REOpData;
00419 
00420 
00421 /*
00422  * Process the op against the two top operands, reducing them to a single
00423  * operand in the penultimate slot. Update progLength and treeDepth.
00424  */
00425 static JSBool
00426 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
00427           intN operandSP)
00428 {
00429     RENode *result;
00430 
00431     switch (opData->op) {
00432     case REOP_ALT:
00433         result = NewRENode(state, REOP_ALT);
00434         if (!result)
00435             return JS_FALSE;
00436         result->kid = operandStack[operandSP - 2];
00437         result->u.kid2 = operandStack[operandSP - 1];
00438         operandStack[operandSP - 2] = result;
00439 
00440         if (state->treeDepth == TREE_DEPTH_MAX) {
00441             js_ReportCompileErrorNumber(state->context, state->tokenStream,
00442                                         JSREPORT_TS | JSREPORT_ERROR,
00443                                         JSMSG_REGEXP_TOO_COMPLEX);
00444             return JS_FALSE;
00445         }
00446         ++state->treeDepth;
00447 
00448         /*
00449          * Look at both alternates to see if there's a FLAT or a CLASS at
00450          * the start of each. If so, use a prerequisite match.
00451          */
00452         if (((RENode *) result->kid)->op == REOP_FLAT &&
00453             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
00454             (state->flags & JSREG_FOLD) == 0) {
00455             result->op = REOP_ALTPREREQ;
00456             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
00457             result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
00458             /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
00459                                             JUMP, <end> ... ENDALT */
00460             state->progLength += 13;
00461         }
00462         else
00463         if (((RENode *) result->kid)->op == REOP_CLASS &&
00464             ((RENode *) result->kid)->u.ucclass.index < 256 &&
00465             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
00466             (state->flags & JSREG_FOLD) == 0) {
00467             result->op = REOP_ALTPREREQ2;
00468             result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
00469             result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
00470             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
00471                                             JUMP, <end> ... ENDALT */
00472             state->progLength += 13;
00473         }
00474         else
00475         if (((RENode *) result->kid)->op == REOP_FLAT &&
00476             ((RENode *) result->u.kid2)->op == REOP_CLASS &&
00477             ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
00478             (state->flags & JSREG_FOLD) == 0) {
00479             result->op = REOP_ALTPREREQ2;
00480             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
00481             result->u.altprereq.ch2 =
00482                 ((RENode *) result->u.kid2)->u.ucclass.index;
00483             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
00484                                           JUMP, <end> ... ENDALT */
00485             state->progLength += 13;
00486         }
00487         else {
00488             /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
00489             state->progLength += 7;
00490         }
00491         break;
00492 
00493     case REOP_CONCAT:
00494         result = operandStack[operandSP - 2];
00495         while (result->next)
00496             result = result->next;
00497         result->next = operandStack[operandSP - 1];
00498         break;
00499 
00500     case REOP_ASSERT:
00501     case REOP_ASSERT_NOT:
00502     case REOP_LPARENNON:
00503     case REOP_LPAREN:
00504         /* These should have been processed by a close paren. */
00505         js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
00506                                       JSREPORT_TS | JSREPORT_ERROR,
00507                                       JSMSG_MISSING_PAREN, opData->errPos);
00508         return JS_FALSE;
00509 
00510     default:;
00511     }
00512     return JS_TRUE;
00513 }
00514 
00515 /*
00516  * Parser forward declarations.
00517  */
00518 static JSBool ParseTerm(CompilerState *state);
00519 static JSBool ParseQuantifier(CompilerState *state);
00520 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
00521 
00522 /*
00523  * Top-down regular expression grammar, based closely on Perl4.
00524  *
00525  *  regexp:     altern                  A regular expression is one or more
00526  *              altern '|' regexp       alternatives separated by vertical bar.
00527  */
00528 #define INITIAL_STACK_SIZE  128
00529 
00530 static JSBool
00531 ParseRegExp(CompilerState *state)
00532 {
00533     size_t parenIndex;
00534     RENode *operand;
00535     REOpData *operatorStack;
00536     RENode **operandStack;
00537     REOp op;
00538     intN i;
00539     JSBool result = JS_FALSE;
00540 
00541     intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
00542     intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
00543 
00544     /* Watch out for empty regexp */
00545     if (state->cp == state->cpend) {
00546         state->result = NewRENode(state, REOP_EMPTY);
00547         return (state->result != NULL);
00548     }
00549 
00550     operatorStack = (REOpData *)
00551         JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
00552     if (!operatorStack)
00553         return JS_FALSE;
00554 
00555     operandStack = (RENode **)
00556         JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
00557     if (!operandStack)
00558         goto out;
00559 
00560     for (;;) {
00561         parenIndex = state->parenCount;
00562         if (state->cp == state->cpend) {
00563             /*
00564              * If we are at the end of the regexp and we're short one or more
00565              * operands, the regexp must have the form /x|/ or some such, with
00566              * left parentheses making us short more than one operand.
00567              */
00568             if (operatorSP >= operandSP) {
00569                 operand = NewRENode(state, REOP_EMPTY);
00570                 if (!operand)
00571                     goto out;
00572                 goto pushOperand;
00573             }
00574         } else {
00575             switch (*state->cp) {
00576             case '(':
00577                 ++state->cp;
00578                 if (state->cp + 1 < state->cpend &&
00579                     *state->cp == '?' &&
00580                     (state->cp[1] == '=' ||
00581                      state->cp[1] == '!' ||
00582                      state->cp[1] == ':')) {
00583                     switch (state->cp[1]) {
00584                     case '=':
00585                         op = REOP_ASSERT;
00586                         /* ASSERT, <next>, ... ASSERTTEST */
00587                         state->progLength += 4;
00588                         break;
00589                     case '!':
00590                         op = REOP_ASSERT_NOT;
00591                         /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
00592                         state->progLength += 4;
00593                         break;
00594                     default:
00595                         op = REOP_LPARENNON;
00596                         break;
00597                     }
00598                     state->cp += 2;
00599                 } else {
00600                     op = REOP_LPAREN;
00601                     /* LPAREN, <index>, ... RPAREN, <index> */
00602                     state->progLength
00603                         += 2 * (1 + GetCompactIndexWidth(parenIndex));
00604                     state->parenCount++;
00605                     if (state->parenCount == 65535) {
00606                         js_ReportCompileErrorNumber(state->context,
00607                                                     state->tokenStream,
00608                                                     JSREPORT_TS |
00609                                                     JSREPORT_ERROR,
00610                                                     JSMSG_TOO_MANY_PARENS);
00611                         goto out;
00612                     }
00613                 }
00614                 goto pushOperator;
00615 
00616             case ')':
00617                 /*
00618                  * If there's no stacked open parenthesis, throw syntax error.
00619                  */
00620                 for (i = operatorSP - 1; ; i--) {
00621                     if (i < 0) {
00622                         js_ReportCompileErrorNumber(state->context,
00623                                                     state->tokenStream,
00624                                                     JSREPORT_TS |
00625                                                     JSREPORT_ERROR,
00626                                                     JSMSG_UNMATCHED_RIGHT_PAREN);
00627                         goto out;
00628                     }
00629                     if (operatorStack[i].op == REOP_ASSERT ||
00630                         operatorStack[i].op == REOP_ASSERT_NOT ||
00631                         operatorStack[i].op == REOP_LPARENNON ||
00632                         operatorStack[i].op == REOP_LPAREN) {
00633                         break;
00634                     }
00635                 }
00636                 /* FALL THROUGH */
00637 
00638             case '|':
00639                 /* Expected an operand before these, so make an empty one */
00640                 operand = NewRENode(state, REOP_EMPTY);
00641                 if (!operand)
00642                     goto out;
00643                 goto pushOperand;
00644 
00645             default:
00646                 if (!ParseTerm(state))
00647                     goto out;
00648                 operand = state->result;
00649 pushOperand:
00650                 if (operandSP == operandStackSize) {
00651                     operandStackSize += operandStackSize;
00652                     operandStack = (RENode **)
00653                         JS_realloc(state->context, operandStack,
00654                                    sizeof(RENode *) * operandStackSize);
00655                     if (!operandStack)
00656                         goto out;
00657                 }
00658                 operandStack[operandSP++] = operand;
00659                 break;
00660             }
00661         }
00662 
00663         /* At the end; process remaining operators. */
00664 restartOperator:
00665         if (state->cp == state->cpend) {
00666             while (operatorSP) {
00667                 --operatorSP;
00668                 if (!ProcessOp(state, &operatorStack[operatorSP],
00669                                operandStack, operandSP))
00670                     goto out;
00671                 --operandSP;
00672             }
00673             JS_ASSERT(operandSP == 1);
00674             state->result = operandStack[0];
00675             result = JS_TRUE;
00676             goto out;
00677         }
00678 
00679         switch (*state->cp) {
00680         case '|':
00681             /* Process any stacked 'concat' operators */
00682             ++state->cp;
00683             while (operatorSP &&
00684                    operatorStack[operatorSP - 1].op == REOP_CONCAT) {
00685                 --operatorSP;
00686                 if (!ProcessOp(state, &operatorStack[operatorSP],
00687                                operandStack, operandSP)) {
00688                     goto out;
00689                 }
00690                 --operandSP;
00691             }
00692             op = REOP_ALT;
00693             goto pushOperator;
00694 
00695         case ')':
00696             /*
00697              * If there's no stacked open parenthesis, throw syntax error.
00698              */
00699             for (i = operatorSP - 1; ; i--) {
00700                 if (i < 0) {
00701                     js_ReportCompileErrorNumber(state->context,
00702                                                 state->tokenStream,
00703                                                 JSREPORT_TS | JSREPORT_ERROR,
00704                                                 JSMSG_UNMATCHED_RIGHT_PAREN);
00705                     goto out;
00706                 }
00707                 if (operatorStack[i].op == REOP_ASSERT ||
00708                     operatorStack[i].op == REOP_ASSERT_NOT ||
00709                     operatorStack[i].op == REOP_LPARENNON ||
00710                     operatorStack[i].op == REOP_LPAREN) {
00711                     break;
00712                 }
00713             }
00714             ++state->cp;
00715 
00716             /* Process everything on the stack until the open parenthesis. */
00717             for (;;) {
00718                 JS_ASSERT(operatorSP);
00719                 --operatorSP;
00720                 switch (operatorStack[operatorSP].op) {
00721                 case REOP_ASSERT:
00722                 case REOP_ASSERT_NOT:
00723                 case REOP_LPAREN:
00724                     operand = NewRENode(state, operatorStack[operatorSP].op);
00725                     if (!operand)
00726                         goto out;
00727                     operand->u.parenIndex =
00728                         operatorStack[operatorSP].parenIndex;
00729                     JS_ASSERT(operandSP);
00730                     operand->kid = operandStack[operandSP - 1];
00731                     operandStack[operandSP - 1] = operand;
00732                     if (state->treeDepth == TREE_DEPTH_MAX) {
00733                         js_ReportCompileErrorNumber(state->context,
00734                                                     state->tokenStream,
00735                                                     JSREPORT_TS |
00736                                                     JSREPORT_ERROR,
00737                                                     JSMSG_REGEXP_TOO_COMPLEX);
00738                         goto out;
00739                     }
00740                     ++state->treeDepth;
00741                     /* FALL THROUGH */
00742 
00743                 case REOP_LPARENNON:
00744                     state->result = operandStack[operandSP - 1];
00745                     if (!ParseQuantifier(state))
00746                         goto out;
00747                     operandStack[operandSP - 1] = state->result;
00748                     goto restartOperator;
00749                 default:
00750                     if (!ProcessOp(state, &operatorStack[operatorSP],
00751                                    operandStack, operandSP))
00752                         goto out;
00753                     --operandSP;
00754                     break;
00755                 }
00756             }
00757             break;
00758 
00759         case '{':
00760         {
00761             const jschar *errp = state->cp;
00762 
00763             if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
00764                 /*
00765                  * This didn't even scan correctly as a quantifier, so we should
00766                  * treat it as flat.
00767                  */
00768                 op = REOP_CONCAT;
00769                 goto pushOperator;
00770             }
00771 
00772             state->cp = errp;
00773             /* FALL THROUGH */
00774         }
00775 
00776         case '+':
00777         case '*':
00778         case '?':
00779             js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
00780                                           JSREPORT_TS | JSREPORT_ERROR,
00781                                           JSMSG_BAD_QUANTIFIER, state->cp);
00782             result = JS_FALSE;
00783             goto out;
00784 
00785         default:
00786             /* Anything else is the start of the next term. */
00787             op = REOP_CONCAT;
00788 pushOperator:
00789             if (operatorSP == operatorStackSize) {
00790                 operatorStackSize += operatorStackSize;
00791                 operatorStack = (REOpData *)
00792                     JS_realloc(state->context, operatorStack,
00793                                sizeof(REOpData) * operatorStackSize);
00794                 if (!operatorStack)
00795                     goto out;
00796             }
00797             operatorStack[operatorSP].op = op;
00798             operatorStack[operatorSP].errPos = state->cp;
00799             operatorStack[operatorSP++].parenIndex = parenIndex;
00800             break;
00801         }
00802     }
00803 out:
00804     if (operatorStack)
00805         JS_free(state->context, operatorStack);
00806     if (operandStack)
00807         JS_free(state->context, operandStack);
00808     return result;
00809 }
00810 
00811 /*
00812  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
00813  * its being on the stack, and to propagate errors to its callers.
00814  */
00815 #define JSREG_FIND_PAREN_COUNT  0x8000
00816 #define JSREG_FIND_PAREN_ERROR  0x4000
00817 
00818 /*
00819  * Magic return value from FindParenCount and GetDecimalValue, to indicate
00820  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
00821  * its findMax parameter is non-null.
00822  */
00823 #define OVERFLOW_VALUE          ((uintN)-1)
00824 
00825 static uintN
00826 FindParenCount(CompilerState *state)
00827 {
00828     CompilerState temp;
00829     int i;
00830 
00831     if (state->flags & JSREG_FIND_PAREN_COUNT)
00832         return OVERFLOW_VALUE;
00833 
00834     /*
00835      * Copy state into temp, flag it so we never report an invalid backref,
00836      * and reset its members to parse the entire regexp.  This is obviously
00837      * suboptimal, but GetDecimalValue calls us only if a backref appears to
00838      * refer to a forward parenthetical, which is rare.
00839      */
00840     temp = *state;
00841     temp.flags |= JSREG_FIND_PAREN_COUNT;
00842     temp.cp = temp.cpbegin;
00843     temp.parenCount = 0;
00844     temp.classCount = 0;
00845     temp.progLength = 0;
00846     temp.treeDepth = 0;
00847     temp.classBitmapsMem = 0;
00848     for (i = 0; i < CLASS_CACHE_SIZE; i++)
00849         temp.classCache[i].start = NULL;
00850 
00851     if (!ParseRegExp(&temp)) {
00852         state->flags |= JSREG_FIND_PAREN_ERROR;
00853         return OVERFLOW_VALUE;
00854     }
00855     return temp.parenCount;
00856 }
00857 
00858 /*
00859  * Extract and return a decimal value at state->cp.  The initial character c
00860  * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
00861  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
00862  * state->flags to discover whether an error occurred under findMax.
00863  */
00864 static uintN
00865 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
00866                 CompilerState *state)
00867 {
00868     uintN value = JS7_UNDEC(c);
00869     JSBool overflow = (value > max && (!findMax || value > findMax(state)));
00870 
00871     /* The following restriction allows simpler overflow checks. */
00872     JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
00873     while (state->cp < state->cpend) {
00874         c = *state->cp;
00875         if (!JS7_ISDEC(c))
00876             break;
00877         value = 10 * value + JS7_UNDEC(c);
00878         if (!overflow && value > max && (!findMax || value > findMax(state)))
00879             overflow = JS_TRUE;
00880         ++state->cp;
00881     }
00882     return overflow ? OVERFLOW_VALUE : value;
00883 }
00884 
00885 /*
00886  * Calculate the total size of the bitmap required for a class expression.
00887  */
00888 static JSBool
00889 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
00890                     const jschar *end)
00891 {
00892     uintN max = 0;
00893     JSBool inRange = JS_FALSE;
00894     jschar c, rangeStart = 0;
00895     uintN n, digit, nDigits, i;
00896 
00897     target->u.ucclass.bmsize = 0;
00898     target->u.ucclass.sense = JS_TRUE;
00899 
00900     if (src == end)
00901         return JS_TRUE;
00902 
00903     if (*src == '^') {
00904         ++src;
00905         target->u.ucclass.sense = JS_FALSE;
00906     }
00907 
00908     while (src != end) {
00909         uintN localMax = 0;
00910         switch (*src) {
00911         case '\\':
00912             ++src;
00913             c = *src++;
00914             switch (c) {
00915             case 'b':
00916                 localMax = 0x8;
00917                 break;
00918             case 'f':
00919                 localMax = 0xC;
00920                 break;
00921             case 'n':
00922                 localMax = 0xA;
00923                 break;
00924             case 'r':
00925                 localMax = 0xD;
00926                 break;
00927             case 't':
00928                 localMax = 0x9;
00929                 break;
00930             case 'v':
00931                 localMax = 0xB;
00932                 break;
00933             case 'c':
00934                 if (src < end && RE_IS_LETTER(*src)) {
00935                     localMax = (jschar) (*src++ & 0x1F);
00936                 } else {
00937                     --src;
00938                     localMax = '\\';
00939                 }
00940                 break;
00941             case 'x':
00942                 nDigits = 2;
00943                 goto lexHex;
00944             case 'u':
00945                 nDigits = 4;
00946 lexHex:
00947                 n = 0;
00948                 for (i = 0; (i < nDigits) && (src < end); i++) {
00949                     c = *src++;
00950                     if (!isASCIIHexDigit(c, &digit)) {
00951                         /*
00952                          * Back off to accepting the original
00953                          *'\' as a literal.
00954                          */
00955                         src -= i + 1;
00956                         n = '\\';
00957                         break;
00958                     }
00959                     n = (n << 4) | digit;
00960                 }
00961                 localMax = n;
00962                 break;
00963             case 'd':
00964                 if (inRange) {
00965                     JS_ReportErrorNumber(state->context,
00966                                          js_GetErrorMessage, NULL,
00967                                          JSMSG_BAD_CLASS_RANGE);
00968                     return JS_FALSE;
00969                 }
00970                 localMax = '9';
00971                 break;
00972             case 'D':
00973             case 's':
00974             case 'S':
00975             case 'w':
00976             case 'W':
00977                 if (inRange) {
00978                     JS_ReportErrorNumber(state->context,
00979                                          js_GetErrorMessage, NULL,
00980                                          JSMSG_BAD_CLASS_RANGE);
00981                     return JS_FALSE;
00982                 }
00983                 target->u.ucclass.bmsize = 65535;
00984                 return JS_TRUE;
00985             case '0':
00986             case '1':
00987             case '2':
00988             case '3':
00989             case '4':
00990             case '5':
00991             case '6':
00992             case '7':
00993                 /*
00994                  *  This is a non-ECMA extension - decimal escapes (in this
00995                  *  case, octal!) are supposed to be an error inside class
00996                  *  ranges, but supported here for backwards compatibility.
00997                  *
00998                  */
00999                 n = JS7_UNDEC(c);
01000                 c = *src;
01001                 if ('0' <= c && c <= '7') {
01002                     src++;
01003                     n = 8 * n + JS7_UNDEC(c);
01004                     c = *src;
01005                     if ('0' <= c && c <= '7') {
01006                         src++;
01007                         i = 8 * n + JS7_UNDEC(c);
01008                         if (i <= 0377)
01009                             n = i;
01010                         else
01011                             src--;
01012                     }
01013                 }
01014                 localMax = n;
01015                 break;
01016 
01017             default:
01018                 localMax = c;
01019                 break;
01020             }
01021             break;
01022         default:
01023             localMax = *src++;
01024             break;
01025         }
01026         if (state->flags & JSREG_FOLD) {
01027             c = JS_MAX(upcase((jschar) localMax), downcase((jschar) localMax));
01028             if (c > localMax)
01029                 localMax = c;
01030         }
01031         if (inRange) {
01032             if (rangeStart > localMax) {
01033                 JS_ReportErrorNumber(state->context,
01034                                      js_GetErrorMessage, NULL,
01035                                      JSMSG_BAD_CLASS_RANGE);
01036                 return JS_FALSE;
01037             }
01038             inRange = JS_FALSE;
01039         } else {
01040             if (src < end - 1) {
01041                 if (*src == '-') {
01042                     ++src;
01043                     inRange = JS_TRUE;
01044                     rangeStart = (jschar)localMax;
01045                     continue;
01046                 }
01047             }
01048         }
01049         if (localMax > max)
01050             max = localMax;
01051     }
01052     target->u.ucclass.bmsize = max;
01053     return JS_TRUE;
01054 }
01055 
01056 /*
01057  *  item:       assertion               An item is either an assertion or
01058  *              quantatom               a quantified atom.
01059  *
01060  *  assertion:  '^'                     Assertions match beginning of string
01061  *                                      (or line if the class static property
01062  *                                      RegExp.multiline is true).
01063  *              '$'                     End of string (or line if the class
01064  *                                      static property RegExp.multiline is
01065  *                                      true).
01066  *              '\b'                    Word boundary (between \w and \W).
01067  *              '\B'                    Word non-boundary.
01068  *
01069  *  quantatom:  atom                    An unquantified atom.
01070  *              quantatom '{' n ',' m '}'
01071  *                                      Atom must occur between n and m times.
01072  *              quantatom '{' n ',' '}' Atom must occur at least n times.
01073  *              quantatom '{' n '}'     Atom must occur exactly n times.
01074  *              quantatom '*'           Zero or more times (same as {0,}).
01075  *              quantatom '+'           One or more times (same as {1,}).
01076  *              quantatom '?'           Zero or one time (same as {0,1}).
01077  *
01078  *              any of which can be optionally followed by '?' for ungreedy
01079  *
01080  *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
01081  *                                      can be addressed using a backreference,
01082  *                                      see '\' n below).
01083  *              '.'                     Matches any char except '\n'.
01084  *              '[' classlist ']'       A character class.
01085  *              '[' '^' classlist ']'   A negated character class.
01086  *              '\f'                    Form Feed.
01087  *              '\n'                    Newline (Line Feed).
01088  *              '\r'                    Carriage Return.
01089  *              '\t'                    Horizontal Tab.
01090  *              '\v'                    Vertical Tab.
01091  *              '\d'                    A digit (same as [0-9]).
01092  *              '\D'                    A non-digit.
01093  *              '\w'                    A word character, [0-9a-z_A-Z].
01094  *              '\W'                    A non-word character.
01095  *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
01096  *              '\S'                    A non-whitespace character.
01097  *              '\' n                   A backreference to the nth (n decimal
01098  *                                      and positive) parenthesized expression.
01099  *              '\' octal               An octal escape sequence (octal must be
01100  *                                      two or three digits long, unless it is
01101  *                                      0 for the null character).
01102  *              '\x' hex                A hex escape (hex must be two digits).
01103  *              '\u' unicode            A unicode escape (must be four digits).
01104  *              '\c' ctrl               A control character, ctrl is a letter.
01105  *              '\' literalatomchar     Any character except one of the above
01106  *                                      that follow '\' in an atom.
01107  *              otheratomchar           Any character not first among the other
01108  *                                      atom right-hand sides.
01109  */
01110 static JSBool
01111 ParseTerm(CompilerState *state)
01112 {
01113     jschar c = *state->cp++;
01114     uintN nDigits;
01115     uintN num, tmp, n, i;
01116     const jschar *termStart;
01117 
01118     switch (c) {
01119     /* assertions and atoms */
01120     case '^':
01121         state->result = NewRENode(state, REOP_BOL);
01122         if (!state->result)
01123             return JS_FALSE;
01124         state->progLength++;
01125         return JS_TRUE;
01126     case '$':
01127         state->result = NewRENode(state, REOP_EOL);
01128         if (!state->result)
01129             return JS_FALSE;
01130         state->progLength++;
01131         return JS_TRUE;
01132     case '\\':
01133         if (state->cp >= state->cpend) {
01134             /* a trailing '\' is an error */
01135             js_ReportCompileErrorNumber(state->context, state->tokenStream,
01136                                         JSREPORT_TS | JSREPORT_ERROR,
01137                                         JSMSG_TRAILING_SLASH);
01138             return JS_FALSE;
01139         }
01140         c = *state->cp++;
01141         switch (c) {
01142         /* assertion escapes */
01143         case 'b' :
01144             state->result = NewRENode(state, REOP_WBDRY);
01145             if (!state->result)
01146                 return JS_FALSE;
01147             state->progLength++;
01148             return JS_TRUE;
01149         case 'B':
01150             state->result = NewRENode(state, REOP_WNONBDRY);
01151             if (!state->result)
01152                 return JS_FALSE;
01153             state->progLength++;
01154             return JS_TRUE;
01155         /* Decimal escape */
01156         case '0':
01157             /* Give a strict warning. See also the note below. */
01158             if (!js_ReportCompileErrorNumber(state->context,
01159                                              state->tokenStream,
01160                                              JSREPORT_TS |
01161                                              JSREPORT_WARNING |
01162                                              JSREPORT_STRICT,
01163                                              JSMSG_INVALID_BACKREF)) {
01164                 return JS_FALSE;
01165             }
01166      doOctal:
01167             num = 0;
01168             while (state->cp < state->cpend) {
01169                 c = *state->cp;
01170                 if (c < '0' || '7' < c)
01171                     break;
01172                 state->cp++;
01173                 tmp = 8 * num + (uintN)JS7_UNDEC(c);
01174                 if (tmp > 0377)
01175                     break;
01176                 num = tmp;
01177             }
01178             c = (jschar)num;
01179     doFlat:
01180             state->result = NewRENode(state, REOP_FLAT);
01181             if (!state->result)
01182                 return JS_FALSE;
01183             state->result->u.flat.chr = c;
01184             state->result->u.flat.length = 1;
01185             state->progLength += 3;
01186             break;
01187         case '1':
01188         case '2':
01189         case '3':
01190         case '4':
01191         case '5':
01192         case '6':
01193         case '7':
01194         case '8':
01195         case '9':
01196             termStart = state->cp - 1;
01197             num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
01198             if (state->flags & JSREG_FIND_PAREN_ERROR)
01199                 return JS_FALSE;
01200             if (num == OVERFLOW_VALUE) {
01201                 /* Give a strict mode warning. */
01202                 if (!js_ReportCompileErrorNumber(state->context,
01203                                                  state->tokenStream,
01204                                                  JSREPORT_TS |
01205                                                  JSREPORT_WARNING |
01206                                                  JSREPORT_STRICT,
01207                                                  (c >= '8')
01208                                                  ? JSMSG_INVALID_BACKREF
01209                                                  : JSMSG_BAD_BACKREF)) {
01210                     return JS_FALSE;
01211                 }
01212 
01213                 /*
01214                  * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
01215                  * error here. However, for compatibility with IE, we treat the
01216                  * whole backref as flat if the first character in it is not a
01217                  * valid octal character, and as an octal escape otherwise.
01218                  */
01219                 state->cp = termStart;
01220                 if (c >= '8') {
01221                     /* Treat this as flat. termStart - 1 is the \. */
01222                     c = '\\';
01223                     goto asFlat;
01224                 }
01225 
01226                 /* Treat this as an octal escape. */
01227                 goto doOctal;
01228             }
01229             JS_ASSERT(1 <= num && num <= 0x10000);
01230             state->result = NewRENode(state, REOP_BACKREF);
01231             if (!state->result)
01232                 return JS_FALSE;
01233             state->result->u.parenIndex = num - 1;
01234             state->progLength
01235                 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
01236             break;
01237         /* Control escape */
01238         case 'f':
01239             c = 0xC;
01240             goto doFlat;
01241         case 'n':
01242             c = 0xA;
01243             goto doFlat;
01244         case 'r':
01245             c = 0xD;
01246             goto doFlat;
01247         case 't':
01248             c = 0x9;
01249             goto doFlat;
01250         case 'v':
01251             c = 0xB;
01252             goto doFlat;
01253         /* Control letter */
01254         case 'c':
01255             if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
01256                 c = (jschar) (*state->cp++ & 0x1F);
01257             } else {
01258                 /* back off to accepting the original '\' as a literal */
01259                 --state->cp;
01260                 c = '\\';
01261             }
01262             goto doFlat;
01263         /* HexEscapeSequence */
01264         case 'x':
01265             nDigits = 2;
01266             goto lexHex;
01267         /* UnicodeEscapeSequence */
01268         case 'u':
01269             nDigits = 4;
01270 lexHex:
01271             n = 0;
01272             for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
01273                 uintN digit;
01274                 c = *state->cp++;
01275                 if (!isASCIIHexDigit(c, &digit)) {
01276                     /*
01277                      * Back off to accepting the original 'u' or 'x' as a
01278                      * literal.
01279                      */
01280                     state->cp -= i + 2;
01281                     n = *state->cp++;
01282                     break;
01283                 }
01284                 n = (n << 4) | digit;
01285             }
01286             c = (jschar) n;
01287             goto doFlat;
01288         /* Character class escapes */
01289         case 'd':
01290             state->result = NewRENode(state, REOP_DIGIT);
01291 doSimple:
01292             if (!state->result)
01293                 return JS_FALSE;
01294             state->progLength++;
01295             break;
01296         case 'D':
01297             state->result = NewRENode(state, REOP_NONDIGIT);
01298             goto doSimple;
01299         case 's':
01300             state->result = NewRENode(state, REOP_SPACE);
01301             goto doSimple;
01302         case 'S':
01303             state->result = NewRENode(state, REOP_NONSPACE);
01304             goto doSimple;
01305         case 'w':
01306             state->result = NewRENode(state, REOP_ALNUM);
01307             goto doSimple;
01308         case 'W':
01309             state->result = NewRENode(state, REOP_NONALNUM);
01310             goto doSimple;
01311         /* IdentityEscape */
01312         default:
01313             state->result = NewRENode(state, REOP_FLAT);
01314             if (!state->result)
01315                 return JS_FALSE;
01316             state->result->u.flat.chr = c;
01317             state->result->u.flat.length = 1;
01318             state->result->kid = (void *) (state->cp - 1);
01319             state->progLength += 3;
01320             break;
01321         }
01322         break;
01323     case '[':
01324         state->result = NewRENode(state, REOP_CLASS);
01325         if (!state->result)
01326             return JS_FALSE;
01327         termStart = state->cp;
01328         state->result->u.ucclass.startIndex = termStart - state->cpbegin;
01329         for (;;) {
01330             if (state->cp == state->cpend) {
01331                 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
01332                                               JSREPORT_TS | JSREPORT_ERROR,
01333                                               JSMSG_UNTERM_CLASS, termStart);
01334 
01335                 return JS_FALSE;
01336             }
01337             if (*state->cp == '\\') {
01338                 state->cp++;
01339                 if (state->cp != state->cpend)
01340                     state->cp++;
01341                 continue;
01342             }
01343             if (*state->cp == ']') {
01344                 state->result->u.ucclass.kidlen = state->cp - termStart;
01345                 break;
01346             }
01347             state->cp++;
01348         }
01349         for (i = 0; i < CLASS_CACHE_SIZE; i++) {
01350             if (!state->classCache[i].start) {
01351                 state->classCache[i].start = termStart;
01352                 state->classCache[i].length = state->result->u.ucclass.kidlen;
01353                 state->classCache[i].index = state->classCount;
01354                 break;
01355             }
01356             if (state->classCache[i].length ==
01357                 state->result->u.ucclass.kidlen) {
01358                 for (n = 0; ; n++) {
01359                     if (n == state->classCache[i].length) {
01360                         state->result->u.ucclass.index
01361                             = state->classCache[i].index;
01362                         goto claim;
01363                     }
01364                     if (state->classCache[i].start[n] != termStart[n])
01365                         break;
01366                 }
01367             }
01368         }
01369         state->result->u.ucclass.index = state->classCount++;
01370 
01371     claim:
01372         /*
01373          * Call CalculateBitmapSize now as we want any errors it finds
01374          * to be reported during the parse phase, not at execution.
01375          */
01376         if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
01377             return JS_FALSE;
01378         /*
01379          * Update classBitmapsMem with number of bytes to hold bmsize bits,
01380          * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
01381          * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
01382          */
01383         n = (state->result->u.ucclass.bmsize >> 3) + 1;
01384         if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
01385             js_ReportCompileErrorNumber(state->context, state->tokenStream,
01386                                         JSREPORT_TS | JSREPORT_ERROR,
01387                                         JSMSG_REGEXP_TOO_COMPLEX);
01388             return JS_FALSE;
01389         }
01390         state->classBitmapsMem += n;
01391         /* CLASS, <index> */
01392         state->progLength
01393             += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
01394         break;
01395 
01396     case '.':
01397         state->result = NewRENode(state, REOP_DOT);
01398         goto doSimple;
01399 
01400     case '{':
01401     {
01402         const jschar *errp = state->cp--;
01403         intN err;
01404 
01405         err = ParseMinMaxQuantifier(state, JS_TRUE);
01406         state->cp = errp;
01407 
01408         if (err < 0)
01409             goto asFlat;
01410 
01411         /* FALL THROUGH */
01412     }
01413     case '*':
01414     case '+':
01415     case '?':
01416         js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
01417                                       JSREPORT_TS | JSREPORT_ERROR,
01418                                       JSMSG_BAD_QUANTIFIER, state->cp - 1);
01419         return JS_FALSE;
01420     default:
01421 asFlat:
01422         state->result = NewRENode(state, REOP_FLAT);
01423         if (!state->result)
01424             return JS_FALSE;
01425         state->result->u.flat.chr = c;
01426         state->result->u.flat.length = 1;
01427         state->result->kid = (void *) (state->cp - 1);
01428         state->progLength += 3;
01429         break;
01430     }
01431     return ParseQuantifier(state);
01432 }
01433 
01434 static JSBool
01435 ParseQuantifier(CompilerState *state)
01436 {
01437     RENode *term;
01438     term = state->result;
01439     if (state->cp < state->cpend) {
01440         switch (*state->cp) {
01441         case '+':
01442             state->result = NewRENode(state, REOP_QUANT);
01443             if (!state->result)
01444                 return JS_FALSE;
01445             state->result->u.range.min = 1;
01446             state->result->u.range.max = (uintN)-1;
01447             /* <PLUS>, <next> ... <ENDCHILD> */
01448             state->progLength += 4;
01449             goto quantifier;
01450         case '*':
01451             state->result = NewRENode(state, REOP_QUANT);
01452             if (!state->result)
01453                 return JS_FALSE;
01454             state->result->u.range.min = 0;
01455             state->result->u.range.max = (uintN)-1;
01456             /* <STAR>, <next> ... <ENDCHILD> */
01457             state->progLength += 4;
01458             goto quantifier;
01459         case '?':
01460             state->result = NewRENode(state, REOP_QUANT);
01461             if (!state->result)
01462                 return JS_FALSE;
01463             state->result->u.range.min = 0;
01464             state->result->u.range.max = 1;
01465             /* <OPT>, <next> ... <ENDCHILD> */
01466             state->progLength += 4;
01467             goto quantifier;
01468         case '{':       /* balance '}' */
01469         {
01470             intN err;
01471             const jschar *errp = state->cp;
01472 
01473             err = ParseMinMaxQuantifier(state, JS_FALSE);
01474             if (err == 0)
01475                 goto quantifier;
01476             if (err == -1)
01477                 return JS_TRUE;
01478 
01479             js_ReportCompileErrorNumberUC(state->context,
01480                                           state->tokenStream,
01481                                           JSREPORT_TS | JSREPORT_ERROR,
01482                                           err, errp);
01483             return JS_FALSE;
01484         }
01485         default:;
01486         }
01487     }
01488     return JS_TRUE;
01489 
01490 quantifier:
01491     if (state->treeDepth == TREE_DEPTH_MAX) {
01492         js_ReportCompileErrorNumber(state->context, state->tokenStream,
01493                                     JSREPORT_TS | JSREPORT_ERROR,
01494                                     JSMSG_REGEXP_TOO_COMPLEX);
01495         return JS_FALSE;
01496     }
01497 
01498     ++state->treeDepth;
01499     ++state->cp;
01500     state->result->kid = term;
01501     if (state->cp < state->cpend && *state->cp == '?') {
01502         ++state->cp;
01503         state->result->u.range.greedy = JS_FALSE;
01504     } else {
01505         state->result->u.range.greedy = JS_TRUE;
01506     }
01507     return JS_TRUE;
01508 }
01509 
01510 static intN
01511 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
01512 {
01513     uintN min, max;
01514     jschar c;
01515     const jschar *errp = state->cp++;
01516 
01517     c = *state->cp;
01518     if (JS7_ISDEC(c)) {
01519         ++state->cp;
01520         min = GetDecimalValue(c, 0xFFFF, NULL, state);
01521         c = *state->cp;
01522 
01523         if (!ignoreValues && min == OVERFLOW_VALUE)
01524             return JSMSG_MIN_TOO_BIG;
01525 
01526         if (c == ',') {
01527             c = *++state->cp;
01528             if (JS7_ISDEC(c)) {
01529                 ++state->cp;
01530                 max = GetDecimalValue(c, 0xFFFF, NULL, state);
01531                 c = *state->cp;
01532                 if (!ignoreValues && max == OVERFLOW_VALUE)
01533                     return JSMSG_MAX_TOO_BIG;
01534                 if (!ignoreValues && min > max)
01535                     return JSMSG_OUT_OF_ORDER;
01536             } else {
01537                 max = (uintN)-1;
01538             }
01539         } else {
01540             max = min;
01541         }
01542         if (c == '}') {
01543             state->result = NewRENode(state, REOP_QUANT);
01544             if (!state->result)
01545                 return JS_FALSE;
01546             state->result->u.range.min = min;
01547             state->result->u.range.max = max;
01548             /*
01549              * QUANT, <min>, <max>, <next> ... <ENDCHILD>
01550              * where <max> is written as compact(max+1) to make
01551              * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
01552              */
01553             state->progLength += (1 + GetCompactIndexWidth(min)
01554                                   + GetCompactIndexWidth(max + 1)
01555                                   +3);
01556             return 0;
01557         }
01558     }
01559 
01560     state->cp = errp;
01561     return -1;
01562 }
01563 
01564 static JSBool
01565 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
01566 {
01567     ptrdiff_t offset = target - jump;
01568 
01569     /* Check that target really points forward. */
01570     JS_ASSERT(offset >= 2);
01571     if ((size_t)offset > OFFSET_MAX)
01572         return JS_FALSE;
01573 
01574     jump[0] = JUMP_OFFSET_HI(offset);
01575     jump[1] = JUMP_OFFSET_LO(offset);
01576     return JS_TRUE;
01577 }
01578 
01579 /*
01580  * Generate bytecode for the tree rooted at t using an explicit stack instead
01581  * of recursion.
01582  */
01583 static jsbytecode *
01584 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
01585                jsbytecode *pc, RENode *t)
01586 {
01587     EmitStateStackEntry *emitStateSP, *emitStateStack;
01588     RECharSet *charSet;
01589     REOp op;
01590 
01591     if (treeDepth == 0) {
01592         emitStateStack = NULL;
01593     } else {
01594         emitStateStack =
01595             (EmitStateStackEntry *)JS_malloc(state->context,
01596                                              sizeof(EmitStateStackEntry) *
01597                                              treeDepth);
01598         if (!emitStateStack)
01599             return NULL;
01600     }
01601     emitStateSP = emitStateStack;
01602     op = t->op;
01603 
01604     for (;;) {
01605         *pc++ = op;
01606         switch (op) {
01607         case REOP_EMPTY:
01608             --pc;
01609             break;
01610 
01611         case REOP_ALTPREREQ2:
01612         case REOP_ALTPREREQ:
01613             JS_ASSERT(emitStateSP);
01614             emitStateSP->altHead = pc - 1;
01615             emitStateSP->endTermFixup = pc;
01616             pc += OFFSET_LEN;
01617             SET_ARG(pc, t->u.altprereq.ch1);
01618             pc += ARG_LEN;
01619             SET_ARG(pc, t->u.altprereq.ch2);
01620             pc += ARG_LEN;
01621 
01622             emitStateSP->nextAltFixup = pc;    /* offset to next alternate */
01623             pc += OFFSET_LEN;
01624 
01625             emitStateSP->continueNode = t;
01626             emitStateSP->continueOp = REOP_JUMP;
01627             emitStateSP->jumpToJumpFlag = JS_FALSE;
01628             ++emitStateSP;
01629             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01630             t = (RENode *) t->kid;
01631             op = t->op;
01632             continue;
01633 
01634         case REOP_JUMP:
01635             emitStateSP->nextTermFixup = pc;    /* offset to following term */
01636             pc += OFFSET_LEN;
01637             if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
01638                 goto jump_too_big;
01639             emitStateSP->continueOp = REOP_ENDALT;
01640             ++emitStateSP;
01641             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01642             t = t->u.kid2;
01643             op = t->op;
01644             continue;
01645 
01646         case REOP_ENDALT:
01647             /*
01648              * If we already patched emitStateSP->nextTermFixup to jump to
01649              * a nearer jump, to avoid 16-bit immediate offset overflow, we
01650              * are done here.
01651              */
01652             if (emitStateSP->jumpToJumpFlag)
01653                 break;
01654 
01655             /*
01656              * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
01657              * REOP_ENDALT is executed only on successful match of the last
01658              * alternate in a group.
01659              */
01660             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
01661                 goto jump_too_big;
01662             if (t->op != REOP_ALT) {
01663                 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
01664                     goto jump_too_big;
01665             }
01666 
01667             /*
01668              * If the program is bigger than the REOP_JUMP offset range, then
01669              * we must check for alternates before this one that are part of
01670              * the same group, and fix up their jump offsets to target jumps
01671              * close enough to fit in a 16-bit unsigned offset immediate.
01672              */
01673             if ((size_t)(pc - re->program) > OFFSET_MAX &&
01674                 emitStateSP > emitStateStack) {
01675                 EmitStateStackEntry *esp, *esp2;
01676                 jsbytecode *alt, *jump;
01677                 ptrdiff_t span, header;
01678 
01679                 esp2 = emitStateSP;
01680                 alt = esp2->altHead;
01681                 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
01682                     if (esp->continueOp == REOP_ENDALT &&
01683                         !esp->jumpToJumpFlag &&
01684                         esp->nextTermFixup + OFFSET_LEN == alt &&
01685                         (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
01686                                        ? esp->endTermFixup
01687                                        : esp->nextTermFixup)) > OFFSET_MAX) {
01688                         alt = esp->altHead;
01689                         jump = esp->nextTermFixup;
01690 
01691                         /*
01692                          * The span must be 1 less than the distance from
01693                          * jump offset to jump offset, so we actually jump
01694                          * to a REOP_JUMP bytecode, not to its offset!
01695                          */
01696                         for (;;) {
01697                             JS_ASSERT(jump < esp2->nextTermFixup);
01698                             span = esp2->nextTermFixup - jump - 1;
01699                             if ((size_t)span <= OFFSET_MAX)
01700                                 break;
01701                             do {
01702                                 if (--esp2 == esp)
01703                                     goto jump_too_big;
01704                             } while (esp2->continueOp != REOP_ENDALT);
01705                         }
01706 
01707                         jump[0] = JUMP_OFFSET_HI(span);
01708                         jump[1] = JUMP_OFFSET_LO(span);
01709 
01710                         if (esp->continueNode->op != REOP_ALT) {
01711                             /*
01712                              * We must patch the offset at esp->endTermFixup
01713                              * as well, for the REOP_ALTPREREQ{,2} opcodes.
01714                              * If we're unlucky and endTermFixup is more than
01715                              * OFFSET_MAX bytes from its target, we cheat by
01716                              * jumping 6 bytes to the jump whose offset is at
01717                              * esp->nextTermFixup, which has the same target.
01718                              */
01719                             jump = esp->endTermFixup;
01720                             header = esp->nextTermFixup - jump;
01721                             span += header;
01722                             if ((size_t)span > OFFSET_MAX)
01723                                 span = header;
01724 
01725                             jump[0] = JUMP_OFFSET_HI(span);
01726                             jump[1] = JUMP_OFFSET_LO(span);
01727                         }
01728 
01729                         esp->jumpToJumpFlag = JS_TRUE;
01730                     }
01731                 }
01732             }
01733             break;
01734 
01735         case REOP_ALT:
01736             JS_ASSERT(emitStateSP);
01737             emitStateSP->altHead = pc - 1;
01738             emitStateSP->nextAltFixup = pc;     /* offset to next alternate */
01739             pc += OFFSET_LEN;
01740             emitStateSP->continueNode = t;
01741             emitStateSP->continueOp = REOP_JUMP;
01742             emitStateSP->jumpToJumpFlag = JS_FALSE;
01743             ++emitStateSP;
01744             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01745             t = t->kid;
01746             op = t->op;
01747             continue;
01748 
01749         case REOP_FLAT:
01750             /*
01751              * Coalesce FLATs if possible and if it would not increase bytecode
01752              * beyond preallocated limit. The latter happens only when bytecode
01753              * size for coalesced string with offset p and length 2 exceeds 6
01754              * bytes preallocated for 2 single char nodes, i.e. when
01755              * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
01756              * GetCompactIndexWidth(p) > 4.
01757              * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
01758              * nodes strictly decreases bytecode size, the check has to be
01759              * done only for the first coalescing.
01760              */
01761             if (t->kid &&
01762                 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
01763             {
01764                 while (t->next &&
01765                        t->next->op == REOP_FLAT &&
01766                        (jschar*)t->kid + t->u.flat.length ==
01767                        (jschar*)t->next->kid) {
01768                     t->u.flat.length += t->next->u.flat.length;
01769                     t->next = t->next->next;
01770                 }
01771             }
01772             if (t->kid && t->u.flat.length > 1) {
01773                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
01774                 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
01775                 pc = WriteCompactIndex(pc, t->u.flat.length);
01776             } else if (t->u.flat.chr < 256) {
01777                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
01778                 *pc++ = (jsbytecode) t->u.flat.chr;
01779             } else {
01780                 pc[-1] = (state->flags & JSREG_FOLD)
01781                          ? REOP_UCFLAT1i
01782                          : REOP_UCFLAT1;
01783                 SET_ARG(pc, t->u.flat.chr);
01784                 pc += ARG_LEN;
01785             }
01786             break;
01787 
01788         case REOP_LPAREN:
01789             JS_ASSERT(emitStateSP);
01790             pc = WriteCompactIndex(pc, t->u.parenIndex);
01791             emitStateSP->continueNode = t;
01792             emitStateSP->continueOp = REOP_RPAREN;
01793             ++emitStateSP;
01794             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01795             t = (RENode *) t->kid;
01796             op = t->op;
01797             continue;
01798 
01799         case REOP_RPAREN:
01800             pc = WriteCompactIndex(pc, t->u.parenIndex);
01801             break;
01802 
01803         case REOP_BACKREF:
01804             pc = WriteCompactIndex(pc, t->u.parenIndex);
01805             break;
01806 
01807         case REOP_ASSERT:
01808             JS_ASSERT(emitStateSP);
01809             emitStateSP->nextTermFixup = pc;
01810             pc += OFFSET_LEN;
01811             emitStateSP->continueNode = t;
01812             emitStateSP->continueOp = REOP_ASSERTTEST;
01813             ++emitStateSP;
01814             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01815             t = (RENode *) t->kid;
01816             op = t->op;
01817             continue;
01818 
01819         case REOP_ASSERTTEST:
01820         case REOP_ASSERTNOTTEST:
01821             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
01822                 goto jump_too_big;
01823             break;
01824 
01825         case REOP_ASSERT_NOT:
01826             JS_ASSERT(emitStateSP);
01827             emitStateSP->nextTermFixup = pc;
01828             pc += OFFSET_LEN;
01829             emitStateSP->continueNode = t;
01830             emitStateSP->continueOp = REOP_ASSERTNOTTEST;
01831             ++emitStateSP;
01832             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01833             t = (RENode *) t->kid;
01834             op = t->op;
01835             continue;
01836 
01837         case REOP_QUANT:
01838             JS_ASSERT(emitStateSP);
01839             if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
01840                 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
01841             } else if (t->u.range.min == 0 && t->u.range.max == 1) {
01842                 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
01843             } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
01844                 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
01845             } else {
01846                 if (!t->u.range.greedy)
01847                     pc[-1] = REOP_MINIMALQUANT;
01848                 pc = WriteCompactIndex(pc, t->u.range.min);
01849                 /*
01850                  * Write max + 1 to avoid using size_t(max) + 1 bytes
01851                  * for (uintN)-1 sentinel.
01852                  */
01853                 pc = WriteCompactIndex(pc, t->u.range.max + 1);
01854             }
01855             emitStateSP->nextTermFixup = pc;
01856             pc += OFFSET_LEN;
01857             emitStateSP->continueNode = t;
01858             emitStateSP->continueOp = REOP_ENDCHILD;
01859             ++emitStateSP;
01860             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
01861             t = (RENode *) t->kid;
01862             op = t->op;
01863             continue;
01864 
01865         case REOP_ENDCHILD:
01866             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
01867                 goto jump_too_big;
01868             break;
01869 
01870         case REOP_CLASS:
01871             if (!t->u.ucclass.sense)
01872                 pc[-1] = REOP_NCLASS;
01873             pc = WriteCompactIndex(pc, t->u.ucclass.index);
01874             charSet = &re->classList[t->u.ucclass.index];
01875             charSet->converted = JS_FALSE;
01876             charSet->length = t->u.ucclass.bmsize;
01877             charSet->u.src.startIndex = t->u.ucclass.startIndex;
01878             charSet->u.src.length = t->u.ucclass.kidlen;
01879             charSet->sense = t->u.ucclass.sense;
01880             break;
01881 
01882         default:
01883             break;
01884         }
01885 
01886         t = t->next;
01887         if (t) {
01888             op = t->op;
01889         } else {
01890             if (emitStateSP == emitStateStack)
01891                 break;
01892             --emitStateSP;
01893             t = emitStateSP->continueNode;
01894             op = emitStateSP->continueOp;
01895         }
01896     }
01897 
01898   cleanup:
01899     if (emitStateStack)
01900         JS_free(state->context, emitStateStack);
01901     return pc;
01902 
01903   jump_too_big:
01904     js_ReportCompileErrorNumber(state->context, state->tokenStream,
01905                                 JSREPORT_TS | JSREPORT_ERROR,
01906                                 JSMSG_REGEXP_TOO_COMPLEX);
01907     pc = NULL;
01908     goto cleanup;
01909 }
01910 
01911 
01912 JSRegExp *
01913 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
01914              JSString *str, uintN flags, JSBool flat)
01915 {
01916     JSRegExp *re;
01917     void *mark;
01918     CompilerState state;
01919     size_t resize;
01920     jsbytecode *endPC;
01921     uintN i;
01922     size_t len;
01923 
01924     re = NULL;
01925     mark = JS_ARENA_MARK(&cx->tempPool);
01926     len = JSSTRING_LENGTH(str);
01927 
01928     state.context = cx;
01929     state.tokenStream = ts;
01930     state.cp = js_UndependString(cx, str);
01931     if (!state.cp)
01932         goto out;
01933     state.cpbegin = state.cp;
01934     state.cpend = state.cp + len;
01935     state.flags = flags;
01936     state.parenCount = 0;
01937     state.classCount = 0;
01938     state.progLength = 0;
01939     state.treeDepth = 0;
01940     state.classBitmapsMem = 0;
01941     for (i = 0; i < CLASS_CACHE_SIZE; i++)
01942         state.classCache[i].start = NULL;
01943 
01944     if (len != 0 && flat) {
01945         state.result = NewRENode(&state, REOP_FLAT);
01946         state.result->u.flat.chr = *state.cpbegin;
01947         state.result->u.flat.length = len;
01948         state.result->kid = (void *) state.cpbegin;
01949         /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
01950         state.progLength += 1 + GetCompactIndexWidth(0)
01951                           + GetCompactIndexWidth(len);
01952     } else {
01953         if (!ParseRegExp(&state))
01954             goto out;
01955     }
01956     resize = offsetof(JSRegExp, program) + state.progLength + 1;
01957     re = (JSRegExp *) JS_malloc(cx, resize);
01958     if (!re)
01959         goto out;
01960 
01961     re->nrefs = 1;
01962     JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
01963     re->classCount = state.classCount;
01964     if (re->classCount) {
01965         re->classList = (RECharSet *)
01966             JS_malloc(cx, re->classCount * sizeof(RECharSet));
01967         if (!re->classList) {
01968             js_DestroyRegExp(cx, re);
01969             re = NULL;
01970             goto out;
01971         }
01972         for (i = 0; i < re->classCount; i++)
01973             re->classList[i].converted = JS_FALSE;
01974     } else {
01975         re->classList = NULL;
01976     }
01977     endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
01978     if (!endPC) {
01979         js_DestroyRegExp(cx, re);
01980         re = NULL;
01981         goto out;
01982     }
01983     *endPC++ = REOP_END;
01984     /*
01985      * Check whether size was overestimated and shrink using realloc.
01986      * This is safe since no pointers to newly parsed regexp or its parts
01987      * besides re exist here.
01988      */
01989     if ((size_t)(endPC - re->program) != state.progLength + 1) {
01990         JSRegExp *tmp;
01991         JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
01992         resize = offsetof(JSRegExp, program) + (endPC - re->program);
01993         tmp = (JSRegExp *) JS_realloc(cx, re, resize);
01994         if (tmp)
01995             re = tmp;
01996     }
01997 
01998     re->flags = flags;
01999     re->cloneIndex = 0;
02000     re->parenCount = state.parenCount;
02001     re->source = str;
02002 
02003 out:
02004     JS_ARENA_RELEASE(&cx->tempPool, mark);
02005     return re;
02006 }
02007 
02008 JSRegExp *
02009 js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
02010                 JSString *str, JSString *opt, JSBool flat)
02011 {
02012     uintN flags;
02013     jschar *s;
02014     size_t i, n;
02015     char charBuf[2];
02016 
02017     flags = 0;
02018     if (opt) {
02019         s = JSSTRING_CHARS(opt);
02020         for (i = 0, n = JSSTRING_LENGTH(opt); i < n; i++) {
02021             switch (s[i]) {
02022             case 'g':
02023                 flags |= JSREG_GLOB;
02024                 break;
02025             case 'i':
02026                 flags |= JSREG_FOLD;
02027                 break;
02028             case 'm':
02029                 flags |= JSREG_MULTILINE;
02030                 break;
02031             default:
02032                 charBuf[0] = (char)s[i];
02033                 charBuf[1] = '\0';
02034                 js_ReportCompileErrorNumber(cx, ts,
02035                                             JSREPORT_TS | JSREPORT_ERROR,
02036                                             JSMSG_BAD_FLAG, charBuf);
02037                 return NULL;
02038             }
02039         }
02040     }
02041     return js_NewRegExp(cx, ts, str, flags, flat);
02042 }
02043 
02044 /*
02045  * Save the current state of the match - the position in the input
02046  * text as well as the position in the bytecode. The state of any
02047  * parent expressions is also saved (preceding state).
02048  * Contents of parenCount parentheses from parenIndex are also saved.
02049  */
02050 static REBackTrackData *
02051 PushBackTrackState(REGlobalData *gData, REOp op,
02052                    jsbytecode *target, REMatchState *x, const jschar *cp,
02053                    size_t parenIndex, size_t parenCount)
02054 {
02055     size_t i;
02056     REBackTrackData *result =
02057         (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
02058 
02059     size_t sz = sizeof(REBackTrackData) +
02060                 gData->stateStackTop * sizeof(REProgState) +
02061                 parenCount * sizeof(RECapture);
02062 
02063     ptrdiff_t btsize = gData->backTrackStackSize;
02064     ptrdiff_t btincr = ((char *)result + sz) -
02065                        ((char *)gData->backTrackStack + btsize);
02066 
02067     if (btincr > 0) {
02068         ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
02069 
02070         btincr = JS_ROUNDUP(btincr, btsize);
02071         JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
02072                            &gData->pool, btsize, btincr);
02073         if (!gData->backTrackStack) {
02074             JS_ReportOutOfMemory(gData->cx);
02075             gData->ok = JS_FALSE;
02076             return NULL;
02077         }
02078         gData->backTrackStackSize = btsize + btincr;
02079         result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
02080     }
02081     gData->backTrackSP = result;
02082     result->sz = gData->cursz;
02083     gData->cursz = sz;
02084 
02085     result->backtrack_op = op;
02086     result->backtrack_pc = target;
02087     result->cp = cp;
02088     result->parenCount = parenCount;
02089 
02090     result->saveStateStackTop = gData->stateStackTop;
02091     JS_ASSERT(gData->stateStackTop);
02092     memcpy(result + 1, gData->stateStack,
02093            sizeof(REProgState) * result->saveStateStackTop);
02094 
02095     if (parenCount != 0) {
02096         result->parenIndex = parenIndex;
02097         memcpy((char *)(result + 1) +
02098                sizeof(REProgState) * result->saveStateStackTop,
02099                &x->parens[parenIndex],
02100                sizeof(RECapture) * parenCount);
02101         for (i = 0; i != parenCount; i++)
02102             x->parens[parenIndex + i].index = -1;
02103     }
02104 
02105     return result;
02106 }
02107 
02108 
02109 /*
02110  *   Consecutive literal characters.
02111  */
02112 #if 0
02113 static REMatchState *
02114 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
02115              size_t length)
02116 {
02117     size_t i;
02118     if (length > gData->cpend - x->cp)
02119         return NULL;
02120     for (i = 0; i != length; i++) {
02121         if (matchChars[i] != x->cp[i])
02122             return NULL;
02123     }
02124     x->cp += length;
02125     return x;
02126 }
02127 #endif
02128 
02129 static REMatchState *
02130 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
02131               size_t length)
02132 {
02133     size_t i;
02134     JS_ASSERT(gData->cpend >= x->cp);
02135     if (length > (size_t)(gData->cpend - x->cp))
02136         return NULL;
02137     for (i = 0; i != length; i++) {
02138         if (upcase(matchChars[i]) != upcase(x->cp[i]))
02139             return NULL;
02140     }
02141     x->cp += length;
02142     return x;
02143 }
02144 
02145 /*
02146  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
02147  * 2. If E is not a character then go to step 6.
02148  * 3. Let ch be E's character.
02149  * 4. Let A be a one-element RECharSet containing the character ch.
02150  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
02151  * 6. E must be an integer. Let n be that integer.
02152  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
02153  * 8. Return an internal Matcher closure that takes two arguments, a State x
02154  *    and a Continuation c, and performs the following:
02155  *     1. Let cap be x's captures internal array.
02156  *     2. Let s be cap[n].
02157  *     3. If s is undefined, then call c(x) and return its result.
02158  *     4. Let e be x's endIndex.
02159  *     5. Let len be s's length.
02160  *     6. Let f be e+len.
02161  *     7. If f>InputLength, return failure.
02162  *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
02163  *        such that Canonicalize(s[i]) is not the same character as
02164  *        Canonicalize(Input [e+i]), then return failure.
02165  *     9. Let y be the State (f, cap).
02166  *     10. Call c(y) and return its result.
02167  */
02168 static REMatchState *
02169 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
02170 {
02171     size_t len, i;
02172     const jschar *parenContent;
02173     RECapture *cap = &x->parens[parenIndex];
02174 
02175     if (cap->index == -1)
02176         return x;
02177 
02178     len = cap->length;
02179     if (x->cp + len > gData->cpend)
02180         return NULL;
02181 
02182     parenContent = &gData->cpbegin[cap->index];
02183     if (gData->regexp->flags & JSREG_FOLD) {
02184         for (i = 0; i < len; i++) {
02185             if (upcase(parenContent[i]) != upcase(x->cp[i]))
02186                 return NULL;
02187         }
02188     } else {
02189         for (i = 0; i < len; i++) {
02190             if (parenContent[i] != x->cp[i])
02191                 return NULL;
02192         }
02193     }
02194     x->cp += len;
02195     return x;
02196 }
02197 
02198 
02199 /* Add a single character to the RECharSet */
02200 static void
02201 AddCharacterToCharSet(RECharSet *cs, jschar c)
02202 {
02203     uintN byteIndex = (uintN)(c >> 3);
02204     JS_ASSERT(c <= cs->length);
02205     cs->u.bits[byteIndex] |= 1 << (c & 0x7);
02206 }
02207 
02208 
02209 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
02210 static void
02211 AddCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
02212 {
02213     uintN i;
02214 
02215     uintN byteIndex1 = (uintN)(c1 >> 3);
02216     uintN byteIndex2 = (uintN)(c2 >> 3);
02217 
02218     JS_ASSERT((c2 <= cs->length) && (c1 <= c2));
02219 
02220     c1 &= 0x7;
02221     c2 &= 0x7;
02222 
02223     if (byteIndex1 == byteIndex2) {
02224         cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
02225     } else {
02226         cs->u.bits[byteIndex1] |= 0xFF << c1;
02227         for (i = byteIndex1 + 1; i < byteIndex2; i++)
02228             cs->u.bits[i] = 0xFF;
02229         cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
02230     }
02231 }
02232 
02233 /* Compile the source of the class into a RECharSet */
02234 static JSBool
02235 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
02236 {
02237     const jschar *src, *end;
02238     JSBool inRange = JS_FALSE;
02239     jschar rangeStart = 0;
02240     uintN byteLength, n;
02241     jschar c, thisCh;
02242     intN nDigits, i;
02243 
02244     JS_ASSERT(!charSet->converted);
02245     /*
02246      * Assert that startIndex and length points to chars inside [] inside
02247      * source string.
02248      */
02249     JS_ASSERT(1 <= charSet->u.src.startIndex);
02250     JS_ASSERT(charSet->u.src.startIndex
02251               < JSSTRING_LENGTH(gData->regexp->source));
02252     JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
02253                                        - 1 - charSet->u.src.startIndex);
02254 
02255     charSet->converted = JS_TRUE;
02256     src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
02257     end = src + charSet->u.src.length;
02258     JS_ASSERT(src[-1] == '[');
02259     JS_ASSERT(end[0] == ']');
02260 
02261     byteLength = (charSet->length >> 3) + 1;
02262     charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
02263     if (!charSet->u.bits) {
02264         JS_ReportOutOfMemory(gData->cx);
02265         gData->ok = JS_FALSE;
02266         return JS_FALSE;
02267     }
02268     memset(charSet->u.bits, 0, byteLength);
02269 
02270     if (src == end)
02271         return JS_TRUE;
02272 
02273     if (*src == '^') {
02274         JS_ASSERT(charSet->sense == JS_FALSE);
02275         ++src;
02276     } else {
02277         JS_ASSERT(charSet->sense == JS_TRUE);
02278     }
02279 
02280     while (src != end) {
02281         switch (*src) {
02282         case '\\':
02283             ++src;
02284             c = *src++;
02285             switch (c) {
02286             case 'b':
02287                 thisCh = 0x8;
02288                 break;
02289             case 'f':
02290                 thisCh = 0xC;
02291                 break;
02292             case 'n':
02293                 thisCh = 0xA;
02294                 break;
02295             case 'r':
02296                 thisCh = 0xD;
02297                 break;
02298             case 't':
02299                 thisCh = 0x9;
02300                 break;
02301             case 'v':
02302                 thisCh = 0xB;
02303                 break;
02304             case 'c':
02305                 if (src < end && JS_ISWORD(*src)) {
02306                     thisCh = (jschar)(*src++ & 0x1F);
02307                 } else {
02308                     --src;
02309                     thisCh = '\\';
02310                 }
02311                 break;
02312             case 'x':
02313                 nDigits = 2;
02314                 goto lexHex;
02315             case 'u':
02316                 nDigits = 4;
02317             lexHex:
02318                 n = 0;
02319                 for (i = 0; (i < nDigits) && (src < end); i++) {
02320                     uintN digit;
02321                     c = *src++;
02322                     if (!isASCIIHexDigit(c, &digit)) {
02323                         /*
02324                          * Back off to accepting the original '\'
02325                          * as a literal
02326                          */
02327                         src -= i + 1;
02328                         n = '\\';
02329                         break;
02330                     }
02331                     n = (n << 4) | digit;
02332                 }
02333                 thisCh = (jschar)n;
02334                 break;
02335             case '0':
02336             case '1':
02337             case '2':
02338             case '3':
02339             case '4':
02340             case '5':
02341             case '6':
02342             case '7':
02343                 /*
02344                  *  This is a non-ECMA extension - decimal escapes (in this
02345                  *  case, octal!) are supposed to be an error inside class
02346                  *  ranges, but supported here for backwards compatibility.
02347                  */
02348                 n = JS7_UNDEC(c);
02349                 c = *src;
02350                 if ('0' <= c && c <= '7') {
02351                     src++;
02352                     n = 8 * n + JS7_UNDEC(c);
02353                     c = *src;
02354                     if ('0' <= c && c <= '7') {
02355                         src++;
02356                         i = 8 * n + JS7_UNDEC(c);
02357                         if (i <= 0377)
02358                             n = i;
02359                         else
02360                             src--;
02361                     }
02362                 }
02363                 thisCh = (jschar)n;
02364                 break;
02365 
02366             case 'd':
02367                 AddCharacterRangeToCharSet(charSet, '0', '9');
02368                 continue;   /* don't need range processing */
02369             case 'D':
02370                 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
02371                 AddCharacterRangeToCharSet(charSet,
02372                                            (jschar)('9' + 1),
02373                                            (jschar)charSet->length);
02374                 continue;
02375             case 's':
02376                 for (i = (intN)charSet->length; i >= 0; i--)
02377                     if (JS_ISSPACE(i))
02378                         AddCharacterToCharSet(charSet, (jschar)i);
02379                 continue;
02380             case 'S':
02381                 for (i = (intN)charSet->length; i >= 0; i--)
02382                     if (!JS_ISSPACE(i))
02383                         AddCharacterToCharSet(charSet, (jschar)i);
02384                 continue;
02385             case 'w':
02386                 for (i = (intN)charSet->length; i >= 0; i--)
02387                     if (JS_ISWORD(i))
02388                         AddCharacterToCharSet(charSet, (jschar)i);
02389                 continue;
02390             case 'W':
02391                 for (i = (intN)charSet->length; i >= 0; i--)
02392                     if (!JS_ISWORD(i))
02393                         AddCharacterToCharSet(charSet, (jschar)i);
02394                 continue;
02395             default:
02396                 thisCh = c;
02397                 break;
02398 
02399             }
02400             break;
02401 
02402         default:
02403             thisCh = *src++;
02404             break;
02405 
02406         }
02407         if (inRange) {
02408             if (gData->regexp->flags & JSREG_FOLD) {
02409                 AddCharacterRangeToCharSet(charSet, upcase(rangeStart),
02410                                                     upcase(thisCh));
02411                 AddCharacterRangeToCharSet(charSet, downcase(rangeStart),
02412                                                     downcase(thisCh));
02413             } else {
02414                 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
02415             }
02416             inRange = JS_FALSE;
02417         } else {
02418             if (gData->regexp->flags & JSREG_FOLD) {
02419                 AddCharacterToCharSet(charSet, upcase(thisCh));
02420                 AddCharacterToCharSet(charSet, downcase(thisCh));
02421             } else {
02422                 AddCharacterToCharSet(charSet, thisCh);
02423             }
02424             if (src < end - 1) {
02425                 if (*src == '-') {
02426                     ++src;
02427                     inRange = JS_TRUE;
02428                     rangeStart = thisCh;
02429                 }
02430             }
02431         }
02432     }
02433     return JS_TRUE;
02434 }
02435 
02436 void
02437 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
02438 {
02439     if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
02440         if (re->classList) {
02441             uintN i;
02442             for (i = 0; i < re->classCount; i++) {
02443                 if (re->classList[i].converted)
02444                     JS_free(cx, re->classList[i].u.bits);
02445                 re->classList[i].u.bits = NULL;
02446             }
02447             JS_free(cx, re->classList);
02448         }
02449         JS_free(cx, re);
02450     }
02451 }
02452 
02453 static JSBool
02454 ReallocStateStack(REGlobalData *gData)
02455 {
02456     size_t limit = gData->stateStackLimit;
02457     size_t sz = sizeof(REProgState) * limit;
02458 
02459     JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
02460     if (!gData->stateStack) {
02461         gData->ok = JS_FALSE;
02462         return JS_FALSE;
02463     }
02464     gData->stateStackLimit = limit + limit;
02465     return JS_TRUE;
02466 }
02467 
02468 #define PUSH_STATE_STACK(data)                                                \
02469     JS_BEGIN_MACRO                                                            \
02470         ++(data)->stateStackTop;                                              \
02471         if ((data)->stateStackTop == (data)->stateStackLimit &&               \
02472             !ReallocStateStack((data))) {                                     \
02473             return NULL;                                                      \
02474         }                                                                     \
02475     JS_END_MACRO
02476 
02477 /*
02478  * Apply the current op against the given input to see if it's going to match
02479  * or fail. Return false if we don't get a match, true if we do. If updatecp is
02480  * true, then update the current state's cp. Always update startpc to the next
02481  * op.
02482  */
02483 static REMatchState *
02484 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
02485             jsbytecode **startpc, JSBool updatecp)
02486 {
02487     REMatchState *result = NULL;
02488     jschar matchCh;
02489     size_t parenIndex;
02490     size_t offset, length, index;
02491     jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
02492     jschar *source;
02493     const jschar *startcp = x->cp;
02494     jschar ch;
02495     RECharSet *charSet;
02496 
02497     switch (op) {
02498     case REOP_BOL:
02499         if (x->cp != gData->cpbegin) {
02500             if (!gData->cx->regExpStatics.multiline &&
02501                 !(gData->regexp->flags & JSREG_MULTILINE)) {
02502                 break;
02503             }
02504             if (!RE_IS_LINE_TERM(x->cp[-1]))
02505                 break;
02506         }
02507         result = x;
02508         break;
02509     case REOP_EOL:
02510         if (x->cp != gData->cpend) {
02511             if (!gData->cx->regExpStatics.multiline &&
02512                 !(gData->regexp->flags & JSREG_MULTILINE)) {
02513                 break;
02514             }
02515             if (!RE_IS_LINE_TERM(*x->cp))
02516                 break;
02517         }
02518         result = x;
02519         break;
02520     case REOP_WBDRY:
02521         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
02522             !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
02523             result = x;
02524         }
02525         break;
02526     case REOP_WNONBDRY:
02527         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
02528             (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
02529             result = x;
02530         }
02531         break;
02532     case REOP_DOT:
02533         if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
02534             result = x;
02535             result->cp++;
02536         }
02537         break;
02538     case REOP_DIGIT:
02539         if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
02540             result = x;
02541             result->cp++;
02542         }
02543         break;
02544     case REOP_NONDIGIT:
02545         if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
02546             result = x;
02547             result->cp++;
02548         }
02549         break;
02550     case REOP_ALNUM:
02551         if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
02552             result = x;
02553             result->cp++;
02554         }
02555         break;
02556     case REOP_NONALNUM:
02557         if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
02558             result = x;
02559             result->cp++;
02560         }
02561         break;
02562     case REOP_SPACE:
02563         if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
02564             result = x;
02565             result->cp++;
02566         }
02567         break;
02568     case REOP_NONSPACE:
02569         if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
02570             result = x;
02571             result->cp++;
02572         }
02573         break;
02574     case REOP_BACKREF:
02575         pc = ReadCompactIndex(pc, &parenIndex);
02576         JS_ASSERT(parenIndex < gData->regexp->parenCount);
02577         result = BackrefMatcher(gData, x, parenIndex);
02578         break;
02579     case REOP_FLAT:
02580         pc = ReadCompactIndex(pc, &offset);
02581         JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
02582         pc = ReadCompactIndex(pc, &length);
02583         JS_ASSERT(1 <= length);
02584         JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
02585         if (length <= (size_t)(gData->cpend - x->cp)) {
02586             source = JSSTRING_CHARS(gData->regexp->source) + offset;
02587             for (index = 0; index != length; index++) {
02588                 if (source[index] != x->cp[index])
02589                     return NULL;
02590             }
02591             x->cp += length;
02592             result = x;
02593         }
02594         break;
02595     case REOP_FLAT1:
02596         matchCh = *pc++;
02597         if (x->cp != gData->cpend && *x->cp == matchCh) {
02598             result = x;
02599             result->cp++;
02600         }
02601         break;
02602     case REOP_FLATi:
02603         pc = ReadCompactIndex(pc, &offset);
02604         JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
02605         pc = ReadCompactIndex(pc, &length);
02606         JS_ASSERT(1 <= length);
02607         JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
02608         source = JSSTRING_CHARS(gData->regexp->source);
02609         result = FlatNIMatcher(gData, x, source + offset, length);
02610         break;
02611     case REOP_FLAT1i:
02612         matchCh = *pc++;
02613         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
02614             result = x;
02615             result->cp++;
02616         }
02617         break;
02618     case REOP_UCFLAT1:
02619         matchCh = GET_ARG(pc);
02620         pc += ARG_LEN;
02621         if (x->cp != gData->cpend && *x->cp == matchCh) {
02622             result = x;
02623             result->cp++;
02624         }
02625         break;
02626     case REOP_UCFLAT1i:
02627         matchCh = GET_ARG(pc);
02628         pc += ARG_LEN;
02629         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
02630             result = x;
02631             result->cp++;
02632         }
02633         break;
02634     case REOP_CLASS:
02635         pc = ReadCompactIndex(pc, &index);
02636         JS_ASSERT(index < gData->regexp->classCount);
02637         if (x->cp != gData->cpend) {
02638             charSet = &gData->regexp->classList[index];
02639             JS_ASSERT(charSet->converted);
02640             ch = *x->cp;
02641             index = ch >> 3;
02642             if (charSet->length != 0 &&
02643                 ch <= charSet->length &&
02644                 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
02645                 result = x;
02646                 result->cp++;
02647             }
02648         }
02649         break;
02650     case REOP_NCLASS:
02651         pc = ReadCompactIndex(pc, &index);
02652         JS_ASSERT(index < gData->regexp->classCount);
02653         if (x->cp != gData->cpend) {
02654             charSet = &gData->regexp->classList[index];
02655             JS_ASSERT(charSet->converted);
02656             ch = *x->cp;
02657             index = ch >> 3;
02658             if (charSet->length == 0 ||
02659                 ch > charSet->length ||
02660                 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
02661                 result = x;
02662                 result->cp++;
02663             }
02664         }
02665         break;
02666     default:
02667         JS_ASSERT(JS_FALSE);
02668     }
02669     if (result) {
02670         if (!updatecp)
02671             x->cp = startcp;
02672         *startpc = pc;
02673         return result;
02674     }
02675     x->cp = startcp;
02676     return NULL;
02677 }
02678 
02679 static REMatchState *
02680 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
02681 {
02682     REMatchState *result = NULL;
02683     REBackTrackData *backTrackData;
02684     jsbytecode *nextpc, *testpc;
02685     REOp nextop;
02686     RECapture *cap;
02687     REProgState *curState;
02688     const jschar *startcp;
02689     size_t parenIndex, k;
02690     size_t parenSoFar = 0;
02691 
02692     jschar matchCh1, matchCh2;
02693     RECharSet *charSet;
02694 
02695     JSBranchCallback onbranch = gData->cx->branchCallback;
02696     uintN onbranchCalls = 0;
02697 #define ONBRANCH_CALLS_MASK             127
02698 #define CHECK_BRANCH()                                                         \
02699     JS_BEGIN_MACRO                                                             \
02700         if (onbranch &&                                                        \
02701             (++onbranchCalls & ONBRANCH_CALLS_MASK) == 0 &&                    \
02702             !(*onbranch)(gData->cx, NULL)) {                                   \
02703             gData->ok = JS_FALSE;                                              \
02704             return NULL;                                                       \
02705         }                                                                      \
02706     JS_END_MACRO
02707 
02708     JSBool anchor;
02709     jsbytecode *pc = gData->regexp->program;
02710     REOp op = (REOp) *pc++;
02711 
02712     /*
02713      * If the first node is a simple match, step the index into the string
02714      * until that match is made, or fail if it can't be found at all.
02715      */
02716     if (REOP_IS_SIMPLE(op)) {
02717         anchor = JS_FALSE;
02718         while (x->cp <= gData->cpend) {
02719             nextpc = pc;    /* reset back to start each time */
02720             result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
02721             if (result) {
02722                 anchor = JS_TRUE;
02723                 x = result;
02724                 pc = nextpc;    /* accept skip to next opcode */
02725                 op = (REOp) *pc++;
02726                 break;
02727             }
02728             gData->skipped++;
02729             x->cp++;
02730         }
02731         if (!anchor)
02732             return NULL;
02733     }
02734 
02735     for (;;) {
02736         if (REOP_IS_SIMPLE(op)) {
02737             result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
02738         } else {
02739             curState = &gData->stateStack[gData->stateStackTop];
02740             switch (op) {
02741             case REOP_EMPTY:
02742                 result = x;
02743                 break;
02744 
02745             case REOP_ALTPREREQ2:
02746                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
02747                 pc += ARG_LEN;
02748                 matchCh2 = GET_ARG(pc);
02749                 pc += ARG_LEN;
02750                 k = GET_ARG(pc);
02751                 pc += ARG_LEN;
02752 
02753                 if (x->cp != gData->cpend) {
02754                     if (*x->cp == matchCh2)
02755                         goto doAlt;
02756 
02757                     charSet = &gData->regexp->classList[k];
02758                     if (!charSet->converted && !ProcessCharSet(gData, charSet))
02759                         return NULL;
02760                     matchCh1 = *x->cp;
02761                     k = matchCh1 >> 3;
02762                     if ((charSet->length == 0 ||
02763                          matchCh1 > charSet->length ||
02764                          !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
02765                         charSet->sense) {
02766                         goto doAlt;
02767                     }
02768                 }
02769                 result = NULL;
02770                 break;
02771 
02772             case REOP_ALTPREREQ:
02773                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
02774                 pc += ARG_LEN;
02775                 matchCh1 = GET_ARG(pc);
02776                 pc += ARG_LEN;
02777                 matchCh2 = GET_ARG(pc);
02778                 pc += ARG_LEN;
02779                 if (x->cp == gData->cpend ||
02780                     (*x->cp != matchCh1 && *x->cp != matchCh2)) {
02781                     result = NULL;
02782                     break;
02783                 }
02784                 /* else false thru... */
02785 
02786             case REOP_ALT:
02787             doAlt:
02788                 nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
02789                 pc += ARG_LEN;                  /* start of this alternate */
02790                 curState->parenSoFar = parenSoFar;
02791                 PUSH_STATE_STACK(gData);
02792                 op = (REOp) *pc++;
02793                 startcp = x->cp;
02794                 if (REOP_IS_SIMPLE(op)) {
02795                     if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
02796                         op = (REOp) *nextpc++;
02797                         pc = nextpc;
02798                         continue;
02799                     }
02800                     result = x;
02801                     op = (REOp) *pc++;
02802                 }
02803                 nextop = (REOp) *nextpc++;
02804                 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
02805                     return NULL;
02806                 continue;
02807 
02808             /*
02809              * Occurs at (successful) end of REOP_ALT,
02810              */
02811             case REOP_JUMP:
02812                 --gData->stateStackTop;
02813                 pc += GET_OFFSET(pc);
02814                 op = (REOp) *pc++;
02815                 continue;
02816 
02817             /*
02818              * Occurs at last (successful) end of REOP_ALT,
02819              */
02820             case REOP_ENDALT:
02821                 --gData->stateStackTop;
02822                 op = (REOp) *pc++;
02823                 continue;
02824 
02825             case REOP_LPAREN:
02826                 pc = ReadCompactIndex(pc, &parenIndex);
02827                 JS_ASSERT(parenIndex < gData->regexp->parenCount);
02828                 if (parenIndex + 1 > parenSoFar)
02829                     parenSoFar = parenIndex + 1;
02830                 x->parens[parenIndex].index = x->cp - gData->cpbegin;
02831                 x->parens[parenIndex].length = 0;
02832                 op = (REOp) *pc++;
02833                 continue;
02834 
02835             case REOP_RPAREN:
02836                 pc = ReadCompactIndex(pc, &parenIndex);
02837                 JS_ASSERT(parenIndex < gData->regexp->parenCount);
02838                 cap = &x->parens[parenIndex];
02839 
02840                 /*
02841                  * FIXME: https://bugzilla.mozilla.org/show_bug.cgi?id=346090
02842                  * This wallpaper prevents a case where we somehow took a step
02843                  * backward in input while minimally-matching an empty string.
02844                  */
02845                 if (x->cp < gData->cpbegin + cap->index)
02846                     cap->index = -1;
02847                 cap->length = x->cp - (gData->cpbegin + cap->index);
02848                 op = (REOp) *pc++;
02849                 continue;
02850 
02851             case REOP_ASSERT:
02852                 nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
02853                 pc += ARG_LEN;                 /* start of ASSERT child */
02854                 op = (REOp) *pc++;
02855                 testpc = pc;
02856                 if (REOP_IS_SIMPLE(op) &&
02857                     !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
02858                     result = NULL;
02859                     break;
02860                 }
02861                 curState->u.assertion.top =
02862                     (char *)gData->backTrackSP - (char *)gData->backTrackStack;
02863                 curState->u.assertion.sz = gData->cursz;
02864                 curState->index = x->cp - gData->cpbegin;
02865                 curState->parenSoFar = parenSoFar;
02866                 PUSH_STATE_STACK(gData);
02867                 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
02868                                         nextpc, x, x->cp, 0, 0)) {
02869                     return NULL;
02870                 }
02871                 continue;
02872 
02873             case REOP_ASSERT_NOT:
02874                 nextpc = pc + GET_OFFSET(pc);
02875                 pc += ARG_LEN;
02876                 op = (REOp) *pc++;
02877                 testpc = pc;
02878                 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
02879                     SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
02880                     *testpc == REOP_ASSERTNOTTEST) {
02881                     result = NULL;
02882                     break;
02883                 }
02884                 curState->u.assertion.top
02885                     = (char *)gData->backTrackSP -
02886                       (char *)gData->backTrackStack;
02887                 curState->u.assertion.sz = gData->cursz;
02888                 curState->index = x->cp - gData->cpbegin;
02889                 curState->parenSoFar = parenSoFar;
02890                 PUSH_STATE_STACK(gData);
02891                 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
02892                                         nextpc, x, x->cp, 0, 0)) {
02893                     return NULL;
02894                 }
02895                 continue;
02896 
02897             case REOP_ASSERTTEST:
02898                 --gData->stateStackTop;
02899                 --curState;
02900                 x->cp = gData->cpbegin + curState->index;
02901                 gData->backTrackSP =
02902                     (REBackTrackData *) ((char *)gData->backTrackStack +
02903                                          curState->u.assertion.top);
02904                 gData->cursz = curState->u.assertion.sz;
02905                 if (result)
02906                     result = x;
02907                 break;
02908 
02909             case REOP_ASSERTNOTTEST:
02910                 --gData->stateStackTop;
02911                 --curState;
02912                 x->cp = gData->cpbegin + curState->index;
02913                 gData->backTrackSP =
02914                     (REBackTrackData *) ((char *)gData->backTrackStack +
02915                                          curState->u.assertion.top);
02916                 gData->cursz = curState->u.assertion.sz;
02917                 result = (!result) ? x : NULL;
02918                 break;
02919 
02920             case REOP_END:
02921                 if (x)
02922                     return x;
02923                 break;
02924 
02925             case REOP_STAR:
02926                 curState->u.quantifier.min = 0;
02927                 curState->u.quantifier.max = (uintN)-1;
02928                 goto quantcommon;
02929             case REOP_PLUS:
02930                 curState->u.quantifier.min = 1;
02931                 curState->u.quantifier.max = (uintN)-1;
02932                 goto quantcommon;
02933             case REOP_OPT:
02934                 curState->u.quantifier.min = 0;
02935                 curState->u.quantifier.max = 1;
02936                 goto quantcommon;
02937             case REOP_QUANT:
02938                 pc = ReadCompactIndex(pc, &k);
02939                 curState->u.quantifier.min = k;
02940                 pc = ReadCompactIndex(pc, &k);
02941                 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
02942                 curState->u.quantifier.max = k - 1;
02943                 JS_ASSERT(curState->u.quantifier.min
02944                           <= curState->u.quantifier.max);
02945             quantcommon:
02946                 if (curState->u.quantifier.max == 0) {
02947                     pc = pc + GET_OFFSET(pc);
02948                     op = (REOp) *pc++;
02949                     result = x;
02950                     continue;
02951                 }
02952                 /* Step over <next> */
02953                 nextpc = pc + ARG_LEN;
02954                 op = (REOp) *nextpc++;
02955                 startcp = x->cp;
02956                 if (REOP_IS_SIMPLE(op)) {
02957                     if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
02958                         if (curState->u.quantifier.min == 0)
02959                             result = x;
02960                         else
02961                             result = NULL;
02962                         pc = pc + GET_OFFSET(pc);
02963                         break;
02964                     }
02965                     op = (REOp) *nextpc++;
02966                     result = x;
02967                 }
02968                 curState->index = startcp - gData->cpbegin;
02969                 curState->continue_op = REOP_REPEAT;
02970                 curState->continue_pc = pc;
02971                 curState->parenSoFar = parenSoFar;
02972                 PUSH_STATE_STACK(gData);
02973                 if (curState->u.quantifier.min == 0 &&
02974                     !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
02975                                         0, 0)) {
02976                     return NULL;
02977                 }
02978                 pc = nextpc;
02979                 continue;
02980 
02981             case REOP_ENDCHILD: /* marks the end of a quantifier child */
02982                 pc = curState[-1].continue_pc;
02983                 op = curState[-1].continue_op;
02984                 continue;
02985 
02986             case REOP_REPEAT:
02987                 CHECK_BRANCH();
02988                 --curState;
02989                 do {
02990                     --gData->stateStackTop;
02991                     if (!result) {
02992                         /* Failed, see if we have enough children. */
02993                         if (curState->u.quantifier.min == 0)
02994                             goto repeatDone;
02995                         goto break_switch;
02996                     }
02997                     if (curState->u.quantifier.min == 0 &&
02998                         x->cp == gData->cpbegin + curState->index) {
02999                         /* matched an empty string, that'll get us nowhere */
03000                         result = NULL;
03001                         goto break_switch;
03002                     }
03003                     if (curState->u.quantifier.min != 0)
03004                         curState->u.quantifier.min--;
03005                     if (curState->u.quantifier.max != (uintN) -1)
03006                         curState->u.quantifier.max--;
03007                     if (curState->u.quantifier.max == 0)
03008                         goto repeatDone;
03009                     nextpc = pc + ARG_LEN;
03010                     nextop = (REOp) *nextpc;
03011                     startcp = x->cp;
03012                     if (REOP_IS_SIMPLE(nextop)) {
03013                         nextpc++;
03014                         if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
03015                             if (curState->u.quantifier.min == 0)
03016                                 goto repeatDone;
03017                             result = NULL;
03018                             goto break_switch;
03019                         }
03020                         result = x;
03021                     }
03022                     curState->index = startcp - gData->cpbegin;
03023                     PUSH_STATE_STACK(gData);
03024                     if (curState->u.quantifier.min == 0 &&
03025                         !PushBackTrackState(gData, REOP_REPEAT,
03026                                             pc, x, startcp,
03027                                             curState->parenSoFar,
03028                                             parenSoFar -
03029                                             curState->parenSoFar)) {
03030                         return NULL;
03031                     }
03032                 } while (*nextpc == REOP_ENDCHILD);
03033                 pc = nextpc;
03034                 op = (REOp) *pc++;
03035                 parenSoFar = curState->parenSoFar;
03036                 continue;
03037 
03038             repeatDone:
03039                 result = x;
03040                 pc += GET_OFFSET(pc);
03041                 goto break_switch;
03042 
03043             case REOP_MINIMALSTAR:
03044                 curState->u.quantifier.min = 0;
03045                 curState->u.quantifier.max = (uintN)-1;
03046                 goto minimalquantcommon;
03047             case REOP_MINIMALPLUS:
03048                 curState->u.quantifier.min = 1;
03049                 curState->u.quantifier.max = (uintN)-1;
03050                 goto minimalquantcommon;
03051             case REOP_MINIMALOPT:
03052                 curState->u.quantifier.min = 0;
03053                 curState->u.quantifier.max = 1;
03054                 goto minimalquantcommon;
03055             case REOP_MINIMALQUANT:
03056                 pc = ReadCompactIndex(pc, &k);
03057                 curState->u.quantifier.min = k;
03058                 pc = ReadCompactIndex(pc, &k);
03059                 /* See REOP_QUANT comments about k - 1. */
03060                 curState->u.quantifier.max = k - 1;
03061                 JS_ASSERT(curState->u.quantifier.min
03062                           <= curState->u.quantifier.max);
03063             minimalquantcommon:
03064                 curState->index = x->cp - gData->cpbegin;
03065                 curState->parenSoFar = parenSoFar;
03066                 PUSH_STATE_STACK(gData);
03067                 if (curState->u.quantifier.min != 0) {
03068                     curState->continue_op = REOP_MINIMALREPEAT;
03069                     curState->continue_pc = pc;
03070                     /* step over <next> */
03071                     pc += OFFSET_LEN;
03072                     op = (REOp) *pc++;
03073                 } else {
03074                     if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
03075                                             pc, x, x->cp, 0, 0)) {
03076                         return NULL;
03077                     }
03078                     --gData->stateStackTop;
03079                     pc = pc + GET_OFFSET(pc);
03080                     op = (REOp) *pc++;
03081                 }
03082                 continue;
03083 
03084             case REOP_MINIMALREPEAT:
03085                 CHECK_BRANCH();
03086                 --gData->stateStackTop;
03087                 --curState;
03088 
03089                 if (!result) {
03090                     /*
03091                      * Non-greedy failure - try to consume another child.
03092                      */
03093                     if (curState->u.quantifier.max == (uintN) -1 ||
03094                         curState->u.quantifier.max > 0) {
03095                         curState->index = x->cp - gData->cpbegin;
03096                         curState->continue_op = REOP_MINIMALREPEAT;
03097                         curState->continue_pc = pc;
03098                         pc += ARG_LEN;
03099                         for (k = curState->parenSoFar; k < parenSoFar; k++)
03100                             x->parens[k].index = -1;
03101                         PUSH_STATE_STACK(gData);
03102                         op = (REOp) *pc++;
03103                         continue;
03104                     }
03105                     /* Don't need to adjust pc since we're going to pop. */
03106                     break;
03107                 }
03108                 if (curState->u.quantifier.min == 0 &&
03109                     x->cp == gData->cpbegin + curState->index) {
03110                     /* Matched an empty string, that'll get us nowhere. */
03111                     result = NULL;
03112                     break;
03113                 }
03114                 if (curState->u.quantifier.min != 0)
03115                     curState->u.quantifier.min--;
03116                 if (curState->u.quantifier.max != (uintN) -1)
03117                     curState->u.quantifier.max--;
03118                 if (curState->u.quantifier.min != 0) {
03119                     curState->continue_op = REOP_MINIMALREPEAT;
03120                     curState->continue_pc = pc;
03121                     pc += ARG_LEN;
03122                     for (k = curState->parenSoFar; k < parenSoFar; k++)
03123                         x->parens[k].index = -1;
03124                     curState->index = x->cp - gData->cpbegin;
03125                     PUSH_STATE_STACK(gData);
03126                     op = (REOp) *pc++;
03127                     continue;
03128                 }
03129                 curState->index = x->cp - gData->cpbegin;
03130                 curState->parenSoFar = parenSoFar;
03131                 PUSH_STATE_STACK(gData);
03132                 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
03133                                         pc, x, x->cp,
03134                                         curState->parenSoFar,
03135                                         parenSoFar - curState->parenSoFar)) {
03136                     return NULL;
03137                 }
03138                 --gData->stateStackTop;
03139                 pc = pc + GET_OFFSET(pc);
03140                 op = (REOp) *pc++;
03141                 continue;
03142 
03143             default:
03144                 JS_ASSERT(JS_FALSE);
03145                 result = NULL;
03146             }
03147         break_switch:;
03148         }
03149 
03150         /*
03151          *  If the match failed and there's a backtrack option, take it.
03152          *  Otherwise this is a complete and utter failure.
03153          */
03154         if (!result) {
03155             if (gData->cursz == 0)
03156                 return NULL;
03157             backTrackData = gData->backTrackSP;
03158             gData->cursz = backTrackData->sz;
03159             gData->backTrackSP =
03160                 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
03161             x->cp = backTrackData->cp;
03162             pc = backTrackData->backtrack_pc;
03163             op = backTrackData->backtrack_op;
03164             gData->stateStackTop = backTrackData->saveStateStackTop;
03165             JS_ASSERT(gData->stateStackTop);
03166 
03167             memcpy(gData->stateStack, backTrackData + 1,
03168                    sizeof(REProgState) * backTrackData->saveStateStackTop);
03169             curState = &gData->stateStack[gData->stateStackTop - 1];
03170 
03171             if (backTrackData->parenCount) {
03172                 memcpy(&x->parens[backTrackData->parenIndex],
03173                        (char *)(backTrackData + 1) +
03174                        sizeof(REProgState) * backTrackData->saveStateStackTop,
03175                        sizeof(RECapture) * backTrackData->parenCount);
03176                 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
03177             } else {
03178                 for (k = curState->parenSoFar; k < parenSoFar; k++)
03179                     x->parens[k].index = -1;
03180                 parenSoFar = curState->parenSoFar;
03181             }
03182             continue;
03183         }
03184         x = result;
03185 
03186         /*
03187          *  Continue with the expression.
03188          */
03189         op = (REOp)*pc++;
03190     }
03191     return NULL;
03192 }
03193 
03194 static REMatchState *
03195 MatchRegExp(REGlobalData *gData, REMatchState *x)
03196 {
03197     REMatchState *result;
03198     const jschar *cp = x->cp;
03199     const jschar *cp2;
03200     uintN j;
03201 
03202     /*
03203      * Have to include the position beyond the last character
03204      * in order to detect end-of-input/line condition.
03205      */
03206     for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
03207         gData->skipped = cp2 - cp;
03208         x->cp = cp2;
03209         for (j = 0; j < gData->regexp->parenCount; j++)
03210             x->parens[j].index = -1;
03211         result = ExecuteREBytecode(gData, x);
03212         if (!gData->ok || result)
03213             return result;
03214         gData->backTrackSP = gData->backTrackStack;
03215         gData->cursz = 0;
03216         gData->stateStackTop = 0;
03217         cp2 = cp + gData->skipped;
03218     }
03219     return NULL;
03220 }
03221 
03222 
03223 static REMatchState *
03224 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re)
03225 {
03226     REMatchState *result;
03227     uintN i;
03228 
03229     gData->backTrackStackSize = INITIAL_BACKTRACK;
03230     JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
03231                            &gData->pool,
03232                            INITIAL_BACKTRACK);
03233     if (!gData->backTrackStack)
03234         goto bad;
03235 
03236     gData->backTrackSP = gData->backTrackStack;
03237     gData->cursz = 0;
03238 
03239     gData->stateStackLimit = INITIAL_STATESTACK;
03240     JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
03241                            &gData->pool,
03242                            sizeof(REProgState) * INITIAL_STATESTACK);
03243     if (!gData->stateStack)
03244         goto bad;
03245 
03246     gData->stateStackTop = 0;
03247     gData->cx = cx;
03248     gData->regexp = re;
03249     gData->ok = JS_TRUE;
03250 
03251     JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
03252                            &gData->pool,
03253                            offsetof(REMatchState, parens)
03254                            + re->parenCount * sizeof(RECapture));
03255     if (!result)
03256         goto bad;
03257 
03258     for (i = 0; i < re->classCount; i++) {
03259         if (!re->classList[i].converted &&
03260             !ProcessCharSet(gData, &re->classList[i])) {
03261             return NULL;
03262         }
03263     }
03264 
03265     return result;
03266 
03267 bad:
03268     JS_ReportOutOfMemory(cx);
03269     gData->ok = JS_FALSE;
03270     return NULL;
03271 }
03272 
03273 JSBool
03274 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
03275                  JSBool test, jsval *rval)
03276 {
03277     REGlobalData gData;
03278     REMatchState *x, *result;
03279 
03280     const jschar *cp, *ep;
03281     size_t i, length, start;
03282     JSSubString *morepar;
03283     JSBool ok;
03284     JSRegExpStatics *res;
03285     ptrdiff_t matchlen;
03286     uintN num, morenum;
03287     JSString *parstr, *matchstr;
03288     JSObject *obj;
03289 
03290     RECapture *parsub = NULL;
03291 
03292     /*
03293      * It's safe to load from cp because JSStrings have a zero at the end,
03294      * and we never let cp get beyond cpend.
03295      */
03296     start = *indexp;
03297     length = JSSTRING_LENGTH(str);
03298     if (start > length)
03299         start = length;
03300     cp = JSSTRING_CHARS(str);
03301     gData.cpbegin = cp;
03302     gData.cpend = cp + length;
03303     cp += start;
03304     gData.start = start;
03305     gData.skipped = 0;
03306 
03307     JS_InitArenaPool(&gData.pool, "RegExpPool", 8096, 4);
03308     x = InitMatch(cx, &gData, re);
03309     if (!x) {
03310         ok = JS_FALSE;
03311         goto out;
03312     }
03313     x->cp = cp;
03314 
03315     /*
03316      * Call the recursive matcher to do the real work.  Return null on mismatch
03317      * whether testing or not.  On match, return an extended Array object.
03318      */
03319     result = MatchRegExp(&gData, x);
03320     ok = gData.ok;
03321     if (!ok)
03322         goto out;
03323     if (!result) {
03324         *rval = JSVAL_NULL;
03325         goto out;
03326     }
03327     cp = result->cp;
03328     i = cp - gData.cpbegin;
03329     *indexp = i;
03330     matchlen = i - (start + gData.skipped);
03331     ep = cp;
03332     cp -= matchlen;
03333 
03334     if (test) {
03335         /*
03336          * Testing for a match and updating cx->regExpStatics: don't allocate
03337          * an array object, do return true.
03338          */
03339         *rval = JSVAL_TRUE;
03340 
03341         /* Avoid warning.  (gcc doesn't detect that obj is needed iff !test); */
03342         obj = NULL;
03343     } else {
03344         /*
03345          * The array returned on match has element 0 bound to the matched
03346          * string, elements 1 through state.parenCount bound to the paren
03347          * matches, an index property telling the length of the left context,
03348          * and an input property referring to the input string.
03349          */
03350         obj = js_NewArrayObject(cx, 0, NULL);
03351         if (!obj) {
03352             ok = JS_FALSE;
03353             goto out;
03354         }
03355         *rval = OBJECT_TO_JSVAL(obj);
03356 
03357 #define DEFVAL(val, id) {                                                     \
03358     ok = js_DefineProperty(cx, obj, id, val,                                  \
03359                            JS_PropertyStub, JS_PropertyStub,                  \
03360                            JSPROP_ENUMERATE, NULL);                           \
03361     if (!ok) {                                                                \
03362         cx->weakRoots.newborn[GCX_OBJECT] = NULL;                             \
03363         cx->weakRoots.newborn[GCX_STRING] = NULL;                             \
03364         goto out;                                                             \
03365     }                                                                         \
03366 }
03367 
03368         matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
03369         if (!matchstr) {
03370             cx->weakRoots.newborn[GCX_OBJECT] = NULL;
03371             ok = JS_FALSE;
03372             goto out;
03373         }
03374         DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
03375     }
03376 
03377     res = &cx->regExpStatics;
03378     res->input = str;
03379     res->parenCount = re->parenCount;
03380     if (re->parenCount == 0) {
03381         res->lastParen = js_EmptySubString;
03382     } else {
03383         for (num = 0; num < re->parenCount; num++) {
03384             parsub = &result->parens[num];
03385             if (num < 9) {
03386                 if (parsub->index == -1) {
03387                     res->parens[num].chars = NULL;
03388                     res->parens[num].length = 0;
03389                 } else {
03390                     res->parens[num].chars = gData.cpbegin + parsub->index;
03391                     res->parens[num].length = parsub->length;
03392                 }
03393             } else {
03394                 morenum = num - 9;
03395                 morepar = res->moreParens;
03396                 if (!morepar) {
03397                     res->moreLength = 10;
03398                     morepar = (JSSubString*)
03399                         JS_malloc(cx, 10 * sizeof(JSSubString));
03400                 } else if (morenum >= res->moreLength) {
03401                     res->moreLength += 10;
03402                     morepar = (JSSubString*)
03403                         JS_realloc(cx, morepar,
03404                                    res->moreLength * sizeof(JSSubString));
03405                 }
03406                 if (!morepar) {
03407                     cx->weakRoots.newborn[GCX_OBJECT] = NULL;
03408                     cx->weakRoots.newborn[GCX_STRING] = NULL;
03409                     ok = JS_FALSE;
03410                     goto out;
03411                 }
03412                 res->moreParens = morepar;
03413                 if (parsub->index == -1) {
03414                     morepar[morenum].chars = NULL;
03415                     morepar[morenum].length = 0;
03416                 } else {
03417                     morepar[morenum].chars = gData.cpbegin + parsub->index;
03418                     morepar[morenum].length = parsub->length;
03419                 }
03420             }
03421             if (test)
03422                 continue;
03423             if (parsub->index == -1) {
03424                 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
03425                                        JSVAL_VOID, NULL, NULL,
03426                                        JSPROP_ENUMERATE, NULL);
03427             } else {
03428                 parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
03429                                            parsub->length, 0);
03430                 if (!parstr) {
03431                     cx->weakRoots.newborn[GCX_OBJECT] = NULL;
03432                     cx->weakRoots.newborn[GCX_STRING] = NULL;
03433                     ok = JS_FALSE;
03434                     goto out;
03435                 }
03436                 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
03437                                        STRING_TO_JSVAL(parstr), NULL, NULL,
03438                                        JSPROP_ENUMERATE, NULL);
03439             }
03440             if (!ok) {
03441                 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
03442                 cx->weakRoots.newborn[GCX_STRING] = NULL;
03443                 goto out;
03444             }
03445         }
03446         if (parsub->index == -1) {
03447             res->lastParen = js_EmptySubString;
03448         } else {
03449             res->lastParen.chars = gData.cpbegin + parsub->index;
03450             res->lastParen.length = parsub->length;
03451         }
03452     }
03453 
03454     if (!test) {
03455         /*
03456          * Define the index and input properties last for better for/in loop
03457          * order (so they come after the elements).
03458          */
03459         DEFVAL(INT_TO_JSVAL(start + gData.skipped),
03460                ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
03461         DEFVAL(STRING_TO_JSVAL(str),
03462                ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
03463     }
03464 
03465 #undef DEFVAL
03466 
03467     res->lastMatch.chars = cp;
03468     res->lastMatch.length = matchlen;
03469 
03470     /*
03471      * For JS1.3 and ECMAv2, emulate Perl5 exactly:
03472      *
03473      * js1.3        "hi", "hi there"            "hihitherehi therebye"
03474      */
03475     res->leftContext.chars = JSSTRING_CHARS(str);
03476     res->leftContext.length = start + gData.skipped;
03477     res->rightContext.chars = ep;
03478     res->rightContext.length = gData.cpend - ep;
03479 
03480 out:
03481     JS_FinishArenaPool(&gData.pool);
03482     return ok;
03483 }
03484 
03485 /************************************************************************/
03486 
03487 enum regexp_tinyid {
03488     REGEXP_SOURCE       = -1,
03489     REGEXP_GLOBAL       = -2,
03490     REGEXP_IGNORE_CASE  = -3,
03491     REGEXP_LAST_INDEX   = -4,
03492     REGEXP_MULTILINE    = -5
03493 };
03494 
03495 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT|JSPROP_SHARED)
03496 
03497 static JSPropertySpec regexp_props[] = {
03498     {"source",     REGEXP_SOURCE,      REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
03499     {"global",     REGEXP_GLOBAL,      REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
03500     {"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
03501     {"lastIndex",  REGEXP_LAST_INDEX,  REGEXP_PROP_ATTRS,0,0},
03502     {"multiline",  REGEXP_MULTILINE,   REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
03503     {0,0,0,0,0}
03504 };
03505 
03506 static JSBool
03507 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
03508 {
03509     jsint slot;
03510     JSRegExp *re;
03511 
03512     if (!JSVAL_IS_INT(id))
03513         return JS_TRUE;
03514     slot = JSVAL_TO_INT(id);
03515     if (slot == REGEXP_LAST_INDEX)
03516         return JS_GetReservedSlot(cx, obj, 0, vp);
03517 
03518     JS_LOCK_OBJ(cx, obj);
03519     re = (JSRegExp *) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
03520     if (re) {
03521         switch (slot) {
03522           case REGEXP_SOURCE:
03523             *vp = STRING_TO_JSVAL(re->source);
03524             break;
03525           case REGEXP_GLOBAL:
03526             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
03527             break;
03528           case REGEXP_IGNORE_CASE:
03529             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
03530             break;
03531           case REGEXP_MULTILINE:
03532             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
03533             break;
03534         }
03535     }
03536     JS_UNLOCK_OBJ(cx, obj);
03537     return JS_TRUE;
03538 }
03539 
03540 static JSBool
03541 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
03542 {
03543     JSBool ok;
03544     jsint slot;
03545     jsdouble lastIndex;
03546 
03547     ok = JS_TRUE;
03548     if (!JSVAL_IS_INT(id))
03549         return ok;
03550     slot = JSVAL_TO_INT(id);
03551     if (slot == REGEXP_LAST_INDEX) {
03552         if (!js_ValueToNumber(cx, *vp, &lastIndex))
03553             return JS_FALSE;
03554         lastIndex = js_DoubleToInteger(lastIndex);
03555         ok = js_NewNumberValue(cx, lastIndex, vp) &&
03556              JS_SetReservedSlot(cx, obj, 0, *vp);
03557     }
03558     return ok;
03559 }
03560 
03561 /*
03562  * RegExp class static properties and their Perl counterparts:
03563  *
03564  *  RegExp.input                $_
03565  *  RegExp.multiline            $*
03566  *  RegExp.lastMatch            $&
03567  *  RegExp.lastParen            $+
03568  *  RegExp.leftContext          $`
03569  *  RegExp.rightContext         $'
03570  */
03571 enum regexp_static_tinyid {
03572     REGEXP_STATIC_INPUT         = -1,
03573     REGEXP_STATIC_MULTILINE     = -2,
03574     REGEXP_STATIC_LAST_MATCH    = -3,
03575     REGEXP_STATIC_LAST_PAREN    = -4,
03576     REGEXP_STATIC_LEFT_CONTEXT  = -5,
03577     REGEXP_STATIC_RIGHT_CONTEXT = -6
03578 };
03579 
03580 JSBool
03581 js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
03582 {
03583     JS_ClearRegExpStatics(cx);
03584     return js_AddRoot(cx, &res->input, "res->input");
03585 }
03586 
03587 void
03588 js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
03589 {
03590     if (res->moreParens) {
03591         JS_free(cx, res->moreParens);
03592         res->moreParens = NULL;
03593     }
03594     js_RemoveRoot(cx->runtime, &res->input);
03595 }
03596 
03597 static JSBool
03598 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
03599 {
03600     jsint slot;
03601     JSRegExpStatics *res;
03602     JSString *str;
03603     JSSubString *sub;
03604 
03605     res = &cx->regExpStatics;
03606     if (!JSVAL_IS_INT(id))
03607         return JS_TRUE;
03608     slot = JSVAL_TO_INT(id);
03609     switch (slot) {
03610       case REGEXP_STATIC_INPUT:
03611         *vp = res->input ? STRING_TO_JSVAL(res->input)
03612                          : JS_GetEmptyStringValue(cx);
03613         return JS_TRUE;
03614       case REGEXP_STATIC_MULTILINE:
03615         *vp = BOOLEAN_TO_JSVAL(res->multiline);
03616         return JS_TRUE;
03617       case REGEXP_STATIC_LAST_MATCH:
03618         sub = &res->lastMatch;
03619         break;
03620       case REGEXP_STATIC_LAST_PAREN:
03621         sub = &res->lastParen;
03622         break;
03623       case REGEXP_STATIC_LEFT_CONTEXT:
03624         sub = &res->leftContext;
03625         break;
03626       case REGEXP_STATIC_RIGHT_CONTEXT:
03627         sub = &res->rightContext;
03628         break;
03629       default:
03630         sub = REGEXP_PAREN_SUBSTRING(res, slot);
03631         break;
03632     }
03633     str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
03634     if (!str)
03635         return JS_FALSE;
03636     *vp = STRING_TO_JSVAL(str);
03637     return JS_TRUE;
03638 }
03639 
03640 static JSBool
03641 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
03642 {
03643     JSRegExpStatics *res;
03644 
03645     if (!JSVAL_IS_INT(id))
03646         return JS_TRUE;
03647     res = &cx->regExpStatics;
03648     /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
03649     if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
03650         if (!JSVAL_IS_STRING(*vp) &&
03651             !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
03652             return JS_FALSE;
03653         }
03654         res->input = JSVAL_TO_STRING(*vp);
03655     } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
03656         if (!JSVAL_IS_BOOLEAN(*vp) &&
03657             !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
03658             return JS_FALSE;
03659         }
03660         res->multiline = JSVAL_TO_BOOLEAN(*vp);
03661     }
03662     return JS_TRUE;
03663 }
03664 
03665 static JSPropertySpec regexp_static_props[] = {
03666     {"input",
03667      REGEXP_STATIC_INPUT,
03668      JSPROP_ENUMERATE|JSPROP_SHARED,
03669      regexp_static_getProperty,    regexp_static_setProperty},
03670     {"multiline",
03671      REGEXP_STATIC_MULTILINE,
03672      JSPROP_ENUMERATE|JSPROP_SHARED,
03673      regexp_static_getProperty,    regexp_static_setProperty},
03674     {"lastMatch",
03675      REGEXP_STATIC_LAST_MATCH,
03676      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03677      regexp_static_getProperty,    regexp_static_getProperty},
03678     {"lastParen",
03679      REGEXP_STATIC_LAST_PAREN,
03680      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03681      regexp_static_getProperty,    regexp_static_getProperty},
03682     {"leftContext",
03683      REGEXP_STATIC_LEFT_CONTEXT,
03684      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03685      regexp_static_getProperty,    regexp_static_getProperty},
03686     {"rightContext",
03687      REGEXP_STATIC_RIGHT_CONTEXT,
03688      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03689      regexp_static_getProperty,    regexp_static_getProperty},
03690 
03691     /* XXX should have block scope and local $1, etc. */
03692     {"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03693      regexp_static_getProperty,    regexp_static_getProperty},
03694     {"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03695      regexp_static_getProperty,    regexp_static_getProperty},
03696     {"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03697      regexp_static_getProperty,    regexp_static_getProperty},
03698     {"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03699      regexp_static_getProperty,    regexp_static_getProperty},
03700     {"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03701      regexp_static_getProperty,    regexp_static_getProperty},
03702     {"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03703      regexp_static_getProperty,    regexp_static_getProperty},
03704     {"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03705      regexp_static_getProperty,    regexp_static_getProperty},
03706     {"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03707      regexp_static_getProperty,    regexp_static_getProperty},
03708     {"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
03709      regexp_static_getProperty,    regexp_static_getProperty},
03710 
03711     {0,0,0,0,0}
03712 };
03713 
03714 static void
03715 regexp_finalize(JSContext *cx, JSObject *obj)
03716 {
03717     JSRegExp *re;
03718 
03719     re = (JSRegExp *) JS_GetPrivate(cx, obj);
03720     if (!re)
03721         return;
03722     js_DestroyRegExp(cx, re);
03723 }
03724 
03725 /* Forward static prototype. */
03726 static JSBool
03727 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
03728             jsval *rval);
03729 
03730 static JSBool
03731 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
03732 {
03733     return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
03734 }
03735 
03736 #if JS_HAS_XDR
03737 
03738 #include "jsxdrapi.h"
03739 
03740 static JSBool
03741 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
03742 {
03743     JSRegExp *re;
03744     JSString *source;
03745     uint32 flagsword;
03746     JSObject *obj;
03747 
03748     if (xdr->mode == JSXDR_ENCODE) {
03749         re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
03750         if (!re)
03751             return JS_FALSE;
03752         source = re->source;
03753         flagsword = ((uint32)re->cloneIndex << 16) | re->flags;
03754     }
03755     if (!JS_XDRString(xdr, &source) ||
03756         !JS_XDRUint32(xdr, &flagsword)) {
03757         return JS_FALSE;
03758     }
03759     if (xdr->mode == JSXDR_DECODE) {
03760         obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
03761         if (!obj)
03762             return JS_FALSE;
03763         re = js_NewRegExp(xdr->cx, NULL, source, (uint16)flagsword, JS_FALSE);
03764         if (!re)
03765             return JS_FALSE;
03766         if (!JS_SetPrivate(xdr->cx, obj, re) ||
03767             !js_SetLastIndex(xdr->cx, obj, 0)) {
03768             js_DestroyRegExp(xdr->cx, re);
03769             return JS_FALSE;
03770         }
03771         re->cloneIndex = (uint16)(flagsword >> 16);
03772         *objp = obj;
03773     }
03774     return JS_TRUE;
03775 }
03776 
03777 #else  /* !JS_HAS_XDR */
03778 
03779 #define regexp_xdrObject NULL
03780 
03781 #endif /* !JS_HAS_XDR */
03782 
03783 static uint32
03784 regexp_mark(JSContext *cx, JSObject *obj, void *arg)
03785 {
03786     JSRegExp *re = (JSRegExp *) JS_GetPrivate(cx, obj);
03787     if (re)
03788         GC_MARK(cx, re->source, "source");
03789     return 0;
03790 }
03791 
03792 JSClass js_RegExpClass = {
03793     js_RegExp_str,
03794     JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
03795     JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
03796     JS_PropertyStub,    JS_PropertyStub,
03797     regexp_getProperty, regexp_setProperty,
03798     JS_EnumerateStub,   JS_ResolveStub,
03799     JS_ConvertStub,     regexp_finalize,
03800     NULL,               NULL,
03801     regexp_call,        NULL,
03802     regexp_xdrObject,   NULL,
03803     regexp_mark,        0
03804 };
03805 
03806 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
03807 
03808 JSBool
03809 js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
03810                    jsval *rval)
03811 {
03812     JSRegExp *re;
03813     const jschar *source;
03814     jschar *chars;
03815     size_t length, nflags;
03816     uintN flags;
03817     JSString *str;
03818 
03819     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
03820         return JS_FALSE;
03821     JS_LOCK_OBJ(cx, obj);
03822     re = (JSRegExp *) JS_GetPrivate(cx, obj);
03823     if (!re) {
03824         JS_UNLOCK_OBJ(cx, obj);
03825         *rval = STRING_TO_JSVAL(cx->runtime->emptyString);
03826         return JS_TRUE;
03827     }
03828 
03829     source = JSSTRING_CHARS(re->source);
03830     length = JSSTRING_LENGTH(re->source);
03831     if (length == 0) {
03832         source = empty_regexp_ucstr;
03833         length = sizeof(empty_regexp_ucstr) / sizeof(jschar) - 1;
03834     }
03835     length += 2;
03836     nflags = 0;
03837     for (flags = re->flags; flags != 0; flags &= flags - 1)
03838         nflags++;
03839     chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
03840     if (!chars) {
03841         JS_UNLOCK_OBJ(cx, obj);
03842         return JS_FALSE;
03843     }
03844 
03845     chars[0] = '/';
03846     js_strncpy(&chars[1], source, length - 2);
03847     chars[length-1] = '/';
03848     if (nflags) {
03849         if (re->flags & JSREG_GLOB)
03850             chars[length++] = 'g';
03851         if (re->flags & JSREG_FOLD)
03852             chars[length++] = 'i';
03853         if (re->flags & JSREG_MULTILINE)
03854             chars[length++] = 'm';
03855     }
03856     JS_UNLOCK_OBJ(cx, obj);
03857     chars[length] = 0;
03858 
03859     str = js_NewString(cx, chars, length, 0);
03860     if (!str) {
03861         JS_free(cx, chars);
03862         return JS_FALSE;
03863     }
03864     *rval = STRING_TO_JSVAL(str);
03865     return JS_TRUE;
03866 }
03867 
03868 static JSBool
03869 regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
03870                jsval *rval)
03871 {
03872     JSString *opt, *str;
03873     JSRegExp *oldre, *re;
03874     JSBool ok, ok2;
03875     JSObject *obj2;
03876     size_t length, nbytes;
03877     const jschar *cp, *start, *end;
03878     jschar *nstart, *ncp, *tmp;
03879 
03880     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
03881         return JS_FALSE;
03882     opt = NULL;
03883     if (argc == 0) {
03884         str = cx->runtime->emptyString;
03885     } else {
03886         if (JSVAL_IS_OBJECT(argv[0])) {
03887             /*
03888              * If we get passed in a RegExp object we construct a new
03889              * RegExp that is a duplicate of it by re-compiling the
03890              * original source code. ECMA requires that it be an error
03891              * here if the flags are specified. (We must use the flags
03892              * from the original RegExp also).
03893              */
03894             obj2 = JSVAL_TO_OBJECT(argv[0]);
03895             if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
03896                 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
03897                     JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
03898                                          JSMSG_NEWREGEXP_FLAGGED);
03899                     return JS_FALSE;
03900                 }
03901                 JS_LOCK_OBJ(cx, obj2);
03902                 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
03903                 if (!re) {
03904                     JS_UNLOCK_OBJ(cx, obj2);
03905                     return JS_FALSE;
03906                 }
03907                 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
03908                 JS_UNLOCK_OBJ(cx, obj2);
03909                 goto created;
03910             }
03911         }
03912         str = js_ValueToString(cx, argv[0]);
03913         if (!str)
03914             return JS_FALSE;
03915         argv[0] = STRING_TO_JSVAL(str);
03916         if (argc > 1) {
03917             if (JSVAL_IS_VOID(argv[1])) {
03918                 opt = NULL;
03919             } else {
03920                 opt = js_ValueToString(cx, argv[1]);
03921                 if (!opt)
03922                     return JS_FALSE;
03923                 argv[1] = STRING_TO_JSVAL(opt);
03924             }
03925         }
03926 
03927         /* Escape any naked slashes in the regexp source. */
03928         length = JSSTRING_LENGTH(str);
03929         start = JSSTRING_CHARS(str);
03930         end = start + length;
03931         nstart = ncp = NULL;
03932         for (cp = start; cp < end; cp++) {
03933             if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
03934                 nbytes = (++length + 1) * sizeof(jschar);
03935                 if (!nstart) {
03936                     nstart = (jschar *) JS_malloc(cx, nbytes);
03937                     if (!nstart)
03938                         return JS_FALSE;
03939                     ncp = nstart + (cp - start);
03940                     js_strncpy(nstart, start, cp - start);
03941                 } else {
03942                     tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
03943                     if (!tmp) {
03944                         JS_free(cx, nstart);
03945                         return JS_FALSE;
03946                     }
03947                     ncp = tmp + (ncp - nstart);
03948                     nstart = tmp;
03949                 }
03950                 *ncp++ = '\\';
03951             }
03952             if (nstart)
03953                 *ncp++ = *cp;
03954         }
03955 
03956         if (nstart) {
03957             /* Don't forget to store the backstop after the new string. */
03958             JS_ASSERT((size_t)(ncp - nstart) == length);
03959             *ncp = 0;
03960             str = js_NewString(cx, nstart, length, 0);
03961             if (!str) {
03962                 JS_free(cx, nstart);
03963                 return JS_FALSE;
03964             }
03965             argv[0] = STRING_TO_JSVAL(str);
03966         }
03967     }
03968 
03969     re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
03970 created:
03971     if (!re)
03972         return JS_FALSE;
03973     JS_LOCK_OBJ(cx, obj);
03974     oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
03975     ok = JS_SetPrivate(cx, obj, re);
03976     ok2 = js_SetLastIndex(cx, obj, 0);
03977     JS_UNLOCK_OBJ(cx, obj);
03978     if (!ok) {
03979         js_DestroyRegExp(cx, re);
03980         return JS_FALSE;
03981     }
03982     if (oldre)
03983         js_DestroyRegExp(cx, oldre);
03984     *rval = OBJECT_TO_JSVAL(obj);
03985     return ok2;
03986 }
03987 
03988 static JSBool
03989 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
03990                 JSBool test, jsval *rval)
03991 {
03992     JSBool ok;
03993     JSRegExp *re;
03994     jsdouble lastIndex;
03995     JSString *str;
03996     size_t i;
03997 
03998     ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
03999     if (!ok)
04000         return JS_FALSE;
04001     JS_LOCK_OBJ(cx, obj);
04002     re = (JSRegExp *) JS_GetPrivate(cx, obj);
04003     if (!re) {
04004         JS_UNLOCK_OBJ(cx, obj);
04005         return JS_TRUE;
04006     }
04007 
04008     /* NB: we must reach out: after this paragraph, in order to drop re. */
04009     HOLD_REGEXP(cx, re);
04010     if (re->flags & JSREG_GLOB) {
04011         ok = js_GetLastIndex(cx, obj, &lastIndex);
04012     } else {
04013         lastIndex = 0;
04014     }
04015     JS_UNLOCK_OBJ(cx, obj);
04016     if (!ok)
04017         goto out;
04018 
04019     /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
04020     if (argc == 0) {
04021         str = cx->regExpStatics.input;
04022         if (!str) {
04023             JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
04024                                  JSMSG_NO_INPUT,
04025                                  JS_GetStringBytes(re->source),
04026                                  (re->flags & JSREG_GLOB) ? "g" : "",
04027                                  (re->flags & JSREG_FOLD) ? "i" : "",
04028                                  (re->flags & JSREG_MULTILINE) ? "m" : "");
04029             ok = JS_FALSE;
04030             goto out;
04031         }
04032     } else {
04033         str = js_ValueToString(cx, argv[0]);
04034         if (!str) {
04035             ok = JS_FALSE;
04036             goto out;
04037         }
04038         argv[0] = STRING_TO_JSVAL(str);
04039     }
04040 
04041     if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
04042         ok = js_SetLastIndex(cx, obj, 0);
04043         *rval = JSVAL_NULL;
04044     } else {
04045         i = (size_t) lastIndex;
04046         ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
04047         if (ok && (re->flags & JSREG_GLOB))
04048             ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
04049     }
04050 
04051 out:
04052     DROP_REGEXP(cx, re);
04053     return ok;
04054 }
04055 
04056 static JSBool
04057 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
04058 {
04059     return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
04060 }
04061 
04062 static JSBool
04063 regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
04064 {
04065     if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
04066         return JS_FALSE;
04067     if (*rval != JSVAL_TRUE)
04068         *rval = JSVAL_FALSE;
04069     return JS_TRUE;
04070 }
04071 
04072 static JSFunctionSpec regexp_methods[] = {
04073 #if JS_HAS_TOSOURCE
04074     {js_toSource_str,   js_regexp_toString,     0,0,0},
04075 #endif
04076     {js_toString_str,   js_regexp_toString,     0,0,0},
04077     {"compile",         regexp_compile,         1,0,0},
04078     {"exec",            regexp_exec,            0,0,0},
04079     {"test",            regexp_test,            0,0,0},
04080     {0,0,0,0,0}
04081 };
04082 
04083 static JSBool
04084 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
04085 {
04086     if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
04087         /*
04088          * If first arg is regexp and no flags are given, just return the arg.
04089          * (regexp_compile detects the regexp + flags case and throws a
04090          * TypeError.)  See 10.15.3.1.
04091          */
04092         if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
04093             !JSVAL_IS_PRIMITIVE(argv[0]) &&
04094             OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
04095             *rval = argv[0];
04096             return JS_TRUE;
04097         }
04098 
04099         /* Otherwise, replace obj with a new RegExp object. */
04100         obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
04101         if (!obj)
04102             return JS_FALSE;
04103 
04104         /*
04105          * regexp_compile does not use rval to root its temporaries
04106          * so we can use it to root obj.
04107          */
04108         *rval = OBJECT_TO_JSVAL(obj);
04109     }
04110     return regexp_compile(cx, obj, argc, argv, rval);
04111 }
04112 
04113 JSObject *
04114 js_InitRegExpClass(JSContext *cx, JSObject *obj)
04115 {
04116     JSObject *proto, *ctor;
04117     jsval rval;
04118 
04119     proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
04120                          regexp_props, regexp_methods,
04121                          regexp_static_props, NULL);
04122 
04123     if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
04124         return NULL;
04125     if (!JS_AliasProperty(cx, ctor, "input",        "$_") ||
04126         !JS_AliasProperty(cx, ctor, "multiline",    "$*") ||
04127         !JS_AliasProperty(cx, ctor, "lastMatch",    "$&") ||
04128         !JS_AliasProperty(cx, ctor, "lastParen",    "$+") ||
04129         !JS_AliasProperty(cx, ctor, "leftContext",  "$`") ||
04130         !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
04131         goto bad;
04132     }
04133 
04134     /* Give RegExp.prototype private data so it matches the empty string. */
04135     if (!regexp_compile(cx, proto, 0, NULL, &rval))
04136         goto bad;
04137     return proto;
04138 
04139 bad:
04140     JS_DeleteProperty(cx, obj, js_RegExpClass.name);
04141     return NULL;
04142 }
04143 
04144 JSObject *
04145 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
04146                    jschar *chars, size_t length, uintN flags)
04147 {
04148     JSString *str;
04149     JSObject *obj;
04150     JSRegExp *re;
04151     JSTempValueRooter tvr;
04152 
04153     str = js_NewStringCopyN(cx, chars, length, 0);
04154     if (!str)
04155         return NULL;
04156     re = js_NewRegExp(cx, ts,  str, flags, JS_FALSE);
04157     if (!re)
04158         return NULL;
04159     JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr);
04160     obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
04161     if (!obj || !JS_SetPrivate(cx, obj, re)) {
04162         js_DestroyRegExp(cx, re);
04163         obj = NULL;
04164     }
04165     if (obj && !js_SetLastIndex(cx, obj, 0))
04166         obj = NULL;
04167     JS_POP_TEMP_ROOT(cx, &tvr);
04168     return obj;
04169 }
04170 
04171 JSObject *
04172 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
04173 {
04174     JSObject *clone;
04175     JSRegExp *re;
04176 
04177     JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
04178     clone = js_NewObject(cx, &js_RegExpClass, NULL, parent);
04179     if (!clone)
04180         return NULL;
04181     re = JS_GetPrivate(cx, obj);
04182     if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
04183         cx->weakRoots.newborn[GCX_OBJECT] = NULL;
04184         return NULL;
04185     }
04186     HOLD_REGEXP(cx, re);
04187     return clone;
04188 }
04189 
04190 JSBool
04191 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
04192 {
04193     jsval v;
04194 
04195     return JS_GetReservedSlot(cx, obj, 0, &v) &&
04196            js_ValueToNumber(cx, v, lastIndex);
04197 }
04198 
04199 JSBool
04200 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
04201 {
04202     jsval v;
04203 
04204     return js_NewNumberValue(cx, lastIndex, &v) &&
04205            JS_SetReservedSlot(cx, obj, 0, v);
04206 }