Back to index

lightning-sunbird  0.9+nobinonly
jsscan.h
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
00002  *
00003  * ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is Mozilla Communicator client code, released
00017  * March 31, 1998.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Netscape Communications Corporation.
00021  * Portions created by the Initial Developer are Copyright (C) 1998
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either of the GNU General Public License Version 2 or later (the "GPL"),
00028  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 
00040 #ifndef jsscan_h___
00041 #define jsscan_h___
00042 /*
00043  * JS lexical scanner interface.
00044  */
00045 #include <stddef.h>
00046 #include <stdio.h>
00047 #include "jsconfig.h"
00048 #include "jsopcode.h"
00049 #include "jsprvtd.h"
00050 #include "jspubtd.h"
00051 
00052 JS_BEGIN_EXTERN_C
00053 
00054 #define JS_KEYWORD(keyword, type, op, version) \
00055     extern const char js_##keyword##_str[];
00056 #include "jskeyword.tbl"
00057 #undef JS_KEYWORD
00058 
00059 typedef enum JSTokenType {
00060     TOK_ERROR = -1,                     /* well-known as the only code < EOF */
00061     TOK_EOF = 0,                        /* end of file */
00062     TOK_EOL = 1,                        /* end of line */
00063     TOK_SEMI = 2,                       /* semicolon */
00064     TOK_COMMA = 3,                      /* comma operator */
00065     TOK_ASSIGN = 4,                     /* assignment ops (= += -= etc.) */
00066     TOK_HOOK = 5, TOK_COLON = 6,        /* conditional (?:) */
00067     TOK_OR = 7,                         /* logical or (||) */
00068     TOK_AND = 8,                        /* logical and (&&) */
00069     TOK_BITOR = 9,                      /* bitwise-or (|) */
00070     TOK_BITXOR = 10,                    /* bitwise-xor (^) */
00071     TOK_BITAND = 11,                    /* bitwise-and (&) */
00072     TOK_EQOP = 12,                      /* equality ops (== !=) */
00073     TOK_RELOP = 13,                     /* relational ops (< <= > >=) */
00074     TOK_SHOP = 14,                      /* shift ops (<< >> >>>) */
00075     TOK_PLUS = 15,                      /* plus */
00076     TOK_MINUS = 16,                     /* minus */
00077     TOK_STAR = 17, TOK_DIVOP = 18,      /* multiply/divide ops (* / %) */
00078     TOK_UNARYOP = 19,                   /* unary prefix operator */
00079     TOK_INC = 20, TOK_DEC = 21,         /* increment/decrement (++ --) */
00080     TOK_DOT = 22,                       /* member operator (.) */
00081     TOK_LB = 23, TOK_RB = 24,           /* left and right brackets */
00082     TOK_LC = 25, TOK_RC = 26,           /* left and right curlies (braces) */
00083     TOK_LP = 27, TOK_RP = 28,           /* left and right parentheses */
00084     TOK_NAME = 29,                      /* identifier */
00085     TOK_NUMBER = 30,                    /* numeric constant */
00086     TOK_STRING = 31,                    /* string constant */
00087     TOK_OBJECT = 32,                    /* RegExp or other object constant */
00088     TOK_PRIMARY = 33,                   /* true, false, null, this, super */
00089     TOK_FUNCTION = 34,                  /* function keyword */
00090     TOK_EXPORT = 35,                    /* export keyword */
00091     TOK_IMPORT = 36,                    /* import keyword */
00092     TOK_IF = 37,                        /* if keyword */
00093     TOK_ELSE = 38,                      /* else keyword */
00094     TOK_SWITCH = 39,                    /* switch keyword */
00095     TOK_CASE = 40,                      /* case keyword */
00096     TOK_DEFAULT = 41,                   /* default keyword */
00097     TOK_WHILE = 42,                     /* while keyword */
00098     TOK_DO = 43,                        /* do keyword */
00099     TOK_FOR = 44,                       /* for keyword */
00100     TOK_BREAK = 45,                     /* break keyword */
00101     TOK_CONTINUE = 46,                  /* continue keyword */
00102     TOK_IN = 47,                        /* in keyword */
00103     TOK_VAR = 48,                       /* var keyword */
00104     TOK_WITH = 49,                      /* with keyword */
00105     TOK_RETURN = 50,                    /* return keyword */
00106     TOK_NEW = 51,                       /* new keyword */
00107     TOK_DELETE = 52,                    /* delete keyword */
00108     TOK_DEFSHARP = 53,                  /* #n= for object/array initializers */
00109     TOK_USESHARP = 54,                  /* #n# for object/array initializers */
00110     TOK_TRY = 55,                       /* try keyword */
00111     TOK_CATCH = 56,                     /* catch keyword */
00112     TOK_FINALLY = 57,                   /* finally keyword */
00113     TOK_THROW = 58,                     /* throw keyword */
00114     TOK_INSTANCEOF = 59,                /* instanceof keyword */
00115     TOK_DEBUGGER = 60,                  /* debugger keyword */
00116     TOK_XMLSTAGO = 61,                  /* XML start tag open (<) */
00117     TOK_XMLETAGO = 62,                  /* XML end tag open (</) */
00118     TOK_XMLPTAGC = 63,                  /* XML point tag close (/>) */
00119     TOK_XMLTAGC = 64,                   /* XML start or end tag close (>) */
00120     TOK_XMLNAME = 65,                   /* XML start-tag non-final fragment */
00121     TOK_XMLATTR = 66,                   /* XML quoted attribute value */
00122     TOK_XMLSPACE = 67,                  /* XML whitespace */
00123     TOK_XMLTEXT = 68,                   /* XML text */
00124     TOK_XMLCOMMENT = 69,                /* XML comment */
00125     TOK_XMLCDATA = 70,                  /* XML CDATA section */
00126     TOK_XMLPI = 71,                     /* XML processing instruction */
00127     TOK_AT = 72,                        /* XML attribute op (@) */
00128     TOK_DBLCOLON = 73,                  /* namespace qualified name op (::) */
00129     TOK_ANYNAME = 74,                   /* XML AnyName singleton (*) */
00130     TOK_DBLDOT = 75,                    /* XML descendant op (..) */
00131     TOK_FILTER = 76,                    /* XML filtering predicate op (.()) */
00132     TOK_XMLELEM = 77,                   /* XML element node type (no token) */
00133     TOK_XMLLIST = 78,                   /* XML list node type (no token) */
00134     TOK_YIELD = 79,                     /* yield from generator function */
00135     TOK_ARRAYCOMP = 80,                 /* array comprehension initialiser */
00136     TOK_ARRAYPUSH = 81,                 /* array push within comprehension */
00137     TOK_LEXICALSCOPE = 82,              /* block scope AST node label */
00138     TOK_LET = 83,                       /* let keyword */
00139     TOK_BODY = 84,                      /* synthetic body of function with
00140                                            destructuring formal parameters */
00141     TOK_RESERVED,                       /* reserved keywords */
00142     TOK_LIMIT                           /* domain size */
00143 } JSTokenType;
00144 
00145 #define IS_PRIMARY_TOKEN(tt) \
00146     ((uintN)((tt) - TOK_NAME) <= (uintN)(TOK_PRIMARY - TOK_NAME))
00147 
00148 #define TOKEN_TYPE_IS_XML(tt) \
00149     (tt == TOK_AT || tt == TOK_DBLCOLON || tt == TOK_ANYNAME)
00150 
00151 #if JS_HAS_BLOCK_SCOPE
00152 # define TOKEN_TYPE_IS_DECL(tt) ((tt) == TOK_VAR || (tt) == TOK_LET)
00153 #else
00154 # define TOKEN_TYPE_IS_DECL(tt) ((tt) == TOK_VAR)
00155 #endif
00156 
00157 struct JSStringBuffer {
00158     jschar      *base;
00159     jschar      *limit;         /* length limit for quick bounds check */
00160     jschar      *ptr;           /* slot for next non-NUL char to store */
00161     void        *data;
00162     JSBool      (*grow)(JSStringBuffer *sb, size_t newlength);
00163     void        (*free)(JSStringBuffer *sb);
00164 };
00165 
00166 #define STRING_BUFFER_ERROR_BASE        ((jschar *) 1)
00167 #define STRING_BUFFER_OK(sb)            ((sb)->base != STRING_BUFFER_ERROR_BASE)
00168 #define STRING_BUFFER_OFFSET(sb)        ((sb)->ptr -(sb)->base)
00169 
00170 extern void
00171 js_InitStringBuffer(JSStringBuffer *sb);
00172 
00173 extern void
00174 js_FinishStringBuffer(JSStringBuffer *sb);
00175 
00176 extern void
00177 js_AppendChar(JSStringBuffer *sb, jschar c);
00178 
00179 extern void
00180 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count);
00181 
00182 extern void
00183 js_AppendCString(JSStringBuffer *sb, const char *asciiz);
00184 
00185 extern void
00186 js_AppendJSString(JSStringBuffer *sb, JSString *str);
00187 
00188 struct JSTokenPtr {
00189     uint16              index;          /* index of char in physical line */
00190     uint16              lineno;         /* physical line number */
00191 };
00192 
00193 struct JSTokenPos {
00194     JSTokenPtr          begin;          /* first character and line of token */
00195     JSTokenPtr          end;            /* index 1 past last char, last line */
00196 };
00197 
00198 struct JSToken {
00199     JSTokenType         type;           /* char value or above enumerator */
00200     JSTokenPos          pos;            /* token position in file */
00201     jschar              *ptr;           /* beginning of token in line buffer */
00202     union {
00203         struct {                        /* non-numeric literal */
00204             JSOp        op;             /* operator, for minimal parser */
00205             JSAtom      *atom;          /* atom table entry */
00206         } s;
00207         struct {                        /* atom pair, for XML PIs */
00208             JSAtom      *atom2;         /* auxiliary atom table entry */
00209             JSAtom      *atom;          /* main atom table entry */
00210         } p;
00211         jsdouble        dval;           /* floating point number */
00212     } u;
00213 };
00214 
00215 #define t_op            u.s.op
00216 #define t_atom          u.s.atom
00217 #define t_atom2         u.p.atom2
00218 #define t_dval          u.dval
00219 
00220 typedef struct JSTokenBuf {
00221     jschar              *base;          /* base of line or stream buffer */
00222     jschar              *limit;         /* limit for quick bounds check */
00223     jschar              *ptr;           /* next char to get, or slot to use */
00224 } JSTokenBuf;
00225 
00226 #define JS_LINE_LIMIT   256             /* logical line buffer size limit --
00227                                            physical line length is unlimited */
00228 #define NTOKENS         4               /* 1 current + 2 lookahead, rounded */
00229 #define NTOKENS_MASK    (NTOKENS-1)     /* to power of 2 to avoid divmod by 3 */
00230 
00231 struct JSTokenStream {
00232     JSToken             tokens[NTOKENS];/* circular token buffer */
00233     uintN               cursor;         /* index of last parsed token */
00234     uintN               lookahead;      /* count of lookahead tokens */
00235     uintN               lineno;         /* current line number */
00236     uintN               ungetpos;       /* next free char slot in ungetbuf */
00237     jschar              ungetbuf[6];    /* at most 6, for \uXXXX lookahead */
00238     uintN               flags;          /* flags -- see below */
00239     ptrdiff_t           linelen;        /* physical linebuf segment length */
00240     ptrdiff_t           linepos;        /* linebuf offset in physical line */
00241     JSTokenBuf          linebuf;        /* line buffer for diagnostics */
00242     JSTokenBuf          userbuf;        /* user input buffer if !file */
00243     JSStringBuffer      tokenbuf;       /* current token string buffer */
00244     const char          *filename;      /* input filename or null */
00245     FILE                *file;          /* stdio stream if reading from file */
00246     JSPrincipals        *principals;    /* principals associated with source */
00247     JSSourceHandler     listener;       /* callback for source; eg debugger */
00248     void                *listenerData;  /* listener 'this' data */
00249     void                *listenerTSData;/* listener data for this TokenStream */
00250     jschar              *saveEOL;       /* save next end of line in userbuf, to
00251                                            optimize for very long lines */
00252 };
00253 
00254 #define CURRENT_TOKEN(ts)       ((ts)->tokens[(ts)->cursor])
00255 #define ON_CURRENT_LINE(ts,pos) ((uint16)(ts)->lineno == (pos).end.lineno)
00256 
00257 /* JSTokenStream flags */
00258 #define TSF_ERROR       0x01            /* fatal error while compiling */
00259 #define TSF_EOF         0x02            /* hit end of file */
00260 #define TSF_NEWLINES    0x04            /* tokenize newlines */
00261 #define TSF_OPERAND     0x08            /* looking for operand, not operator */
00262 #define TSF_NLFLAG      0x20            /* last linebuf ended with \n */
00263 #define TSF_CRFLAG      0x40            /* linebuf would have ended with \r */
00264 #define TSF_DIRTYLINE   0x80            /* non-whitespace since start of line */
00265 #define TSF_OWNFILENAME 0x100           /* ts->filename is malloc'd */
00266 #define TSF_XMLTAGMODE  0x200           /* scanning within an XML tag in E4X */
00267 #define TSF_XMLTEXTMODE 0x400           /* scanning XMLText terminal from E4X */
00268 #define TSF_XMLONLYMODE 0x800           /* don't scan {expr} within text/tag */
00269 
00270 /* Flag indicating unexpected end of input, i.e. TOK_EOF not at top-level. */
00271 #define TSF_UNEXPECTED_EOF 0x1000
00272 
00273 /*
00274  * To handle the hard case of contiguous HTML comments, we want to clear the
00275  * TSF_DIRTYINPUT flag at the end of each such comment.  But we'd rather not
00276  * scan for --> within every //-style comment unless we have to.  So we set
00277  * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
00278  * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
00279  * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
00280  *
00281  * This still works as before given a malformed comment hiding hack such as:
00282  *
00283  *    <script>
00284  *      <!-- comment hiding hack #1
00285  *      code goes here
00286  *      // --> oops, markup for script-unaware browsers goes here!
00287  *    </script>
00288  *
00289  * It does not cope with malformed comment hiding hacks where --> is hidden
00290  * by C-style comments, or on a dirty line.  Such cases are already broken.
00291  */
00292 #define TSF_IN_HTML_COMMENT 0x2000
00293 
00294 /* Ignore keywords and return TOK_NAME instead to the parser. */
00295 #define TSF_KEYWORD_IS_NAME 0x4000
00296 
00297 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
00298 #define LINE_SEPARATOR  0x2028
00299 #define PARA_SEPARATOR  0x2029
00300 
00301 /*
00302  * Create a new token stream, either from an input buffer or from a file.
00303  * Return null on file-open or memory-allocation failure.
00304  *
00305  * NB: All of js_New{,Buffer,File}TokenStream() return a pointer to transient
00306  * memory in the current context's temp pool.  This memory is deallocated via
00307  * JS_ARENA_RELEASE() after parsing is finished.
00308  */
00309 extern JSTokenStream *
00310 js_NewTokenStream(JSContext *cx, const jschar *base, size_t length,
00311                   const char *filename, uintN lineno, JSPrincipals *principals);
00312 
00313 extern JS_FRIEND_API(JSTokenStream *)
00314 js_NewBufferTokenStream(JSContext *cx, const jschar *base, size_t length);
00315 
00316 extern JS_FRIEND_API(JSTokenStream *)
00317 js_NewFileTokenStream(JSContext *cx, const char *filename, FILE *defaultfp);
00318 
00319 extern JS_FRIEND_API(JSBool)
00320 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts);
00321 
00322 extern JS_FRIEND_API(int)
00323 js_fgets(char *buf, int size, FILE *file);
00324 
00325 /*
00326  * If the given char array forms JavaScript keyword, return corresponding
00327  * token. Otherwise return TOK_EOF.
00328  */
00329 extern JSTokenType
00330 js_CheckKeyword(const jschar *chars, size_t length);
00331 
00332 #define js_IsKeyword(chars, length) \
00333     (js_CheckKeyword(chars, length) != TOK_EOF)
00334 
00335 /*
00336  * Friend-exported API entry point to call a mapping function on each reserved
00337  * identifier in the scanner's keyword table.
00338  */
00339 extern JS_FRIEND_API(void)
00340 js_MapKeywords(void (*mapfun)(const char *));
00341 
00342 /*
00343  * Report a compile-time error by its number, using ts or cg to show context.
00344  * Return true for a warning, false for an error.
00345  */
00346 extern JSBool
00347 js_ReportCompileErrorNumber(JSContext *cx, void *handle, uintN flags,
00348                             uintN errorNumber, ...);
00349 
00350 extern JSBool
00351 js_ReportCompileErrorNumberUC(JSContext *cx, void *handle, uintN flags,
00352                               uintN errorNumber, ...);
00353 
00354 /* Steal some JSREPORT_* bits (see jsapi.h) to tell handle's type. */
00355 #define JSREPORT_HANDLE 0x300
00356 #define JSREPORT_TS     0x000
00357 #define JSREPORT_CG     0x100
00358 #define JSREPORT_PN     0x200
00359 
00360 /*
00361  * Look ahead one token and return its type.
00362  */
00363 extern JSTokenType
00364 js_PeekToken(JSContext *cx, JSTokenStream *ts);
00365 
00366 extern JSTokenType
00367 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts);
00368 
00369 /*
00370  * Get the next token from ts.
00371  */
00372 extern JSTokenType
00373 js_GetToken(JSContext *cx, JSTokenStream *ts);
00374 
00375 /*
00376  * Push back the last scanned token onto ts.
00377  */
00378 extern void
00379 js_UngetToken(JSTokenStream *ts);
00380 
00381 /*
00382  * Get the next token from ts if its type is tt.
00383  */
00384 extern JSBool
00385 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt);
00386 
00387 JS_END_EXTERN_C
00388 
00389 #endif /* jsscan_h___ */