Back to index

php5  5.3.10
regparse.h
Go to the documentation of this file.
00001 #ifndef REGPARSE_H
00002 #define REGPARSE_H
00003 /**********************************************************************
00004   regparse.h -  Oniguruma (regular expression library)
00005 **********************************************************************/
00006 /*-
00007  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00008  * All rights reserved.
00009  *
00010  * Redistribution and use in source and binary forms, with or without
00011  * modification, are permitted provided that the following conditions
00012  * are met:
00013  * 1. Redistributions of source code must retain the above copyright
00014  *    notice, this list of conditions and the following disclaimer.
00015  * 2. Redistributions in binary form must reproduce the above copyright
00016  *    notice, this list of conditions and the following disclaimer in the
00017  *    documentation and/or other materials provided with the distribution.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00020  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00021  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00022  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00023  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00024  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00025  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00026  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00027  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00028  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00029  * SUCH DAMAGE.
00030  */
00031 
00032 #include "regint.h"
00033 
00034 /* node type */
00035 #define N_STRING       (1<< 0)
00036 #define N_CCLASS       (1<< 1)
00037 #define N_CTYPE        (1<< 2)
00038 #define N_ANYCHAR      (1<< 3)
00039 #define N_BACKREF      (1<< 4)
00040 #define N_QUANTIFIER   (1<< 5)
00041 #define N_EFFECT       (1<< 6)
00042 #define N_ANCHOR       (1<< 7)
00043 #define N_LIST         (1<< 8)
00044 #define N_ALT          (1<< 9)
00045 #define N_CALL         (1<<10)
00046 
00047 #define IS_NODE_TYPE_SIMPLE(type) \
00048   (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
00049 
00050 #define NTYPE(node)        ((node)->type)
00051 #define NCONS(node)        ((node)->u.cons)
00052 #define NSTRING(node)      ((node)->u.str)
00053 #define NCCLASS(node)      ((node)->u.cclass)
00054 #define NCTYPE(node)       ((node)->u.ctype)
00055 #define NQUANTIFIER(node)  ((node)->u.quantifier)
00056 #define NANCHOR(node)      ((node)->u.anchor)
00057 #define NBACKREF(node)     ((node)->u.backref)
00058 #define NEFFECT(node)      ((node)->u.effect)
00059 #define NCALL(node)        ((node)->u.call)
00060 
00061 #define CTYPE_WORD              (1<<0)
00062 #define CTYPE_NOT_WORD          (1<<1)
00063 #define CTYPE_WHITE_SPACE       (1<<2)
00064 #define CTYPE_NOT_WHITE_SPACE   (1<<3)
00065 #define CTYPE_DIGIT             (1<<4)
00066 #define CTYPE_NOT_DIGIT         (1<<5)
00067 #define CTYPE_XDIGIT            (1<<6)
00068 #define CTYPE_NOT_XDIGIT        (1<<7)
00069 
00070 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
00071 #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
00072 
00073 #define EFFECT_MEMORY           (1<<0)
00074 #define EFFECT_OPTION           (1<<1)
00075 #define EFFECT_STOP_BACKTRACK   (1<<2)
00076 
00077 #define NODE_STR_MARGIN         16
00078 #define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */
00079 #define NODE_BACKREFS_SIZE       6
00080 
00081 #define NSTR_RAW                (1<<0) /* by backslashed number */
00082 #define NSTR_AMBIG              (1<<1)
00083 #define NSTR_AMBIG_REDUCE       (1<<2)
00084 
00085 #define NSTRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s)
00086 #define NSTRING_SET_RAW(node)          (node)->u.str.flag |= NSTR_RAW
00087 #define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~NSTR_RAW
00088 #define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= NSTR_AMBIG
00089 #define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
00090 #define NSTRING_IS_RAW(node)          (((node)->u.str.flag & NSTR_RAW)   != 0)
00091 #define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & NSTR_AMBIG) != 0)
00092 #define NSTRING_IS_AMBIG_REDUCE(node) \
00093   (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
00094 
00095 #define BACKREFS_P(br) \
00096   (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
00097 
00098 #define NQ_TARGET_ISNOT_EMPTY     0
00099 #define NQ_TARGET_IS_EMPTY        1
00100 #define NQ_TARGET_IS_EMPTY_MEM    2
00101 #define NQ_TARGET_IS_EMPTY_REC    3
00102 
00103 
00104 typedef struct {
00105   UChar* s;
00106   UChar* end;
00107   unsigned int flag;
00108   int    capa;    /* (allocated size - 1) or 0: use buf[] */
00109   UChar  buf[NODE_STR_BUF_SIZE];
00110 } StrNode;
00111 
00112 /* move to regint.h */
00113 #if 0
00114 typedef struct {
00115   int    flags;
00116   BitSet bs;
00117   BBuf*  mbuf;     /* multi-byte info or NULL */
00118 } CClassNode;
00119 #endif
00120 
00121 typedef struct {
00122   int state;
00123   struct _Node* target;
00124   int lower;
00125   int upper;
00126   int greedy;
00127   int target_empty_info;
00128   struct _Node* head_exact;
00129   struct _Node* next_head_exact;
00130   int is_refered;     /* include called node. don't eliminate even if {0} */
00131 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00132   int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */
00133 #endif
00134 } QuantifierNode;
00135 
00136 /* status bits */
00137 #define NST_MIN_FIXED             (1<<0)
00138 #define NST_MAX_FIXED             (1<<1)
00139 #define NST_CLEN_FIXED            (1<<2)
00140 #define NST_MARK1                 (1<<3)
00141 #define NST_MARK2                 (1<<4)
00142 #define NST_MEM_BACKREFED         (1<<5)
00143 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
00144 #define NST_RECURSION             (1<<7)
00145 #define NST_CALLED                (1<<8)
00146 #define NST_ADDR_FIXED            (1<<9)
00147 #define NST_NAMED_GROUP           (1<<10)
00148 #define NST_NAME_REF              (1<<11)
00149 #define NST_IN_REPEAT             (1<<12) /* STK_REPEAT is nested in stack. */
00150 #define NST_NEST_LEVEL            (1<<13)
00151 #define NST_BY_NUMBER             (1<<14) /* {n,m} */
00152 
00153 #define SET_EFFECT_STATUS(node,f)      (node)->u.effect.state |=  (f)
00154 #define CLEAR_EFFECT_STATUS(node,f)    (node)->u.effect.state &= ~(f)
00155 
00156 #define IS_EFFECT_CALLED(en)           (((en)->state & NST_CALLED)        != 0)
00157 #define IS_EFFECT_ADDR_FIXED(en)       (((en)->state & NST_ADDR_FIXED)    != 0)
00158 #define IS_EFFECT_RECURSION(en)        (((en)->state & NST_RECURSION)     != 0)
00159 #define IS_EFFECT_MARK1(en)            (((en)->state & NST_MARK1)         != 0)
00160 #define IS_EFFECT_MARK2(en)            (((en)->state & NST_MARK2)         != 0)
00161 #define IS_EFFECT_MIN_FIXED(en)        (((en)->state & NST_MIN_FIXED)     != 0)
00162 #define IS_EFFECT_MAX_FIXED(en)        (((en)->state & NST_MAX_FIXED)     != 0)
00163 #define IS_EFFECT_CLEN_FIXED(en)       (((en)->state & NST_CLEN_FIXED)    != 0)
00164 #define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
00165     (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
00166 #define IS_EFFECT_NAMED_GROUP(en)      (((en)->state & NST_NAMED_GROUP)   != 0)
00167 
00168 #define SET_CALL_RECURSION(node)       (node)->u.call.state |= NST_RECURSION
00169 #define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0)
00170 #define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0)
00171 #define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0)
00172 #define IS_BACKREF_NEST_LEVEL(bn)      (((bn)->state & NST_NEST_LEVEL) != 0)
00173 #define IS_QUANTIFIER_IN_REPEAT(qn)     (((qn)->state & NST_IN_REPEAT)  != 0)
00174 #define IS_QUANTIFIER_BY_NUMBER(qn)     (((qn)->state & NST_BY_NUMBER)  != 0)
00175 
00176 typedef struct {
00177   int state;
00178   int type;
00179   int regnum;
00180   OnigOptionType option;
00181   struct _Node* target;
00182   AbsAddrType call_addr;
00183   /* for multiple call reference */
00184   OnigDistance min_len; /* min length (byte) */
00185   OnigDistance max_len; /* max length (byte) */ 
00186   int char_len;        /* character length  */
00187   int opt_count;       /* referenced count in optimize_node_left() */
00188 } EffectNode;
00189 
00190 #define CALLNODE_REFNUM_UNDEF  -1
00191 
00192 #ifdef USE_SUBEXP_CALL
00193 
00194 typedef struct {
00195   int offset;
00196   struct _Node* target;
00197 } UnsetAddr;
00198 
00199 typedef struct {
00200   int num;
00201   int alloc;
00202   UnsetAddr* us;
00203 } UnsetAddrList;
00204 
00205 typedef struct {
00206   int     state;
00207   int     ref_num;
00208   UChar*  name;
00209   UChar*  name_end;
00210   struct _Node* target;  /* EffectNode : EFFECT_MEMORY */
00211   UnsetAddrList* unset_addr_list;
00212 } CallNode;
00213 
00214 #endif
00215 
00216 typedef struct {
00217   int     state;
00218   int     back_num;
00219   int     back_static[NODE_BACKREFS_SIZE];
00220   int*    back_dynamic;
00221   int     nest_level;
00222 } BackrefNode;
00223 
00224 typedef struct {
00225   int type;
00226   struct _Node* target;
00227   int char_len;
00228 } AnchorNode;
00229 
00230 typedef struct _Node {
00231   int type;
00232   union {
00233     StrNode        str;
00234     CClassNode     cclass;
00235     QuantifierNode quantifier;
00236     EffectNode     effect;
00237 #ifdef USE_SUBEXP_CALL
00238     CallNode       call;
00239 #endif
00240     BackrefNode    backref;
00241     AnchorNode     anchor;
00242     struct {
00243       struct _Node* left;
00244       struct _Node* right;
00245     } cons;
00246     struct {
00247       int type;
00248     } ctype;
00249   } u;
00250 } Node;
00251 
00252 #define NULL_NODE  ((Node* )0)
00253 
00254 #define SCANENV_MEMNODES_SIZE               8
00255 #define SCANENV_MEM_NODES(senv)   \
00256  (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
00257     (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
00258 
00259 typedef struct {
00260   OnigOptionType  option;
00261   OnigAmbigType   ambig_flag;
00262   OnigEncoding    enc;
00263   OnigSyntaxType* syntax;
00264   BitStatusType   capture_history;
00265   BitStatusType   bt_mem_start;
00266   BitStatusType   bt_mem_end;
00267   BitStatusType   backrefed_mem;
00268   UChar*          pattern;
00269   UChar*          pattern_end;
00270   UChar*          error;
00271   UChar*          error_end;
00272   regex_t*        reg;       /* for reg->names only */
00273   int             num_call;
00274 #ifdef USE_SUBEXP_CALL
00275   UnsetAddrList*  unset_addr_list;
00276 #endif
00277   int             num_mem;
00278 #ifdef USE_NAMED_GROUP
00279   int             num_named;
00280 #endif
00281   int             mem_alloc;
00282   Node*           mem_nodes_static[SCANENV_MEMNODES_SIZE];
00283   Node**          mem_nodes_dynamic;
00284 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00285   int num_comb_exp_check;
00286   int comb_exp_max_regnum;
00287   int curr_max_regnum;
00288   int has_recursion;
00289 #endif
00290 } ScanEnv;
00291 
00292 
00293 #define IS_SYNTAX_OP(syn, opm)    (((syn)->op  & (opm)) != 0)
00294 #define IS_SYNTAX_OP2(syn, opm)   (((syn)->op2 & (opm)) != 0)
00295 #define IS_SYNTAX_BV(syn, bvm)    (((syn)->behavior & (bvm)) != 0)
00296 
00297 
00298 #ifdef USE_NAMED_GROUP
00299 typedef struct {
00300   int new_val;
00301 } GroupNumRemap;
00302 
00303 extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
00304 #endif
00305 
00306 extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
00307 extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
00308 extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
00309 extern void   onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
00310 extern void   onig_node_conv_to_str_node P_((Node* node, int raw));
00311 extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
00312 extern void   onig_node_free P_((Node* node));
00313 extern Node*  onig_node_new_effect P_((int type));
00314 extern Node*  onig_node_new_anchor P_((int type));
00315 extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));
00316 extern Node*  onig_node_new_list P_((Node* left, Node* right));
00317 extern void   onig_node_str_clear P_((Node* node));
00318 extern int    onig_free_node_list P_((void));
00319 extern int    onig_names_free P_((regex_t* reg));
00320 extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
00321 
00322 #ifdef ONIG_DEBUG
00323 #ifdef USE_NAMED_GROUP
00324 extern int onig_print_names(FILE*, regex_t*);
00325 #endif
00326 #endif
00327 
00328 #endif /* REGPARSE_H */