Back to index

php5  5.3.10
regparse.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regparse.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regparse.h"
00031 
00032 #define WARN_BUFSIZE    256
00033 
00034 OnigSyntaxType OnigSyntaxRuby = {
00035   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00036      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00037      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00038      ONIG_SYN_OP_ESC_C_CONTROL )
00039    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00040   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00041       ONIG_SYN_OP2_OPTION_RUBY |
00042       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
00043       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
00044       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00045       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
00046       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
00047       ONIG_SYN_OP2_ESC_H_XDIGIT )
00048   , ( SYN_GNU_REGEX_BV | 
00049       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
00050       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
00051       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00052       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
00053       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
00054       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
00055       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
00056   , ONIG_OPTION_NONE
00057 };
00058 
00059 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
00060 
00061 extern void onig_null_warn(const char* s) { }
00062 
00063 #ifdef RUBY_PLATFORM
00064 extern void
00065 onig_rb_warn(const char* s)
00066 {
00067   rb_warn("%s", s);
00068 }
00069 
00070 extern void
00071 onig_rb_warning(const char* s)
00072 {
00073   rb_warning("%s", s);
00074 }
00075 #endif
00076 
00077 #ifdef DEFAULT_WARN_FUNCTION
00078 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
00079 #else
00080 static OnigWarnFunc onig_warn = onig_null_warn;
00081 #endif
00082 
00083 #ifdef DEFAULT_VERB_WARN_FUNCTION
00084 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
00085 #else
00086 static OnigWarnFunc onig_verb_warn = onig_null_warn;
00087 #endif
00088 
00089 extern void onig_set_warn_func(OnigWarnFunc f)
00090 {
00091   onig_warn = f;
00092 }
00093 
00094 extern void onig_set_verb_warn_func(OnigWarnFunc f)
00095 {
00096   onig_verb_warn = f;
00097 }
00098 
00099 static void
00100 bbuf_free(BBuf* bbuf)
00101 {
00102   if (IS_NOT_NULL(bbuf)) {
00103     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
00104     xfree(bbuf);
00105   }
00106 }
00107 
00108 static int
00109 bbuf_clone(BBuf** rto, BBuf* from)
00110 {
00111   int r;
00112   BBuf *to;
00113 
00114   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
00115   CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
00116   r = BBUF_INIT(to, from->alloc);
00117   if (r != 0) return r;
00118   to->used = from->used;
00119   xmemcpy(to->p, from->p, from->used);
00120   return 0;
00121 }
00122 
00123 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
00124 
00125 #define MBCODE_START_POS(enc) \
00126   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
00127 
00128 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
00129   add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
00130 
00131 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
00132   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
00133     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
00134     if (r) return r;\
00135   }\
00136 } while (0)
00137 
00138 
00139 #define BITSET_IS_EMPTY(bs,empty) do {\
00140   int i;\
00141   empty = 1;\
00142   for (i = 0; i < BITSET_SIZE; i++) {\
00143     if ((bs)[i] != 0) {\
00144       empty = 0; break;\
00145     }\
00146   }\
00147 } while (0)
00148 
00149 static void
00150 bitset_set_range(BitSetRef bs, int from, int to)
00151 {
00152   int i;
00153   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
00154     BITSET_SET_BIT(bs, i);
00155   }
00156 }
00157 
00158 #if 0
00159 static void
00160 bitset_set_all(BitSetRef bs)
00161 {
00162   int i;
00163   for (i = 0; i < BITSET_SIZE; i++) {
00164     bs[i] = ~((Bits )0);
00165   }
00166 }
00167 #endif
00168 
00169 static void
00170 bitset_invert(BitSetRef bs)
00171 {
00172   int i;
00173   for (i = 0; i < BITSET_SIZE; i++) {
00174     bs[i] = ~(bs[i]);
00175   }
00176 }
00177 
00178 static void
00179 bitset_invert_to(BitSetRef from, BitSetRef to)
00180 {
00181   int i;
00182   for (i = 0; i < BITSET_SIZE; i++) {
00183     to[i] = ~(from[i]);
00184   }
00185 }
00186 
00187 static void
00188 bitset_and(BitSetRef dest, BitSetRef bs)
00189 {
00190   int i;
00191   for (i = 0; i < BITSET_SIZE; i++) {
00192     dest[i] &= bs[i];
00193   }
00194 }
00195 
00196 static void
00197 bitset_or(BitSetRef dest, BitSetRef bs)
00198 {
00199   int i;
00200   for (i = 0; i < BITSET_SIZE; i++) {
00201     dest[i] |= bs[i];
00202   }
00203 }
00204 
00205 static void
00206 bitset_copy(BitSetRef dest, BitSetRef bs)
00207 {
00208   int i;
00209   for (i = 0; i < BITSET_SIZE; i++) {
00210     dest[i] = bs[i];
00211   }
00212 }
00213 
00214 extern int
00215 onig_strncmp(const UChar* s1, const UChar* s2, int n)
00216 {
00217   int x;
00218 
00219   while (n-- > 0) {
00220     x = *s2++ - *s1++;
00221     if (x) return x;
00222   }
00223   return 0;
00224 }
00225 
00226 static void
00227 k_strcpy(UChar* dest, const UChar* src, const UChar* end)
00228 {
00229   int len = end - src;
00230   if (len > 0) {
00231     xmemcpy(dest, src, len);
00232     dest[len] = (UChar )0;
00233   }
00234 }
00235 
00236 static UChar*
00237 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
00238 {
00239   int slen, term_len, i;
00240   UChar *r;
00241 
00242   slen = end - s;
00243   term_len = ONIGENC_MBC_MINLEN(enc);
00244 
00245   r = (UChar* )xmalloc(slen + term_len);
00246   CHECK_NULL_RETURN(r);
00247   xmemcpy(r, s, slen);
00248 
00249   for (i = 0; i < term_len; i++)
00250     r[slen + i] = (UChar )0;
00251 
00252   return r;
00253 }
00254 
00255 
00256 /* scan pattern methods */
00257 #define PEND_VALUE   0
00258 
00259 #define PFETCH_READY  UChar* pfetch_prev
00260 #define PEND         (p < end ?  0 : 1)
00261 #define PUNFETCH     p = pfetch_prev
00262 #define PINC       do { \
00263   pfetch_prev = p; \
00264   p += ONIGENC_MBC_ENC_LEN(enc, p); \
00265 } while (0)
00266 #define PFETCH(c)  do { \
00267   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
00268   pfetch_prev = p; \
00269   p += ONIGENC_MBC_ENC_LEN(enc, p); \
00270 } while (0)
00271 
00272 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
00273 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
00274 
00275 static UChar*
00276 k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
00277              int capa)
00278 {
00279   UChar* r;
00280 
00281   if (dest)
00282     r = (UChar* )xrealloc(dest, capa + 1);
00283   else
00284     r = (UChar* )xmalloc(capa + 1);
00285 
00286   CHECK_NULL_RETURN(r);
00287   k_strcpy(r + (dest_end - dest), src, src_end);
00288   return r;
00289 }
00290 
00291 /* dest on static area */
00292 static UChar*
00293 strcat_capa_from_static(UChar* dest, UChar* dest_end,
00294                      const UChar* src, const UChar* src_end, int capa)
00295 {
00296   UChar* r;
00297 
00298   r = (UChar* )xmalloc(capa + 1);
00299   CHECK_NULL_RETURN(r);
00300   k_strcpy(r, dest, dest_end);
00301   k_strcpy(r + (dest_end - dest), src, src_end);
00302   return r;
00303 }
00304 
00305 #ifdef USE_NAMED_GROUP
00306 
00307 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
00308 
00309 typedef struct {
00310   UChar* name;
00311   int    name_len;   /* byte length */
00312   int    back_num;   /* number of backrefs */
00313   int    back_alloc;
00314   int    back_ref1;
00315   int*   back_refs;
00316 } NameEntry;
00317 
00318 #ifdef USE_ST_HASH_TABLE
00319 
00320 #include "st.h"
00321 
00322 typedef struct {
00323   unsigned char* s;
00324   unsigned char* end;
00325 } st_strend_key;
00326 
00327 static int strend_cmp(st_strend_key*, st_strend_key*);
00328 static int strend_hash(st_strend_key*);
00329 
00330 static struct st_hash_type type_strend_hash = {
00331   strend_cmp,
00332   strend_hash,
00333 };
00334 
00335 static st_table*
00336 onig_st_init_strend_table_with_size(int size)
00337 {
00338     return onig_st_init_table_with_size(&type_strend_hash, size);
00339 }
00340 
00341 static int
00342 onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
00343 {
00344     st_strend_key key;
00345 
00346     key.s   = (unsigned char* )str_key;
00347     key.end = (unsigned char* )end_key;
00348 
00349     return onig_st_lookup(table, (st_data_t )(&key), value);
00350 }
00351 
00352 static int
00353 onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
00354 {
00355   st_strend_key* key;
00356   int result;
00357 
00358   key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
00359   key->s   = (unsigned char* )str_key;
00360   key->end = (unsigned char* )end_key;
00361   result = onig_st_insert(table, (st_data_t )key, value);
00362   if (result) {
00363     xfree(key);
00364   }
00365   return result;
00366 }
00367 
00368 static int
00369 strend_cmp(st_strend_key* x, st_strend_key* y)
00370 {
00371   unsigned char *p, *q;
00372   int c;
00373 
00374   if ((x->end - x->s) != (y->end - y->s))
00375     return 1;
00376 
00377   p = x->s;
00378   q = y->s;
00379   while (p < x->end) {
00380     c = (int )*p - (int )*q;
00381     if (c != 0) return c;
00382 
00383     p++; q++;
00384   }
00385 
00386   return 0;
00387 }
00388 
00389 static int
00390 strend_hash(st_strend_key* x)
00391 {
00392   int val;
00393   unsigned char *p;
00394 
00395   val = 0;
00396   p = x->s;
00397   while (p < x->end) {
00398     val = val * 997 + (int )*p++;
00399   }
00400 
00401   return val + (val >> 5);
00402 }
00403 
00404 typedef st_table  NameTable;
00405 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
00406 
00407 #define NAMEBUF_SIZE    24
00408 #define NAMEBUF_SIZE_1  25
00409 
00410 #ifdef ONIG_DEBUG
00411 static int
00412 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
00413 {
00414   int i;
00415   FILE* fp = (FILE* )arg;
00416 
00417   fprintf(fp, "%s: ", e->name);
00418   if (e->back_num == 0)
00419     fputs("-", fp);
00420   else if (e->back_num == 1)
00421     fprintf(fp, "%d", e->back_ref1);
00422   else {
00423     for (i = 0; i < e->back_num; i++) {
00424       if (i > 0) fprintf(fp, ", ");
00425       fprintf(fp, "%d", e->back_refs[i]);
00426     }
00427   }
00428   fputs("\n", fp);
00429   return ST_CONTINUE;
00430 }
00431 
00432 extern int
00433 onig_print_names(FILE* fp, regex_t* reg)
00434 {
00435   NameTable* t = (NameTable* )reg->name_table;
00436 
00437   if (IS_NOT_NULL(t)) {
00438     fprintf(fp, "name table\n");
00439     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
00440     fputs("\n", fp);
00441   }
00442   return 0;
00443 }
00444 #endif
00445 
00446 static int
00447 i_free_name_entry(UChar* key, NameEntry* e, void* arg)
00448 {
00449   xfree(e->name);
00450   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00451   xfree(key);
00452   xfree(e);
00453   return ST_DELETE;
00454 }
00455 
00456 static int
00457 names_clear(regex_t* reg)
00458 {
00459   NameTable* t = (NameTable* )reg->name_table;
00460 
00461   if (IS_NOT_NULL(t)) {
00462     onig_st_foreach(t, i_free_name_entry, 0);
00463   }
00464   return 0;
00465 }
00466 
00467 extern int
00468 onig_names_free(regex_t* reg)
00469 {
00470   int r;
00471   NameTable* t;
00472 
00473   r = names_clear(reg);
00474   if (r) return r;
00475 
00476   t = (NameTable* )reg->name_table;
00477   if (IS_NOT_NULL(t)) onig_st_free_table(t);
00478   reg->name_table = (void* )NULL;
00479   return 0;
00480 }
00481 
00482 static NameEntry*
00483 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00484 {
00485   NameEntry* e;
00486   NameTable* t = (NameTable* )reg->name_table;
00487 
00488   e = (NameEntry* )NULL;
00489   if (IS_NOT_NULL(t)) {
00490     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
00491   }
00492   return e;
00493 }
00494 
00495 typedef struct {
00496   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
00497   regex_t* reg;
00498   void* arg;
00499   int ret;
00500   OnigEncoding enc;
00501 } INamesArg;
00502 
00503 static int
00504 i_names(UChar* key, NameEntry* e, INamesArg* arg)
00505 {
00506   int r = (*(arg->func))(e->name,
00507                    /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
00508                          e->name + e->name_len,
00509                          e->back_num,
00510                       (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00511                       arg->reg, arg->arg);
00512   if (r != 0) {
00513     arg->ret = r;
00514     return ST_STOP;
00515   }
00516   return ST_CONTINUE;
00517 }
00518 
00519 extern int
00520 onig_foreach_name(regex_t* reg,
00521           int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
00522           void* arg)
00523 {
00524   INamesArg narg;
00525   NameTable* t = (NameTable* )reg->name_table;
00526 
00527   narg.ret = 0;
00528   if (IS_NOT_NULL(t)) {
00529     narg.func = func;
00530     narg.reg  = reg;
00531     narg.arg  = arg;
00532     narg.enc  = reg->enc; /* should be pattern encoding. */
00533     onig_st_foreach(t, i_names, (HashDataType )&narg);
00534   }
00535   return narg.ret;
00536 }
00537 
00538 static int
00539 i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
00540 {
00541   int i;
00542 
00543   if (e->back_num > 1) {
00544     for (i = 0; i < e->back_num; i++) {
00545       e->back_refs[i] = map[e->back_refs[i]].new_val;
00546     }
00547   }
00548   else if (e->back_num == 1) {
00549     e->back_ref1 = map[e->back_ref1].new_val;
00550   }
00551 
00552   return ST_CONTINUE;
00553 }
00554 
00555 extern int
00556 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
00557 {
00558   NameTable* t = (NameTable* )reg->name_table;
00559 
00560   if (IS_NOT_NULL(t)) {
00561     onig_st_foreach(t, i_renumber_name, (HashDataType )map);
00562   }
00563   return 0;
00564 }
00565 
00566 
00567 extern int
00568 onig_number_of_names(regex_t* reg)
00569 {
00570   NameTable* t = (NameTable* )reg->name_table;
00571 
00572   if (IS_NOT_NULL(t))
00573     return t->num_entries;
00574   else
00575     return 0;
00576 }
00577 
00578 #else  /* USE_ST_HASH_TABLE */
00579 
00580 #define INIT_NAMES_ALLOC_NUM    8
00581 
00582 typedef struct {
00583   NameEntry* e;
00584   int        num;
00585   int        alloc;
00586 } NameTable;
00587 
00588 
00589 #ifdef ONIG_DEBUG
00590 extern int
00591 onig_print_names(FILE* fp, regex_t* reg)
00592 {
00593   int i, j;
00594   NameEntry* e;
00595   NameTable* t = (NameTable* )reg->name_table;
00596 
00597   if (IS_NOT_NULL(t) && t->num > 0) {
00598     fprintf(fp, "name table\n");
00599     for (i = 0; i < t->num; i++) {
00600       e = &(t->e[i]);
00601       fprintf(fp, "%s: ", e->name);
00602       if (e->back_num == 0) {
00603        fputs("-", fp);
00604       }
00605       else if (e->back_num == 1) {
00606        fprintf(fp, "%d", e->back_ref1);
00607       }
00608       else {
00609        for (j = 0; j < e->back_num; j++) {
00610          if (j > 0) fprintf(fp, ", ");
00611          fprintf(fp, "%d", e->back_refs[j]);
00612        }
00613       }
00614       fputs("\n", fp);
00615     }
00616     fputs("\n", fp);
00617   }
00618   return 0;
00619 }
00620 #endif
00621 
00622 static int
00623 names_clear(regex_t* reg)
00624 {
00625   int i;
00626   NameEntry* e;
00627   NameTable* t = (NameTable* )reg->name_table;
00628 
00629   if (IS_NOT_NULL(t)) {
00630     for (i = 0; i < t->num; i++) {
00631       e = &(t->e[i]);
00632       if (IS_NOT_NULL(e->name)) {
00633        xfree(e->name);
00634        e->name       = NULL;
00635        e->name_len   = 0;
00636        e->back_num   = 0;
00637        e->back_alloc = 0;
00638        if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00639        e->back_refs = (int* )NULL;
00640       }
00641     }
00642     if (IS_NOT_NULL(t->e)) {
00643       xfree(t->e);
00644       t->e = NULL;
00645     }
00646     t->num = 0;
00647   }
00648   return 0;
00649 }
00650 
00651 extern int
00652 onig_names_free(regex_t* reg)
00653 {
00654   int r;
00655   NameTable* t;
00656 
00657   r = names_clear(reg);
00658   if (r) return r;
00659 
00660   t = (NameTable* )reg->name_table;
00661   if (IS_NOT_NULL(t)) xfree(t);
00662   reg->name_table = NULL;
00663   return 0;
00664 }
00665 
00666 static NameEntry*
00667 name_find(regex_t* reg, UChar* name, UChar* name_end)
00668 {
00669   int i, len;
00670   NameEntry* e;
00671   NameTable* t = (NameTable* )reg->name_table;
00672 
00673   if (IS_NOT_NULL(t)) {
00674     len = name_end - name;
00675     for (i = 0; i < t->num; i++) {
00676       e = &(t->e[i]);
00677       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
00678        return e;
00679     }
00680   }
00681   return (NameEntry* )NULL;
00682 }
00683 
00684 extern int
00685 onig_foreach_name(regex_t* reg,
00686           int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
00687           void* arg)
00688 {
00689   int i, r;
00690   NameEntry* e;
00691   NameTable* t = (NameTable* )reg->name_table;
00692 
00693   if (IS_NOT_NULL(t)) {
00694     for (i = 0; i < t->num; i++) {
00695       e = &(t->e[i]);
00696       r = (*func)(e->name, e->name + e->name_len, e->back_num,
00697                 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00698                 reg, arg);
00699       if (r != 0) return r;
00700     }
00701   }
00702   return 0;
00703 }
00704 
00705 extern int
00706 onig_number_of_names(regex_t* reg)
00707 {
00708   NameTable* t = (NameTable* )reg->name_table;
00709 
00710   if (IS_NOT_NULL(t))
00711     return t->num;
00712   else
00713     return 0;
00714 }
00715 
00716 #endif /* else USE_ST_HASH_TABLE */
00717 
00718 static int
00719 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
00720 {
00721   int alloc;
00722   NameEntry* e;
00723   NameTable* t = (NameTable* )reg->name_table;
00724 
00725   if (name_end - name <= 0)
00726     return ONIGERR_EMPTY_GROUP_NAME;
00727 
00728   e = name_find(reg, name, name_end);
00729   if (IS_NULL(e)) {
00730 #ifdef USE_ST_HASH_TABLE
00731     if (IS_NULL(t)) {
00732       t = onig_st_init_strend_table_with_size(5);
00733       reg->name_table = (void* )t;
00734     }
00735     e = (NameEntry* )xmalloc(sizeof(NameEntry));
00736     CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
00737 
00738     e->name = strdup_with_null(reg->enc, name, name_end);
00739     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
00740     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
00741                           (HashDataType )e);
00742 
00743     e->name_len   = name_end - name;
00744     e->back_num   = 0;
00745     e->back_alloc = 0;
00746     e->back_refs  = (int* )NULL;
00747 
00748 #else
00749 
00750     if (IS_NULL(t)) {
00751       alloc = INIT_NAMES_ALLOC_NUM;
00752       t = (NameTable* )xmalloc(sizeof(NameTable));
00753       CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
00754       t->e     = NULL;
00755       t->alloc = 0;
00756       t->num   = 0;
00757 
00758       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
00759       if (IS_NULL(t->e)) {
00760        xfree(t);
00761        return ONIGERR_MEMORY;
00762       }
00763       t->alloc = alloc;
00764       reg->name_table = t;
00765       goto clear;
00766     }
00767     else if (t->num == t->alloc) {
00768       int i;
00769 
00770       alloc = t->alloc * 2;
00771       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
00772       CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
00773       t->alloc = alloc;
00774 
00775     clear:
00776       for (i = t->num; i < t->alloc; i++) {
00777        t->e[i].name       = NULL;
00778        t->e[i].name_len   = 0;
00779        t->e[i].back_num   = 0;
00780        t->e[i].back_alloc = 0;
00781        t->e[i].back_refs  = (int* )NULL;
00782       }
00783     }
00784     e = &(t->e[t->num]);
00785     t->num++;
00786     e->name = strdup_with_null(reg->enc, name, name_end);
00787     e->name_len = name_end - name;
00788 #endif
00789   }
00790 
00791   if (e->back_num >= 1 &&
00792       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
00793     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
00794                                 name, name_end);
00795     return ONIGERR_MULTIPLEX_DEFINED_NAME;
00796   }
00797 
00798   e->back_num++;
00799   if (e->back_num == 1) {
00800     e->back_ref1 = backref;
00801   }
00802   else {
00803     if (e->back_num == 2) {
00804       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
00805       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
00806       CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
00807       e->back_alloc = alloc;
00808       e->back_refs[0] = e->back_ref1;
00809       e->back_refs[1] = backref;
00810     }
00811     else {
00812       if (e->back_num > e->back_alloc) {
00813        alloc = e->back_alloc * 2;
00814        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
00815        CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
00816        e->back_alloc = alloc;
00817       }
00818       e->back_refs[e->back_num - 1] = backref;
00819     }
00820   }
00821 
00822   return 0;
00823 }
00824 
00825 extern int
00826 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00827                         const UChar* name_end, int** nums)
00828 {
00829   NameEntry* e;
00830 
00831   e = name_find(reg, name, name_end);
00832   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
00833 
00834   switch (e->back_num) {
00835   case 0:
00836     break;
00837   case 1:
00838     *nums = &(e->back_ref1);
00839     break;
00840   default:
00841     *nums = e->back_refs;
00842     break;
00843   }
00844   return e->back_num;
00845 }
00846 
00847 extern int
00848 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00849                          const UChar* name_end, OnigRegion *region)
00850 {
00851   int i, n, *nums;
00852 
00853   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
00854   if (n < 0)
00855     return n;
00856   else if (n == 0)
00857     return ONIGERR_PARSER_BUG;
00858   else if (n == 1)
00859     return nums[0];
00860   else {
00861     if (IS_NOT_NULL(region)) {
00862       for (i = n - 1; i >= 0; i--) {
00863        if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
00864          return nums[i];
00865       }
00866     }
00867     return nums[n - 1];
00868   }
00869 }
00870 
00871 #else /* USE_NAMED_GROUP */
00872 
00873 extern int
00874 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00875                         const UChar* name_end, int** nums)
00876 {
00877   return ONIG_NO_SUPPORT_CONFIG;
00878 }
00879 
00880 extern int
00881 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00882                          const UChar* name_end, OnigRegion* region)
00883 {
00884   return ONIG_NO_SUPPORT_CONFIG;
00885 }
00886 
00887 extern int
00888 onig_foreach_name(regex_t* reg,
00889           int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
00890           void* arg)
00891 {
00892   return ONIG_NO_SUPPORT_CONFIG;
00893 }
00894 
00895 extern int
00896 onig_number_of_names(regex_t* reg)
00897 {
00898   return 0;
00899 }
00900 #endif /* else USE_NAMED_GROUP */
00901 
00902 extern int
00903 onig_noname_group_capture_is_active(regex_t* reg)
00904 {
00905   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
00906     return 0;
00907 
00908 #ifdef USE_NAMED_GROUP
00909   if (onig_number_of_names(reg) > 0 &&
00910       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
00911       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
00912     return 0;
00913   }
00914 #endif
00915 
00916   return 1;
00917 }
00918 
00919 
00920 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
00921 
00922 static void
00923 scan_env_clear(ScanEnv* env)
00924 {
00925   int i;
00926 
00927   BIT_STATUS_CLEAR(env->capture_history);
00928   BIT_STATUS_CLEAR(env->bt_mem_start);
00929   BIT_STATUS_CLEAR(env->bt_mem_end);
00930   BIT_STATUS_CLEAR(env->backrefed_mem);
00931   env->error             = (UChar* )NULL;
00932   env->error_end         = (UChar* )NULL;
00933   env->num_call          = 0;
00934   env->num_mem           = 0;
00935 #ifdef USE_NAMED_GROUP
00936   env->num_named         = 0;
00937 #endif
00938   env->mem_alloc         = 0;
00939   env->mem_nodes_dynamic = (Node** )NULL;
00940 
00941   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
00942     env->mem_nodes_static[i] = NULL_NODE;
00943 
00944 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00945   env->num_comb_exp_check  = 0;
00946   env->comb_exp_max_regnum = 0;
00947   env->curr_max_regnum     = 0;
00948   env->has_recursion       = 0;
00949 #endif
00950 }
00951 
00952 static int
00953 scan_env_add_mem_entry(ScanEnv* env)
00954 {
00955   int i, need, alloc;
00956   Node** p;
00957 
00958   need = env->num_mem + 1;
00959   if (need >= SCANENV_MEMNODES_SIZE) {
00960     if (env->mem_alloc <= need) {
00961       if (IS_NULL(env->mem_nodes_dynamic)) {
00962        alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
00963        p = (Node** )xmalloc(sizeof(Node*) * alloc);
00964        xmemcpy(p, env->mem_nodes_static,
00965               sizeof(Node*) * SCANENV_MEMNODES_SIZE);
00966       }
00967       else {
00968        alloc = env->mem_alloc * 2;
00969        p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
00970       }
00971       CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
00972 
00973       for (i = env->num_mem + 1; i < alloc; i++)
00974        p[i] = NULL_NODE;
00975 
00976       env->mem_nodes_dynamic = p;
00977       env->mem_alloc = alloc;
00978     }
00979   }
00980 
00981   env->num_mem++;
00982   return env->num_mem;
00983 }
00984 
00985 static int
00986 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
00987 {
00988   if (env->num_mem >= num)
00989     SCANENV_MEM_NODES(env)[num] = node;
00990   else
00991     return ONIGERR_PARSER_BUG;
00992   return 0;
00993 }
00994 
00995 
00996 #ifdef USE_RECYCLE_NODE
00997 typedef struct _FreeNode {
00998   struct _FreeNode* next;
00999 } FreeNode;
01000 
01001 static FreeNode* FreeNodeList = (FreeNode* )NULL;
01002 #endif
01003 
01004 extern void
01005 onig_node_free(Node* node)
01006 {
01007  start:
01008   if (IS_NULL(node)) return ;
01009 
01010   switch (NTYPE(node)) {
01011   case N_STRING:
01012     if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
01013       xfree(NSTRING(node).s);
01014     }
01015     break;
01016 
01017   case N_LIST:
01018   case N_ALT:
01019     onig_node_free(NCONS(node).left);
01020     /* onig_node_free(NCONS(node).right); */
01021     {
01022       Node* next_node = NCONS(node).right;
01023 
01024 #ifdef USE_RECYCLE_NODE
01025       {
01026        FreeNode* n = (FreeNode* )node;
01027 
01028         THREAD_ATOMIC_START;
01029        n->next = FreeNodeList;
01030        FreeNodeList = n;
01031         THREAD_ATOMIC_END;
01032       }
01033 #else
01034       xfree(node);
01035 #endif
01036 
01037       node = next_node;
01038       goto start;
01039     }
01040     break;
01041 
01042   case N_CCLASS:
01043     {
01044       CClassNode* cc = &(NCCLASS(node));
01045 
01046       if (IS_CCLASS_SHARE(cc))
01047         return ;
01048 
01049       if (cc->mbuf)
01050         bbuf_free(cc->mbuf);
01051     }
01052     break;
01053 
01054   case N_QUANTIFIER:
01055     if (NQUANTIFIER(node).target)
01056       onig_node_free(NQUANTIFIER(node).target);
01057     break;
01058 
01059   case N_EFFECT:
01060     if (NEFFECT(node).target)
01061       onig_node_free(NEFFECT(node).target);
01062     break;
01063 
01064   case N_BACKREF:
01065     if (IS_NOT_NULL(NBACKREF(node).back_dynamic))
01066       xfree(NBACKREF(node).back_dynamic);
01067     break;
01068 
01069   case N_ANCHOR:
01070     if (NANCHOR(node).target)
01071       onig_node_free(NANCHOR(node).target);
01072     break;
01073   }
01074 
01075 #ifdef USE_RECYCLE_NODE
01076   {
01077     FreeNode* n = (FreeNode* )node;
01078 
01079     THREAD_ATOMIC_START;
01080     n->next = FreeNodeList;
01081     FreeNodeList = n;
01082     THREAD_ATOMIC_END;
01083   }
01084 #else
01085   xfree(node);
01086 #endif
01087 }
01088 
01089 #ifdef USE_RECYCLE_NODE
01090 extern int
01091 onig_free_node_list(void)
01092 {
01093   FreeNode* n;
01094 
01095   /* THREAD_ATOMIC_START; */
01096   while (IS_NOT_NULL(FreeNodeList)) {
01097     n = FreeNodeList;
01098     FreeNodeList = FreeNodeList->next;
01099     xfree(n);
01100   }
01101   /* THREAD_ATOMIC_END; */
01102   return 0;
01103 }
01104 #endif
01105 
01106 static Node*
01107 node_new(void)
01108 {
01109   Node* node;
01110 
01111 #ifdef USE_RECYCLE_NODE
01112   THREAD_ATOMIC_START;
01113   if (IS_NOT_NULL(FreeNodeList)) {
01114     node = (Node* )FreeNodeList;
01115     FreeNodeList = FreeNodeList->next;
01116     THREAD_ATOMIC_END;
01117     return node;
01118   }
01119   THREAD_ATOMIC_END;
01120 #endif
01121 
01122   node = (Node* )xmalloc(sizeof(Node));
01123   return node;
01124 }
01125 
01126 
01127 static void
01128 initialize_cclass(CClassNode* cc)
01129 {
01130   BITSET_CLEAR(cc->bs);
01131   cc->flags = 0;
01132   cc->mbuf  = NULL;
01133 }
01134 
01135 static Node*
01136 node_new_cclass(void)
01137 {
01138   Node* node = node_new();
01139   CHECK_NULL_RETURN(node);
01140   node->type = N_CCLASS;
01141 
01142   initialize_cclass(&(NCCLASS(node)));
01143   return node;
01144 }
01145 
01146 static Node*
01147 node_new_cclass_by_codepoint_range(int not,
01148                    const OnigCodePoint sbr[], const OnigCodePoint mbr[])
01149 {
01150   CClassNode* cc;
01151   int n, i, j;
01152 
01153   Node* node = node_new();
01154   CHECK_NULL_RETURN(node);
01155   node->type = N_CCLASS;
01156 
01157   cc = &(NCCLASS(node));
01158   cc->flags = 0;
01159   if (not != 0) CCLASS_SET_NOT(cc);
01160 
01161   BITSET_CLEAR(cc->bs);
01162   if (IS_NOT_NULL(sbr)) {
01163     n = ONIGENC_CODE_RANGE_NUM(sbr);
01164     for (i = 0; i < n; i++) {
01165       for (j  = ONIGENC_CODE_RANGE_FROM(sbr, i);
01166            j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
01167         BITSET_SET_BIT(cc->bs, j);
01168       }
01169     }
01170   }
01171 
01172   if (IS_NULL(mbr)) {
01173   is_null:
01174     cc->mbuf = NULL;
01175   }
01176   else {
01177     BBuf* bbuf;
01178 
01179     n = ONIGENC_CODE_RANGE_NUM(mbr);
01180     if (n == 0) goto is_null;
01181 
01182     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
01183     CHECK_NULL_RETURN_VAL(bbuf, NULL);
01184     bbuf->alloc = n + 1;
01185     bbuf->used  = n + 1;
01186     bbuf->p     = (UChar* )((void* )mbr);
01187 
01188     cc->mbuf = bbuf;
01189   }
01190 
01191   return node;
01192 }
01193 
01194 static Node*
01195 node_new_ctype(int type)
01196 {
01197   Node* node = node_new();
01198   CHECK_NULL_RETURN(node);
01199   node->type = N_CTYPE;
01200   NCTYPE(node).type = type;
01201   return node;
01202 }
01203 
01204 static Node*
01205 node_new_anychar(void)
01206 {
01207   Node* node = node_new();
01208   CHECK_NULL_RETURN(node);
01209   node->type = N_ANYCHAR;
01210   return node;
01211 }
01212 
01213 static Node*
01214 node_new_list(Node* left, Node* right)
01215 {
01216   Node* node = node_new();
01217   CHECK_NULL_RETURN(node);
01218   node->type = N_LIST;
01219   NCONS(node).left  = left;
01220   NCONS(node).right = right;
01221   return node;
01222 }
01223 
01224 extern Node*
01225 onig_node_new_list(Node* left, Node* right)
01226 {
01227   return node_new_list(left, right);
01228 }
01229 
01230 static Node*
01231 node_new_alt(Node* left, Node* right)
01232 {
01233   Node* node = node_new();
01234   CHECK_NULL_RETURN(node);
01235   node->type = N_ALT;
01236   NCONS(node).left  = left;
01237   NCONS(node).right = right;
01238   return node;
01239 }
01240 
01241 extern Node*
01242 onig_node_new_anchor(int type)
01243 {
01244   Node* node = node_new();
01245   CHECK_NULL_RETURN(node);
01246   node->type = N_ANCHOR;
01247   NANCHOR(node).type     = type;
01248   NANCHOR(node).target   = NULL;
01249   NANCHOR(node).char_len = -1;
01250   return node;
01251 }
01252 
01253 static Node*
01254 node_new_backref(int back_num, int* backrefs, int by_name,
01255 #ifdef USE_BACKREF_AT_LEVEL
01256                int exist_level, int nest_level,
01257 #endif
01258                ScanEnv* env)
01259 {
01260   int i;
01261   Node* node = node_new();
01262 
01263   CHECK_NULL_RETURN(node);
01264   node->type = N_BACKREF;
01265   NBACKREF(node).state    = 0;
01266   NBACKREF(node).back_num = back_num;
01267   NBACKREF(node).back_dynamic = (int* )NULL;
01268   if (by_name != 0)
01269     NBACKREF(node).state |= NST_NAME_REF;
01270 
01271 #ifdef USE_BACKREF_AT_LEVEL
01272   if (exist_level != 0) {
01273     NBACKREF(node).state |= NST_NEST_LEVEL;
01274     NBACKREF(node).nest_level  = nest_level;
01275   }
01276 #endif
01277 
01278   for (i = 0; i < back_num; i++) {
01279     if (backrefs[i] <= env->num_mem &&
01280        IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
01281       NBACKREF(node).state |= NST_RECURSION;   /* /...(\1).../ */
01282       break;
01283     }
01284   }
01285 
01286   if (back_num <= NODE_BACKREFS_SIZE) {
01287     for (i = 0; i < back_num; i++)
01288       NBACKREF(node).back_static[i] = backrefs[i];
01289   }
01290   else {
01291     int* p = (int* )xmalloc(sizeof(int) * back_num);
01292     if (IS_NULL(p)) {
01293       onig_node_free(node);
01294       return NULL;
01295     }
01296     NBACKREF(node).back_dynamic = p;
01297     for (i = 0; i < back_num; i++)
01298       p[i] = backrefs[i];
01299   }
01300   return node;
01301 }
01302 
01303 #ifdef USE_SUBEXP_CALL
01304 static Node*
01305 node_new_call(UChar* name, UChar* name_end)
01306 {
01307   Node* node = node_new();
01308   CHECK_NULL_RETURN(node);
01309 
01310   node->type = N_CALL;
01311   NCALL(node).state    = 0;
01312   NCALL(node).ref_num  = CALLNODE_REFNUM_UNDEF;
01313   NCALL(node).target   = NULL_NODE;
01314   NCALL(node).name     = name;
01315   NCALL(node).name_end = name_end;
01316   return node;
01317 }
01318 #endif
01319 
01320 static Node*
01321 node_new_quantifier(int lower, int upper, int by_number)
01322 {
01323   Node* node = node_new();
01324   CHECK_NULL_RETURN(node);
01325   node->type = N_QUANTIFIER;
01326   NQUANTIFIER(node).state  = 0;
01327   NQUANTIFIER(node).target = NULL;
01328   NQUANTIFIER(node).lower  = lower;
01329   NQUANTIFIER(node).upper  = upper;
01330   NQUANTIFIER(node).greedy = 1;
01331   NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
01332   NQUANTIFIER(node).head_exact        = NULL_NODE;
01333   NQUANTIFIER(node).next_head_exact   = NULL_NODE;
01334   NQUANTIFIER(node).is_refered        = 0;
01335   if (by_number != 0)
01336     NQUANTIFIER(node).state |= NST_BY_NUMBER;
01337 
01338 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01339   NQUANTIFIER(node).comb_exp_check_num = 0;
01340 #endif
01341 
01342   return node;
01343 }
01344 
01345 static Node*
01346 node_new_effect(int type)
01347 {
01348   Node* node = node_new();
01349   CHECK_NULL_RETURN(node);
01350   node->type = N_EFFECT;
01351   NEFFECT(node).type      = type;
01352   NEFFECT(node).state     =  0;
01353   NEFFECT(node).regnum    =  0;
01354   NEFFECT(node).option    =  0;
01355   NEFFECT(node).target    = NULL;
01356   NEFFECT(node).call_addr = -1;
01357   NEFFECT(node).opt_count =  0;
01358   return node;
01359 }
01360 
01361 extern Node*
01362 onig_node_new_effect(int type)
01363 {
01364   return node_new_effect(type);
01365 }
01366 
01367 static Node*
01368 node_new_effect_memory(OnigOptionType option, int is_named)
01369 {
01370   Node* node = node_new_effect(EFFECT_MEMORY);
01371   CHECK_NULL_RETURN(node);
01372   if (is_named != 0)
01373     SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
01374 
01375 #ifdef USE_SUBEXP_CALL
01376   NEFFECT(node).option = option;
01377 #endif
01378   return node;
01379 }
01380 
01381 static Node*
01382 node_new_option(OnigOptionType option)
01383 {
01384   Node* node = node_new_effect(EFFECT_OPTION);
01385   CHECK_NULL_RETURN(node);
01386   NEFFECT(node).option = option;
01387   return node;
01388 }
01389 
01390 extern int
01391 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
01392 {
01393   int addlen = end - s;
01394 
01395   if (addlen > 0) {
01396     int len  = NSTRING(node).end - NSTRING(node).s;
01397 
01398     if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
01399       UChar* p;
01400       int capa = len + addlen + NODE_STR_MARGIN;
01401 
01402       if (capa <= NSTRING(node).capa) {
01403        k_strcpy(NSTRING(node).s + len, s, end);
01404       }
01405       else {
01406        if (NSTRING(node).s == NSTRING(node).buf)
01407          p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
01408                                   s, end, capa);
01409        else
01410          p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
01411 
01412        CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
01413        NSTRING(node).s    = p;
01414        NSTRING(node).capa = capa;
01415       }
01416     }
01417     else {
01418       k_strcpy(NSTRING(node).s + len, s, end);
01419     }
01420     NSTRING(node).end = NSTRING(node).s + len + addlen;
01421   }
01422 
01423   return 0;
01424 }
01425 
01426 static int
01427 node_str_cat_char(Node* node, UChar c)
01428 {
01429   UChar s[1];
01430 
01431   s[0] = c;
01432   return onig_node_str_cat(node, s, s + 1);
01433 }
01434 
01435 extern void
01436 onig_node_conv_to_str_node(Node* node, int flag)
01437 {
01438   node->type = N_STRING;
01439 
01440   NSTRING(node).flag = flag;
01441   NSTRING(node).capa = 0;
01442   NSTRING(node).s    = NSTRING(node).buf;
01443   NSTRING(node).end  = NSTRING(node).buf;
01444 }
01445 
01446 extern void
01447 onig_node_str_clear(Node* node)
01448 {
01449   if (NSTRING(node).capa != 0 &&
01450       IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
01451     xfree(NSTRING(node).s);
01452   }
01453 
01454   NSTRING(node).capa = 0;
01455   NSTRING(node).flag = 0;
01456   NSTRING(node).s    = NSTRING(node).buf;
01457   NSTRING(node).end  = NSTRING(node).buf;
01458 }
01459 
01460 static Node*
01461 node_new_str(const UChar* s, const UChar* end)
01462 {
01463   Node* node = node_new();
01464   CHECK_NULL_RETURN(node);
01465 
01466   node->type = N_STRING;
01467   NSTRING(node).capa = 0;
01468   NSTRING(node).flag = 0;
01469   NSTRING(node).s    = NSTRING(node).buf;
01470   NSTRING(node).end  = NSTRING(node).buf;
01471   if (onig_node_str_cat(node, s, end)) {
01472     onig_node_free(node);
01473     return NULL;
01474   }
01475   return node;
01476 }
01477 
01478 extern Node*
01479 onig_node_new_str(const UChar* s, const UChar* end)
01480 {
01481   return node_new_str(s, end);
01482 }
01483 
01484 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
01485 static Node*
01486 node_new_str_raw(UChar* s, UChar* end)
01487 {
01488   Node* node = node_new_str(s, end);
01489   NSTRING_SET_RAW(node);
01490   return node;
01491 }
01492 #endif
01493 
01494 static Node*
01495 node_new_empty(void)
01496 {
01497   return node_new_str(NULL, NULL);
01498 }
01499 
01500 static Node*
01501 node_new_str_char(UChar c)
01502 {
01503   UChar p[1];
01504 
01505   p[0] = c;
01506   return node_new_str(p, p + 1);
01507 }
01508 
01509 static Node*
01510 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
01511 {
01512   const UChar *p;
01513   Node* n = NULL_NODE;
01514 
01515   if (sn->end > sn->s) {
01516     p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
01517     if (p && p > sn->s) { /* can be splitted. */
01518       n = node_new_str(p, sn->end);
01519       if ((sn->flag & NSTR_RAW) != 0)
01520        NSTRING_SET_RAW(n);
01521       sn->end = (UChar* )p;
01522     }
01523   }
01524   return n;
01525 }
01526 
01527 static int
01528 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
01529 {
01530   if (sn->end > sn->s) {
01531     return ((enc_len(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
01532   }
01533   return 0;
01534 }
01535 
01536 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
01537 static int
01538 node_str_head_pad(StrNode* sn, int num, UChar val)
01539 {
01540   UChar buf[NODE_STR_BUF_SIZE];
01541   int i, len;
01542 
01543   len = sn->end - sn->s;
01544   onig_strcpy(buf, sn->s, sn->end);
01545   onig_strcpy(&(sn->s[num]), buf, buf + len);
01546   sn->end += num;
01547 
01548   for (i = 0; i < num; i++) {
01549     sn->s[i] = val;
01550   }
01551 }
01552 #endif
01553 
01554 extern int
01555 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
01556 {
01557   unsigned int num, val;
01558   OnigCodePoint c;
01559   UChar* p = *src;
01560   PFETCH_READY;
01561 
01562   num = 0;
01563   while (!PEND) {
01564     PFETCH(c);
01565     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
01566       val = (unsigned int )DIGITVAL(c);
01567       if ((INT_MAX_LIMIT - val) / 10UL < num)
01568        return -1;  /* overflow */
01569 
01570       num = num * 10 + val;
01571     }
01572     else {
01573       PUNFETCH;
01574       break;
01575     }
01576   }
01577   *src = p;
01578   return num;
01579 }
01580 
01581 static int
01582 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
01583                              OnigEncoding enc)
01584 {
01585   OnigCodePoint c;
01586   unsigned int num, val;
01587   UChar* p = *src;
01588   PFETCH_READY;
01589 
01590   num = 0;
01591   while (!PEND && maxlen-- != 0) {
01592     PFETCH(c);
01593     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
01594       val = (unsigned int )XDIGITVAL(enc,c);
01595       if ((INT_MAX_LIMIT - val) / 16UL < num)
01596        return -1;  /* overflow */
01597 
01598       num = (num << 4) + XDIGITVAL(enc,c);
01599     }
01600     else {
01601       PUNFETCH;
01602       break;
01603     }
01604   }
01605   *src = p;
01606   return num;
01607 }
01608 
01609 static int
01610 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
01611                         OnigEncoding enc)
01612 {
01613   OnigCodePoint c;
01614   unsigned int num, val;
01615   UChar* p = *src;
01616   PFETCH_READY;
01617 
01618   num = 0;
01619   while (!PEND && maxlen-- != 0) {
01620     PFETCH(c);
01621     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
01622       val = ODIGITVAL(c);
01623       if ((INT_MAX_LIMIT - val) / 8UL < num)
01624        return -1;  /* overflow */
01625 
01626       num = (num << 3) + val;
01627     }
01628     else {
01629       PUNFETCH;
01630       break;
01631     }
01632   }
01633   *src = p;
01634   return num;
01635 }
01636 
01637 
01638 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
01639     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
01640 
01641 /* data format:
01642      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
01643      (all data size is OnigCodePoint)
01644  */
01645 static int
01646 new_code_range(BBuf** pbuf)
01647 {
01648 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
01649   int r;
01650   OnigCodePoint n;
01651   BBuf* bbuf;
01652 
01653   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
01654   CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
01655   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
01656   if (r) return r;
01657 
01658   n = 0;
01659   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01660   return 0;
01661 }
01662 
01663 static int
01664 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
01665 {
01666   int r, inc_n, pos;
01667   int low, high, bound, x;
01668   OnigCodePoint n, *data;
01669   BBuf* bbuf;
01670 
01671   if (from > to) {
01672     n = from; from = to; to = n;
01673   }
01674 
01675   if (IS_NULL(*pbuf)) {
01676     r = new_code_range(pbuf);
01677     if (r) return r;
01678     bbuf = *pbuf;
01679     n = 0;
01680   }
01681   else {
01682     bbuf = *pbuf;
01683     GET_CODE_POINT(n, bbuf->p);
01684   }
01685   data = (OnigCodePoint* )(bbuf->p);
01686   data++;
01687 
01688   for (low = 0, bound = n; low < bound; ) {
01689     x = (low + bound) >> 1;
01690     if (from > data[x*2 + 1])
01691       low = x + 1;
01692     else
01693       bound = x;
01694   }
01695 
01696   for (high = low, bound = n; high < bound; ) {
01697     x = (high + bound) >> 1;
01698     if (to >= data[x*2] - 1)
01699       high = x + 1;
01700     else
01701       bound = x;
01702   }
01703 
01704   inc_n = low + 1 - high;
01705   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
01706     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
01707 
01708   if (inc_n != 1) {
01709     if (from > data[low*2])
01710       from = data[low*2];
01711     if (to < data[(high - 1)*2 + 1])
01712       to = data[(high - 1)*2 + 1];
01713   }
01714 
01715   if (inc_n != 0 && (OnigCodePoint )high < n) {
01716     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
01717     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
01718     int size = (n - high) * 2 * SIZE_CODE_POINT;
01719 
01720     if (inc_n > 0) {
01721       BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
01722     }
01723     else {
01724       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
01725     }
01726   }
01727 
01728   pos = SIZE_CODE_POINT * (1 + low * 2);
01729   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
01730   BBUF_WRITE_CODE_POINT(bbuf, pos, from);
01731   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
01732   n += inc_n;
01733   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01734 
01735   return 0;
01736 }
01737 
01738 static int
01739 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01740 {
01741   if (from > to) {
01742     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
01743       return 0;
01744     else
01745       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
01746   }
01747 
01748   return add_code_range_to_buf(pbuf, from, to);
01749 }
01750 
01751 static int
01752 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
01753 {
01754   int r, i, n;
01755   OnigCodePoint pre, from, *data, to = 0;
01756 
01757   *pbuf = (BBuf* )NULL;
01758   if (IS_NULL(bbuf)) {
01759   set_all:
01760     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01761   }
01762 
01763   data = (OnigCodePoint* )(bbuf->p);
01764   GET_CODE_POINT(n, data);
01765   data++;
01766   if (n <= 0) goto set_all;
01767 
01768   r = 0;
01769   pre = MBCODE_START_POS(enc);
01770   for (i = 0; i < n; i++) {
01771     from = data[i*2];
01772     to   = data[i*2+1];
01773     if (pre <= from - 1) {
01774       r = add_code_range_to_buf(pbuf, pre, from - 1);
01775       if (r != 0) return r;
01776     }
01777     if (to == ~((OnigCodePoint )0)) break;
01778     pre = to + 1;
01779   }
01780   if (to < ~((OnigCodePoint )0)) {
01781     r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
01782   }
01783   return r;
01784 }
01785 
01786 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
01787   BBuf *tbuf; \
01788   int  tnot; \
01789   tnot = not1;  not1  = not2;  not2  = tnot; \
01790   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
01791 } while (0)
01792 
01793 static int
01794 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
01795                   BBuf* bbuf2, int not2, BBuf** pbuf)
01796 {
01797   int r;
01798   OnigCodePoint i, n1, *data1;
01799   OnigCodePoint from, to;
01800 
01801   *pbuf = (BBuf* )NULL;
01802   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
01803     if (not1 != 0 || not2 != 0)
01804       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01805     return 0;
01806   }
01807 
01808   r = 0;
01809   if (IS_NULL(bbuf2))
01810     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01811 
01812   if (IS_NULL(bbuf1)) {
01813     if (not1 != 0) {
01814       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01815     }
01816     else {
01817       if (not2 == 0) {
01818        return bbuf_clone(pbuf, bbuf2);
01819       }
01820       else {
01821        return not_code_range_buf(enc, bbuf2, pbuf);
01822       }
01823     }
01824   }
01825 
01826   if (not1 != 0)
01827     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01828 
01829   data1 = (OnigCodePoint* )(bbuf1->p);
01830   GET_CODE_POINT(n1, data1);
01831   data1++;
01832 
01833   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
01834     r = bbuf_clone(pbuf, bbuf2);
01835   }
01836   else if (not1 == 0) { /* 1 OR (not 2) */
01837     r = not_code_range_buf(enc, bbuf2, pbuf);
01838   }
01839   if (r != 0) return r;
01840 
01841   for (i = 0; i < n1; i++) {
01842     from = data1[i*2];
01843     to   = data1[i*2+1];
01844     r = add_code_range_to_buf(pbuf, from, to);
01845     if (r != 0) return r;
01846   }
01847   return 0;
01848 }
01849 
01850 static int
01851 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
01852                OnigCodePoint* data, int n)
01853 {
01854   int i, r;
01855   OnigCodePoint from2, to2;
01856 
01857   for (i = 0; i < n; i++) {
01858     from2 = data[i*2];
01859     to2   = data[i*2+1];
01860     if (from2 < from1) {
01861       if (to2 < from1) continue;
01862       else {
01863        from1 = to2 + 1;
01864       }
01865     }
01866     else if (from2 <= to1) {
01867       if (to2 < to1) {
01868        if (from1 <= from2 - 1) {
01869          r = add_code_range_to_buf(pbuf, from1, from2-1);
01870          if (r != 0) return r;
01871        }
01872        from1 = to2 + 1;
01873       }
01874       else {
01875        to1 = from2 - 1;
01876       }
01877     }
01878     else {
01879       from1 = from2;
01880     }
01881     if (from1 > to1) break;
01882   }
01883   if (from1 <= to1) {
01884     r = add_code_range_to_buf(pbuf, from1, to1);
01885     if (r != 0) return r;
01886   }
01887   return 0;
01888 }
01889 
01890 static int
01891 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
01892 {
01893   int r;
01894   OnigCodePoint i, j, n1, n2, *data1, *data2;
01895   OnigCodePoint from, to, from1, to1, from2, to2;
01896 
01897   *pbuf = (BBuf* )NULL;
01898   if (IS_NULL(bbuf1)) {
01899     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
01900       return bbuf_clone(pbuf, bbuf2);
01901     return 0;
01902   }
01903   else if (IS_NULL(bbuf2)) {
01904     if (not2 != 0)
01905       return bbuf_clone(pbuf, bbuf1);
01906     return 0;
01907   }
01908 
01909   if (not1 != 0)
01910     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01911 
01912   data1 = (OnigCodePoint* )(bbuf1->p);
01913   data2 = (OnigCodePoint* )(bbuf2->p);
01914   GET_CODE_POINT(n1, data1);
01915   GET_CODE_POINT(n2, data2);
01916   data1++;
01917   data2++;
01918 
01919   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
01920     for (i = 0; i < n1; i++) {
01921       from1 = data1[i*2];
01922       to1   = data1[i*2+1];
01923       for (j = 0; j < n2; j++) {
01924        from2 = data2[j*2];
01925        to2   = data2[j*2+1];
01926        if (from2 > to1) break;
01927        if (to2 < from1) continue;
01928        from = MAX(from1, from2);
01929        to   = MIN(to1, to2);
01930        r = add_code_range_to_buf(pbuf, from, to);
01931        if (r != 0) return r;
01932       }
01933     }
01934   }
01935   else if (not1 == 0) { /* 1 AND (not 2) */
01936     for (i = 0; i < n1; i++) {
01937       from1 = data1[i*2];
01938       to1   = data1[i*2+1];
01939       r = and_code_range1(pbuf, from1, to1, data2, n2);
01940       if (r != 0) return r;
01941     }
01942   }
01943 
01944   return 0;
01945 }
01946 
01947 static int
01948 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
01949 {
01950   int r, not1, not2;
01951   BBuf *buf1, *buf2, *pbuf;
01952   BitSetRef bsr1, bsr2;
01953   BitSet bs1, bs2;
01954 
01955   not1 = IS_CCLASS_NOT(dest);
01956   bsr1 = dest->bs;
01957   buf1 = dest->mbuf;
01958   not2 = IS_CCLASS_NOT(cc);
01959   bsr2 = cc->bs;
01960   buf2 = cc->mbuf;
01961 
01962   if (not1 != 0) {
01963     bitset_invert_to(bsr1, bs1);
01964     bsr1 = bs1;
01965   }
01966   if (not2 != 0) {
01967     bitset_invert_to(bsr2, bs2);
01968     bsr2 = bs2;
01969   }
01970   bitset_and(bsr1, bsr2);
01971   if (bsr1 != dest->bs) {
01972     bitset_copy(dest->bs, bsr1);
01973     bsr1 = dest->bs;
01974   }
01975   if (not1 != 0) {
01976     bitset_invert(dest->bs);
01977   }
01978 
01979   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
01980     if (not1 != 0 && not2 != 0) {
01981       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
01982     }
01983     else {
01984       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
01985       if (r == 0 && not1 != 0) {
01986        BBuf *tbuf;
01987        r = not_code_range_buf(enc, pbuf, &tbuf);
01988        if (r != 0) {
01989          bbuf_free(pbuf);
01990          return r;
01991        }
01992        bbuf_free(pbuf);
01993        pbuf = tbuf;
01994       }
01995     }
01996     if (r != 0) return r;
01997 
01998     dest->mbuf = pbuf;
01999     bbuf_free(buf1);
02000     return r;
02001   }
02002   return 0;
02003 }
02004 
02005 static int
02006 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
02007 {
02008   int r, not1, not2;
02009   BBuf *buf1, *buf2, *pbuf;
02010   BitSetRef bsr1, bsr2;
02011   BitSet bs1, bs2;
02012 
02013   not1 = IS_CCLASS_NOT(dest);
02014   bsr1 = dest->bs;
02015   buf1 = dest->mbuf;
02016   not2 = IS_CCLASS_NOT(cc);
02017   bsr2 = cc->bs;
02018   buf2 = cc->mbuf;
02019 
02020   if (not1 != 0) {
02021     bitset_invert_to(bsr1, bs1);
02022     bsr1 = bs1;
02023   }
02024   if (not2 != 0) {
02025     bitset_invert_to(bsr2, bs2);
02026     bsr2 = bs2;
02027   }
02028   bitset_or(bsr1, bsr2);
02029   if (bsr1 != dest->bs) {
02030     bitset_copy(dest->bs, bsr1);
02031     bsr1 = dest->bs;
02032   }
02033   if (not1 != 0) {
02034     bitset_invert(dest->bs);
02035   }
02036 
02037   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02038     if (not1 != 0 && not2 != 0) {
02039       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
02040     }
02041     else {
02042       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
02043       if (r == 0 && not1 != 0) {
02044        BBuf *tbuf;
02045        r = not_code_range_buf(enc, pbuf, &tbuf);
02046        if (r != 0) {
02047          bbuf_free(pbuf);
02048          return r;
02049        }
02050        bbuf_free(pbuf);
02051        pbuf = tbuf;
02052       }
02053     }
02054     if (r != 0) return r;
02055 
02056     dest->mbuf = pbuf;
02057     bbuf_free(buf1);
02058     return r;
02059   }
02060   else
02061     return 0;
02062 }
02063 
02064 static int
02065 conv_backslash_value(int c, ScanEnv* env)
02066 {
02067   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
02068     switch (c) {
02069     case 'n':  return '\n';
02070     case 't':  return '\t';
02071     case 'r':  return '\r';
02072     case 'f':  return '\f';
02073     case 'a':  return '\007';
02074     case 'b':  return '\010';
02075     case 'e':  return '\033';
02076     case 'v':
02077       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
02078        return '\v';
02079       break;
02080 
02081     default:
02082       break;
02083     }
02084   }
02085   return c;
02086 }
02087 
02088 static int
02089 is_invalid_quantifier_target(Node* node)
02090 {
02091   switch (NTYPE(node)) {
02092   case N_ANCHOR:
02093     return 1;
02094     break;
02095 
02096   case N_EFFECT:
02097     if (NEFFECT(node).type == EFFECT_OPTION)
02098       return is_invalid_quantifier_target(NEFFECT(node).target);
02099     break;
02100 
02101   case N_LIST: /* ex. (?:\G\A)* */
02102     do {
02103       if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
02104     } while (IS_NOT_NULL(node = NCONS(node).right));
02105     return 0;
02106     break;
02107 
02108   case N_ALT:  /* ex. (?:abc|\A)* */
02109     do {
02110       if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
02111     } while (IS_NOT_NULL(node = NCONS(node).right));
02112     break;
02113 
02114   default:
02115     break;
02116   }
02117   return 0;
02118 }
02119 
02120 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
02121 static int
02122 popular_quantifier_num(QuantifierNode* qf)
02123 {
02124   if (qf->greedy) {
02125     if (qf->lower == 0) {
02126       if (qf->upper == 1) return 0;
02127       else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
02128     }
02129     else if (qf->lower == 1) {
02130       if (IS_REPEAT_INFINITE(qf->upper)) return 2;
02131     }
02132   }
02133   else {
02134     if (qf->lower == 0) {
02135       if (qf->upper == 1) return 3;
02136       else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
02137     }
02138     else if (qf->lower == 1) {
02139       if (IS_REPEAT_INFINITE(qf->upper)) return 5;
02140     }
02141   }
02142   return -1;
02143 }
02144 
02145 
02146 enum ReduceType {
02147   RQ_ASIS = 0, /* as is */
02148   RQ_DEL  = 1, /* delete parent */
02149   RQ_A,        /* to '*'    */
02150   RQ_AQ,       /* to '*?'   */
02151   RQ_QQ,       /* to '??'   */
02152   RQ_P_QQ,     /* to '+)??' */
02153   RQ_PQ_Q      /* to '+?)?' */
02154 };
02155 
02156 static enum ReduceType ReduceTypeTable[6][6] = {
02157   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
02158   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
02159   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
02160   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
02161   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
02162   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
02163 };
02164 
02165 extern void
02166 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
02167 {
02168   int pnum, cnum;
02169   QuantifierNode *p, *c;
02170 
02171   p = &(NQUANTIFIER(pnode));
02172   c = &(NQUANTIFIER(cnode));
02173   pnum = popular_quantifier_num(p);
02174   cnum = popular_quantifier_num(c);
02175 
02176   switch(ReduceTypeTable[cnum][pnum]) {
02177   case RQ_DEL:
02178     *p = *c;
02179     break;
02180   case RQ_A:
02181     p->target = c->target;
02182     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
02183     break;
02184   case RQ_AQ:
02185     p->target = c->target;
02186     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
02187     break;
02188   case RQ_QQ:
02189     p->target = c->target;
02190     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02191     break;
02192   case RQ_P_QQ:
02193     p->target = cnode;
02194     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02195     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
02196     return ;
02197     break;
02198   case RQ_PQ_Q:
02199     p->target = cnode;
02200     p->lower  = 0;  p->upper = 1;  p->greedy = 1;
02201     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
02202     return ;
02203     break;
02204   case RQ_ASIS:
02205     p->target = cnode;
02206     return ;
02207     break;
02208   }
02209 
02210   c->target = NULL_NODE;
02211   onig_node_free(cnode);
02212 }
02213 
02214 
02215 enum TokenSyms {
02216   TK_EOT      = 0,   /* end of token */
02217   TK_RAW_BYTE = 1,
02218   TK_CHAR,
02219   TK_STRING,
02220   TK_CODE_POINT,
02221   TK_ANYCHAR,
02222   TK_CHAR_TYPE,
02223   TK_BACKREF,
02224   TK_CALL,
02225   TK_ANCHOR,
02226   TK_OP_REPEAT,
02227   TK_INTERVAL,
02228   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
02229   TK_ALT,
02230   TK_SUBEXP_OPEN,
02231   TK_SUBEXP_CLOSE,
02232   TK_CC_OPEN,
02233   TK_QUOTE_OPEN,
02234   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
02235   /* in cc */
02236   TK_CC_CLOSE,
02237   TK_CC_RANGE,
02238   TK_POSIX_BRACKET_OPEN,
02239   TK_CC_AND,             /* && */
02240   TK_CC_CC_OPEN          /* [ */
02241 };
02242 
02243 typedef struct {
02244   enum TokenSyms type;
02245   int escaped;
02246   int base;   /* is number: 8, 16 (used in [....]) */
02247   UChar* backp;
02248   union {
02249     UChar* s;
02250     int   c;
02251     OnigCodePoint code;
02252     int   anchor;
02253     int   subtype;
02254     struct {
02255       int lower;
02256       int upper;
02257       int greedy;
02258       int possessive;
02259     } repeat;
02260     struct {
02261       int  num;
02262       int  ref1;
02263       int* refs;
02264       int  by_name;
02265 #ifdef USE_BACKREF_AT_LEVEL
02266       int  exist_level;
02267       int  level;   /* \k<name+n> */
02268 #endif
02269     } backref;
02270     struct {
02271       UChar* name;
02272       UChar* name_end;
02273     } call;
02274     struct {
02275       int not;
02276     } prop;
02277   } u;
02278 } OnigToken;
02279 
02280 
02281 static int
02282 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
02283 {
02284   int low, up, syn_allow, non_low = 0;
02285   int r = 0;
02286   OnigCodePoint c;
02287   OnigEncoding enc = env->enc;
02288   UChar* p = *src;
02289   PFETCH_READY;
02290 
02291   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
02292 
02293   if (PEND) {
02294     if (syn_allow)
02295       return 1;  /* "....{" : OK! */
02296     else
02297       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
02298   }
02299 
02300   if (! syn_allow) {
02301     c = PPEEK;
02302     if (c == ')' || c == '(' || c == '|') {
02303       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02304     }
02305   }
02306 
02307   low = onig_scan_unsigned_number(&p, end, env->enc);
02308   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02309   if (low > ONIG_MAX_REPEAT_NUM)
02310     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02311 
02312   if (p == *src) { /* can't read low */
02313     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
02314       /* allow {,n} as {0,n} */
02315       low = 0;
02316       non_low = 1;
02317     }
02318     else
02319       goto invalid;
02320   }
02321 
02322   if (PEND) goto invalid;
02323   PFETCH(c);
02324   if (c == ',') {
02325     UChar* prev = p;
02326     up = onig_scan_unsigned_number(&p, end, env->enc);
02327     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02328     if (up > ONIG_MAX_REPEAT_NUM)
02329       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02330 
02331     if (p == prev) {
02332       if (non_low != 0)
02333        goto invalid;
02334       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
02335     }
02336   }
02337   else {
02338     if (non_low != 0)
02339       goto invalid;
02340 
02341     PUNFETCH;
02342     up = low;  /* {n} : exact n times */
02343     r = 2;     /* fixed */
02344   }
02345 
02346   if (PEND) goto invalid;
02347   PFETCH(c);
02348   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
02349     if (c != MC_ESC(enc)) goto invalid;
02350     PFETCH(c);
02351   }
02352   if (c != '}') goto invalid;
02353 
02354   if (!IS_REPEAT_INFINITE(up) && low > up) {
02355     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
02356   }
02357 
02358   tok->type = TK_INTERVAL;
02359   tok->u.repeat.lower = low;
02360   tok->u.repeat.upper = up;
02361   *src = p;
02362   return r; /* 0: normal {n,m}, 2: fixed {n} */
02363 
02364  invalid:
02365   if (syn_allow)
02366     return 1;  /* OK */
02367   else
02368     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
02369 }
02370 
02371 /* \M-, \C-, \c, or \... */
02372 static int
02373 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
02374 {
02375   int v;
02376   OnigCodePoint c;
02377   OnigEncoding enc = env->enc;
02378   UChar* p = *src;
02379   PFETCH_READY;
02380 
02381   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02382 
02383   PFETCH(c);
02384   switch (c) {
02385   case 'M':
02386     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
02387       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02388       PFETCH(c);
02389       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
02390       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02391       PFETCH(c);
02392       if (c == MC_ESC(enc)) {
02393        v = fetch_escaped_value(&p, end, env);
02394        if (v < 0) return v;
02395         c = (OnigCodePoint )v;
02396       }
02397       c = ((c & 0xff) | 0x80);
02398     }
02399     else
02400       goto backslash;
02401     break;
02402 
02403   case 'C':
02404     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
02405       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02406       PFETCH(c);
02407       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
02408       goto control;
02409     }
02410     else
02411       goto backslash;
02412 
02413   case 'c':
02414     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
02415     control:
02416       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02417       PFETCH(c);
02418       if (c == '?') {
02419        c = 0177;
02420       }
02421       else {
02422         if (c == MC_ESC(enc)) {
02423           v = fetch_escaped_value(&p, end, env);
02424           if (v < 0) return v;
02425           c = (OnigCodePoint )v;
02426         }
02427        c &= 0x9f;
02428       }
02429       break;
02430     }
02431     /* fall through */
02432 
02433   default:
02434     {
02435     backslash:
02436       c = conv_backslash_value(c, env);
02437     }
02438     break;
02439   }
02440 
02441   *src = p;
02442   return c;
02443 }
02444 
02445 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
02446 
02447 #ifdef USE_NAMED_GROUP
02448 #ifdef USE_BACKREF_AT_LEVEL
02449 /*
02450    \k<name+n>, \k<name-n>
02451 */
02452 static int
02453 fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
02454                     , ScanEnv* env, int* level)
02455 {
02456   int r, exist_level = 0;
02457   OnigCodePoint c = 0;
02458   OnigCodePoint first_code;
02459   OnigEncoding enc = env->enc;
02460   UChar *name_end;
02461   UChar *p = *src;
02462   PFETCH_READY;
02463 
02464   name_end = end;
02465   r = 0;
02466   if (PEND) {
02467     return ONIGERR_EMPTY_GROUP_NAME;
02468   }
02469   else {
02470     PFETCH(c);
02471     first_code = c;
02472     if (c == '>')
02473       return ONIGERR_EMPTY_GROUP_NAME;
02474 
02475     if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02476       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02477     }
02478   }
02479 
02480   while (!PEND) {
02481     name_end = p;
02482     PFETCH(c);
02483     if (c == '>' || c == ')' || c == '+' || c == '-') break;
02484 
02485     if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02486       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02487     }
02488   }
02489 
02490   if (c != '>') {
02491     if (c == '+' || c == '-') {
02492       int num;
02493       int flag = (c == '-' ? -1 : 1);
02494 
02495       PFETCH(c);
02496       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
02497       PUNFETCH;
02498       num = onig_scan_unsigned_number(&p, end, enc);
02499       if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
02500       *level = (num * flag);
02501       exist_level = 1;
02502 
02503       PFETCH(c);
02504       if (c == '>')
02505        goto first_check;
02506     }
02507 
02508   err:
02509     r = ONIGERR_INVALID_GROUP_NAME;
02510     name_end = end;
02511   }
02512   else {
02513   first_check:
02514     if (ONIGENC_IS_CODE_ASCII(first_code) &&
02515         ONIGENC_IS_CODE_UPPER(enc, first_code))
02516       r = ONIGERR_INVALID_GROUP_NAME;
02517   }
02518 
02519   if (r == 0) {
02520     *rname_end = name_end;
02521     *src = p;
02522     return (exist_level ? 1 : 0);
02523   }
02524   else {
02525     onig_scan_env_set_error_string(env, r, *src, name_end);
02526     return r;
02527   }
02528 }
02529 #endif /* USE_BACKREF_AT_LEVEL */
02530 
02531 /*
02532   def: 0 -> define name    (don't allow number name)
02533        1 -> reference name (allow number name)
02534 */
02535 static int
02536 fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
02537 {
02538   int r, is_num;
02539   OnigCodePoint c = 0;
02540   OnigCodePoint first_code;
02541   OnigEncoding enc = env->enc;
02542   UChar *name_end;
02543   UChar *p = *src;
02544   PFETCH_READY;
02545 
02546   name_end = end;
02547   r = 0;
02548   is_num = 0;
02549   if (PEND) {
02550     return ONIGERR_EMPTY_GROUP_NAME;
02551   }
02552   else {
02553     PFETCH(c);
02554     first_code = c;
02555     if (c == '>')
02556       return ONIGERR_EMPTY_GROUP_NAME;
02557 
02558     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02559       if (ref == 1)
02560        is_num = 1;
02561       else {
02562        r = ONIGERR_INVALID_GROUP_NAME;
02563       }
02564     }
02565     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02566       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02567     }
02568   }
02569 
02570   while (!PEND) {
02571     name_end = p;
02572     PFETCH(c);
02573     if (c == '>' || c == ')') break;
02574 
02575     if (is_num == 1) {
02576       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
02577        if (!ONIGENC_IS_CODE_WORD(enc, c))
02578          r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02579        else
02580          r = ONIGERR_INVALID_GROUP_NAME;
02581       }
02582     }
02583     else {
02584       if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02585         r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02586       }
02587     }
02588   }
02589 
02590   if (c != '>') {
02591     r = ONIGERR_INVALID_GROUP_NAME;
02592     name_end = end;
02593   }
02594   else {
02595     if (ONIGENC_IS_CODE_ASCII(first_code) &&
02596         ONIGENC_IS_CODE_UPPER(enc, first_code))
02597       r = ONIGERR_INVALID_GROUP_NAME;
02598   }
02599 
02600   if (r == 0) {
02601     *rname_end = name_end;
02602     *src = p;
02603     return 0;
02604   }
02605   else {
02606     onig_scan_env_set_error_string(env, r, *src, name_end);
02607     return r;
02608   }
02609 }
02610 #else
02611 static int
02612 fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
02613 {
02614   int r, len;
02615   OnigCodePoint c = 0;
02616   UChar *name_end;
02617   OnigEncoding enc = env->enc;
02618   UChar *p = *src;
02619   PFETCH_READY;
02620 
02621   r = 0;
02622   while (!PEND) {
02623     name_end = p;
02624     if (enc_len(enc, p) > 1)
02625       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02626 
02627     PFETCH(c);
02628     if (c == '>' || c == ')') break;
02629     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
02630       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02631   }
02632   if (c != '>') {
02633     r = ONIGERR_INVALID_GROUP_NAME;
02634     name_end = end;
02635   }
02636 
02637   if (r == 0) {
02638     *rname_end = name_end;
02639     *src = p;
02640     return 0;
02641   }
02642   else {
02643   err:
02644     onig_scan_env_set_error_string(env, r, *src, name_end);
02645     return r;
02646   }
02647 }
02648 #endif
02649 
02650 static void
02651 CC_ESC_WARN(ScanEnv* env, UChar *c)
02652 {
02653   if (onig_warn == onig_null_warn) return ;
02654 
02655   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
02656       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
02657     UChar buf[WARN_BUFSIZE];
02658     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
02659               env->pattern, env->pattern_end,
02660                 (UChar* )"character class has '%s' without escape", c);
02661     (*onig_warn)((char* )buf);
02662   }
02663 }
02664 
02665 static void
02666 CCEND_ESC_WARN(ScanEnv* env, UChar* c)
02667 {
02668   if (onig_warn == onig_null_warn) return ;
02669 
02670   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
02671     UChar buf[WARN_BUFSIZE];
02672     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
02673               (env)->pattern, (env)->pattern_end,
02674               (UChar* )"regular expression has '%s' without escape", c);
02675     (*onig_warn)((char* )buf);
02676   }
02677 }
02678 
02679 static UChar*
02680 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
02681                 UChar **next, OnigEncoding enc)
02682 {
02683   int i;
02684   OnigCodePoint x;
02685   UChar *q;
02686   UChar *p = from;
02687   
02688   while (p < to) {
02689     x = ONIGENC_MBC_TO_CODE(enc, p, to);
02690     q = p + enc_len(enc, p);
02691     if (x == s[0]) {
02692       for (i = 1; i < n && q < to; i++) {
02693        x = ONIGENC_MBC_TO_CODE(enc, q, to);
02694        if (x != s[i]) break;
02695        q += enc_len(enc, q);
02696       }
02697       if (i >= n) {
02698        if (IS_NOT_NULL(next))
02699          *next = q;
02700        return p;
02701       }
02702     }
02703     p = q;
02704   }
02705   return NULL_UCHARP;
02706 }
02707 
02708 static int
02709 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
02710                       OnigCodePoint bad, OnigEncoding enc)
02711 {
02712   int i, in_esc;
02713   OnigCodePoint x;
02714   UChar *q;
02715   UChar *p = from;
02716 
02717   in_esc = 0;
02718   while (p < to) {
02719     if (in_esc) {
02720       in_esc = 0;
02721       p += enc_len(enc, p);
02722     }
02723     else {
02724       x = ONIGENC_MBC_TO_CODE(enc, p, to);
02725       q = p + enc_len(enc, p);
02726       if (x == s[0]) {
02727        for (i = 1; i < n && q < to; i++) {
02728          x = ONIGENC_MBC_TO_CODE(enc, q, to);
02729          if (x != s[i]) break;
02730          q += enc_len(enc, q);
02731        }
02732        if (i >= n) return 1;
02733        p += enc_len(enc, p);
02734       }
02735       else {
02736        x = ONIGENC_MBC_TO_CODE(enc, p, to);
02737        if (x == bad) return 0;
02738        else if (x == MC_ESC(enc)) in_esc = 1;
02739        p = q;
02740       }
02741     }
02742   }
02743   return 0;
02744 }
02745 
02746 static int
02747 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
02748 {
02749   int num;
02750   OnigCodePoint c, c2;
02751   OnigSyntaxType* syn = env->syntax;
02752   OnigEncoding enc = env->enc;
02753   UChar* prev;
02754   UChar* p = *src;
02755   PFETCH_READY;
02756 
02757   if (PEND) {
02758     tok->type = TK_EOT;
02759     return tok->type;
02760   }
02761 
02762   PFETCH(c);
02763   tok->type = TK_CHAR;
02764   tok->base = 0;
02765   tok->u.c  = c;
02766   tok->escaped = 0;
02767 
02768   if (c == ']') {
02769     tok->type = TK_CC_CLOSE;
02770   }
02771   else if (c == '-') {
02772     tok->type = TK_CC_RANGE;
02773   }
02774   else if (c == MC_ESC(enc)) {
02775     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
02776       goto end;
02777 
02778     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02779 
02780     PFETCH(c);
02781     tok->escaped = 1;
02782     tok->u.c = c;
02783     switch (c) {
02784     case 'w':
02785       tok->type = TK_CHAR_TYPE;
02786       tok->u.subtype = CTYPE_WORD;
02787       break;
02788     case 'W':
02789       tok->type = TK_CHAR_TYPE;
02790       tok->u.subtype = CTYPE_NOT_WORD;
02791       break;
02792     case 'd':
02793       tok->type = TK_CHAR_TYPE;
02794       tok->u.subtype = CTYPE_DIGIT;
02795       break;
02796     case 'D':
02797       tok->type = TK_CHAR_TYPE;
02798       tok->u.subtype = CTYPE_NOT_DIGIT;
02799       break;
02800     case 's':
02801       tok->type = TK_CHAR_TYPE;
02802       tok->u.subtype = CTYPE_WHITE_SPACE;
02803       break;
02804     case 'S':
02805       tok->type = TK_CHAR_TYPE;
02806       tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
02807       break;
02808     case 'h':
02809       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
02810       tok->type = TK_CHAR_TYPE;
02811       tok->u.subtype = CTYPE_XDIGIT;
02812       break;
02813     case 'H':
02814       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
02815       tok->type = TK_CHAR_TYPE;
02816       tok->u.subtype = CTYPE_NOT_XDIGIT;
02817       break;
02818 
02819     case 'p':
02820     case 'P':
02821       c2 = PPEEK;
02822       if (c2 == '{' &&
02823          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
02824        PINC;
02825        tok->type = TK_CHAR_PROPERTY;
02826        tok->u.prop.not = (c == 'P' ? 1 : 0);
02827 
02828        if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
02829          PFETCH(c2);
02830          if (c2 == '^') {
02831            tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
02832          }
02833          else
02834            PUNFETCH;
02835        }
02836       }
02837       break;
02838 
02839     case 'x':
02840       if (PEND) break;
02841 
02842       prev = p;
02843       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
02844        PINC;
02845        num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
02846        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
02847        if (!PEND) {
02848           c2 = PPEEK;
02849           if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
02850             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
02851         }
02852 
02853        if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
02854          PINC;
02855          tok->type   = TK_CODE_POINT;
02856          tok->base   = 16;
02857          tok->u.code = (OnigCodePoint )num;
02858        }
02859        else {
02860          /* can't read nothing or invalid format */
02861          p = prev;
02862        }
02863       }
02864       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
02865        num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
02866        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
02867        if (p == prev) {  /* can't read nothing. */
02868          num = 0; /* but, it's not error */
02869        }
02870        tok->type = TK_RAW_BYTE;
02871        tok->base = 16;
02872        tok->u.c  = num;
02873       }
02874       break;
02875 
02876     case 'u':
02877       if (PEND) break;
02878 
02879       prev = p;
02880       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
02881        num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
02882        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
02883        if (p == prev) {  /* can't read nothing. */
02884          num = 0; /* but, it's not error */
02885        }
02886        tok->type   = TK_CODE_POINT;
02887        tok->base   = 16;
02888        tok->u.code = (OnigCodePoint )num;
02889       }
02890       break;
02891 
02892     case '0':
02893     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
02894       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
02895        PUNFETCH;
02896        prev = p;
02897        num = scan_unsigned_octal_number(&p, end, 3, enc);
02898        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
02899        if (p == prev) {  /* can't read nothing. */
02900          num = 0; /* but, it's not error */
02901        }
02902        tok->type = TK_RAW_BYTE;
02903        tok->base = 8;
02904        tok->u.c  = num;
02905       }
02906       break;
02907 
02908     default:
02909       PUNFETCH;
02910       num = fetch_escaped_value(&p, end, env);
02911       if (num < 0) return num;
02912       if (tok->u.c != num) {
02913        tok->u.code = (OnigCodePoint )num;
02914        tok->type   = TK_CODE_POINT;
02915       }
02916       break;
02917     }
02918   }
02919   else if (c == '[') {
02920     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
02921       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
02922       tok->backp = p; /* point at '[' is readed */
02923       PINC;
02924       if (str_exist_check_with_esc(send, 2, p, end,
02925                                    (OnigCodePoint )']', enc)) {
02926        tok->type = TK_POSIX_BRACKET_OPEN;
02927       }
02928       else {
02929        PUNFETCH;
02930        goto cc_in_cc;
02931       }
02932     }
02933     else {
02934     cc_in_cc:
02935       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
02936        tok->type = TK_CC_CC_OPEN;
02937       }
02938       else {
02939        CC_ESC_WARN(env, (UChar* )"[");
02940       }
02941     }
02942   }
02943   else if (c == '&') {
02944     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
02945        !PEND && (PPEEK_IS('&'))) {
02946       PINC;
02947       tok->type = TK_CC_AND;
02948     }
02949   }
02950 
02951  end:
02952   *src = p;
02953   return tok->type;
02954 }
02955 
02956 static int
02957 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
02958 {
02959   int r, num;
02960   OnigCodePoint c;
02961   OnigEncoding enc = env->enc;
02962   OnigSyntaxType* syn = env->syntax;
02963   UChar* prev;
02964   UChar* p = *src;
02965   PFETCH_READY;
02966 
02967  start:
02968   if (PEND) {
02969     tok->type = TK_EOT;
02970     return tok->type;
02971   }
02972 
02973   tok->type  = TK_STRING;
02974   tok->base  = 0;
02975   tok->backp = p;
02976 
02977   PFETCH(c);
02978   if (IS_MC_ESC_CODE(c, enc, syn)) {
02979     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02980 
02981     tok->backp = p;
02982     PFETCH(c);
02983 
02984     tok->u.c = c;
02985     tok->escaped = 1;
02986     switch (c) {
02987     case '*':
02988       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
02989       tok->type = TK_OP_REPEAT;
02990       tok->u.repeat.lower = 0;
02991       tok->u.repeat.upper = REPEAT_INFINITE;
02992       goto greedy_check;
02993       break;
02994 
02995     case '+':
02996       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
02997       tok->type = TK_OP_REPEAT;
02998       tok->u.repeat.lower = 1;
02999       tok->u.repeat.upper = REPEAT_INFINITE;
03000       goto greedy_check;
03001       break;
03002 
03003     case '?':
03004       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
03005       tok->type = TK_OP_REPEAT;
03006       tok->u.repeat.lower = 0;
03007       tok->u.repeat.upper = 1;
03008     greedy_check:
03009       if (!PEND && PPEEK_IS('?') &&
03010          IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
03011        PFETCH(c);
03012        tok->u.repeat.greedy     = 0;
03013        tok->u.repeat.possessive = 0;
03014       }
03015       else {
03016       possessive_check:
03017        if (!PEND && PPEEK_IS('+') &&
03018            ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
03019              tok->type != TK_INTERVAL)  ||
03020             (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
03021              tok->type == TK_INTERVAL))) {
03022          PFETCH(c);
03023          tok->u.repeat.greedy     = 1;
03024          tok->u.repeat.possessive = 1;
03025        }
03026        else {
03027          tok->u.repeat.greedy     = 1;
03028          tok->u.repeat.possessive = 0;
03029        }
03030       }
03031       break;
03032 
03033     case '{':
03034       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
03035       r = fetch_range_quantifier(&p, end, tok, env);
03036       if (r < 0) return r;  /* error */
03037       if (r == 0) goto greedy_check;
03038       else if (r == 2) { /* {n} */
03039        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03040          goto possessive_check;
03041 
03042        goto greedy_check;
03043       }
03044       /* r == 1 : normal char */
03045       break;
03046 
03047     case '|':
03048       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
03049       tok->type = TK_ALT;
03050       break;
03051 
03052     case '(':
03053       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03054       tok->type = TK_SUBEXP_OPEN;
03055       break;
03056 
03057     case ')':
03058       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03059       tok->type = TK_SUBEXP_CLOSE;
03060       break;
03061 
03062     case 'w':
03063       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03064       tok->type = TK_CHAR_TYPE;
03065       tok->u.subtype = CTYPE_WORD;
03066       break;
03067 
03068     case 'W':
03069       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03070       tok->type = TK_CHAR_TYPE;
03071       tok->u.subtype = CTYPE_NOT_WORD;
03072       break;
03073 
03074     case 'b':
03075       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03076       tok->type = TK_ANCHOR;
03077       tok->u.anchor = ANCHOR_WORD_BOUND;
03078       break;
03079 
03080     case 'B':
03081       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03082       tok->type = TK_ANCHOR;
03083       tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
03084       break;
03085 
03086 #ifdef USE_WORD_BEGIN_END
03087     case '<':
03088       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03089       tok->type = TK_ANCHOR;
03090       tok->u.anchor = ANCHOR_WORD_BEGIN;
03091       break;
03092 
03093     case '>':
03094       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03095       tok->type = TK_ANCHOR;
03096       tok->u.anchor = ANCHOR_WORD_END;
03097       break;
03098 #endif
03099 
03100     case 's':
03101       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03102       tok->type = TK_CHAR_TYPE;
03103       tok->u.subtype = CTYPE_WHITE_SPACE;
03104       break;
03105 
03106     case 'S':
03107       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03108       tok->type = TK_CHAR_TYPE;
03109       tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
03110       break;
03111 
03112     case 'd':
03113       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03114       tok->type = TK_CHAR_TYPE;
03115       tok->u.subtype = CTYPE_DIGIT;
03116       break;
03117 
03118     case 'D':
03119       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03120       tok->type = TK_CHAR_TYPE;
03121       tok->u.subtype = CTYPE_NOT_DIGIT;
03122       break;
03123 
03124     case 'h':
03125       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03126       tok->type = TK_CHAR_TYPE;
03127       tok->u.subtype = CTYPE_XDIGIT;
03128       break;
03129 
03130     case 'H':
03131       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03132       tok->type = TK_CHAR_TYPE;
03133       tok->u.subtype = CTYPE_NOT_XDIGIT;
03134       break;
03135 
03136     case 'A':
03137       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03138     begin_buf:
03139       tok->type = TK_ANCHOR;
03140       tok->u.subtype = ANCHOR_BEGIN_BUF;
03141       break;
03142 
03143     case 'Z':
03144       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03145       tok->type = TK_ANCHOR;
03146       tok->u.subtype = ANCHOR_SEMI_END_BUF;
03147       break;
03148 
03149     case 'z':
03150       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03151     end_buf:
03152       tok->type = TK_ANCHOR;
03153       tok->u.subtype = ANCHOR_END_BUF;
03154       break;
03155 
03156     case 'G':
03157       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
03158       tok->type = TK_ANCHOR;
03159       tok->u.subtype = ANCHOR_BEGIN_POSITION;
03160       break;
03161 
03162     case '`':
03163       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03164       goto begin_buf;
03165       break;
03166 
03167     case '\'':
03168       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03169       goto end_buf;
03170       break;
03171 
03172     case 'x':
03173       if (PEND) break;
03174 
03175       prev = p;
03176       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03177        PINC;
03178        num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03179        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03180        if (!PEND) {
03181           if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
03182             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03183         }
03184 
03185        if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
03186          PINC;
03187          tok->type   = TK_CODE_POINT;
03188          tok->u.code = (OnigCodePoint )num;
03189        }
03190        else {
03191          /* can't read nothing or invalid format */
03192          p = prev;
03193        }
03194       }
03195       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03196        num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03197        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03198        if (p == prev) {  /* can't read nothing. */
03199          num = 0; /* but, it's not error */
03200        }
03201        tok->type = TK_RAW_BYTE;
03202        tok->base = 16;
03203        tok->u.c  = num;
03204       }
03205       break;
03206 
03207     case 'u':
03208       if (PEND) break;
03209 
03210       prev = p;
03211       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03212        num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03213        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03214        if (p == prev) {  /* can't read nothing. */
03215          num = 0; /* but, it's not error */
03216        }
03217        tok->type   = TK_CODE_POINT;
03218        tok->base   = 16;
03219        tok->u.code = (OnigCodePoint )num;
03220       }
03221       break;
03222 
03223     case '1': case '2': case '3': case '4':
03224     case '5': case '6': case '7': case '8': case '9':
03225       PUNFETCH;
03226       prev = p;
03227       num = onig_scan_unsigned_number(&p, end, enc);
03228       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
03229         goto skip_backref;
03230       }
03231 
03232       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && 
03233          (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
03234        if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03235          if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
03236            return ONIGERR_INVALID_BACKREF;
03237        }
03238 
03239        tok->type = TK_BACKREF;
03240        tok->u.backref.num     = 1;
03241        tok->u.backref.ref1    = num;
03242        tok->u.backref.by_name = 0;
03243 #ifdef USE_BACKREF_AT_LEVEL
03244        tok->u.backref.exist_level = 0;
03245 #endif
03246        break;
03247       }
03248 
03249     skip_backref:
03250       if (c == '8' || c == '9') {
03251        /* normal char */
03252        p = prev; PINC;
03253        break;
03254       }
03255 
03256       p = prev;
03257       /* fall through */
03258     case '0':
03259       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03260        prev = p;
03261        num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
03262        if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03263        if (p == prev) {  /* can't read nothing. */
03264          num = 0; /* but, it's not error */
03265        }
03266        tok->type = TK_RAW_BYTE;
03267        tok->base = 8;
03268        tok->u.c  = num;
03269       }
03270       else if (c != '0') {
03271        PINC;
03272       }
03273       break;
03274 
03275 #ifdef USE_NAMED_GROUP
03276     case 'k':
03277       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
03278        PFETCH(c);
03279        if (c == '<') {
03280          UChar* name_end;
03281          int* backs;
03282 
03283          prev = p;
03284 
03285 #ifdef USE_BACKREF_AT_LEVEL
03286          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
03287          r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
03288          if (r == 1) tok->u.backref.exist_level = 1;
03289          else        tok->u.backref.exist_level = 0;
03290 #else
03291          r = fetch_name(&p, end, &name_end, env, 1);
03292 #endif
03293          if (r < 0) return r;
03294 
03295          num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
03296          if (num <= 0) {
03297            onig_scan_env_set_error_string(env,
03298                          ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
03299            return ONIGERR_UNDEFINED_NAME_REFERENCE;
03300          }
03301          if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03302            int i;
03303            for (i = 0; i < num; i++) {
03304              if (backs[i] > env->num_mem ||
03305                 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
03306               return ONIGERR_INVALID_BACKREF;
03307            }
03308          }
03309 
03310          tok->type = TK_BACKREF;
03311          tok->u.backref.by_name = 1;
03312          if (num == 1) {
03313            tok->u.backref.num  = 1;
03314            tok->u.backref.ref1 = backs[0];
03315          }
03316          else {
03317            tok->u.backref.num  = num;
03318            tok->u.backref.refs = backs;
03319          }
03320        }
03321        else
03322          PUNFETCH;
03323       }
03324       break;
03325 #endif
03326 
03327 #ifdef USE_SUBEXP_CALL
03328     case 'g':
03329       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
03330        PFETCH(c);
03331        if (c == '<') {
03332          UChar* name_end;
03333 
03334          prev = p;
03335          r = fetch_name(&p, end, &name_end, env, 1);
03336          if (r < 0) return r;
03337 
03338          tok->type = TK_CALL;
03339          tok->u.call.name     = prev;
03340          tok->u.call.name_end = name_end;
03341        }
03342        else
03343          PUNFETCH;
03344       }
03345       break;
03346 #endif
03347 
03348     case 'Q':
03349       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
03350        tok->type = TK_QUOTE_OPEN;
03351       }
03352       break;
03353 
03354     case 'p':
03355     case 'P':
03356       if (PPEEK_IS('{') &&
03357          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03358        PINC;
03359        tok->type = TK_CHAR_PROPERTY;
03360        tok->u.prop.not = (c == 'P' ? 1 : 0);
03361 
03362        if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03363          PFETCH(c);
03364          if (c == '^') {
03365            tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03366          }
03367          else
03368            PUNFETCH;
03369        }
03370       }
03371       break;
03372 
03373     default:
03374       PUNFETCH;
03375       num = fetch_escaped_value(&p, end, env);
03376       if (num < 0) return num;
03377       /* set_raw: */
03378       if (tok->u.c != num) {
03379        tok->type = TK_CODE_POINT;
03380        tok->u.code = (OnigCodePoint )num;
03381       }
03382       else { /* string */
03383        p = tok->backp + enc_len(enc, tok->backp);
03384       }
03385       break;
03386     }
03387   }
03388   else {
03389     tok->u.c = c;
03390     tok->escaped = 0;
03391 
03392 #ifdef USE_VARIABLE_META_CHARS
03393     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
03394        IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
03395       if (c == MC_ANYCHAR(enc))
03396        goto any_char;
03397       else if (c == MC_ANYTIME(enc))
03398        goto anytime;
03399       else if (c == MC_ZERO_OR_ONE_TIME(enc))
03400        goto zero_or_one_time;
03401       else if (c == MC_ONE_OR_MORE_TIME(enc))
03402        goto one_or_more_time;
03403       else if (c == MC_ANYCHAR_ANYTIME(enc)) {
03404        tok->type = TK_ANYCHAR_ANYTIME;
03405        goto out;
03406       }
03407     }
03408 #endif
03409 
03410     switch (c) {
03411     case '.':
03412       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
03413 #ifdef USE_VARIABLE_META_CHARS
03414     any_char:
03415 #endif
03416       tok->type = TK_ANYCHAR;
03417       break;
03418 
03419     case '*':
03420       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
03421 #ifdef USE_VARIABLE_META_CHARS
03422     anytime:
03423 #endif
03424       tok->type = TK_OP_REPEAT;
03425       tok->u.repeat.lower = 0;
03426       tok->u.repeat.upper = REPEAT_INFINITE;
03427       goto greedy_check;
03428       break;
03429 
03430     case '+':
03431       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
03432 #ifdef USE_VARIABLE_META_CHARS
03433     one_or_more_time:
03434 #endif
03435       tok->type = TK_OP_REPEAT;
03436       tok->u.repeat.lower = 1;
03437       tok->u.repeat.upper = REPEAT_INFINITE;
03438       goto greedy_check;
03439       break;
03440 
03441     case '?':
03442       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
03443 #ifdef USE_VARIABLE_META_CHARS
03444     zero_or_one_time:
03445 #endif
03446       tok->type = TK_OP_REPEAT;
03447       tok->u.repeat.lower = 0;
03448       tok->u.repeat.upper = 1;
03449       goto greedy_check;
03450       break;
03451 
03452     case '{':
03453       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
03454       r = fetch_range_quantifier(&p, end, tok, env);
03455       if (r < 0) return r;  /* error */
03456       if (r == 0) goto greedy_check;
03457       else if (r == 2) { /* {n} */
03458        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03459          goto possessive_check;
03460 
03461        goto greedy_check;
03462       }
03463       /* r == 1 : normal char */
03464       break;
03465 
03466     case '|':
03467       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
03468       tok->type = TK_ALT;
03469       break;
03470 
03471     case '(':
03472       if (PPEEK_IS('?') &&
03473           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
03474         PINC;
03475         if (PPEEK_IS('#')) {
03476           PFETCH(c);
03477           while (1) {
03478             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
03479             PFETCH(c);
03480             if (c == MC_ESC(enc)) {
03481               if (!PEND) PFETCH(c);
03482             }
03483             else {
03484               if (c == ')') break;
03485             }
03486           }
03487           goto start;
03488         }
03489         PUNFETCH;
03490       }
03491 
03492       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03493       tok->type = TK_SUBEXP_OPEN;
03494       break;
03495 
03496     case ')':
03497       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03498       tok->type = TK_SUBEXP_CLOSE;
03499       break;
03500 
03501     case '^':
03502       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03503       tok->type = TK_ANCHOR;
03504       tok->u.subtype = (IS_SINGLELINE(env->option)
03505                      ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
03506       break;
03507 
03508     case '$':
03509       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03510       tok->type = TK_ANCHOR;
03511       tok->u.subtype = (IS_SINGLELINE(env->option)
03512                      ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
03513       break;
03514 
03515     case '[':
03516       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
03517       tok->type = TK_CC_OPEN;
03518       break;
03519 
03520     case ']':
03521       if (*src > env->pattern)   /* /].../ is allowed. */
03522        CCEND_ESC_WARN(env, (UChar* )"]");
03523       break;
03524 
03525     case '#':
03526       if (IS_EXTEND(env->option)) {
03527        while (!PEND) {
03528          PFETCH(c);
03529          if (ONIGENC_IS_CODE_NEWLINE(enc, c))
03530            break;
03531        }
03532        goto start;
03533        break;
03534       }
03535       break;
03536 
03537     case ' ': case '\t': case '\n': case '\r': case '\f':
03538       if (IS_EXTEND(env->option))
03539        goto start;
03540       break;
03541 
03542     default:
03543       /* string */
03544       break;
03545     }
03546   }
03547 
03548 #ifdef USE_VARIABLE_META_CHARS
03549  out:
03550 #endif
03551   *src = p;
03552   return tok->type;
03553 }
03554 
03555 static int
03556 add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
03557                          const OnigCodePoint sbr[], const OnigCodePoint mbr[])
03558 {
03559   int i, r;
03560   OnigCodePoint j;
03561 
03562   int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
03563   int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
03564 
03565   if (not == 0) {
03566     for (i = 0; i < nsb; i++) {
03567       for (j  = ONIGENC_CODE_RANGE_FROM(sbr, i);
03568            j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
03569         BITSET_SET_BIT(cc->bs, j);
03570       }
03571     }
03572 
03573     for (i = 0; i < nmb; i++) {
03574       r = add_code_range_to_buf(&(cc->mbuf),
03575                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
03576                                 ONIGENC_CODE_RANGE_TO(mbr, i));
03577       if (r != 0) return r;
03578     }
03579   }
03580   else {
03581     OnigCodePoint prev = 0;
03582 
03583     if (ONIGENC_MBC_MINLEN(enc) == 1) {
03584       for (i = 0; i < nsb; i++) {
03585         for (j = prev;
03586              j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
03587           BITSET_SET_BIT(cc->bs, j);
03588         }
03589         prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
03590       }
03591       if (prev < 0x7f) {
03592         for (j = prev; j < 0x7f; j++) {
03593           BITSET_SET_BIT(cc->bs, j);
03594         }
03595       }
03596 
03597       prev = 0x80;
03598     }
03599 
03600     for (i = 0; i < nmb; i++) {
03601       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03602        r = add_code_range_to_buf(&(cc->mbuf), prev,
03603                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
03604        if (r != 0) return r;
03605       }
03606       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03607     }
03608     if (prev < 0x7fffffff) {
03609       r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
03610       if (r != 0) return r;
03611     }
03612   }
03613 
03614   return 0;
03615 }
03616 
03617 static int
03618 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
03619 {
03620   int c, r;
03621   const OnigCodePoint *sbr, *mbr;
03622   OnigEncoding enc = env->enc;
03623 
03624   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
03625   if (r == 0) {
03626     return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
03627   }
03628   else if (r != ONIG_NO_SUPPORT_CONFIG) {
03629     return r;
03630   }
03631 
03632   r = 0;
03633   switch (ctype) {
03634   case ONIGENC_CTYPE_ALPHA:
03635   case ONIGENC_CTYPE_BLANK:
03636   case ONIGENC_CTYPE_CNTRL:
03637   case ONIGENC_CTYPE_DIGIT:
03638   case ONIGENC_CTYPE_LOWER:
03639   case ONIGENC_CTYPE_PUNCT:
03640   case ONIGENC_CTYPE_SPACE:
03641   case ONIGENC_CTYPE_UPPER:
03642   case ONIGENC_CTYPE_XDIGIT:
03643   case ONIGENC_CTYPE_ASCII:
03644   case ONIGENC_CTYPE_ALNUM:
03645     if (not != 0) {
03646       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03647        if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03648          BITSET_SET_BIT(cc->bs, c);
03649       }
03650       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03651     }
03652     else {
03653       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03654        if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03655          BITSET_SET_BIT(cc->bs, c);
03656       }
03657     }
03658     break;
03659 
03660   case ONIGENC_CTYPE_GRAPH:
03661   case ONIGENC_CTYPE_PRINT:
03662     if (not != 0) {
03663       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03664        if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03665          BITSET_SET_BIT(cc->bs, c);
03666       }
03667     }
03668     else {
03669       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03670        if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03671          BITSET_SET_BIT(cc->bs, c);
03672       }
03673       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03674     }
03675     break;
03676 
03677   case ONIGENC_CTYPE_WORD:
03678     if (not == 0) {
03679       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03680        if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
03681       }
03682       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03683     }
03684     else {
03685       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03686         if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)  /* 0: invalid code point */
03687            && ! ONIGENC_IS_CODE_WORD(enc, c))
03688          BITSET_SET_BIT(cc->bs, c);
03689       }
03690     }
03691     break;
03692 
03693   default:
03694     return ONIGERR_PARSER_BUG;
03695     break;
03696   }
03697 
03698   return r;
03699 }
03700 
03701 static int
03702 parse_ctype_to_enc_ctype(int pctype, int* not)
03703 {
03704   int ctype;
03705 
03706   switch (pctype) {
03707   case CTYPE_WORD:
03708     ctype = ONIGENC_CTYPE_WORD;
03709     *not = 0;
03710     break;
03711   case CTYPE_NOT_WORD:
03712     ctype = ONIGENC_CTYPE_WORD;
03713     *not = 1;
03714     break;
03715   case CTYPE_WHITE_SPACE:
03716     ctype = ONIGENC_CTYPE_SPACE;
03717     *not = 0;
03718     break;
03719   case CTYPE_NOT_WHITE_SPACE:
03720     ctype = ONIGENC_CTYPE_SPACE;
03721     *not = 1;
03722     break;
03723   case CTYPE_DIGIT:
03724     ctype = ONIGENC_CTYPE_DIGIT;
03725     *not = 0;
03726     break;
03727   case CTYPE_NOT_DIGIT:
03728     ctype = ONIGENC_CTYPE_DIGIT;
03729     *not = 1;
03730     break;
03731   case CTYPE_XDIGIT:
03732     ctype = ONIGENC_CTYPE_XDIGIT;
03733     *not = 0;
03734     break;
03735   case CTYPE_NOT_XDIGIT:
03736     ctype = ONIGENC_CTYPE_XDIGIT;
03737     *not = 1;
03738     break;
03739   default:
03740     return ONIGERR_PARSER_BUG;
03741     break;
03742   }
03743   return ctype;
03744 }
03745 
03746 typedef struct {
03747   UChar    *name;
03748   int       ctype;
03749   short int len;
03750 } PosixBracketEntryType;
03751 
03752 static int
03753 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
03754 {
03755 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
03756 #define POSIX_BRACKET_NAME_MAX_LEN         6
03757 
03758   static PosixBracketEntryType PBS[] = {
03759     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
03760     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
03761     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
03762     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
03763     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
03764     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
03765     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
03766     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
03767     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
03768     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
03769     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
03770     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
03771     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
03772     { (UChar* )NULL, -1, 0 }
03773   };
03774 
03775   PosixBracketEntryType *pb;
03776   int not, i, r;
03777   OnigCodePoint c;
03778   OnigEncoding enc = env->enc;
03779   UChar *p = *src;
03780   PFETCH_READY;
03781 
03782   if (PPEEK_IS('^')) {
03783     PINC;
03784     not = 1;
03785   }
03786   else
03787     not = 0;
03788 
03789   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
03790     goto not_posix_bracket;
03791 
03792   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
03793     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
03794       p = (UChar* )onigenc_step(enc, p, end, pb->len);
03795       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
03796        return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
03797 
03798       r = add_ctype_to_cc(cc, pb->ctype, not, env);
03799       if (r != 0) return r;
03800 
03801       PINC; PINC;
03802       *src = p;
03803       return 0;
03804     }
03805   }
03806 
03807  not_posix_bracket:
03808   c = 0;
03809   i = 0;
03810   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
03811     PINC;
03812     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
03813   }
03814   if (c == ':' && ! PEND) {
03815     PINC;
03816     if (! PEND) {
03817       PFETCH(c);
03818       if (c == ']')
03819        return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
03820     }
03821   }
03822 
03823   return 1;   /* 1: is not POSIX bracket, but no error. */
03824 }
03825 
03826 static int
03827 property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
03828 {
03829   static PosixBracketEntryType PBS[] = {
03830     { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
03831     { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
03832     { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
03833     { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
03834     { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
03835     { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
03836     { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
03837     { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
03838     { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
03839     { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
03840     { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
03841     { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
03842     { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
03843     { (UChar* )NULL, -1, 0 }
03844   };
03845 
03846   PosixBracketEntryType *pb;
03847   int len;
03848 
03849   len = onigenc_strlen(enc, p, end);
03850   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
03851     if (len == pb->len &&
03852         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
03853       return pb->ctype;
03854   }
03855 
03856   return -1;
03857 }
03858 
03859 static int
03860 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
03861 {
03862   int ctype;
03863   OnigCodePoint c;
03864   OnigEncoding enc = env->enc;
03865   UChar *prev, *start, *p = *src;
03866   PFETCH_READY;
03867 
03868   /* 'IsXXXX' => 'XXXX' */
03869   if (!PEND &&
03870       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
03871     c = PPEEK;
03872     if (c == 'I') {
03873       PINC;
03874       if (! PEND) {
03875        c = PPEEK;
03876        if (c == 's')
03877          PINC;
03878        else
03879          PUNFETCH;
03880       }
03881     }
03882   }
03883 
03884   start = prev = p;
03885 
03886   while (!PEND) {
03887     prev = p;
03888     PFETCH(c);
03889     if (c == '}') {
03890       ctype = property_name_to_ctype(start, prev, enc);
03891       if (ctype < 0) break;
03892 
03893       *src = p;
03894       return ctype;
03895     }
03896     else if (c == '(' || c == ')' || c == '{' || c == '|')
03897       break;
03898   }
03899 
03900   onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
03901                              *src, prev);
03902   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
03903 }
03904 
03905 static int
03906 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
03907                   ScanEnv* env)
03908 {
03909   int r, ctype;
03910   CClassNode* cc;
03911 
03912   ctype = fetch_char_property_to_ctype(src, end, env);
03913   if (ctype < 0) return ctype;
03914 
03915   *np = node_new_cclass();
03916   CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
03917   cc = &(NCCLASS(*np));
03918   r = add_ctype_to_cc(cc, ctype, 0, env);
03919   if (r != 0) return r;
03920   if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
03921 
03922   return 0;
03923 }
03924 
03925 
03926 enum CCSTATE {
03927   CCS_VALUE,
03928   CCS_RANGE,
03929   CCS_COMPLETE,
03930   CCS_START
03931 };
03932 
03933 enum CCVALTYPE {
03934   CCV_SB,
03935   CCV_CODE_POINT,
03936   CCV_CLASS
03937 };
03938 
03939 static int
03940 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
03941                enum CCSTATE* state, ScanEnv* env)
03942 {
03943   int r;
03944 
03945   if (*state == CCS_RANGE)
03946     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
03947 
03948   if (*state == CCS_VALUE && *type != CCV_CLASS) {
03949     if (*type == CCV_SB)
03950       BITSET_SET_BIT(cc->bs, (int )(*vs));
03951     else if (*type == CCV_CODE_POINT) {
03952       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
03953       if (r < 0) return r;
03954     }
03955   }
03956 
03957   *state = CCS_VALUE;
03958   *type  = CCV_CLASS;
03959   return 0;
03960 }
03961 
03962 static int
03963 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
03964               int* vs_israw, int v_israw,
03965               enum CCVALTYPE intype, enum CCVALTYPE* type,
03966               enum CCSTATE* state, ScanEnv* env)
03967 {
03968   int r;
03969 
03970   switch (*state) {
03971   case CCS_VALUE:
03972     if (*type == CCV_SB)
03973       BITSET_SET_BIT(cc->bs, (int )(*vs));
03974     else if (*type == CCV_CODE_POINT) {
03975       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
03976       if (r < 0) return r;
03977     }
03978     break;
03979 
03980   case CCS_RANGE:
03981     if (intype == *type) {
03982       if (intype == CCV_SB) {
03983         if (*vs > 0xff || v > 0xff)
03984           return ONIGERR_INVALID_WIDE_CHAR_VALUE;
03985 
03986        if (*vs > v) {
03987          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
03988            goto ccs_range_end;
03989          else
03990            return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
03991        }
03992        bitset_set_range(cc->bs, (int )*vs, (int )v);
03993       }
03994       else {
03995        r = add_code_range(&(cc->mbuf), env, *vs, v);
03996        if (r < 0) return r;
03997       }
03998     }
03999     else {
04000 #if 0
04001       if (intype == CCV_CODE_POINT && *type == CCV_SB) {
04002 #endif
04003        if (*vs > v) {
04004          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04005            goto ccs_range_end;
04006          else
04007            return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04008        }
04009        bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
04010        r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
04011        if (r < 0) return r;
04012 #if 0
04013       }
04014       else
04015        return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
04016 #endif
04017     }
04018   ccs_range_end:
04019     *state = CCS_COMPLETE;
04020     break;
04021 
04022   case CCS_COMPLETE:
04023   case CCS_START:
04024     *state = CCS_VALUE;
04025     break;
04026 
04027   default:
04028     break;
04029   }
04030 
04031   *vs_israw = v_israw;
04032   *vs       = v;
04033   *type     = intype;
04034   return 0;
04035 }
04036 
04037 static int
04038 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
04039                OnigEncoding enc)
04040 {
04041   int in_esc;
04042   OnigCodePoint code;
04043   UChar* p = from;
04044   PFETCH_READY;
04045 
04046   in_esc = 0;
04047   while (! PEND) {
04048     if (ignore_escaped && in_esc) {
04049       in_esc = 0;
04050     }
04051     else {
04052       PFETCH(code);
04053       if (code == c) return 1;
04054       if (code == MC_ESC(enc)) in_esc = 1;
04055     }
04056   }
04057   return 0;
04058 }
04059 
04060 static int
04061 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
04062                ScanEnv* env)
04063 {
04064   int r, neg, len, fetched, and_start;
04065   OnigCodePoint v, vs;
04066   UChar *p;
04067   Node* node;
04068   CClassNode *cc, *prev_cc;
04069   CClassNode work_cc;
04070 
04071   enum CCSTATE state;
04072   enum CCVALTYPE val_type, in_type;
04073   int val_israw, in_israw;
04074 
04075   prev_cc = (CClassNode* )NULL;
04076   *np = NULL_NODE;
04077   r = fetch_token_in_cc(tok, src, end, env);
04078   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
04079     neg = 1;
04080     r = fetch_token_in_cc(tok, src, end, env);
04081   }
04082   else {
04083     neg = 0;
04084   }
04085 
04086   if (r < 0) return r;
04087   if (r == TK_CC_CLOSE) {
04088     if (! code_exist_check((OnigCodePoint )']',
04089                            *src, env->pattern_end, 1, env->enc))
04090       return ONIGERR_EMPTY_CHAR_CLASS;
04091 
04092     CC_ESC_WARN(env, (UChar* )"]");
04093     r = tok->type = TK_CHAR;  /* allow []...] */
04094   }
04095 
04096   *np = node = node_new_cclass();
04097   CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
04098   cc = &(NCCLASS(node));
04099 
04100   and_start = 0;
04101   state = CCS_START;
04102   p = *src;
04103   while (r != TK_CC_CLOSE) {
04104     fetched = 0;
04105     switch (r) {
04106     case TK_CHAR:
04107       len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
04108       if (len > 1) {
04109        in_type = CCV_CODE_POINT;
04110       }
04111       else {
04112       sb_char:
04113        in_type = CCV_SB;
04114       }
04115       v = (OnigCodePoint )tok->u.c;
04116       in_israw = 0;
04117       goto val_entry2;
04118       break;
04119 
04120     case TK_RAW_BYTE:
04121       /* tok->base != 0 : octal or hexadec. */
04122       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
04123        UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04124        UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
04125        UChar* psave = p;
04126        int i, base = tok->base;
04127 
04128        buf[0] = tok->u.c;
04129        for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
04130          r = fetch_token_in_cc(tok, &p, end, env);
04131          if (r < 0) goto err;
04132          if (r != TK_RAW_BYTE || tok->base != base) {
04133            fetched = 1;
04134            break;
04135          }
04136          buf[i] = tok->u.c;
04137        }
04138 
04139        if (i < ONIGENC_MBC_MINLEN(env->enc)) {
04140          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04141          goto err;
04142        }
04143 
04144        len = enc_len(env->enc, buf);
04145        if (i < len) {
04146          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04147          goto err;
04148        }
04149        else if (i > len) { /* fetch back */
04150          p = psave;
04151          for (i = 1; i < len; i++) {
04152            r = fetch_token_in_cc(tok, &p, end, env);
04153          }
04154          fetched = 0;
04155        }
04156 
04157        if (i == 1) {
04158          v = (OnigCodePoint )buf[0];
04159          goto raw_single;
04160        }
04161        else {
04162          v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
04163          in_type = CCV_CODE_POINT;
04164        }
04165       }
04166       else {
04167        v = (OnigCodePoint )tok->u.c;
04168       raw_single:
04169        in_type = CCV_SB;
04170       }
04171       in_israw = 1;
04172       goto val_entry2;
04173       break;
04174 
04175     case TK_CODE_POINT:
04176       v = tok->u.code;
04177       in_israw = 1;
04178     val_entry:
04179       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
04180       if (len < 0) {
04181        r = len;
04182        goto err;
04183       }
04184       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
04185     val_entry2:
04186       r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
04187                       &state, env);
04188       if (r != 0) goto err;
04189       break;
04190 
04191     case TK_POSIX_BRACKET_OPEN:
04192       r = parse_posix_bracket(cc, &p, end, env);
04193       if (r < 0) goto err;
04194       if (r == 1) {  /* is not POSIX bracket */
04195        CC_ESC_WARN(env, (UChar* )"[");
04196        p = tok->backp;
04197        v = (OnigCodePoint )tok->u.c;
04198        in_israw = 0;
04199        goto val_entry;
04200       }
04201       goto next_class;
04202       break;
04203 
04204     case TK_CHAR_TYPE:
04205       {
04206        int ctype, not;
04207        ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
04208        r = add_ctype_to_cc(cc, ctype, not, env);
04209        if (r != 0) return r;
04210       }
04211 
04212     next_class:
04213       r = next_state_class(cc, &vs, &val_type, &state, env);
04214       if (r != 0) goto err;
04215       break;
04216 
04217     case TK_CHAR_PROPERTY:
04218       {
04219        int ctype;
04220 
04221        ctype = fetch_char_property_to_ctype(&p, end, env);
04222        if (ctype < 0) return ctype;
04223        r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
04224        if (r != 0) return r;
04225        goto next_class;
04226       }
04227       break;
04228 
04229     case TK_CC_RANGE:
04230       if (state == CCS_VALUE) {
04231        r = fetch_token_in_cc(tok, &p, end, env);
04232        if (r < 0) goto err;
04233        fetched = 1;
04234        if (r == TK_CC_CLOSE) { /* allow [x-] */
04235        range_end_val:
04236          v = (OnigCodePoint )'-';
04237          in_israw = 0;
04238          goto val_entry;
04239        }
04240        else if (r == TK_CC_AND) {
04241          CC_ESC_WARN(env, (UChar* )"-");
04242          goto range_end_val;
04243        }
04244        state = CCS_RANGE;
04245       }
04246       else if (state == CCS_START) {
04247        /* [-xa] is allowed */
04248        v = (OnigCodePoint )tok->u.c;
04249        in_israw = 0;
04250 
04251        r = fetch_token_in_cc(tok, &p, end, env);
04252        if (r < 0) goto err;
04253        fetched = 1;
04254        /* [--x] or [a&&-x] is warned. */
04255        if (r == TK_CC_RANGE || and_start != 0)
04256          CC_ESC_WARN(env, (UChar* )"-");
04257 
04258        goto val_entry;
04259       }
04260       else if (state == CCS_RANGE) {
04261        CC_ESC_WARN(env, (UChar* )"-");
04262        goto sb_char;  /* [!--x] is allowed */
04263       }
04264       else { /* CCS_COMPLETE */
04265        r = fetch_token_in_cc(tok, &p, end, env);
04266        if (r < 0) goto err;
04267        fetched = 1;
04268        if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
04269        else if (r == TK_CC_AND) {
04270          CC_ESC_WARN(env, (UChar* )"-");
04271          goto range_end_val;
04272        }
04273        
04274        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
04275          CC_ESC_WARN(env, (UChar* )"-");
04276          goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
04277        }
04278        r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
04279        goto err;
04280       }
04281       break;
04282 
04283     case TK_CC_CC_OPEN: /* [ */
04284       {
04285        Node *anode;
04286        CClassNode* acc;
04287 
04288        r = parse_char_class(&anode, tok, &p, end, env);
04289        if (r != 0) goto cc_open_err;
04290        acc = &(NCCLASS(anode));
04291        r = or_cclass(cc, acc, env->enc);
04292 
04293        onig_node_free(anode);
04294       cc_open_err:
04295        if (r != 0) goto err;
04296       }
04297       break;
04298 
04299     case TK_CC_AND: /* && */
04300       {
04301        if (state == CCS_VALUE) {
04302          r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04303                           &val_type, &state, env);
04304          if (r != 0) goto err;
04305        }
04306        /* initialize local variables */
04307        and_start = 1;
04308        state = CCS_START;
04309 
04310        if (IS_NOT_NULL(prev_cc)) {
04311          r = and_cclass(prev_cc, cc, env->enc);
04312          if (r != 0) goto err;
04313          bbuf_free(cc->mbuf);
04314        }
04315        else {
04316          prev_cc = cc;
04317          cc = &work_cc;
04318        }
04319        initialize_cclass(cc);
04320       }
04321       break;
04322 
04323     case TK_EOT:
04324       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
04325       goto err;
04326       break;
04327     default:
04328       r = ONIGERR_PARSER_BUG;
04329       goto err;
04330       break;
04331     }
04332 
04333     if (fetched)
04334       r = tok->type;
04335     else {
04336       r = fetch_token_in_cc(tok, &p, end, env);
04337       if (r < 0) goto err;
04338     }
04339   }
04340 
04341   if (state == CCS_VALUE) {
04342     r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04343                      &val_type, &state, env);
04344     if (r != 0) goto err;
04345   }
04346 
04347   if (IS_NOT_NULL(prev_cc)) {
04348     r = and_cclass(prev_cc, cc, env->enc);
04349     if (r != 0) goto err;
04350     bbuf_free(cc->mbuf);
04351     cc = prev_cc;
04352   }
04353 
04354   if (neg != 0)
04355     CCLASS_SET_NOT(cc);
04356   else
04357     CCLASS_CLEAR_NOT(cc);
04358   if (IS_CCLASS_NOT(cc) &&
04359       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
04360     int is_empty;
04361 
04362     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
04363     if (is_empty != 0)
04364       BITSET_IS_EMPTY(cc->bs, is_empty);
04365 
04366     if (is_empty == 0) {
04367 #define NEWLINE_CODE    0x0a
04368 
04369       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
04370         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
04371           BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
04372         else
04373           add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
04374       }
04375     }
04376   }
04377   *src = p;
04378   return 0;
04379 
04380  err:
04381   if (cc != &(NCCLASS(*np)))
04382     bbuf_free(cc->mbuf);
04383   onig_node_free(*np);
04384   return r;
04385 }
04386 
04387 static int parse_subexp(Node** top, OnigToken* tok, int term,
04388                      UChar** src, UChar* end, ScanEnv* env);
04389 
04390 static int
04391 parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
04392             ScanEnv* env)
04393 {
04394   int r, num;
04395   int list_capture;
04396   Node *target;
04397   OnigOptionType option;
04398   OnigEncoding enc = env->enc;
04399   OnigCodePoint c;
04400   UChar* p = *src;
04401   PFETCH_READY;
04402 
04403   *np = NULL;
04404   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
04405 
04406   option = env->option;
04407   if (PPEEK_IS('?') &&
04408       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
04409     PINC;
04410     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04411 
04412     PFETCH(c);
04413     switch (c) {
04414     case ':':   /* (?:...) grouping only */
04415     group:
04416       r = fetch_token(tok, &p, end, env);
04417       if (r < 0) return r;
04418       r = parse_subexp(np, tok, term, &p, end, env);
04419       if (r < 0) return r;
04420       *src = p;
04421       return 1; /* group */
04422       break;
04423 
04424     case '=':
04425       *np = onig_node_new_anchor(ANCHOR_PREC_READ);
04426       break;
04427     case '!':  /*         preceding read */
04428       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
04429       break;
04430     case '>':            /* (?>...) stop backtrack */
04431       *np = node_new_effect(EFFECT_STOP_BACKTRACK);
04432       break;
04433 
04434     case '<':   /* look behind (?<=...), (?<!...) */
04435       PFETCH(c);
04436       if (c == '=')
04437        *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
04438       else if (c == '!')
04439        *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
04440 #ifdef USE_NAMED_GROUP
04441       else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04442        UChar *name;
04443        UChar *name_end;
04444 
04445        PUNFETCH;
04446        list_capture = 0;
04447 
04448       named_group:
04449        name = p;
04450        r = fetch_name(&p, end, &name_end, env, 0);
04451        if (r < 0) return r;
04452 
04453        num = scan_env_add_mem_entry(env);
04454        if (num < 0) return num;
04455        if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
04456          return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04457 
04458        r = name_add(env->reg, name, name_end, num, env);
04459        if (r != 0) return r;
04460        *np = node_new_effect_memory(env->option, 1);
04461        CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04462        NEFFECT(*np).regnum = num;
04463        if (list_capture != 0)
04464          BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04465        env->num_named++;
04466       }
04467 #endif
04468       else
04469        return ONIGERR_UNDEFINED_GROUP_OPTION;
04470       break;
04471 
04472     case '@':
04473       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
04474 #ifdef USE_NAMED_GROUP
04475        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04476          PFETCH(c);
04477          if (c == '<') {
04478            list_capture = 1;
04479            goto named_group; /* (?@<name>...) */
04480          }
04481          PUNFETCH;
04482        }
04483 #endif
04484        *np = node_new_effect_memory(env->option, 0);
04485        CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04486        num = scan_env_add_mem_entry(env);
04487        if (num < 0) {
04488          onig_node_free(*np);
04489          return num;
04490        }
04491        else if (num >= BIT_STATUS_BITS_NUM) {
04492          onig_node_free(*np);
04493          return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04494        }
04495        NEFFECT(*np).regnum = num;
04496        BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04497       }
04498       else {
04499        return ONIGERR_UNDEFINED_GROUP_OPTION;
04500       }
04501       break;
04502 
04503 #ifdef USE_POSIXLINE_OPTION
04504     case 'p':
04505 #endif
04506     case '-': case 'i': case 'm': case 's': case 'x':
04507       {
04508        int neg = 0;
04509 
04510        while (1) {
04511          switch (c) {
04512          case ':':
04513          case ')':
04514          break;
04515 
04516          case '-':  neg = 1; break;
04517          case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
04518          case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
04519          case 's':
04520            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04521              ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
04522            }
04523            else
04524              return ONIGERR_UNDEFINED_GROUP_OPTION;
04525            break;
04526 
04527          case 'm':
04528            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04529              ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
04530            }
04531            else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
04532              ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
04533            }
04534            else
04535              return ONIGERR_UNDEFINED_GROUP_OPTION;
04536            break;
04537 #ifdef USE_POSIXLINE_OPTION
04538          case 'p':
04539            ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
04540            break;
04541 #endif
04542          default:
04543            return ONIGERR_UNDEFINED_GROUP_OPTION;
04544          }
04545 
04546          if (c == ')') {
04547            *np = node_new_option(option);
04548            CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04549            *src = p;
04550            return 2; /* option only */
04551          }
04552          else if (c == ':') {
04553            OnigOptionType prev = env->option;
04554 
04555            env->option     = option;
04556            r = fetch_token(tok, &p, end, env);
04557            if (r < 0) return r;
04558            r = parse_subexp(&target, tok, term, &p, end, env);
04559            env->option = prev;
04560            if (r < 0) return r;
04561            *np = node_new_option(option);
04562            CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04563            NEFFECT(*np).target = target;
04564            *src = p;
04565            return 0;
04566          }
04567 
04568          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04569          PFETCH(c);
04570        }
04571       }
04572       break;
04573 
04574     default:
04575       return ONIGERR_UNDEFINED_GROUP_OPTION;
04576     }
04577   }
04578   else {
04579     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
04580       goto group;
04581 
04582     *np = node_new_effect_memory(env->option, 0);
04583     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04584     num = scan_env_add_mem_entry(env);
04585     if (num < 0) return num;
04586     NEFFECT(*np).regnum = num;
04587   }
04588 
04589   CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04590   r = fetch_token(tok, &p, end, env);
04591   if (r < 0) return r;
04592   r = parse_subexp(&target, tok, term, &p, end, env);
04593   if (r < 0) return r;
04594 
04595   if (NTYPE(*np) == N_ANCHOR)
04596     NANCHOR(*np).target = target;
04597   else {
04598     NEFFECT(*np).target = target;
04599     if (NEFFECT(*np).type == EFFECT_MEMORY) {
04600       /* Don't move this to previous of parse_subexp() */
04601       r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
04602       if (r != 0) return r;
04603     }
04604   }
04605 
04606   *src = p;
04607   return 0;
04608 }
04609 
04610 static const char* PopularQStr[] = {
04611   "?", "*", "+", "??", "*?", "+?"
04612 };
04613 
04614 static const char* ReduceQStr[] = {
04615   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
04616 };
04617 
04618 static int
04619 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
04620 {
04621   QuantifierNode* qn;
04622 
04623   qn = &(NQUANTIFIER(qnode));
04624   if (qn->lower == 1 && qn->upper == 1) {
04625     return 1;
04626   }
04627 
04628   switch (NTYPE(target)) {
04629   case N_STRING:
04630     if (! group) {
04631       StrNode* sn = &(NSTRING(target));
04632       if (str_node_can_be_split(sn, env->enc)) {
04633        Node* n = str_node_split_last_char(sn, env->enc);
04634        if (IS_NOT_NULL(n)) {
04635          qn->target = n;
04636          return 2;
04637        }
04638       }
04639     }
04640     break;
04641 
04642   case N_QUANTIFIER:
04643     { /* check redundant double repeat. */
04644       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
04645       QuantifierNode* qnt = &(NQUANTIFIER(target));
04646       int nestq_num   = popular_quantifier_num(qn);
04647       int targetq_num = popular_quantifier_num(qnt);
04648 
04649 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
04650       if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
04651          IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
04652         UChar buf[WARN_BUFSIZE];
04653 
04654         switch(ReduceTypeTable[targetq_num][nestq_num]) {
04655         case RQ_ASIS:
04656           break;
04657 
04658         case RQ_DEL:
04659           if (onig_verb_warn != onig_null_warn) {
04660             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04661                                  env->pattern, env->pattern_end,
04662                                  (UChar* )"redundant nested repeat operator");
04663             (*onig_verb_warn)((char* )buf);
04664           }
04665           goto warn_exit;
04666           break;
04667 
04668         default:
04669           if (onig_verb_warn != onig_null_warn) {
04670             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04671                                        env->pattern, env->pattern_end,
04672             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
04673             PopularQStr[targetq_num], PopularQStr[nestq_num],
04674             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
04675             (*onig_verb_warn)((char* )buf);
04676           }
04677           goto warn_exit;
04678           break;
04679         }
04680       }
04681 
04682     warn_exit:
04683 #endif
04684       if (targetq_num >= 0) {
04685        if (nestq_num >= 0) {
04686          onig_reduce_nested_quantifier(qnode, target);
04687          goto q_exit;
04688        }
04689        else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
04690          /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
04691          if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
04692            qn->upper = (qn->lower == 0 ? 1 : qn->lower);
04693          }
04694        }
04695       }
04696     }
04697     break;
04698 
04699   default:
04700     break;
04701   }
04702 
04703   qn->target = target;
04704  q_exit:
04705   return 0;
04706 }
04707 
04708 #ifdef USE_SHARED_CCLASS_TABLE
04709 
04710 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
04711 
04712 /* for ctype node hash table */
04713 
04714 typedef struct {
04715   OnigEncoding enc;
04716   int not;
04717   int type;
04718 } type_cclass_key;
04719 
04720 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
04721 {
04722   if (x->type != y->type) return 1;
04723   if (x->enc  != y->enc)  return 1;
04724   if (x->not  != y->not)  return 1;
04725   return 0;
04726 }
04727 
04728 static int type_cclass_hash(type_cclass_key* key)
04729 {
04730   int i, val;
04731   unsigned char *p;
04732 
04733   val = 0;
04734 
04735   p = (unsigned char* )&(key->enc);
04736   for (i = 0; i < sizeof(key->enc); i++) {
04737     val = val * 997 + (int )*p++;
04738   }
04739 
04740   p = (unsigned char* )(&key->type);
04741   for (i = 0; i < sizeof(key->type); i++) {
04742     val = val * 997 + (int )*p++;
04743   }
04744 
04745   val += key->not;
04746   return val + (val >> 5);
04747 }
04748 
04749 static struct st_hash_type type_type_cclass_hash = {
04750     type_cclass_cmp,
04751     type_cclass_hash,
04752 };
04753 
04754 static st_table* OnigTypeCClassTable;
04755 
04756 
04757 static int
04758 i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
04759 {
04760   if (IS_NOT_NULL(node)) {
04761     CClassNode* cc = &(NCCLASS(node));
04762     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
04763     xfree(node);
04764   }
04765 
04766   if (IS_NOT_NULL(key)) xfree(key);
04767   return ST_DELETE;
04768 }
04769 
04770 extern int
04771 onig_free_shared_cclass_table(void)
04772 {
04773   if (IS_NOT_NULL(OnigTypeCClassTable)) {
04774     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
04775     onig_st_free_table(OnigTypeCClassTable);
04776     OnigTypeCClassTable = NULL;
04777   }
04778 
04779   return 0;
04780 }
04781 
04782 #endif /* USE_SHARED_CCLASS_TABLE */
04783 
04784 
04785 static int
04786 parse_exp(Node** np, OnigToken* tok, int term,
04787          UChar** src, UChar* end, ScanEnv* env)
04788 {
04789   int r, len, group = 0;
04790   Node* qn;
04791   Node** targetp;
04792 
04793   *np = NULL;
04794   if (tok->type == term)
04795     goto end_of_token;
04796 
04797   switch (tok->type) {
04798   case TK_ALT:
04799   case TK_EOT:
04800   end_of_token:
04801   *np = node_new_empty();
04802   return tok->type;
04803   break;
04804 
04805   case TK_SUBEXP_OPEN:
04806     r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env);
04807     if (r < 0) return r;
04808     if (r == 1) group = 1;
04809     else if (r == 2) { /* option only */
04810       Node* target;
04811       OnigOptionType prev = env->option;
04812 
04813       env->option = NEFFECT(*np).option;
04814       r = fetch_token(tok, src, end, env);
04815       if (r < 0) return r;
04816       r = parse_subexp(&target, tok, term, src, end, env);
04817       env->option = prev;
04818       if (r < 0) return r;
04819       NEFFECT(*np).target = target;       
04820       return tok->type;
04821     }
04822     break;
04823 
04824   case TK_SUBEXP_CLOSE:
04825     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
04826       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
04827 
04828     if (tok->escaped) goto tk_raw_byte;
04829     else goto tk_byte;
04830     break;
04831 
04832   case TK_STRING:
04833   tk_byte:
04834     {
04835       *np = node_new_str(tok->backp, *src);
04836       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04837 
04838       while (1) {
04839        r = fetch_token(tok, src, end, env);
04840        if (r < 0) return r;
04841        if (r != TK_STRING) break;
04842 
04843        r = onig_node_str_cat(*np, tok->backp, *src);
04844        if (r < 0) return r;
04845       }
04846 
04847     string_end:
04848       targetp = np;
04849       goto repeat;
04850     }
04851     break;
04852 
04853   case TK_RAW_BYTE:
04854   tk_raw_byte:
04855     {
04856       *np = node_new_str_char((UChar )tok->u.c);
04857       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04858       len = 1;
04859       while (1) {
04860        if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
04861          if (len == enc_len(env->enc, NSTRING(*np).s)) {
04862            r = fetch_token(tok, src, end, env);
04863            goto string_end;
04864          }
04865        }
04866 
04867        r = fetch_token(tok, src, end, env);
04868        if (r < 0) return r;
04869        if (r != TK_RAW_BYTE) {
04870 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
04871          int rem;
04872          if (len < ONIGENC_MBC_MINLEN(env->enc)) {
04873            rem = ONIGENC_MBC_MINLEN(env->enc) - len;
04874            (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
04875            if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
04876              goto string_end;
04877            }
04878          }
04879 #endif
04880          return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04881        }
04882 
04883        r = node_str_cat_char(*np, (UChar )tok->u.c);
04884        if (r < 0) return r;
04885 
04886        len++;
04887       }
04888     }
04889     break;
04890 
04891   case TK_CODE_POINT:
04892     {
04893       UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04894       int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
04895       if (num < 0) return num;
04896 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
04897       *np = node_new_str_raw(buf, buf + num);
04898 #else
04899       *np = node_new_str(buf, buf + num);
04900 #endif
04901       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04902     }
04903     break;
04904 
04905   case TK_QUOTE_OPEN:
04906     {
04907       OnigCodePoint end_op[2];
04908       UChar *qstart, *qend, *nextp;
04909 
04910       end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
04911       end_op[1] = (OnigCodePoint )'E';
04912       qstart = *src;
04913       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
04914       if (IS_NULL(qend)) {
04915        nextp = qend = end;
04916       }
04917       *np = node_new_str(qstart, qend);
04918       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04919       *src = nextp;
04920     }
04921     break;
04922 
04923   case TK_CHAR_TYPE:
04924     {
04925       switch (tok->u.subtype) {
04926       case CTYPE_WORD:
04927       case CTYPE_NOT_WORD:
04928        *np = node_new_ctype(tok->u.subtype);
04929        CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04930        break;
04931 
04932       case CTYPE_WHITE_SPACE:
04933       case CTYPE_NOT_WHITE_SPACE:
04934       case CTYPE_DIGIT:
04935       case CTYPE_NOT_DIGIT:
04936       case CTYPE_XDIGIT:
04937       case CTYPE_NOT_XDIGIT:
04938        {
04939          CClassNode* cc;
04940          int ctype, not;
04941 
04942 #ifdef USE_SHARED_CCLASS_TABLE
04943           const OnigCodePoint *sbr, *mbr;
04944 
04945          ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
04946           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
04947           if (r == 0 &&
04948               ONIGENC_CODE_RANGE_NUM(mbr)
04949               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
04950             type_cclass_key  key;
04951             type_cclass_key* new_key;
04952 
04953             key.enc  = env->enc;
04954             key.not  = not;
04955             key.type = ctype;
04956 
04957             THREAD_ATOMIC_START;
04958 
04959             if (IS_NULL(OnigTypeCClassTable)) {
04960               OnigTypeCClassTable
04961                 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
04962               if (IS_NULL(OnigTypeCClassTable)) {
04963                 THREAD_ATOMIC_END;
04964                 return ONIGERR_MEMORY;
04965               }
04966             }
04967             else {
04968               if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
04969                                  (st_data_t* )np)) {
04970                 THREAD_ATOMIC_END;
04971                 break;
04972               }
04973             }
04974 
04975             *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
04976             if (IS_NULL(*np)) {
04977               THREAD_ATOMIC_END;
04978               return ONIGERR_MEMORY;
04979             }
04980 
04981             CCLASS_SET_SHARE(&(NCCLASS(*np)));
04982             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
04983             xmemcpy(new_key, &key, sizeof(type_cclass_key));
04984             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
04985                                (st_data_t )*np);
04986             
04987             THREAD_ATOMIC_END;
04988           }
04989           else {
04990 #endif
04991             ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
04992             *np = node_new_cclass();
04993             CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
04994             cc = &(NCCLASS(*np));
04995             add_ctype_to_cc(cc, ctype, 0, env);
04996             if (not != 0) CCLASS_SET_NOT(cc);
04997 #ifdef USE_SHARED_CCLASS_TABLE
04998           }
04999 #endif
05000        }
05001        break;
05002 
05003       default:
05004        return ONIGERR_PARSER_BUG;
05005        break;
05006       }
05007     }
05008     break;
05009 
05010   case TK_CHAR_PROPERTY:
05011     r = parse_char_property(np, tok, src, end, env);
05012     if (r != 0) return r;
05013     break;
05014 
05015   case TK_CC_OPEN:
05016     {
05017       CClassNode* cc;
05018 
05019       r = parse_char_class(np, tok, src, end, env);
05020       if (r != 0) return r;
05021 
05022       cc = &(NCCLASS(*np));
05023 
05024       if (IS_IGNORECASE(env->option)) {
05025         int i, n, in_cc;
05026         const OnigPairAmbigCodes* ccs;
05027         BitSetRef bs = cc->bs;
05028         OnigAmbigType amb;
05029 
05030         for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
05031           if ((amb & env->ambig_flag) == 0)  continue;
05032 
05033           n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
05034           for (i = 0; i < n; i++) {
05035             in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
05036 
05037             if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
05038                 (in_cc == 0 && IS_CCLASS_NOT(cc))) {
05039               if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
05040                   ccs[i].from >= SINGLE_BYTE_SIZE) {
05041                 /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
05042                 add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
05043               }
05044               else {
05045                 if (BITSET_AT(bs, ccs[i].from)) {
05046                   /* /(?i:[^A-C])/.match("a") ==> fail. */
05047                   BITSET_SET_BIT(bs, ccs[i].to);
05048                 }
05049                 if (BITSET_AT(bs, ccs[i].to)) {
05050                   BITSET_SET_BIT(bs, ccs[i].from);
05051                 }
05052               }
05053             }
05054           }
05055         }
05056       }
05057     }
05058     break;
05059 
05060   case TK_ANYCHAR:
05061     *np = node_new_anychar();
05062     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
05063     break;
05064 
05065   case TK_ANYCHAR_ANYTIME:
05066     *np = node_new_anychar();
05067     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
05068     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
05069     CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
05070     NQUANTIFIER(qn).target = *np;
05071     *np = qn;
05072     break;
05073 
05074   case TK_BACKREF:
05075     len = tok->u.backref.num;
05076     *np = node_new_backref(len,
05077                  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
05078                         tok->u.backref.by_name,
05079 #ifdef USE_BACKREF_AT_LEVEL
05080                         tok->u.backref.exist_level,
05081                         tok->u.backref.level,
05082 #endif
05083                         env);
05084     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
05085     break;
05086 
05087 #ifdef USE_SUBEXP_CALL
05088   case TK_CALL:
05089     *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
05090     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
05091     env->num_call++;
05092     break;
05093 #endif
05094 
05095   case TK_ANCHOR:
05096     *np = onig_node_new_anchor(tok->u.anchor);
05097     break;
05098 
05099   case TK_OP_REPEAT:
05100   case TK_INTERVAL:
05101     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
05102       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
05103        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
05104       else
05105        *np = node_new_empty();
05106     }
05107     else {
05108       goto tk_byte;
05109     }
05110     break;
05111 
05112   default:
05113     return ONIGERR_PARSER_BUG;
05114     break;
05115   }
05116 
05117   {
05118     targetp = np;
05119 
05120   re_entry:
05121     r = fetch_token(tok, src, end, env);
05122     if (r < 0) return r;
05123 
05124   repeat:
05125     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
05126       if (is_invalid_quantifier_target(*targetp))
05127        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
05128 
05129       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
05130                            (r == TK_INTERVAL ? 1 : 0));
05131       CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
05132       NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
05133       r = set_quantifier(qn, *targetp, group, env);
05134       if (r < 0) return r;
05135       
05136       if (tok->u.repeat.possessive != 0) {
05137        Node* en;
05138        en = node_new_effect(EFFECT_STOP_BACKTRACK);
05139        CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
05140        NEFFECT(en).target = qn;
05141        qn = en;
05142       }
05143 
05144       if (r == 0) {
05145        *targetp = qn;
05146       }
05147       else if (r == 2) { /* split case: /abc+/ */
05148        Node *tmp;
05149 
05150        *targetp = node_new_list(*targetp, NULL);
05151        CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
05152        tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
05153        CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
05154        targetp = &(NCONS(tmp).left);
05155       }
05156       goto re_entry;
05157     }
05158   }
05159 
05160   return r;
05161 }
05162 
05163 static int
05164 parse_branch(Node** top, OnigToken* tok, int term,
05165             UChar** src, UChar* end, ScanEnv* env)
05166 {
05167   int r;
05168   Node *node, **headp;
05169 
05170   *top = NULL;
05171   r = parse_exp(&node, tok, term, src, end, env);
05172   if (r < 0) return r;
05173 
05174   if (r == TK_EOT || r == term || r == TK_ALT) {
05175     *top = node;
05176   }
05177   else {
05178     *top  = node_new_list(node, NULL);
05179     headp = &(NCONS(*top).right);
05180     while (r != TK_EOT && r != term && r != TK_ALT) {
05181       r = parse_exp(&node, tok, term, src, end, env);
05182       if (r < 0) return r;
05183 
05184       if (NTYPE(node) == N_LIST) {
05185        *headp = node;
05186        while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right;
05187        headp = &(NCONS(node).right);
05188       }
05189       else {
05190        *headp = node_new_list(node, NULL);
05191        headp = &(NCONS(*headp).right);
05192       }
05193     }
05194   }
05195 
05196   return r;
05197 }
05198 
05199 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
05200 static int
05201 parse_subexp(Node** top, OnigToken* tok, int term,
05202             UChar** src, UChar* end, ScanEnv* env)
05203 {
05204   int r;
05205   Node *node, **headp;
05206 
05207   *top = NULL;
05208   r = parse_branch(&node, tok, term, src, end, env);
05209   if (r < 0) {
05210     onig_node_free(node);
05211     return r;
05212   }
05213 
05214   if (r == term) {
05215     *top = node;
05216   }
05217   else if (r == TK_ALT) {
05218     *top  = node_new_alt(node, NULL);
05219     headp = &(NCONS(*top).right);
05220     while (r == TK_ALT) {
05221       r = fetch_token(tok, src, end, env);
05222       if (r < 0) return r;
05223       r = parse_branch(&node, tok, term, src, end, env);
05224       if (r < 0) return r;
05225 
05226       *headp = node_new_alt(node, NULL);
05227       headp = &(NCONS(*headp).right);
05228     }
05229 
05230     if (tok->type != term)
05231       goto err;
05232   }
05233   else {
05234   err:
05235     if (term == TK_SUBEXP_CLOSE)
05236       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
05237     else
05238       return ONIGERR_PARSER_BUG;
05239   }
05240 
05241   return r;
05242 }
05243 
05244 static int
05245 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
05246 {
05247   int r;
05248   OnigToken tok;
05249 
05250   r = fetch_token(&tok, src, end, env);
05251   if (r < 0) return r;
05252   r = parse_subexp(top, &tok, TK_EOT, src, end, env);
05253   if (r < 0) return r;
05254   return 0;
05255 }
05256 
05257 extern int
05258 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg,
05259                     ScanEnv* env)
05260 {
05261   int r;
05262   UChar* p;
05263 
05264 #ifdef USE_NAMED_GROUP
05265   names_clear(reg);
05266 #endif
05267 
05268   scan_env_clear(env);
05269   env->option      = reg->options;
05270   env->ambig_flag  = reg->ambig_flag;
05271   env->enc         = reg->enc;
05272   env->syntax      = reg->syntax;
05273   env->pattern     = (UChar* )pattern;
05274   env->pattern_end = (UChar* )end;
05275   env->reg         = reg;
05276 
05277   *root = NULL;
05278   p = (UChar* )pattern;
05279   r = parse_regexp(root, &p, (UChar* )end, env);
05280   reg->num_mem = env->num_mem;
05281   return r;
05282 }
05283 
05284 extern void
05285 onig_scan_env_set_error_string(ScanEnv* env, int ecode,
05286                             UChar* arg, UChar* arg_end)
05287 {
05288   env->error     = arg;
05289   env->error_end = arg_end;
05290 }