Back to index

php5  5.3.10
regcomp.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regparse.h"
00031 
00032 OnigAmbigType OnigDefaultAmbigFlag =
00033   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
00034    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
00035 
00036 extern OnigAmbigType
00037 onig_get_default_ambig_flag(void)
00038 {
00039   return OnigDefaultAmbigFlag;
00040 }
00041 
00042 extern int
00043 onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
00044 {
00045   OnigDefaultAmbigFlag = ambig_flag;
00046   return 0;
00047 }
00048 
00049 
00050 static UChar*
00051 k_strdup(UChar* s, UChar* end)
00052 {
00053   int len = end - s;
00054 
00055   if (len > 0) {
00056     UChar* r = (UChar* )xmalloc(len + 1);
00057     CHECK_NULL_RETURN(r);
00058     xmemcpy(r, s, len);
00059     r[len] = (UChar )0;
00060     return r;
00061   }
00062   else return NULL;
00063 }
00064 
00065 /*
00066   Caution: node should not be a string node.
00067            (s and end member address break)
00068 */
00069 static void
00070 swap_node(Node* a, Node* b)
00071 {
00072   Node c;
00073   c = *a; *a = *b; *b = c;
00074 }
00075 
00076 static OnigDistance
00077 distance_add(OnigDistance d1, OnigDistance d2)
00078 {
00079   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00080     return ONIG_INFINITE_DISTANCE;
00081   else {
00082     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00083     else return ONIG_INFINITE_DISTANCE;
00084   }
00085 }
00086 
00087 static OnigDistance
00088 distance_multiply(OnigDistance d, int m)
00089 {
00090   if (m == 0) return 0;
00091 
00092   if (d < ONIG_INFINITE_DISTANCE / m)
00093     return d * m;
00094   else
00095     return ONIG_INFINITE_DISTANCE;
00096 }
00097 
00098 static int
00099 bitset_is_empty(BitSetRef bs)
00100 {
00101   int i;
00102   for (i = 0; i < BITSET_SIZE; i++) {
00103     if (bs[i] != 0) return 0;
00104   }
00105   return 1;
00106 }
00107 
00108 #ifdef ONIG_DEBUG
00109 static int
00110 bitset_on_num(BitSetRef bs)
00111 {
00112   int i, n;
00113 
00114   n = 0;
00115   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00116     if (BITSET_AT(bs, i)) n++;
00117   }
00118   return n;
00119 }
00120 #endif
00121 
00122 extern int
00123 onig_bbuf_init(BBuf* buf, int size)
00124 {
00125   buf->p = (UChar* )xmalloc(size);
00126   if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00127 
00128   buf->alloc = size;
00129   buf->used  = 0;
00130   return 0;
00131 }
00132 
00133 
00134 #ifdef USE_SUBEXP_CALL
00135 
00136 static int
00137 unset_addr_list_init(UnsetAddrList* uslist, int size)
00138 {
00139   UnsetAddr* p;
00140 
00141   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00142   CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
00143   uslist->num   = 0;
00144   uslist->alloc = size;
00145   uslist->us    = p;
00146   return 0;
00147 }
00148 
00149 static void
00150 unset_addr_list_end(UnsetAddrList* uslist)
00151 {
00152   if (IS_NOT_NULL(uslist->us))
00153     xfree(uslist->us);
00154 }
00155 
00156 static int
00157 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00158 {
00159   UnsetAddr* p;
00160   int size;
00161 
00162   if (uslist->num >= uslist->alloc) {
00163     size = uslist->alloc * 2;
00164     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00165     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
00166     uslist->alloc = size;
00167     uslist->us    = p;
00168   }
00169 
00170   uslist->us[uslist->num].offset = offset;
00171   uslist->us[uslist->num].target = node;
00172   uslist->num++;
00173   return 0;
00174 }
00175 #endif /* USE_SUBEXP_CALL */
00176 
00177 
00178 static int
00179 add_opcode(regex_t* reg, int opcode)
00180 {
00181   BBUF_ADD1(reg, opcode);
00182   return 0;
00183 }
00184 
00185 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00186 static int
00187 add_state_check_num(regex_t* reg, int num)
00188 {
00189   StateCheckNumType n = (StateCheckNumType )num;
00190 
00191   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00192   return 0;
00193 }
00194 #endif
00195 
00196 static int
00197 add_rel_addr(regex_t* reg, int addr)
00198 {
00199   RelAddrType ra = (RelAddrType )addr;
00200 
00201   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00202   return 0;
00203 }
00204 
00205 static int
00206 add_abs_addr(regex_t* reg, int addr)
00207 {
00208   AbsAddrType ra = (AbsAddrType )addr;
00209 
00210   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00211   return 0;
00212 }
00213 
00214 static int
00215 add_length(regex_t* reg, int len)
00216 {
00217   LengthType l = (LengthType )len;
00218 
00219   BBUF_ADD(reg, &l, SIZE_LENGTH);
00220   return 0;
00221 }
00222 
00223 static int
00224 add_mem_num(regex_t* reg, int num)
00225 {
00226   MemNumType n = (MemNumType )num;
00227 
00228   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00229   return 0;
00230 }
00231 
00232 static int
00233 add_pointer(regex_t* reg, void* addr)
00234 {
00235   PointerType ptr = (PointerType )addr;
00236 
00237   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00238   return 0;
00239 }
00240 
00241 static int
00242 add_option(regex_t* reg, OnigOptionType option)
00243 {
00244   BBUF_ADD(reg, &option, SIZE_OPTION);
00245   return 0;
00246 }
00247 
00248 static int
00249 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00250 {
00251   int r;
00252 
00253   r = add_opcode(reg, opcode);
00254   if (r) return r;
00255   r = add_rel_addr(reg, addr);
00256   return r;
00257 }
00258 
00259 static int
00260 add_bytes(regex_t* reg, UChar* bytes, int len)
00261 {
00262   BBUF_ADD(reg, bytes, len);
00263   return 0;
00264 }
00265 
00266 static int
00267 add_bitset(regex_t* reg, BitSetRef bs)
00268 {
00269   BBUF_ADD(reg, bs, SIZE_BITSET);
00270   return 0;
00271 }
00272 
00273 static int
00274 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00275 {
00276   int r;
00277 
00278   r = add_opcode(reg, opcode);
00279   if (r) return r;
00280   r = add_option(reg, option);
00281   return r;
00282 }
00283 
00284 static int compile_length_tree(Node* node, regex_t* reg);
00285 static int compile_tree(Node* node, regex_t* reg);
00286 
00287 
00288 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00289    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00290     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00291 
00292 static int
00293 select_str_opcode(int mb_len, int str_len, int ignore_case)
00294 {
00295   int op;
00296 
00297   if (ignore_case) {
00298     switch (str_len) {
00299     case 1:  op = OP_EXACT1_IC; break;
00300     default: op = OP_EXACTN_IC; break;
00301     }
00302   }
00303   else {
00304     switch (mb_len) {
00305     case 1:
00306       switch (str_len) {
00307       case 1:  op = OP_EXACT1; break;
00308       case 2:  op = OP_EXACT2; break;
00309       case 3:  op = OP_EXACT3; break;
00310       case 4:  op = OP_EXACT4; break;
00311       case 5:  op = OP_EXACT5; break;
00312       default: op = OP_EXACTN; break;
00313       }
00314       break;
00315 
00316     case 2:
00317       switch (str_len) {
00318       case 1:  op = OP_EXACTMB2N1; break;
00319       case 2:  op = OP_EXACTMB2N2; break;
00320       case 3:  op = OP_EXACTMB2N3; break;
00321       default: op = OP_EXACTMB2N;  break;
00322       }
00323       break;
00324 
00325     case 3:
00326       op = OP_EXACTMB3N;
00327       break;
00328 
00329     default:
00330       op = OP_EXACTMBN;
00331       break;
00332     }
00333   }
00334   return op;
00335 }
00336 
00337 static int
00338 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00339 {
00340   int r;
00341   int saved_num_null_check = reg->num_null_check;
00342 
00343   if (empty_info != 0) {
00344     r = add_opcode(reg, OP_NULL_CHECK_START);
00345     if (r) return r;
00346     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00347     if (r) return r;
00348     reg->num_null_check++;
00349   }
00350 
00351   r = compile_tree(node, reg);
00352   if (r) return r;
00353 
00354   if (empty_info != 0) {
00355     if (empty_info == NQ_TARGET_IS_EMPTY)
00356       r = add_opcode(reg, OP_NULL_CHECK_END);
00357     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00358       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00359     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00360       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00361 
00362     if (r) return r;
00363     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00364   }
00365   return r;
00366 }
00367 
00368 #ifdef USE_SUBEXP_CALL
00369 static int
00370 compile_call(CallNode* node, regex_t* reg)
00371 {
00372   int r;
00373 
00374   r = add_opcode(reg, OP_CALL);
00375   if (r) return r;
00376   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00377                           node->target);
00378   if (r) return r;
00379   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00380   return r;
00381 }
00382 #endif
00383 
00384 static int
00385 compile_tree_n_times(Node* node, int n, regex_t* reg)
00386 {
00387   int i, r;
00388 
00389   for (i = 0; i < n; i++) {
00390     r = compile_tree(node, reg);
00391     if (r) return r;
00392   }
00393   return 0;
00394 }
00395 
00396 static int
00397 add_compile_string_length(UChar* s, int mb_len, int str_len,
00398                           regex_t* reg, int ignore_case)
00399 {
00400   int len;
00401   int op = select_str_opcode(mb_len, str_len, ignore_case);
00402 
00403   len = SIZE_OPCODE;
00404 
00405   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00406   if (IS_NEED_STR_LEN_OP_EXACT(op))
00407     len += SIZE_LENGTH;
00408 
00409   len += mb_len * str_len;
00410   return len;
00411 }
00412 
00413 static int
00414 add_compile_string(UChar* s, int mb_len, int str_len,
00415                    regex_t* reg, int ignore_case)
00416 {
00417   int op = select_str_opcode(mb_len, str_len, ignore_case);
00418   add_opcode(reg, op);
00419 
00420   if (op == OP_EXACTMBN)
00421     add_length(reg, mb_len);
00422 
00423   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00424     if (op == OP_EXACTN_IC)
00425       add_length(reg, mb_len * str_len);
00426     else
00427       add_length(reg, str_len);
00428   }
00429 
00430   add_bytes(reg, s, mb_len * str_len);
00431   return 0;
00432 }
00433 
00434 
00435 static int
00436 compile_length_string_node(Node* node, regex_t* reg)
00437 {
00438   int rlen, r, len, prev_len, slen, ambig;
00439   OnigEncoding enc = reg->enc;
00440   UChar *p, *prev;
00441   StrNode* sn;
00442 
00443   sn = &(NSTRING(node));
00444   if (sn->end <= sn->s)
00445     return 0;
00446 
00447   ambig = NSTRING_IS_AMBIG(node);
00448 
00449   p = prev = sn->s;
00450   prev_len = enc_len(enc, p);
00451   p += prev_len;
00452   slen = 1;
00453   rlen = 0;
00454 
00455   for (; p < sn->end; ) {
00456     len = enc_len(enc, p);
00457     if (len == prev_len) {
00458       slen++;
00459     }
00460     else {
00461       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00462       rlen += r;
00463       prev = p;
00464       slen = 1;
00465       prev_len = len;
00466     }
00467     p += len;
00468   }
00469   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00470   rlen += r;
00471   return rlen;
00472 }
00473 
00474 static int
00475 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00476 {
00477   if (sn->end <= sn->s)
00478     return 0;
00479 
00480   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00481 }
00482 
00483 static int
00484 compile_string_node(Node* node, regex_t* reg)
00485 {
00486   int r, len, prev_len, slen, ambig;
00487   OnigEncoding enc = reg->enc;
00488   UChar *p, *prev, *end;
00489   StrNode* sn;
00490 
00491   sn = &(NSTRING(node));
00492   if (sn->end <= sn->s)
00493     return 0;
00494 
00495   end = sn->end;
00496   ambig = NSTRING_IS_AMBIG(node);
00497 
00498   p = prev = sn->s;
00499   prev_len = enc_len(enc, p);
00500   p += prev_len;
00501   slen = 1;
00502 
00503   for (; p < end; ) {
00504     len = enc_len(enc, p);
00505     if (len == prev_len) {
00506       slen++;
00507     }
00508     else {
00509       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00510       if (r) return r;
00511 
00512       prev  = p;
00513       slen  = 1;
00514       prev_len = len;
00515     }
00516 
00517     p += len;
00518   }
00519   return add_compile_string(prev, prev_len, slen, reg, ambig);
00520 }
00521 
00522 static int
00523 compile_string_raw_node(StrNode* sn, regex_t* reg)
00524 {
00525   if (sn->end <= sn->s)
00526     return 0;
00527 
00528   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00529 }
00530 
00531 static int
00532 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00533 {
00534 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00535   add_length(reg, mbuf->used);
00536   return add_bytes(reg, mbuf->p, mbuf->used);
00537 #else
00538   static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00539 
00540   int r, pad_size;
00541   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00542 
00543   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00544   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00545   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00546 
00547   r = add_bytes(reg, mbuf->p, mbuf->used);
00548 
00549   /* padding for return value from compile_length_cclass_node() to be fix. */
00550   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00551   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00552   return r;
00553 #endif
00554 }
00555 
00556 static int
00557 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00558 {
00559   int len;
00560 
00561   if (IS_CCLASS_SHARE(cc)) {
00562     len = SIZE_OPCODE + SIZE_POINTER;
00563     return len;
00564   }
00565 
00566   if (IS_NULL(cc->mbuf)) {
00567     len = SIZE_OPCODE + SIZE_BITSET;
00568   }
00569   else {
00570     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00571       len = SIZE_OPCODE;
00572     }
00573     else {
00574       len = SIZE_OPCODE + SIZE_BITSET;
00575     }
00576 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00577     len += SIZE_LENGTH + cc->mbuf->used;
00578 #else
00579     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00580 #endif
00581   }
00582 
00583   return len;
00584 }
00585 
00586 static int
00587 compile_cclass_node(CClassNode* cc, regex_t* reg)
00588 {
00589   int r;
00590 
00591   if (IS_CCLASS_SHARE(cc)) {
00592     add_opcode(reg, OP_CCLASS_NODE);
00593     r = add_pointer(reg, cc);
00594     return r;
00595   }
00596 
00597   if (IS_NULL(cc->mbuf)) {
00598     if (IS_CCLASS_NOT(cc))
00599       add_opcode(reg, OP_CCLASS_NOT);
00600     else
00601       add_opcode(reg, OP_CCLASS);
00602 
00603     r = add_bitset(reg, cc->bs);
00604   }
00605   else {
00606     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00607       if (IS_CCLASS_NOT(cc))
00608         add_opcode(reg, OP_CCLASS_MB_NOT);
00609       else
00610         add_opcode(reg, OP_CCLASS_MB);
00611 
00612       r = add_multi_byte_cclass(cc->mbuf, reg);
00613     }
00614     else {
00615       if (IS_CCLASS_NOT(cc))
00616         add_opcode(reg, OP_CCLASS_MIX_NOT);
00617       else
00618         add_opcode(reg, OP_CCLASS_MIX);
00619 
00620       r = add_bitset(reg, cc->bs);
00621       if (r) return r;
00622       r = add_multi_byte_cclass(cc->mbuf, reg);
00623     }
00624   }
00625 
00626   return r;
00627 }
00628 
00629 static int
00630 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00631 {
00632 #define REPEAT_RANGE_ALLOC  4
00633 
00634   OnigRepeatRange* p;
00635 
00636   if (reg->repeat_range_alloc == 0) {
00637     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00638     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
00639     reg->repeat_range = p;
00640     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00641   }
00642   else if (reg->repeat_range_alloc <= id) {
00643     int n;
00644     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00645     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00646                                     sizeof(OnigRepeatRange) * n);
00647     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
00648     reg->repeat_range = p;
00649     reg->repeat_range_alloc = n;
00650   }
00651   else {
00652     p = reg->repeat_range;
00653   }
00654 
00655   p[id].lower = lower;
00656   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00657   return 0;
00658 }
00659 
00660 static int
00661 compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
00662                           regex_t* reg)
00663 {
00664   int r;
00665   int num_repeat = reg->num_repeat;
00666 
00667   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00668   if (r) return r;
00669   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00670   reg->num_repeat++;
00671   if (r) return r;
00672   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00673   if (r) return r;
00674 
00675   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00676   if (r) return r;
00677 
00678   r = compile_tree_empty_check(qn->target, reg, empty_info);
00679   if (r) return r;
00680 
00681   if (
00682 #ifdef USE_SUBEXP_CALL
00683       reg->num_call > 0 ||
00684 #endif
00685       IS_QUANTIFIER_IN_REPEAT(qn)) {
00686     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00687   }
00688   else {
00689     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00690   }
00691   if (r) return r;
00692   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00693   return r;
00694 }
00695 
00696 static int
00697 is_anychar_star_quantifier(QuantifierNode* qn)
00698 {
00699   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00700       NTYPE(qn->target) == N_ANYCHAR)
00701     return 1;
00702   else
00703     return 0;
00704 }
00705 
00706 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00707 #define CKN_ON   (ckn > 0)
00708 
00709 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00710 
00711 static int
00712 compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
00713 {
00714   int len, mod_tlen, cklen;
00715   int ckn;
00716   int infinite = IS_REPEAT_INFINITE(qn->upper);
00717   int empty_info = qn->target_empty_info;
00718   int tlen = compile_length_tree(qn->target, reg);
00719 
00720   if (tlen < 0) return tlen;
00721 
00722   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00723 
00724   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00725 
00726   /* anychar repeat */
00727   if (NTYPE(qn->target) == N_ANYCHAR) {
00728     if (qn->greedy && infinite) {
00729       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00730         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00731       else
00732         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00733     }
00734   }
00735 
00736   if (empty_info != 0)
00737     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00738   else
00739     mod_tlen = tlen;
00740 
00741   if (infinite && qn->lower <= 1) {
00742     if (qn->greedy) {
00743       if (qn->lower == 1)
00744        len = SIZE_OP_JUMP;
00745       else
00746        len = 0;
00747 
00748       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00749     }
00750     else {
00751       if (qn->lower == 0)
00752        len = SIZE_OP_JUMP;
00753       else
00754        len = 0;
00755 
00756       len += mod_tlen + SIZE_OP_PUSH + cklen;
00757     }
00758   }
00759   else if (qn->upper == 0) {
00760     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00761       len = SIZE_OP_JUMP + tlen;
00762     else
00763       len = 0;
00764   }
00765   else if (qn->upper == 1 && qn->greedy) {
00766     if (qn->lower == 0) {
00767       if (CKN_ON) {
00768        len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00769       }
00770       else {
00771        len = SIZE_OP_PUSH + tlen;
00772       }
00773     }
00774     else {
00775       len = tlen;
00776     }
00777   }
00778   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00779     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00780   }
00781   else {
00782     len = SIZE_OP_REPEAT_INC
00783         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00784     if (CKN_ON)
00785       len += SIZE_OP_STATE_CHECK;
00786   }
00787 
00788   return len;
00789 }
00790 
00791 static int
00792 compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
00793 {
00794   int r, mod_tlen;
00795   int ckn;
00796   int infinite = IS_REPEAT_INFINITE(qn->upper);
00797   int empty_info = qn->target_empty_info;
00798   int tlen = compile_length_tree(qn->target, reg);
00799 
00800   if (tlen < 0) return tlen;
00801 
00802   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00803 
00804   if (is_anychar_star_quantifier(qn)) {
00805     r = compile_tree_n_times(qn->target, qn->lower, reg);
00806     if (r) return r;
00807     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00808       if (IS_MULTILINE(reg->options))
00809        r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00810       else
00811        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00812       if (r) return r;
00813       if (CKN_ON) {
00814        r = add_state_check_num(reg, ckn);
00815        if (r) return r;
00816       }
00817 
00818       return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
00819     }
00820     else {
00821       if (IS_MULTILINE(reg->options)) {
00822        r = add_opcode(reg, (CKN_ON ?
00823                             OP_STATE_CHECK_ANYCHAR_ML_STAR
00824                           : OP_ANYCHAR_ML_STAR));
00825       }
00826       else {
00827        r = add_opcode(reg, (CKN_ON ?
00828                             OP_STATE_CHECK_ANYCHAR_STAR
00829                           : OP_ANYCHAR_STAR));
00830       }
00831       if (r) return r;
00832       if (CKN_ON)
00833        r = add_state_check_num(reg, ckn);
00834 
00835       return r;
00836     }
00837   }
00838 
00839   if (empty_info != 0)
00840     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00841   else
00842     mod_tlen = tlen;
00843 
00844   if (infinite && qn->lower <= 1) {
00845     if (qn->greedy) {
00846       if (qn->lower == 1) {
00847        r = add_opcode_rel_addr(reg, OP_JUMP,
00848                      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00849        if (r) return r;
00850       }
00851 
00852       if (CKN_ON) {
00853        r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00854        if (r) return r;
00855        r = add_state_check_num(reg, ckn);
00856        if (r) return r;
00857        r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00858       }
00859       else {
00860        r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00861       }
00862       if (r) return r;
00863       r = compile_tree_empty_check(qn->target, reg, empty_info);
00864       if (r) return r;
00865       r = add_opcode_rel_addr(reg, OP_JUMP,
00866              -(mod_tlen + (int )SIZE_OP_JUMP
00867               + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00868     }
00869     else {
00870       if (qn->lower == 0) {
00871        r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00872        if (r) return r;
00873       }
00874       r = compile_tree_empty_check(qn->target, reg, empty_info);
00875       if (r) return r;
00876       if (CKN_ON) {
00877        r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00878        if (r) return r;
00879        r = add_state_check_num(reg, ckn);
00880        if (r) return r;
00881        r = add_rel_addr(reg,
00882                -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00883       }
00884       else
00885        r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00886     }
00887   }
00888   else if (qn->upper == 0) {
00889     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00890       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00891       if (r) return r;
00892       r = compile_tree(qn->target, reg);
00893     }
00894     else
00895       r = 0;
00896   }
00897   else if (qn->upper == 1 && qn->greedy) {
00898     if (qn->lower == 0) {
00899       if (CKN_ON) {
00900        r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00901        if (r) return r;
00902        r = add_state_check_num(reg, ckn);
00903        if (r) return r;
00904        r = add_rel_addr(reg, tlen);
00905       }
00906       else {
00907        r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00908       }
00909       if (r) return r;
00910     }
00911 
00912     r = compile_tree(qn->target, reg);
00913   }
00914   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00915     if (CKN_ON) {
00916       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00917       if (r) return r;
00918       r = add_state_check_num(reg, ckn);
00919       if (r) return r;
00920       r = add_rel_addr(reg, SIZE_OP_JUMP);
00921     }
00922     else {
00923       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00924     }
00925 
00926     if (r) return r;
00927     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00928     if (r) return r;
00929     r = compile_tree(qn->target, reg);
00930   }
00931   else {
00932     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00933     if (CKN_ON) {
00934       if (r) return r;
00935       r = add_opcode(reg, OP_STATE_CHECK);
00936       if (r) return r;
00937       r = add_state_check_num(reg, ckn);
00938     }
00939   }
00940   return r;
00941 }
00942 
00943 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00944 
00945 static int
00946 compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
00947 {
00948   int len, mod_tlen;
00949   int infinite = IS_REPEAT_INFINITE(qn->upper);
00950   int empty_info = qn->target_empty_info;
00951   int tlen = compile_length_tree(qn->target, reg);
00952 
00953   if (tlen < 0) return tlen;
00954 
00955   /* anychar repeat */
00956   if (NTYPE(qn->target) == N_ANYCHAR) {
00957     if (qn->greedy && infinite) {
00958       if (IS_NOT_NULL(qn->next_head_exact))
00959         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00960       else
00961         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00962     }
00963   }
00964 
00965   if (empty_info != 0)
00966     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00967   else
00968     mod_tlen = tlen;
00969 
00970   if (infinite &&
00971       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00972     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00973       len = SIZE_OP_JUMP;
00974     }
00975     else {
00976       len = tlen * qn->lower;
00977     }
00978 
00979     if (qn->greedy) {
00980       if (IS_NOT_NULL(qn->head_exact))
00981        len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
00982       else if (IS_NOT_NULL(qn->next_head_exact))
00983        len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
00984       else
00985        len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
00986     }
00987     else
00988       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
00989   }
00990   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
00991     len = SIZE_OP_JUMP + tlen;
00992   }
00993   else if (!infinite && qn->greedy &&
00994            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
00995                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00996     len = tlen * qn->lower;
00997     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
00998   }
00999   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01000     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01001   }
01002   else {
01003     len = SIZE_OP_REPEAT_INC
01004         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01005   }
01006 
01007   return len;
01008 }
01009 
01010 static int
01011 compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
01012 {
01013   int i, r, mod_tlen;
01014   int infinite = IS_REPEAT_INFINITE(qn->upper);
01015   int empty_info = qn->target_empty_info;
01016   int tlen = compile_length_tree(qn->target, reg);
01017 
01018   if (tlen < 0) return tlen;
01019 
01020   if (is_anychar_star_quantifier(qn)) {
01021     r = compile_tree_n_times(qn->target, qn->lower, reg);
01022     if (r) return r;
01023     if (IS_NOT_NULL(qn->next_head_exact)) {
01024       if (IS_MULTILINE(reg->options))
01025        r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01026       else
01027        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01028       if (r) return r;
01029       return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
01030     }
01031     else {
01032       if (IS_MULTILINE(reg->options))
01033        return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01034       else
01035        return add_opcode(reg, OP_ANYCHAR_STAR);
01036     }
01037   }
01038 
01039   if (empty_info != 0)
01040     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01041   else
01042     mod_tlen = tlen;
01043 
01044   if (infinite &&
01045       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01046     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01047       if (qn->greedy) {
01048        if (IS_NOT_NULL(qn->head_exact))
01049          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01050        else if (IS_NOT_NULL(qn->next_head_exact))
01051          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01052        else
01053          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01054       }
01055       else {
01056        r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01057       }
01058       if (r) return r;
01059     }
01060     else {
01061       r = compile_tree_n_times(qn->target, qn->lower, reg);
01062       if (r) return r;
01063     }
01064 
01065     if (qn->greedy) {
01066       if (IS_NOT_NULL(qn->head_exact)) {
01067        r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01068                           mod_tlen + SIZE_OP_JUMP);
01069        if (r) return r;
01070        add_bytes(reg, NSTRING(qn->head_exact).s, 1);
01071        r = compile_tree_empty_check(qn->target, reg, empty_info);
01072        if (r) return r;
01073        r = add_opcode_rel_addr(reg, OP_JUMP,
01074        -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01075       }
01076       else if (IS_NOT_NULL(qn->next_head_exact)) {
01077        r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01078                             mod_tlen + SIZE_OP_JUMP);
01079        if (r) return r;
01080        add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
01081        r = compile_tree_empty_check(qn->target, reg, empty_info);
01082        if (r) return r;
01083        r = add_opcode_rel_addr(reg, OP_JUMP,
01084           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01085       }
01086       else {
01087        r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01088        if (r) return r;
01089        r = compile_tree_empty_check(qn->target, reg, empty_info);
01090        if (r) return r;
01091        r = add_opcode_rel_addr(reg, OP_JUMP,
01092                    -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01093       }
01094     }
01095     else {
01096       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01097       if (r) return r;
01098       r = compile_tree_empty_check(qn->target, reg, empty_info);
01099       if (r) return r;
01100       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01101     }
01102   }
01103   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01104     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01105     if (r) return r;
01106     r = compile_tree(qn->target, reg);
01107   }
01108   else if (!infinite && qn->greedy &&
01109            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01110                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01111     int n = qn->upper - qn->lower;
01112 
01113     r = compile_tree_n_times(qn->target, qn->lower, reg);
01114     if (r) return r;
01115 
01116     for (i = 0; i < n; i++) {
01117       r = add_opcode_rel_addr(reg, OP_PUSH,
01118                         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01119       if (r) return r;
01120       r = compile_tree(qn->target, reg);
01121       if (r) return r;
01122     }
01123   }
01124   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01125     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01126     if (r) return r;
01127     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01128     if (r) return r;
01129     r = compile_tree(qn->target, reg);
01130   }
01131   else {
01132     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01133   }
01134   return r;
01135 }
01136 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01137 
01138 static int
01139 compile_length_option_node(EffectNode* node, regex_t* reg)
01140 {
01141   int tlen;
01142   OnigOptionType prev = reg->options;
01143 
01144   reg->options = node->option;
01145   tlen = compile_length_tree(node->target, reg);
01146   reg->options = prev;
01147 
01148   if (tlen < 0) return tlen;
01149 
01150   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01151     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01152            + tlen + SIZE_OP_SET_OPTION;
01153   }
01154   else
01155     return tlen;
01156 }
01157 
01158 static int
01159 compile_option_node(EffectNode* node, regex_t* reg)
01160 {
01161   int r;
01162   OnigOptionType prev = reg->options;
01163 
01164   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01165     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01166     if (r) return r;
01167     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01168     if (r) return r;
01169     r = add_opcode(reg, OP_FAIL);
01170     if (r) return r;
01171   }
01172 
01173   reg->options = node->option;
01174   r = compile_tree(node->target, reg);
01175   reg->options = prev;
01176 
01177   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01178     if (r) return r;
01179     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01180   }
01181   return r;
01182 }
01183 
01184 static int
01185 compile_length_effect_node(EffectNode* node, regex_t* reg)
01186 {
01187   int len;
01188   int tlen;
01189 
01190   if (node->type == EFFECT_OPTION)
01191     return compile_length_option_node(node, reg);
01192 
01193   if (node->target) {
01194     tlen = compile_length_tree(node->target, reg);
01195     if (tlen < 0) return tlen;
01196   }
01197   else
01198     tlen = 0;
01199 
01200   switch (node->type) {
01201   case EFFECT_MEMORY:
01202 #ifdef USE_SUBEXP_CALL
01203     if (IS_EFFECT_CALLED(node)) {
01204       len = SIZE_OP_MEMORY_START_PUSH + tlen
01205          + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01206       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01207        len += (IS_EFFECT_RECURSION(node)
01208               ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01209       else
01210        len += (IS_EFFECT_RECURSION(node)
01211               ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01212     }
01213     else
01214 #endif
01215     {
01216       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01217        len = SIZE_OP_MEMORY_START_PUSH;
01218       else
01219        len = SIZE_OP_MEMORY_START;
01220 
01221       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01222                    ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01223     }
01224     break;
01225 
01226   case EFFECT_STOP_BACKTRACK:
01227     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
01228       QuantifierNode* qn = &NQUANTIFIER(node->target);
01229       tlen = compile_length_tree(qn->target, reg);
01230       if (tlen < 0) return tlen;
01231 
01232       len = tlen * qn->lower
01233          + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01234     }
01235     else {
01236       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01237     }
01238     break;
01239 
01240   default:
01241     return ONIGERR_TYPE_BUG;
01242     break;
01243   }
01244 
01245   return len;
01246 }
01247 
01248 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01249 
01250 static int
01251 compile_effect_node(EffectNode* node, regex_t* reg)
01252 {
01253   int r, len;
01254 
01255   if (node->type == EFFECT_OPTION)
01256     return compile_option_node(node, reg);
01257 
01258   switch (node->type) {
01259   case EFFECT_MEMORY:
01260 #ifdef USE_SUBEXP_CALL
01261     if (IS_EFFECT_CALLED(node)) {
01262       r = add_opcode(reg, OP_CALL);
01263       if (r) return r;
01264       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01265       node->state |= NST_ADDR_FIXED;
01266       r = add_abs_addr(reg, (int )node->call_addr);
01267       if (r) return r;
01268       len = compile_length_tree(node->target, reg);
01269       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01270       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01271        len += (IS_EFFECT_RECURSION(node)
01272               ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01273       else
01274        len += (IS_EFFECT_RECURSION(node)
01275               ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01276 
01277       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01278       if (r) return r;
01279     }
01280 #endif
01281     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01282       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01283     else
01284       r = add_opcode(reg, OP_MEMORY_START);
01285     if (r) return r;
01286     r = add_mem_num(reg, node->regnum);
01287     if (r) return r;
01288     r = compile_tree(node->target, reg);
01289     if (r) return r;
01290 #ifdef USE_SUBEXP_CALL
01291     if (IS_EFFECT_CALLED(node)) {
01292       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01293        r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
01294                           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01295       else
01296        r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
01297                           ? OP_MEMORY_END_REC : OP_MEMORY_END));
01298 
01299       if (r) return r;
01300       r = add_mem_num(reg, node->regnum);
01301       if (r) return r;
01302       r = add_opcode(reg, OP_RETURN);
01303     }
01304     else
01305 #endif
01306     {
01307       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01308        r = add_opcode(reg, OP_MEMORY_END_PUSH);
01309       else
01310        r = add_opcode(reg, OP_MEMORY_END);
01311       if (r) return r;
01312       r = add_mem_num(reg, node->regnum);
01313     }
01314     break;
01315 
01316   case EFFECT_STOP_BACKTRACK:
01317     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
01318       QuantifierNode* qn = &NQUANTIFIER(node->target);
01319       r = compile_tree_n_times(qn->target, qn->lower, reg);
01320       if (r) return r;
01321 
01322       len = compile_length_tree(qn->target, reg);
01323       if (len < 0) return len;
01324 
01325       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01326       if (r) return r;
01327       r = compile_tree(qn->target, reg);
01328       if (r) return r;
01329       r = add_opcode(reg, OP_POP);
01330       if (r) return r;
01331       r = add_opcode_rel_addr(reg, OP_JUMP,
01332         -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01333     }
01334     else {
01335       r = add_opcode(reg, OP_PUSH_STOP_BT);
01336       if (r) return r;
01337       r = compile_tree(node->target, reg);
01338       if (r) return r;
01339       r = add_opcode(reg, OP_POP_STOP_BT);
01340     }
01341     break;
01342 
01343   default:
01344     return ONIGERR_TYPE_BUG;
01345     break;
01346   }
01347 
01348   return r;
01349 }
01350 
01351 static int
01352 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01353 {
01354   int len;
01355   int tlen = 0;
01356 
01357   if (node->target) {
01358     tlen = compile_length_tree(node->target, reg);
01359     if (tlen < 0) return tlen;
01360   }
01361 
01362   switch (node->type) {
01363   case ANCHOR_PREC_READ:
01364     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01365     break;
01366   case ANCHOR_PREC_READ_NOT:
01367     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01368     break;
01369   case ANCHOR_LOOK_BEHIND:
01370     len = SIZE_OP_LOOK_BEHIND + tlen;
01371     break;
01372   case ANCHOR_LOOK_BEHIND_NOT:
01373     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01374     break;
01375 
01376   default:
01377     len = SIZE_OPCODE;
01378     break;
01379   }
01380 
01381   return len;
01382 }
01383 
01384 static int
01385 compile_anchor_node(AnchorNode* node, regex_t* reg)
01386 {
01387   int r, len;
01388 
01389   switch (node->type) {
01390   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01391   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01392   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01393   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01394   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01395   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01396 
01397   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
01398   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01399 #ifdef USE_WORD_BEGIN_END
01400   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
01401   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
01402 #endif
01403 
01404   case ANCHOR_PREC_READ:
01405     r = add_opcode(reg, OP_PUSH_POS);
01406     if (r) return r;
01407     r = compile_tree(node->target, reg);
01408     if (r) return r;
01409     r = add_opcode(reg, OP_POP_POS);
01410     break;
01411 
01412   case ANCHOR_PREC_READ_NOT:
01413     len = compile_length_tree(node->target, reg);
01414     if (len < 0) return len;
01415     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01416     if (r) return r;
01417     r = compile_tree(node->target, reg);
01418     if (r) return r;
01419     r = add_opcode(reg, OP_FAIL_POS);
01420     break;
01421 
01422   case ANCHOR_LOOK_BEHIND:
01423     {
01424       int n;
01425       r = add_opcode(reg, OP_LOOK_BEHIND);
01426       if (r) return r;
01427       if (node->char_len < 0) {
01428        r = get_char_length_tree(node->target, reg, &n);
01429        if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01430       }
01431       else
01432        n = node->char_len;
01433       r = add_length(reg, n);
01434       if (r) return r;
01435       r = compile_tree(node->target, reg);
01436     }
01437     break;
01438 
01439   case ANCHOR_LOOK_BEHIND_NOT:
01440     {
01441       int n;
01442       len = compile_length_tree(node->target, reg);
01443       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01444                         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01445       if (r) return r;
01446       if (node->char_len < 0) {
01447        r = get_char_length_tree(node->target, reg, &n);
01448        if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01449       }
01450       else
01451        n = node->char_len;
01452       r = add_length(reg, n);
01453       if (r) return r;
01454       r = compile_tree(node->target, reg);
01455       if (r) return r;
01456       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01457     }
01458     break;
01459 
01460   default:
01461     return ONIGERR_TYPE_BUG;
01462     break;
01463   }
01464 
01465   return r;
01466 }
01467 
01468 static int
01469 compile_length_tree(Node* node, regex_t* reg)
01470 {
01471   int len, type, r;
01472 
01473   type = NTYPE(node);
01474   switch (type) {
01475   case N_LIST:
01476     len = 0;
01477     do {
01478       r = compile_length_tree(NCONS(node).left, reg);
01479       if (r < 0) return r;
01480       len += r;
01481     } while (IS_NOT_NULL(node = NCONS(node).right));
01482     r = len;
01483     break;
01484 
01485   case N_ALT:
01486     {
01487       int n;
01488 
01489       n = r = 0;
01490       do {
01491        r += compile_length_tree(NCONS(node).left, reg);
01492        n++;
01493       } while (IS_NOT_NULL(node = NCONS(node).right));
01494       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01495     }
01496     break;
01497 
01498   case N_STRING:
01499     if (NSTRING_IS_RAW(node))
01500       r = compile_length_string_raw_node(&(NSTRING(node)), reg);
01501     else
01502       r = compile_length_string_node(node, reg);
01503     break;
01504 
01505   case N_CCLASS:
01506     r = compile_length_cclass_node(&(NCCLASS(node)), reg);
01507     break;
01508 
01509   case N_CTYPE:
01510   case N_ANYCHAR:
01511     r = SIZE_OPCODE;
01512     break;
01513 
01514   case N_BACKREF:
01515     {
01516       BackrefNode* br = &(NBACKREF(node));
01517 
01518 #ifdef USE_BACKREF_AT_LEVEL
01519       if (IS_BACKREF_NEST_LEVEL(br)) {
01520         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01521             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01522       }
01523       else
01524 #endif
01525       if (br->back_num == 1) {
01526        r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01527             ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01528       }
01529       else {
01530        r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01531       }
01532     }
01533     break;
01534 
01535 #ifdef USE_SUBEXP_CALL
01536   case N_CALL:
01537     r = SIZE_OP_CALL;
01538     break;
01539 #endif
01540 
01541   case N_QUANTIFIER:
01542     r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
01543     break;
01544 
01545   case N_EFFECT:
01546     r = compile_length_effect_node(&NEFFECT(node), reg);
01547     break;
01548 
01549   case N_ANCHOR:
01550     r = compile_length_anchor_node(&(NANCHOR(node)), reg);
01551     break;
01552 
01553   default:
01554     return ONIGERR_TYPE_BUG;
01555     break;
01556   }
01557 
01558   return r;
01559 }
01560 
01561 static int
01562 compile_tree(Node* node, regex_t* reg)
01563 {
01564   int n, type, len, pos, r = 0;
01565 
01566   type = NTYPE(node);
01567   switch (type) {
01568   case N_LIST:
01569     do {
01570       r = compile_tree(NCONS(node).left, reg);
01571     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
01572     break;
01573 
01574   case N_ALT:
01575     {
01576       Node* x = node;
01577       len = 0;
01578       do {
01579        len += compile_length_tree(NCONS(x).left, reg);
01580        if (NCONS(x).right != NULL) {
01581          len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01582        }
01583       } while (IS_NOT_NULL(x = NCONS(x).right));
01584       pos = reg->used + len;  /* goal position */
01585 
01586       do {
01587        len = compile_length_tree(NCONS(node).left, reg);
01588        if (IS_NOT_NULL(NCONS(node).right)) {
01589          r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01590          if (r) break;
01591        }
01592        r = compile_tree(NCONS(node).left, reg);
01593        if (r) break;
01594        if (IS_NOT_NULL(NCONS(node).right)) {
01595          len = pos - (reg->used + SIZE_OP_JUMP);
01596          r = add_opcode_rel_addr(reg, OP_JUMP, len);
01597          if (r) break;
01598        }
01599       } while (IS_NOT_NULL(node = NCONS(node).right));
01600     }
01601     break;
01602 
01603   case N_STRING:
01604     if (NSTRING_IS_RAW(node))
01605       r = compile_string_raw_node(&(NSTRING(node)), reg);
01606     else
01607       r = compile_string_node(node, reg);
01608     break;
01609 
01610   case N_CCLASS:
01611     r = compile_cclass_node(&(NCCLASS(node)), reg);
01612     break;
01613 
01614   case N_CTYPE:
01615     {
01616       int op;
01617 
01618       switch (NCTYPE(node).type) {
01619       case CTYPE_WORD:            op = OP_WORD;           break;
01620       case CTYPE_NOT_WORD:        op = OP_NOT_WORD;       break;
01621       default:
01622        return ONIGERR_TYPE_BUG;
01623        break;
01624       }
01625       r = add_opcode(reg, op);
01626     }
01627     break;
01628 
01629   case N_ANYCHAR:
01630     if (IS_MULTILINE(reg->options))
01631       r = add_opcode(reg, OP_ANYCHAR_ML);
01632     else
01633       r = add_opcode(reg, OP_ANYCHAR);
01634     break;
01635 
01636   case N_BACKREF:
01637     {
01638       BackrefNode* br = &(NBACKREF(node));
01639 
01640 #ifdef USE_BACKREF_AT_LEVEL
01641       if (IS_BACKREF_NEST_LEVEL(br)) {
01642        r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
01643        if (r) return r;
01644        r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01645        if (r) return r;
01646        r = add_length(reg, br->nest_level);
01647        if (r) return r;
01648 
01649        goto add_bacref_mems;
01650       }
01651       else
01652 #endif
01653       if (br->back_num == 1) {
01654        n = br->back_static[0];
01655        if (IS_IGNORECASE(reg->options)) {
01656          r = add_opcode(reg, OP_BACKREFN_IC);
01657          if (r) return r;
01658          r = add_mem_num(reg, n);
01659        }
01660        else {
01661          switch (n) {
01662          case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01663          case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01664          default:
01665            r = add_opcode(reg, OP_BACKREFN);
01666            if (r) return r;
01667            r = add_mem_num(reg, n);
01668            break;
01669          }
01670        }
01671       }
01672       else {
01673        int i;
01674        int* p;
01675 
01676         if (IS_IGNORECASE(reg->options)) {
01677           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01678         }
01679         else {
01680           r = add_opcode(reg, OP_BACKREF_MULTI);
01681         }
01682        if (r) return r;
01683 
01684 #ifdef USE_BACKREF_AT_LEVEL
01685       add_bacref_mems:
01686 #endif
01687        r = add_length(reg, br->back_num);
01688        if (r) return r;
01689        p = BACKREFS_P(br);
01690        for (i = br->back_num - 1; i >= 0; i--) {
01691          r = add_mem_num(reg, p[i]);
01692          if (r) return r;
01693        }
01694       }
01695     }
01696     break;
01697 
01698 #ifdef USE_SUBEXP_CALL
01699   case N_CALL:
01700     r = compile_call(&(NCALL(node)), reg);
01701     break;
01702 #endif
01703 
01704   case N_QUANTIFIER:
01705     r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
01706     break;
01707 
01708   case N_EFFECT:
01709     r = compile_effect_node(&NEFFECT(node), reg);
01710     break;
01711 
01712   case N_ANCHOR:
01713     r = compile_anchor_node(&(NANCHOR(node)), reg);
01714     break;
01715 
01716   default:
01717 #ifdef ONIG_DEBUG
01718     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01719 #endif
01720     break;
01721   }
01722 
01723   return r;
01724 }
01725 
01726 #ifdef USE_NAMED_GROUP
01727 
01728 static int
01729 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01730 {
01731   int r = 0;
01732   Node* node = *plink;
01733 
01734   switch (NTYPE(node)) {
01735   case N_LIST:
01736   case N_ALT:
01737     do {
01738       r = noname_disable_map(&(NCONS(node).left), map, counter);
01739     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
01740     break;
01741 
01742   case N_QUANTIFIER:
01743     {
01744       Node** ptarget = &(NQUANTIFIER(node).target);
01745       Node*  old = *ptarget;
01746       r = noname_disable_map(ptarget, map, counter);
01747       if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
01748        onig_reduce_nested_quantifier(node, *ptarget);
01749       }
01750     }
01751     break;
01752 
01753   case N_EFFECT:
01754     {
01755       EffectNode* en = &(NEFFECT(node));
01756       if (en->type == EFFECT_MEMORY) {
01757        if (IS_EFFECT_NAMED_GROUP(en)) {
01758          (*counter)++;
01759          map[en->regnum].new_val = *counter;
01760          en->regnum = *counter;
01761          r = noname_disable_map(&(en->target), map, counter);
01762        }
01763        else {
01764          *plink = en->target;
01765          en->target = NULL_NODE;
01766          onig_node_free(node);
01767          r = noname_disable_map(plink, map, counter);
01768        }
01769       }
01770       else
01771        r = noname_disable_map(&(en->target), map, counter);
01772     }
01773     break;
01774 
01775   default:
01776     break;
01777   }
01778 
01779   return r;
01780 }
01781 
01782 static int
01783 renumber_node_backref(Node* node, GroupNumRemap* map)
01784 {
01785   int i, pos, n, old_num;
01786   int *backs;
01787   BackrefNode* bn = &(NBACKREF(node));
01788 
01789   if (! IS_BACKREF_NAME_REF(bn))
01790     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01791 
01792   old_num = bn->back_num;
01793   if (IS_NULL(bn->back_dynamic))
01794     backs = bn->back_static;
01795   else
01796     backs = bn->back_dynamic;
01797 
01798   for (i = 0, pos = 0; i < old_num; i++) {
01799     n = map[backs[i]].new_val;
01800     if (n > 0) {
01801       backs[pos] = n;
01802       pos++;
01803     }
01804   }
01805 
01806   bn->back_num = pos;
01807   return 0;
01808 }
01809 
01810 static int
01811 renumber_by_map(Node* node, GroupNumRemap* map)
01812 {
01813   int r = 0;
01814 
01815   switch (NTYPE(node)) {
01816   case N_LIST:
01817   case N_ALT:
01818     do {
01819       r = renumber_by_map(NCONS(node).left, map);
01820     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
01821     break;
01822   case N_QUANTIFIER:
01823     r = renumber_by_map(NQUANTIFIER(node).target, map);
01824     break;
01825   case N_EFFECT:
01826     r = renumber_by_map(NEFFECT(node).target, map);
01827     break;
01828 
01829   case N_BACKREF:
01830     r = renumber_node_backref(node, map);
01831     break;
01832 
01833   default:
01834     break;
01835   }
01836 
01837   return r;
01838 }
01839 
01840 static int
01841 numbered_ref_check(Node* node)
01842 {
01843   int r = 0;
01844 
01845   switch (NTYPE(node)) {
01846   case N_LIST:
01847   case N_ALT:
01848     do {
01849       r = numbered_ref_check(NCONS(node).left);
01850     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
01851     break;
01852   case N_QUANTIFIER:
01853     r = numbered_ref_check(NQUANTIFIER(node).target);
01854     break;
01855   case N_EFFECT:
01856     r = numbered_ref_check(NEFFECT(node).target);
01857     break;
01858 
01859   case N_BACKREF:
01860     if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
01861       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01862     break;
01863 
01864   default:
01865     break;
01866   }
01867 
01868   return r;
01869 }
01870 
01871 static int
01872 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01873 {
01874   int r, i, pos, counter;
01875   BitStatusType loc;
01876   GroupNumRemap* map;
01877 
01878   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01879   CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
01880   for (i = 1; i <= env->num_mem; i++) {
01881     map[i].new_val = 0;
01882   }
01883   counter = 0;
01884   r = noname_disable_map(root, map, &counter);
01885   if (r != 0) return r;
01886 
01887   r = renumber_by_map(*root, map);
01888   if (r != 0) return r;
01889 
01890   for (i = 1, pos = 1; i <= env->num_mem; i++) {
01891     if (map[i].new_val > 0) {
01892       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01893       pos++;
01894     }
01895   }
01896 
01897   loc = env->capture_history;
01898   BIT_STATUS_CLEAR(env->capture_history);
01899   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01900     if (BIT_STATUS_AT(loc, i)) {
01901       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01902     }
01903   }
01904 
01905   env->num_mem = env->num_named;
01906   reg->num_mem = env->num_named;
01907 
01908   return onig_renumber_name_table(reg, map);
01909 }
01910 #endif /* USE_NAMED_GROUP */
01911 
01912 #ifdef USE_SUBEXP_CALL
01913 static int
01914 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01915 {
01916   int i, offset;
01917   EffectNode* en;
01918   AbsAddrType addr;
01919 
01920   for (i = 0; i < uslist->num; i++) {
01921     en = &(NEFFECT(uslist->us[i].target));
01922     if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01923     addr = en->call_addr;
01924     offset = uslist->us[i].offset;
01925 
01926     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01927   }
01928   return 0;
01929 }
01930 #endif
01931 
01932 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
01933 static int
01934 quantifiers_memory_node_info(Node* node)
01935 {
01936   int r = 0;
01937 
01938   switch (NTYPE(node)) {
01939   case N_LIST:
01940   case N_ALT:
01941     {
01942       int v;
01943       do {
01944        v = quantifiers_memory_node_info(NCONS(node).left);
01945        if (v > r) r = v;
01946       } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
01947     }
01948     break;
01949 
01950 #ifdef USE_SUBEXP_CALL
01951   case N_CALL:
01952     if (IS_CALL_RECURSION(&NCALL(node))) {
01953       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
01954     }
01955     else
01956       r = quantifiers_memory_node_info(NCALL(node).target);
01957     break;
01958 #endif
01959 
01960   case N_QUANTIFIER:
01961     {
01962       QuantifierNode* qn = &(NQUANTIFIER(node));
01963       if (qn->upper != 0) {
01964        r = quantifiers_memory_node_info(qn->target);
01965       }
01966     }
01967     break;
01968 
01969   case N_EFFECT:
01970     {
01971       EffectNode* en = &(NEFFECT(node));
01972       switch (en->type) {
01973       case EFFECT_MEMORY:
01974        return NQ_TARGET_IS_EMPTY_MEM;
01975        break;
01976 
01977       case EFFECT_OPTION:
01978       case EFFECT_STOP_BACKTRACK:
01979        r = quantifiers_memory_node_info(en->target);
01980        break;
01981       default:
01982        break;
01983       }
01984     }
01985     break;
01986 
01987   case N_BACKREF:
01988   case N_STRING:
01989   case N_CTYPE:
01990   case N_CCLASS:
01991   case N_ANYCHAR:
01992   case N_ANCHOR:
01993   default:
01994     break;
01995   }
01996 
01997   return r;
01998 }
01999 #endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
02000 
02001 static int
02002 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02003 {
02004   OnigDistance tmin;
02005   int r = 0;
02006 
02007   *min = 0;
02008   switch (NTYPE(node)) {
02009   case N_BACKREF:
02010     {
02011       int i;
02012       int* backs;
02013       Node** nodes = SCANENV_MEM_NODES(env);
02014       BackrefNode* br = &(NBACKREF(node));
02015       if (br->state & NST_RECURSION) break;
02016 
02017       backs = BACKREFS_P(br);
02018       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02019       r = get_min_match_length(nodes[backs[0]], min, env);
02020       if (r != 0) break;
02021       for (i = 1; i < br->back_num; i++) {
02022        if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02023        r = get_min_match_length(nodes[backs[i]], &tmin, env);
02024        if (r != 0) break;
02025        if (*min > tmin) *min = tmin;
02026       }
02027     }
02028     break;
02029 
02030 #ifdef USE_SUBEXP_CALL
02031   case N_CALL:
02032     if (IS_CALL_RECURSION(&NCALL(node))) {
02033       EffectNode* en = &(NEFFECT(NCALL(node).target));
02034       if (IS_EFFECT_MIN_FIXED(en))
02035        *min = en->min_len;
02036     }
02037     else
02038       r = get_min_match_length(NCALL(node).target, min, env);
02039     break;
02040 #endif
02041 
02042   case N_LIST:
02043     do {
02044       r = get_min_match_length(NCONS(node).left, &tmin, env);
02045       if (r == 0) *min += tmin;
02046     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02047     break;
02048 
02049   case N_ALT:
02050     {
02051       Node *x, *y;
02052       y = node;
02053       do {
02054        x = NCONS(y).left;
02055        r = get_min_match_length(x, &tmin, env);
02056        if (r != 0) break;
02057        if (y == node) *min = tmin;
02058        else if (*min > tmin) *min = tmin;
02059       } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
02060     }
02061     break;
02062 
02063   case N_STRING:
02064     {
02065       StrNode* sn = &(NSTRING(node));
02066       *min = sn->end - sn->s;
02067     }
02068     break;
02069 
02070   case N_CTYPE:
02071     switch (NCTYPE(node).type) {
02072     case CTYPE_WORD:     *min = 1; break;
02073     case CTYPE_NOT_WORD: *min = 1; break;
02074     default:
02075       break;
02076     }
02077     break;
02078 
02079   case N_CCLASS:
02080   case N_ANYCHAR:
02081     *min = 1;
02082     break;
02083 
02084   case N_QUANTIFIER:
02085     {
02086       QuantifierNode* qn = &(NQUANTIFIER(node));
02087 
02088       if (qn->lower > 0) {
02089        r = get_min_match_length(qn->target, min, env);
02090        if (r == 0)
02091          *min = distance_multiply(*min, qn->lower);
02092       }
02093     }
02094     break;
02095 
02096   case N_EFFECT:
02097     {
02098       EffectNode* en = &(NEFFECT(node));
02099       switch (en->type) {
02100       case EFFECT_MEMORY:
02101 #ifdef USE_SUBEXP_CALL
02102        if (IS_EFFECT_MIN_FIXED(en))
02103          *min = en->min_len;
02104        else {
02105          r = get_min_match_length(en->target, min, env);
02106          if (r == 0) {
02107            en->min_len = *min;
02108            SET_EFFECT_STATUS(node, NST_MIN_FIXED);
02109          }
02110        }
02111        break;
02112 #endif
02113       case EFFECT_OPTION:
02114       case EFFECT_STOP_BACKTRACK:
02115        r = get_min_match_length(en->target, min, env);
02116        break;
02117       }
02118     }
02119     break;
02120 
02121   case N_ANCHOR:
02122   default:
02123     break;
02124   }
02125 
02126   return r;
02127 }
02128 
02129 static int
02130 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02131 {
02132   OnigDistance tmax;
02133   int r = 0;
02134 
02135   *max = 0;
02136   switch (NTYPE(node)) {
02137   case N_LIST:
02138     do {
02139       r = get_max_match_length(NCONS(node).left, &tmax, env);
02140       if (r == 0)
02141        *max = distance_add(*max, tmax);
02142     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02143     break;
02144 
02145   case N_ALT:
02146     do {
02147       r = get_max_match_length(NCONS(node).left, &tmax, env);
02148       if (r == 0 && *max < tmax) *max = tmax;
02149     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02150     break;
02151 
02152   case N_STRING:
02153     {
02154       StrNode* sn = &(NSTRING(node));
02155       *max = sn->end - sn->s;
02156     }
02157     break;
02158 
02159   case N_CTYPE:
02160     switch (NCTYPE(node).type) {
02161     case CTYPE_WORD:
02162     case CTYPE_NOT_WORD:
02163       *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02164       break;
02165 
02166     default:
02167       break;
02168     }
02169     break;
02170 
02171   case N_CCLASS:
02172   case N_ANYCHAR:
02173     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02174     break;
02175 
02176   case N_BACKREF:
02177     {
02178       int i;
02179       int* backs;
02180       Node** nodes = SCANENV_MEM_NODES(env);
02181       BackrefNode* br = &(NBACKREF(node));
02182       if (br->state & NST_RECURSION) {
02183        *max = ONIG_INFINITE_DISTANCE;
02184        break;
02185       }
02186       backs = BACKREFS_P(br);
02187       for (i = 0; i < br->back_num; i++) {
02188        if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02189        r = get_max_match_length(nodes[backs[i]], &tmax, env);
02190        if (r != 0) break;
02191        if (*max < tmax) *max = tmax;
02192       }
02193     }
02194     break;
02195 
02196 #ifdef USE_SUBEXP_CALL
02197   case N_CALL:
02198     if (! IS_CALL_RECURSION(&(NCALL(node))))
02199       r = get_max_match_length(NCALL(node).target, max, env);
02200     else
02201       *max = ONIG_INFINITE_DISTANCE;
02202     break;
02203 #endif
02204 
02205   case N_QUANTIFIER:
02206     {
02207       QuantifierNode* qn = &(NQUANTIFIER(node));
02208 
02209       if (qn->upper != 0) {
02210        r = get_max_match_length(qn->target, max, env);
02211        if (r == 0 && *max != 0) {
02212          if (! IS_REPEAT_INFINITE(qn->upper))
02213            *max = distance_multiply(*max, qn->upper);
02214          else
02215            *max = ONIG_INFINITE_DISTANCE;
02216        }
02217       }
02218     }
02219     break;
02220 
02221   case N_EFFECT:
02222     {
02223       EffectNode* en = &(NEFFECT(node));
02224       switch (en->type) {
02225       case EFFECT_MEMORY:
02226 #ifdef USE_SUBEXP_CALL
02227        if (IS_EFFECT_MAX_FIXED(en))
02228          *max = en->max_len;
02229        else {
02230          r = get_max_match_length(en->target, max, env);
02231          if (r == 0) {
02232            en->max_len = *max;
02233            SET_EFFECT_STATUS(node, NST_MAX_FIXED);
02234          }
02235        }
02236        break;
02237 #endif
02238       case EFFECT_OPTION:
02239       case EFFECT_STOP_BACKTRACK:
02240        r = get_max_match_length(en->target, max, env);
02241        break;
02242       }
02243     }
02244     break;
02245 
02246   case N_ANCHOR:
02247   default:
02248     break;
02249   }
02250 
02251   return r;
02252 }
02253 
02254 #define GET_CHAR_LEN_VARLEN           -1
02255 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02256 
02257 /* fixed size pattern node only */
02258 static int
02259 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02260 {
02261   int tlen;
02262   int r = 0;
02263 
02264   level++;
02265   *len = 0;
02266   switch (NTYPE(node)) {
02267   case N_LIST:
02268     do {
02269       r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
02270       if (r == 0)
02271        *len = distance_add(*len, tlen);
02272     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02273     break;
02274 
02275   case N_ALT:
02276     {
02277       int tlen2;
02278       int varlen = 0;
02279 
02280       r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
02281       while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
02282        r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
02283        if (r == 0) {
02284          if (tlen != tlen2)
02285            varlen = 1;
02286        }
02287       }
02288       if (r == 0) {
02289        if (varlen != 0) {
02290          if (level == 1)
02291            r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02292          else
02293            r = GET_CHAR_LEN_VARLEN;
02294        }
02295        else
02296          *len = tlen;
02297       }
02298     }
02299     break;
02300 
02301   case N_STRING:
02302     {
02303       StrNode* sn = &(NSTRING(node));
02304       UChar *s = sn->s;
02305       while (s < sn->end) {
02306        s += enc_len(reg->enc, s);
02307        (*len)++;
02308       }
02309     }
02310     break;
02311 
02312   case N_QUANTIFIER:
02313     {
02314       QuantifierNode* qn = &(NQUANTIFIER(node));
02315       if (qn->lower == qn->upper) {
02316        r = get_char_length_tree1(qn->target, reg, &tlen, level);
02317        if (r == 0)
02318          *len = distance_multiply(tlen, qn->lower);
02319       }
02320       else
02321        r = GET_CHAR_LEN_VARLEN;
02322     }
02323     break;
02324 
02325 #ifdef USE_SUBEXP_CALL
02326   case N_CALL:
02327     if (! IS_CALL_RECURSION(&(NCALL(node))))
02328       r = get_char_length_tree1(NCALL(node).target, reg, len, level);
02329     else
02330       r = GET_CHAR_LEN_VARLEN;
02331     break;
02332 #endif
02333 
02334   case N_CTYPE:
02335     switch (NCTYPE(node).type) {
02336     case CTYPE_WORD:
02337     case CTYPE_NOT_WORD:
02338       *len = 1;
02339       break;
02340     }
02341     break;
02342 
02343   case N_CCLASS:
02344   case N_ANYCHAR:
02345     *len = 1;
02346     break;
02347 
02348   case N_EFFECT:
02349     {
02350       EffectNode* en = &(NEFFECT(node));
02351       switch (en->type) {
02352       case EFFECT_MEMORY:
02353 #ifdef USE_SUBEXP_CALL
02354        if (IS_EFFECT_CLEN_FIXED(en))
02355          *len = en->char_len;
02356        else {
02357          r = get_char_length_tree1(en->target, reg, len, level);
02358          if (r == 0) {
02359            en->char_len = *len;
02360            SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
02361          }
02362        }
02363        break;
02364 #endif
02365       case EFFECT_OPTION:
02366       case EFFECT_STOP_BACKTRACK:
02367        r = get_char_length_tree1(en->target, reg, len, level);
02368        break;
02369       default:
02370        break;
02371       }
02372     }
02373     break;
02374 
02375   case N_ANCHOR:
02376     break;
02377 
02378   default:
02379     r = GET_CHAR_LEN_VARLEN;
02380     break;
02381   }
02382 
02383   return r;
02384 }
02385 
02386 static int
02387 get_char_length_tree(Node* node, regex_t* reg, int* len)
02388 {
02389   return get_char_length_tree1(node, reg, len, 0);
02390 }
02391 
02392 /* x is not included y ==>  1 : 0 */
02393 static int
02394 is_not_included(Node* x, Node* y, regex_t* reg)
02395 {
02396   int i, len;
02397   OnigCodePoint code;
02398   UChar *p, c;
02399   int ytype;
02400 
02401  retry:
02402   ytype = NTYPE(y);
02403   switch (NTYPE(x)) {
02404   case N_CTYPE:
02405     {
02406       switch (ytype) {
02407       case N_CTYPE:
02408        switch (NCTYPE(x).type) {
02409        case CTYPE_WORD:
02410          if (NCTYPE(y).type == CTYPE_NOT_WORD)
02411            return 1;
02412          else
02413            return 0;
02414          break;
02415        case CTYPE_NOT_WORD:
02416          if (NCTYPE(y).type == CTYPE_WORD)
02417            return 1;
02418          else
02419            return 0;
02420          break;
02421        default:
02422          break;
02423        }
02424        break;
02425 
02426       case N_CCLASS:
02427       swap:
02428        {
02429          Node* tmp;
02430          tmp = x; x = y; y = tmp;
02431          goto retry;
02432        }
02433        break;
02434 
02435       case N_STRING:
02436        goto swap;
02437        break;
02438 
02439       default:
02440        break;
02441       }
02442     }
02443     break;
02444 
02445   case N_CCLASS:
02446     {
02447       CClassNode* xc = &(NCCLASS(x));
02448       switch (ytype) {
02449       case N_CTYPE:
02450        switch (NCTYPE(y).type) {
02451        case CTYPE_WORD:
02452          if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
02453            for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02454              if (BITSET_AT(xc->bs, i)) {
02455               if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
02456              }
02457            }
02458            return 1;
02459          }
02460          return 0;
02461          break;
02462        case CTYPE_NOT_WORD:
02463          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02464            if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
02465              if (!IS_CCLASS_NOT(xc)) {
02466               if (BITSET_AT(xc->bs, i))
02467                 return 0;
02468              }
02469              else {
02470               if (! BITSET_AT(xc->bs, i))
02471                 return 0;
02472              }
02473            }
02474          }
02475          return 1;
02476          break;
02477 
02478        default:
02479          break;
02480        }
02481        break;
02482 
02483       case N_CCLASS:
02484        {
02485          int v;
02486          CClassNode* yc = &(NCCLASS(y));
02487 
02488          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02489            v = BITSET_AT(xc->bs, i);
02490            if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
02491                 (v == 0 && IS_CCLASS_NOT(xc))) {
02492              v = BITSET_AT(yc->bs, i);
02493              if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
02494                   (v == 0 && IS_CCLASS_NOT(yc)))
02495               return 0;
02496            }
02497          }
02498          if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
02499              (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
02500            return 1;
02501          return 0;
02502        }
02503        break;
02504 
02505       case N_STRING:
02506        goto swap;
02507        break;
02508 
02509       default:
02510        break;
02511       }
02512     }
02513     break;
02514 
02515   case N_STRING:
02516     {
02517       StrNode* xs = &(NSTRING(x));
02518       if (NSTRING_LEN(x) == 0)
02519        break;
02520 
02521       c = *(xs->s);
02522       switch (ytype) {
02523       case N_CTYPE:
02524        switch (NCTYPE(y).type) {
02525        case CTYPE_WORD:
02526          return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
02527          break;
02528        case CTYPE_NOT_WORD:
02529          return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
02530          break;
02531        default:
02532          break;
02533        }
02534        break;
02535 
02536       case N_CCLASS:
02537        {
02538          CClassNode* cc = &(NCCLASS(y));
02539 
02540          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02541                                  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02542          return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02543        }
02544        break;
02545 
02546       case N_STRING:
02547        {
02548          UChar *q;
02549          StrNode* ys = &(NSTRING(y));
02550          len = NSTRING_LEN(x);
02551          if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02552          if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02553             /* tiny version */
02554             return 0;
02555          }
02556          else {
02557            for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
02558              if (*p != *q) return 1;
02559            }
02560          }
02561        }
02562        break;
02563        
02564       default:
02565        break;
02566       }
02567     }
02568     break;
02569 
02570   default:
02571     break;
02572   }
02573 
02574   return 0;
02575 }
02576 
02577 static Node*
02578 get_head_value_node(Node* node, int exact, regex_t* reg)
02579 {
02580   Node* n = NULL_NODE;
02581 
02582   switch (NTYPE(node)) {
02583   case N_BACKREF:
02584   case N_ALT:
02585   case N_ANYCHAR:
02586 #ifdef USE_SUBEXP_CALL
02587   case N_CALL:
02588 #endif
02589     break;
02590 
02591   case N_CTYPE:
02592   case N_CCLASS:
02593     if (exact == 0) {
02594       n = node;
02595     }
02596     break;
02597 
02598   case N_LIST:
02599     n = get_head_value_node(NCONS(node).left, exact, reg);
02600     break;
02601 
02602   case N_STRING:
02603     {
02604       StrNode* sn = &(NSTRING(node));
02605 
02606       if (sn->end <= sn->s)
02607        break;
02608 
02609       if (exact != 0 &&
02610          !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02611 #if 0
02612         UChar* tmp = sn->s;
02613        if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
02614                                        &tmp, sn->end))
02615          n = node;
02616 #endif
02617       }
02618       else {
02619        n = node;
02620       }
02621     }
02622     break;
02623 
02624   case N_QUANTIFIER:
02625     {
02626       QuantifierNode* qn = &(NQUANTIFIER(node));
02627       if (qn->lower > 0) {
02628        if (IS_NOT_NULL(qn->head_exact))
02629          n = qn->head_exact;
02630        else
02631          n = get_head_value_node(qn->target, exact, reg);
02632       }
02633     }
02634     break;
02635 
02636   case N_EFFECT:
02637     {
02638       EffectNode* en = &(NEFFECT(node));
02639       switch (en->type) {
02640       case EFFECT_OPTION:
02641        {
02642          OnigOptionType options = reg->options;
02643 
02644          reg->options = NEFFECT(node).option;
02645          n = get_head_value_node(NEFFECT(node).target, exact, reg);
02646          reg->options = options;
02647        }
02648        break;
02649 
02650       case EFFECT_MEMORY:
02651       case EFFECT_STOP_BACKTRACK:
02652        n = get_head_value_node(en->target, exact, reg);
02653        break;
02654       }
02655     }
02656     break;
02657 
02658   case N_ANCHOR:
02659     if (NANCHOR(node).type == ANCHOR_PREC_READ)
02660       n = get_head_value_node(NANCHOR(node).target, exact, reg);
02661     break;
02662 
02663   default:
02664     break;
02665   }
02666 
02667   return n;
02668 }
02669 
02670 static int
02671 check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
02672 {
02673   int type, r = 0;
02674 
02675   type = NTYPE(node);
02676   if ((type & type_mask) == 0)
02677     return 1;
02678 
02679   switch (type) {
02680   case N_LIST:
02681   case N_ALT:
02682     do {
02683       r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
02684     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02685     break;
02686 
02687   case N_QUANTIFIER:
02688     r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
02689                      anchor_mask);
02690     break;
02691 
02692   case N_EFFECT:
02693     {
02694       EffectNode* en = &(NEFFECT(node));
02695       if ((en->type & effect_mask) == 0)
02696        return 1;
02697 
02698       r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
02699     }
02700     break;
02701 
02702   case N_ANCHOR:
02703     type = NANCHOR(node).type;
02704     if ((type & anchor_mask) == 0)
02705       return 1;
02706 
02707     if (NANCHOR(node).target)
02708       r = check_type_tree(NANCHOR(node).target,
02709                        type_mask, effect_mask, anchor_mask);
02710     break;
02711 
02712   default:
02713     break;
02714   }
02715   return r;
02716 }
02717 
02718 #ifdef USE_SUBEXP_CALL
02719 
02720 #define RECURSION_EXIST       1
02721 #define RECURSION_INFINITE    2
02722 
02723 static int
02724 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02725 {
02726   int type;
02727   int r = 0;
02728 
02729   type = NTYPE(node);
02730   switch (type) {
02731   case N_LIST:
02732     {
02733       Node *x;
02734       OnigDistance min;
02735       int ret;
02736 
02737       x = node;
02738       do {
02739        ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
02740        if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02741        r |= ret;
02742        if (head) {
02743          ret = get_min_match_length(NCONS(x).left, &min, env);
02744          if (ret != 0) return ret;
02745          if (min != 0) head = 0;
02746        }
02747       } while (IS_NOT_NULL(x = NCONS(x).right));
02748     }
02749     break;
02750 
02751   case N_ALT:
02752     {
02753       int ret;
02754       r = RECURSION_EXIST;
02755       do {
02756        ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
02757        if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02758        r &= ret;
02759       } while (IS_NOT_NULL(node = NCONS(node).right));
02760     }
02761     break;
02762 
02763   case N_QUANTIFIER:
02764     r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
02765     if (r == RECURSION_EXIST) {
02766       if (NQUANTIFIER(node).lower == 0) r = 0;
02767     }
02768     break;
02769 
02770   case N_ANCHOR:
02771     {
02772       AnchorNode* an = &(NANCHOR(node));
02773       switch (an->type) {
02774       case ANCHOR_PREC_READ:
02775       case ANCHOR_PREC_READ_NOT:
02776       case ANCHOR_LOOK_BEHIND:
02777       case ANCHOR_LOOK_BEHIND_NOT:
02778        r = subexp_inf_recursive_check(an->target, env, head);
02779        break;
02780       }
02781     }
02782     break;
02783 
02784   case N_CALL:
02785     r = subexp_inf_recursive_check(NCALL(node).target, env, head);
02786     break;
02787 
02788   case N_EFFECT:
02789     if (IS_EFFECT_MARK2(&(NEFFECT(node))))
02790       return 0;
02791     else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
02792       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02793     else {
02794       SET_EFFECT_STATUS(node, NST_MARK2);
02795       r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
02796       CLEAR_EFFECT_STATUS(node, NST_MARK2);
02797     }
02798     break;
02799 
02800   default:
02801     break;
02802   }
02803 
02804   return r;
02805 }
02806 
02807 static int
02808 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02809 {
02810   int type;
02811   int r = 0;
02812 
02813   type = NTYPE(node);
02814   switch (type) {
02815   case N_LIST:
02816   case N_ALT:
02817     do {
02818       r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
02819     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
02820     break;
02821 
02822   case N_QUANTIFIER:
02823     r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
02824     break;
02825 
02826   case N_ANCHOR:
02827     {
02828       AnchorNode* an = &(NANCHOR(node));
02829       switch (an->type) {
02830       case ANCHOR_PREC_READ:
02831       case ANCHOR_PREC_READ_NOT:
02832       case ANCHOR_LOOK_BEHIND:
02833       case ANCHOR_LOOK_BEHIND_NOT:
02834        r = subexp_inf_recursive_check_trav(an->target, env);
02835        break;
02836       }
02837     }
02838     break;
02839 
02840   case N_EFFECT:
02841     {
02842       EffectNode* en = &(NEFFECT(node));
02843 
02844       if (IS_EFFECT_RECURSION(en)) {
02845        SET_EFFECT_STATUS(node, NST_MARK1);
02846        r = subexp_inf_recursive_check(en->target, env, 1);
02847        if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02848        CLEAR_EFFECT_STATUS(node, NST_MARK1);
02849       }
02850       r = subexp_inf_recursive_check_trav(en->target, env);
02851     }
02852 
02853     break;
02854 
02855   default:
02856     break;
02857   }
02858 
02859   return r;
02860 }
02861 
02862 static int
02863 subexp_recursive_check(Node* node)
02864 {
02865   int type;
02866   int r = 0;
02867 
02868   type = NTYPE(node);
02869   switch (type) {
02870   case N_LIST:
02871   case N_ALT:
02872     do {
02873       r |= subexp_recursive_check(NCONS(node).left);
02874     } while (IS_NOT_NULL(node = NCONS(node).right));
02875     break;
02876 
02877   case N_QUANTIFIER:
02878     r = subexp_recursive_check(NQUANTIFIER(node).target);
02879     break;
02880 
02881   case N_ANCHOR:
02882     {
02883       AnchorNode* an = &(NANCHOR(node));
02884       switch (an->type) {
02885       case ANCHOR_PREC_READ:
02886       case ANCHOR_PREC_READ_NOT:
02887       case ANCHOR_LOOK_BEHIND:
02888       case ANCHOR_LOOK_BEHIND_NOT:
02889        r = subexp_recursive_check(an->target);
02890        break;
02891       }
02892     }
02893     break;
02894 
02895   case N_CALL:
02896     r = subexp_recursive_check(NCALL(node).target);
02897     if (r != 0) SET_CALL_RECURSION(node);
02898     break;
02899 
02900   case N_EFFECT:
02901     if (IS_EFFECT_MARK2(&(NEFFECT(node))))
02902       return 0;
02903     else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
02904       return 1; /* recursion */
02905     else {
02906       SET_EFFECT_STATUS(node, NST_MARK2);
02907       r = subexp_recursive_check(NEFFECT(node).target);
02908       CLEAR_EFFECT_STATUS(node, NST_MARK2);
02909     }
02910     break;
02911 
02912   default:
02913     break;
02914   }
02915 
02916   return r;
02917 }
02918 
02919 
02920 static int
02921 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02922 {
02923 #define FOUND_CALLED_NODE    1
02924 
02925   int type;
02926   int r = 0;
02927 
02928   type = NTYPE(node);
02929   switch (type) {
02930   case N_LIST:
02931   case N_ALT:
02932     {
02933       int ret;
02934       do {
02935        ret = subexp_recursive_check_trav(NCONS(node).left, env);
02936        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02937        else if (ret < 0) return ret;
02938       } while (IS_NOT_NULL(node = NCONS(node).right));
02939     }
02940     break;
02941 
02942   case N_QUANTIFIER:
02943     r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
02944     if (NQUANTIFIER(node).upper == 0) {
02945       if (r == FOUND_CALLED_NODE)
02946        NQUANTIFIER(node).is_refered = 1;
02947     }
02948     break;
02949 
02950   case N_ANCHOR:
02951     {
02952       AnchorNode* an = &(NANCHOR(node));
02953       switch (an->type) {
02954       case ANCHOR_PREC_READ:
02955       case ANCHOR_PREC_READ_NOT:
02956       case ANCHOR_LOOK_BEHIND:
02957       case ANCHOR_LOOK_BEHIND_NOT:
02958        r = subexp_recursive_check_trav(an->target, env);
02959        break;
02960       }
02961     }
02962     break;
02963 
02964   case N_EFFECT:
02965     {
02966       EffectNode* en = &(NEFFECT(node));
02967 
02968       if (! IS_EFFECT_RECURSION(en)) {
02969        if (IS_EFFECT_CALLED(en)) {
02970          SET_EFFECT_STATUS(node, NST_MARK1);
02971          r = subexp_recursive_check(en->target);
02972          if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
02973          CLEAR_EFFECT_STATUS(node, NST_MARK1);
02974        }
02975       }
02976       r = subexp_recursive_check_trav(en->target, env);
02977       if (IS_EFFECT_CALLED(en))
02978        r |= FOUND_CALLED_NODE;
02979     }
02980     break;
02981 
02982   default:
02983     break;
02984   }
02985 
02986   return r;
02987 }
02988 
02989 static int
02990 setup_subexp_call(Node* node, ScanEnv* env)
02991 {
02992   int type;
02993   int r = 0;
02994 
02995   type = NTYPE(node);
02996   switch (type) {
02997   case N_LIST:
02998     do {
02999       r = setup_subexp_call(NCONS(node).left, env);
03000     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
03001     break;
03002 
03003   case N_ALT:
03004     do {
03005       r = setup_subexp_call(NCONS(node).left, env);
03006     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
03007     break;
03008 
03009   case N_QUANTIFIER:
03010     r = setup_subexp_call(NQUANTIFIER(node).target, env);
03011     break;
03012   case N_EFFECT:
03013     r = setup_subexp_call(NEFFECT(node).target, env);
03014     break;
03015 
03016   case N_CALL:
03017     {
03018       int n, num, *refs;
03019       UChar *p;
03020       CallNode* cn = &(NCALL(node));
03021       Node** nodes = SCANENV_MEM_NODES(env);
03022 
03023 #ifdef USE_NAMED_GROUP
03024       n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
03025 #else
03026       n = -1;
03027 #endif
03028       if (n <= 0) {
03029        /* name not found, check group number. (?*ddd) */
03030        p = cn->name;
03031        num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
03032        if (num <= 0 || p != cn->name_end) {
03033          onig_scan_env_set_error_string(env,
03034                 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03035          return ONIGERR_UNDEFINED_NAME_REFERENCE;
03036        }
03037 #ifdef USE_NAMED_GROUP
03038        if (env->num_named > 0 &&
03039            IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03040            !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03041          return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03042        }
03043 #endif
03044        if (num > env->num_mem) {
03045          onig_scan_env_set_error_string(env,
03046                ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03047          return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03048        }
03049        cn->ref_num = num;
03050        goto set_call_attr;
03051       }
03052       else if (n > 1) {
03053        onig_scan_env_set_error_string(env,
03054               ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03055        return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03056       }
03057       else {
03058        cn->ref_num = refs[0];
03059       set_call_attr:
03060        cn->target = nodes[cn->ref_num];
03061        if (IS_NULL(cn->target)) {
03062          onig_scan_env_set_error_string(env,
03063                 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03064          return ONIGERR_UNDEFINED_NAME_REFERENCE;
03065        }
03066        SET_EFFECT_STATUS(cn->target, NST_CALLED);
03067        BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
03068        cn->unset_addr_list = env->unset_addr_list;
03069       }
03070     }
03071     break;
03072 
03073   case N_ANCHOR:
03074     {
03075       AnchorNode* an = &(NANCHOR(node));
03076 
03077       switch (an->type) {
03078       case ANCHOR_PREC_READ:
03079       case ANCHOR_PREC_READ_NOT:
03080       case ANCHOR_LOOK_BEHIND:
03081       case ANCHOR_LOOK_BEHIND_NOT:
03082        r = setup_subexp_call(an->target, env);
03083        break;
03084       }
03085     }
03086     break;
03087 
03088   default:
03089     break;
03090   }
03091 
03092   return r;
03093 }
03094 #endif
03095 
03096 /* divide different length alternatives in look-behind.
03097   (?<=A|B) ==> (?<=A)|(?<=B)
03098   (?<!A|B) ==> (?<!A)(?<!B)
03099 */
03100 static int
03101 divide_look_behind_alternatives(Node* node)
03102 {
03103   Node tmp_node;
03104   Node *head, *np, *insert_node;
03105   AnchorNode* an = &(NANCHOR(node));
03106   int anc_type = an->type;
03107 
03108   head = an->target;
03109   np = NCONS(head).left;
03110   tmp_node = *node; *node = *head; *head = tmp_node;
03111   NCONS(node).left = head;
03112   NANCHOR(head).target = np;
03113 
03114   np = node;
03115   while ((np = NCONS(np).right) != NULL_NODE) {
03116     insert_node = onig_node_new_anchor(anc_type);
03117     CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
03118     NANCHOR(insert_node).target = NCONS(np).left;
03119     NCONS(np).left = insert_node;
03120   }
03121 
03122   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03123     np = node;
03124     do {
03125       np->type = N_LIST;  /* alt -> list */
03126     } while ((np = NCONS(np).right) != NULL_NODE);
03127   }
03128   return 0;
03129 }
03130 
03131 static int
03132 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03133 {
03134   int r, len;
03135   AnchorNode* an = &(NANCHOR(node));
03136 
03137   r = get_char_length_tree(an->target, reg, &len);
03138   if (r == 0)
03139     an->char_len = len;
03140   else if (r == GET_CHAR_LEN_VARLEN)
03141     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03142   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03143     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03144       r = divide_look_behind_alternatives(node);
03145     else
03146       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03147   }
03148 
03149   return r;
03150 }
03151 
03152 static int
03153 next_setup(Node* node, Node* next_node, regex_t* reg)
03154 {
03155   int type;
03156 
03157  retry:
03158   type = NTYPE(node);
03159   if (type == N_QUANTIFIER) {
03160     QuantifierNode* qn = &(NQUANTIFIER(node));
03161     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03162 #ifdef USE_QUANTIFIER_PEEK_NEXT
03163       qn->next_head_exact = get_head_value_node(next_node, 1, reg);
03164 #endif
03165       /* automatic posseivation a*b ==> (?>a*)b */
03166       if (qn->lower <= 1) {
03167        int ttype = NTYPE(qn->target);
03168        if (IS_NODE_TYPE_SIMPLE(ttype)) {
03169          Node *x, *y;
03170          x = get_head_value_node(qn->target, 0, reg);
03171          if (IS_NOT_NULL(x)) {
03172            y = get_head_value_node(next_node,  0, reg);
03173            if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03174              Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
03175              CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
03176              SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03177              swap_node(node, en);
03178              NEFFECT(node).target = en;
03179            }
03180          }
03181        }
03182       }
03183     }
03184   }
03185   else if (type == N_EFFECT) {
03186     EffectNode* en = &(NEFFECT(node));
03187     if (en->type == EFFECT_MEMORY) {
03188       node = en->target;
03189       goto retry;
03190     }
03191   }
03192   return 0;
03193 }
03194 
03195 
03196 static int
03197 divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
03198                              UChar* prev_start, UChar* prev,
03199                              UChar* end, Node*** tailp, Node** root)
03200 {
03201   UChar *tmp, *wp;
03202   Node* snode;
03203 
03204   if (prev_ambig != 0) {
03205     tmp = prev_start;
03206     wp  = prev_start;
03207     while (tmp < prev) {
03208       wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
03209                                      &tmp, end, wp);
03210     }
03211     snode = onig_node_new_str(prev_start, wp);
03212     CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
03213     NSTRING_SET_AMBIG(snode);
03214     if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
03215   }
03216   else {
03217     snode = onig_node_new_str(prev_start, prev);
03218     CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
03219   }
03220 
03221   if (*tailp == (Node** )0) {
03222     *root = onig_node_new_list(snode, NULL);
03223     CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
03224     *tailp = &(NCONS(*root).right);
03225   }
03226   else {
03227     **tailp = onig_node_new_list(snode, NULL);
03228     CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
03229     *tailp = &(NCONS(**tailp).right);
03230   }
03231 
03232   return 0;
03233 }
03234 
03235 static int
03236 divide_ambig_string_node(Node* node, regex_t* reg)
03237 {
03238   StrNode* sn = &NSTRING(node);
03239   int ambig, prev_ambig;
03240   UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
03241   Node *root = NULL_NODE;
03242   Node **tailp = (Node** )0;
03243   int r;
03244 
03245   start = prev_start = p = sn->s;
03246   end  = sn->end;
03247   if (p >= end) return 0;
03248 
03249   prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
03250 
03251   while (p < end) {
03252     prev = p;
03253     if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
03254                                               reg->ambig_flag, &p, end))) {
03255 
03256       r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
03257                                        end, &tailp, &root);
03258       if (r != 0) return r;
03259 
03260       prev_ambig = ambig;
03261       prev_start = prev;
03262     }
03263   }
03264 
03265   if (prev_start == start) {
03266     if (prev_ambig != 0) {
03267       NSTRING_SET_AMBIG(node);
03268       tmp = start;
03269       wp  = start;
03270       while (tmp < end) {
03271         wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
03272                                        &tmp, end, wp);
03273       }
03274       if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
03275       sn->end = wp;
03276     }
03277   }
03278   else {
03279     r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
03280                                      end, &tailp, &root);
03281     if (r != 0) return r;
03282 
03283     swap_node(node, root);
03284     onig_node_str_clear(root); /* should be after swap! */
03285     onig_node_free(root);      /* free original string node */
03286   }
03287 
03288   return 0;
03289 }
03290 
03291 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03292 
03293 #define CEC_THRES_NUM_BIG_REPEAT         512
03294 #define CEC_INFINITE_NUM          0x7fffffff
03295 
03296 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03297 #define CEC_IN_FINITE_REPEAT      (1<<1)
03298 #define CEC_CONT_BIG_REPEAT       (1<<2)
03299 
03300 static int
03301 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03302 {
03303   int type;
03304   int r = state;
03305 
03306   type = NTYPE(node);
03307   switch (type) {
03308   case N_LIST:
03309     {
03310       Node* prev = NULL_NODE;
03311       do {
03312        r = setup_comb_exp_check(NCONS(node).left, r, env);
03313        prev = NCONS(node).left;
03314       } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
03315     }
03316     break;
03317 
03318   case N_ALT:
03319     {
03320       int ret;
03321       do {
03322        ret = setup_comb_exp_check(NCONS(node).left, state, env);
03323        r |= ret;
03324       } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
03325     }
03326     break;
03327 
03328   case N_QUANTIFIER:
03329     {
03330       int child_state = state;
03331       int add_state = 0;
03332       QuantifierNode* qn = &(NQUANTIFIER(node));
03333       Node* target = qn->target;
03334       int var_num;
03335 
03336       if (! IS_REPEAT_INFINITE(qn->upper)) {
03337        if (qn->upper > 1) {
03338          /* {0,1}, {1,1} are allowed */
03339          child_state |= CEC_IN_FINITE_REPEAT;
03340 
03341          /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03342          if (env->backrefed_mem == 0) {
03343            if (NTYPE(qn->target) == N_EFFECT) {
03344              EffectNode* en = &(NEFFECT(qn->target));
03345              if (en->type == EFFECT_MEMORY) {
03346               if (NTYPE(en->target) == N_QUANTIFIER) {
03347                 QuantifierNode* q = &(NQUANTIFIER(en->target));
03348                 if (IS_REPEAT_INFINITE(q->upper)
03349                     && q->greedy == qn->greedy) {
03350                   qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03351                   if (qn->upper == 1)
03352                     child_state = state;
03353                 }
03354               }
03355              }
03356            }
03357          }
03358        }
03359       }
03360 
03361       if (state & CEC_IN_FINITE_REPEAT) {
03362        qn->comb_exp_check_num = -1;
03363       }
03364       else {
03365        if (IS_REPEAT_INFINITE(qn->upper)) {
03366          var_num = CEC_INFINITE_NUM;
03367          child_state |= CEC_IN_INFINITE_REPEAT;
03368        }
03369        else {
03370          var_num = qn->upper - qn->lower;
03371        }
03372 
03373        if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03374          add_state |= CEC_CONT_BIG_REPEAT;
03375 
03376        if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03377            ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03378             var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03379          if (qn->comb_exp_check_num == 0) {
03380            env->num_comb_exp_check++;
03381            qn->comb_exp_check_num = env->num_comb_exp_check;
03382            if (env->curr_max_regnum > env->comb_exp_max_regnum)
03383              env->comb_exp_max_regnum = env->curr_max_regnum;
03384          }
03385        }
03386       }
03387 
03388       r = setup_comb_exp_check(target, child_state, env);
03389       r |= add_state;
03390     }
03391     break;
03392 
03393   case N_EFFECT:
03394     {
03395       EffectNode* en = &(NEFFECT(node));
03396 
03397       switch (en->type) {
03398       case EFFECT_MEMORY:
03399        {
03400          if (env->curr_max_regnum < en->regnum)
03401            env->curr_max_regnum = en->regnum;
03402 
03403          r = setup_comb_exp_check(en->target, state, env);
03404        }
03405        break;
03406 
03407       default:
03408        r = setup_comb_exp_check(en->target, state, env);
03409        break;
03410       }
03411     }
03412     break;
03413 
03414 #ifdef USE_SUBEXP_CALL
03415   case N_CALL:
03416     if (IS_CALL_RECURSION(&(NCALL(node))))
03417       env->has_recursion = 1;
03418     else
03419       r = setup_comb_exp_check(NCALL(node).target, state, env);
03420     break;
03421 #endif
03422 
03423   default:
03424     break;
03425   }
03426 
03427   return r;
03428 }
03429 #endif
03430 
03431 #define IN_ALT        (1<<0)
03432 #define IN_NOT        (1<<1)
03433 #define IN_REPEAT     (1<<2)
03434 #define IN_VAR_REPEAT (1<<3)
03435 
03436 /* setup_tree does the following work.
03437  1. check empty loop. (set qn->target_empty_info)
03438  2. expand ignore-case in char class.
03439  3. set memory status bit flags. (reg->mem_stats)
03440  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03441  5. find invalid patterns in look-behind.
03442  6. expand repeated string.
03443  */
03444 static int
03445 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03446 {
03447   int type;
03448   int r = 0;
03449 
03450   type = NTYPE(node);
03451   switch (type) {
03452   case N_LIST:
03453     {
03454       Node* prev = NULL_NODE;
03455       do {
03456        r = setup_tree(NCONS(node).left, reg, state, env);
03457        if (IS_NOT_NULL(prev) && r == 0) {
03458          r = next_setup(prev, NCONS(node).left, reg);
03459        }
03460        prev = NCONS(node).left;
03461       } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
03462     }
03463     break;
03464 
03465   case N_ALT:
03466     do {
03467       r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
03468     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
03469     break;
03470 
03471   case N_CCLASS:
03472     break;
03473 
03474   case N_STRING:
03475     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03476       r = divide_ambig_string_node(node, reg);
03477     }
03478     break;
03479 
03480   case N_CTYPE:
03481   case N_ANYCHAR:
03482     break;
03483 
03484 #ifdef USE_SUBEXP_CALL
03485   case N_CALL:
03486     break;
03487 #endif
03488 
03489   case N_BACKREF:
03490     {
03491       int i;
03492       int* p;
03493       Node** nodes = SCANENV_MEM_NODES(env);
03494       BackrefNode* br = &(NBACKREF(node));
03495       p = BACKREFS_P(br);
03496       for (i = 0; i < br->back_num; i++) {
03497        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03498        BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03499        BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03500 #ifdef USE_BACKREF_AT_LEVEL
03501        if (IS_BACKREF_NEST_LEVEL(br)) {
03502          BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03503        }
03504 #endif
03505        SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03506       }
03507     }
03508     break;
03509 
03510   case N_QUANTIFIER:
03511     {
03512       OnigDistance d;
03513       QuantifierNode* qn = &(NQUANTIFIER(node));
03514       Node* target = qn->target;
03515 
03516       if ((state & IN_REPEAT) != 0) {
03517         qn->state |= NST_IN_REPEAT;
03518       }
03519 
03520       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03521        r = get_min_match_length(target, &d, env);
03522        if (r) break;
03523        if (d == 0) {
03524          qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03525 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
03526          r = quantifiers_memory_node_info(target);
03527          if (r < 0) break;
03528          if (r > 0) {
03529            qn->target_empty_info = r;
03530          }
03531 #endif
03532 #if 0
03533          r = get_max_match_length(target, &d, env);
03534          if (r == 0 && d == 0) {
03535            /*  ()* ==> ()?, ()+ ==> ()  */
03536            qn->upper = 1;
03537            if (qn->lower > 1) qn->lower = 1;
03538            if (NTYPE(target) == N_STRING) {
03539              qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03540            }
03541          }
03542 #endif
03543        }
03544       }
03545 
03546       state |= IN_REPEAT;
03547       if (qn->lower != qn->upper)
03548        state |= IN_VAR_REPEAT;
03549       r = setup_tree(target, reg, state, env);
03550       if (r) break;
03551 
03552       /* expand string */
03553 #define EXPAND_STRING_MAX_LENGTH  100
03554       if (NTYPE(target) == N_STRING) {
03555        if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03556            qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03557          int len = NSTRING_LEN(target);
03558          StrNode* sn = &(NSTRING(target));
03559 
03560          if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03561            int i, n = qn->lower;
03562            onig_node_conv_to_str_node(node, NSTRING(target).flag);
03563            for (i = 0; i < n; i++) {
03564              r = onig_node_str_cat(node, sn->s, sn->end);
03565              if (r) break;
03566            }
03567            onig_node_free(target);
03568            break; /* break case N_QUANTIFIER: */
03569          }
03570        }
03571       }
03572 
03573 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03574       if (qn->greedy && (qn->target_empty_info != 0)) {
03575        if (NTYPE(target) == N_QUANTIFIER) {
03576          QuantifierNode* tqn = &(NQUANTIFIER(target));
03577          if (IS_NOT_NULL(tqn->head_exact)) {
03578            qn->head_exact  = tqn->head_exact;
03579            tqn->head_exact = NULL;
03580          }
03581        }
03582        else {
03583          qn->head_exact = get_head_value_node(qn->target, 1, reg);
03584        }
03585       }
03586 #endif
03587     }
03588     break;
03589 
03590   case N_EFFECT:
03591     {
03592       EffectNode* en = &(NEFFECT(node));
03593 
03594       switch (en->type) {
03595       case EFFECT_OPTION:
03596        {
03597          OnigOptionType options = reg->options;
03598          reg->options = NEFFECT(node).option;
03599          r = setup_tree(NEFFECT(node).target, reg, state, env);
03600          reg->options = options;
03601        }
03602        break;
03603 
03604       case EFFECT_MEMORY:
03605        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03606          BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03607          /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
03608        }
03609         r = setup_tree(en->target, reg, state, env);
03610         break;
03611 
03612       case EFFECT_STOP_BACKTRACK:
03613        {
03614          Node* target = en->target;
03615          r = setup_tree(target, reg, state, env);
03616          if (NTYPE(target) == N_QUANTIFIER) {
03617            QuantifierNode* tqn = &(NQUANTIFIER(target));
03618            if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03619               tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
03620              int qtype = NTYPE(tqn->target);
03621              if (IS_NODE_TYPE_SIMPLE(qtype))
03622               SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03623            }
03624          }
03625        }
03626        break;
03627       }
03628     }
03629     break;
03630 
03631   case N_ANCHOR:
03632     {
03633       AnchorNode* an = &(NANCHOR(node));
03634 
03635       switch (an->type) {
03636       case ANCHOR_PREC_READ:
03637        r = setup_tree(an->target, reg, state, env);
03638        break;
03639       case ANCHOR_PREC_READ_NOT:
03640        r = setup_tree(an->target, reg, (state | IN_NOT), env);
03641        break;
03642 
03643 /* allowed node types in look-behind */
03644 #define ALLOWED_TYPE_IN_LB  \
03645   ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
03646     N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
03647 
03648 #define ALLOWED_EFFECT_IN_LB       ( EFFECT_MEMORY )
03649 #define ALLOWED_EFFECT_IN_LB_NOT   0
03650 
03651 #define ALLOWED_ANCHOR_IN_LB \
03652 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03653 #define ALLOWED_ANCHOR_IN_LB_NOT \
03654 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03655 
03656       case ANCHOR_LOOK_BEHIND:
03657        {
03658          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03659                            ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
03660          if (r < 0) return r;
03661          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03662          r = setup_look_behind(node, reg, env);
03663          if (r != 0) return r;
03664          r = setup_tree(an->target, reg, state, env);
03665        }
03666        break;
03667 
03668       case ANCHOR_LOOK_BEHIND_NOT:
03669        {
03670          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03671                     ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03672          if (r < 0) return r;
03673          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03674          r = setup_look_behind(node, reg, env);
03675          if (r != 0) return r;
03676          r = setup_tree(an->target, reg, (state | IN_NOT), env);
03677        }
03678        break;
03679       }
03680     }
03681     break;
03682 
03683   default:
03684     break;
03685   }
03686 
03687   return r;
03688 }
03689 
03690 /* set skip map for Boyer-Moor search */
03691 static int
03692 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
03693            UChar skip[], int** int_skip)
03694 {
03695   int i, len;
03696 
03697   len = end - s;
03698   if (len < ONIG_CHAR_TABLE_SIZE) {
03699     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03700 
03701     for (i = 0; i < len - 1; i++)
03702       skip[s[i]] = len - 1 - i;
03703   }
03704   else {
03705     if (IS_NULL(*int_skip)) {
03706       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03707       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03708     }
03709     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
03710 
03711     for (i = 0; i < len - 1; i++)
03712       (*int_skip)[s[i]] = len - 1 - i;
03713   }
03714   return 0;
03715 }
03716 
03717 #define OPT_EXACT_MAXLEN   24
03718 
03719 typedef struct {
03720   OnigDistance min;  /* min byte length */
03721   OnigDistance max;  /* max byte length */
03722 } MinMaxLen;
03723 
03724 typedef struct {
03725   MinMaxLen       mmd;
03726   OnigEncoding    enc;
03727   OnigOptionType  options;
03728   OnigAmbigType   ambig_flag;
03729   ScanEnv*        scan_env;
03730 } OptEnv;
03731 
03732 typedef struct {
03733   int left_anchor;
03734   int right_anchor;
03735 } OptAncInfo;
03736 
03737 typedef struct {
03738   MinMaxLen  mmd; /* info position */
03739   OptAncInfo anc;
03740 
03741   int   reach_end;
03742   int   ignore_case;
03743   int   len;
03744   UChar s[OPT_EXACT_MAXLEN];
03745 } OptExactInfo;
03746 
03747 typedef struct {
03748   MinMaxLen mmd; /* info position */
03749   OptAncInfo anc;
03750 
03751   int   value;      /* weighted value */
03752   UChar map[ONIG_CHAR_TABLE_SIZE];
03753 } OptMapInfo;
03754 
03755 typedef struct {
03756   MinMaxLen    len;
03757 
03758   OptAncInfo   anc;
03759   OptExactInfo exb;    /* boundary */
03760   OptExactInfo exm;    /* middle */
03761   OptExactInfo expr;   /* prec read (?=...) */
03762 
03763   OptMapInfo   map;   /* boundary */
03764 } NodeOptInfo;
03765 
03766 
03767 static int
03768 map_position_value(OnigEncoding enc, int i)
03769 {
03770   static const short int ByteValTable[] = {
03771      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
03772      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
03773     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
03774      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
03775      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
03776      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
03777      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
03778      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
03779   };
03780 
03781   if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
03782     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
03783       return 20;
03784     else
03785       return (int )ByteValTable[i];
03786   }
03787   else
03788     return 4;   /* Take it easy. */
03789 }
03790 
03791 static int
03792 distance_value(MinMaxLen* mm)
03793 {
03794   /* 1000 / (min-max-dist + 1) */
03795   static const short int dist_vals[] = {
03796     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100, 
03797       91,   83,   77,   71,   67,   63,   59,   56,   53,   50, 
03798       48,   45,   43,   42,   40,   38,   37,   36,   34,   33, 
03799       32,   31,   30,   29,   29,   28,   27,   26,   26,   25, 
03800       24,   24,   23,   23,   22,   22,   21,   21,   20,   20, 
03801       20,   19,   19,   19,   18,   18,   18,   17,   17,   17, 
03802       16,   16,   16,   16,   15,   15,   15,   15,   14,   14, 
03803       14,   14,   14,   14,   13,   13,   13,   13,   13,   13, 
03804       12,   12,   12,   12,   12,   12,   11,   11,   11,   11, 
03805       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
03806   };
03807 
03808   int d;
03809 
03810   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
03811 
03812   d = mm->max - mm->min;
03813   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
03814     /* return dist_vals[d] * 16 / (mm->min + 12); */
03815     return (int )dist_vals[d];
03816   else
03817     return 1;
03818 }
03819 
03820 static int
03821 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
03822 {
03823   if (v2 <= 0) return -1;
03824   if (v1 <= 0) return  1;
03825 
03826   v1 *= distance_value(d1);
03827   v2 *= distance_value(d2);
03828 
03829   if (v2 > v1) return  1;
03830   if (v2 < v1) return -1;
03831 
03832   if (d2->min < d1->min) return  1;
03833   if (d2->min > d1->min) return -1;
03834   return 0;
03835 }
03836 
03837 static int
03838 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
03839 {
03840   return (a->min == b->min && a->max == b->max) ? 1 : 0;
03841 }
03842 
03843 
03844 static void
03845 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
03846 {
03847   mml->min = min;
03848   mml->max = max;
03849 }
03850 
03851 static void
03852 clear_mml(MinMaxLen* mml)
03853 {
03854   mml->min = mml->max = 0;
03855 }
03856 
03857 static void
03858 copy_mml(MinMaxLen* to, MinMaxLen* from)
03859 {
03860   to->min = from->min;
03861   to->max = from->max;
03862 }
03863 
03864 static void
03865 add_mml(MinMaxLen* to, MinMaxLen* from)
03866 {
03867   to->min = distance_add(to->min, from->min);
03868   to->max = distance_add(to->max, from->max);
03869 }
03870 
03871 #if 0
03872 static void
03873 add_len_mml(MinMaxLen* to, OnigDistance len)
03874 {
03875   to->min = distance_add(to->min, len);
03876   to->max = distance_add(to->max, len);
03877 }
03878 #endif
03879 
03880 static void
03881 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
03882 {
03883   if (to->min > from->min) to->min = from->min;
03884   if (to->max < from->max) to->max = from->max;
03885 }
03886 
03887 static void
03888 copy_opt_env(OptEnv* to, OptEnv* from)
03889 {
03890   *to = *from;
03891 }
03892 
03893 static void
03894 clear_opt_anc_info(OptAncInfo* anc)
03895 {
03896   anc->left_anchor  = 0;
03897   anc->right_anchor = 0;
03898 }
03899 
03900 static void
03901 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
03902 {
03903   *to = *from;
03904 }
03905 
03906 static void
03907 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
03908                   OnigDistance left_len, OnigDistance right_len)
03909 {
03910   clear_opt_anc_info(to);
03911 
03912   to->left_anchor = left->left_anchor;
03913   if (left_len == 0) {
03914     to->left_anchor |= right->left_anchor;
03915   }
03916 
03917   to->right_anchor = right->right_anchor;
03918   if (right_len == 0) {
03919     to->right_anchor |= left->right_anchor;
03920   }
03921 }
03922 
03923 static int
03924 is_left_anchor(int anc)
03925 {
03926   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
03927       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
03928       anc == ANCHOR_PREC_READ_NOT)
03929     return 0;
03930 
03931   return 1;
03932 }
03933 
03934 static int
03935 is_set_opt_anc_info(OptAncInfo* to, int anc)
03936 {
03937   if ((to->left_anchor & anc) != 0) return 1;
03938 
03939   return ((to->right_anchor & anc) != 0 ? 1 : 0);
03940 }
03941 
03942 static void
03943 add_opt_anc_info(OptAncInfo* to, int anc)
03944 {
03945   if (is_left_anchor(anc))
03946     to->left_anchor |= anc;
03947   else
03948     to->right_anchor |= anc;
03949 }
03950 
03951 static void
03952 remove_opt_anc_info(OptAncInfo* to, int anc)
03953 {
03954   if (is_left_anchor(anc))
03955     to->left_anchor &= ~anc;
03956   else
03957     to->right_anchor &= ~anc;
03958 }
03959 
03960 static void
03961 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
03962 {
03963   to->left_anchor  &= add->left_anchor;
03964   to->right_anchor &= add->right_anchor;
03965 }
03966 
03967 static int
03968 is_full_opt_exact_info(OptExactInfo* ex)
03969 {
03970   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
03971 }
03972 
03973 static void
03974 clear_opt_exact_info(OptExactInfo* ex)
03975 {
03976   clear_mml(&ex->mmd);
03977   clear_opt_anc_info(&ex->anc);
03978   ex->reach_end   = 0;
03979   ex->ignore_case = 0;
03980   ex->len         = 0;
03981   ex->s[0]        = '\0';
03982 }
03983 
03984 static void
03985 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
03986 {
03987   *to = *from;
03988 }
03989 
03990 static void
03991 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
03992 {
03993   int i, j, len;
03994   UChar *p, *end;
03995   OptAncInfo tanc;
03996 
03997   if (! to->ignore_case && add->ignore_case) {
03998     if (to->len >= add->len) return ;  /* avoid */
03999 
04000     to->ignore_case = 1;
04001   }
04002 
04003   p = add->s;
04004   end = p + add->len;
04005   for (i = to->len; p < end; ) {
04006     len = enc_len(enc, p);
04007     if (i + len > OPT_EXACT_MAXLEN) break;
04008     for (j = 0; j < len && p < end; j++)
04009       to->s[i++] = *p++;
04010   }
04011 
04012   to->len = i;
04013   to->reach_end = (p == end ? add->reach_end : 0);
04014 
04015   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04016   if (! to->reach_end) tanc.right_anchor = 0;
04017   copy_opt_anc_info(&to->anc, &tanc);
04018 }
04019 
04020 static void
04021 concat_opt_exact_info_str(OptExactInfo* to,
04022                        UChar* s, UChar* end, int raw, OnigEncoding enc)
04023 {
04024   int i, j, len;
04025   UChar *p;
04026 
04027   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04028     len = enc_len(enc, p);
04029     if (i + len > OPT_EXACT_MAXLEN) break;
04030     for (j = 0; j < len && p < end; j++)
04031       to->s[i++] = *p++;
04032   }
04033 
04034   to->len = i;
04035 }
04036 
04037 static void
04038 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04039 {
04040   int i, j, len;
04041 
04042   if (add->len == 0 || to->len == 0) {
04043     clear_opt_exact_info(to);
04044     return ;
04045   }
04046 
04047   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04048     clear_opt_exact_info(to);
04049     return ;
04050   }
04051 
04052   for (i = 0; i < to->len && i < add->len; ) {
04053     if (to->s[i] != add->s[i]) break;
04054     len = enc_len(env->enc, to->s + i);
04055 
04056     for (j = 1; j < len; j++) {
04057       if (to->s[i+j] != add->s[i+j]) break;
04058     }
04059     if (j < len) break;
04060     i += len;
04061   }
04062 
04063   if (! add->reach_end || i < add->len || i < to->len) {
04064     to->reach_end = 0;
04065   }
04066   to->len = i;
04067   to->ignore_case |= add->ignore_case;
04068 
04069   alt_merge_opt_anc_info(&to->anc, &add->anc);
04070   if (! to->reach_end) to->anc.right_anchor = 0;
04071 }
04072 
04073 static void
04074 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04075 {
04076   int v1, v2;
04077 
04078   v1 = now->len;
04079   v2 = alt->len;
04080 
04081   if (v2 == 0) {
04082     return ;
04083   }
04084   else if (v1 == 0) {
04085     copy_opt_exact_info(now, alt);
04086     return ;
04087   }
04088   else if (v1 <= 2 && v2 <= 2) {
04089     /* ByteValTable[x] is big value --> low price */
04090     v2 = map_position_value(enc, now->s[0]);
04091     v1 = map_position_value(enc, alt->s[0]);
04092 
04093     if (now->len > 1) v1 += 5;
04094     if (alt->len > 1) v2 += 5;
04095   }
04096 
04097   if (now->ignore_case == 0) v1 *= 2;
04098   if (alt->ignore_case == 0) v2 *= 2;
04099 
04100   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04101     copy_opt_exact_info(now, alt);
04102 }
04103 
04104 static void
04105 clear_opt_map_info(OptMapInfo* map)
04106 {
04107   static const OptMapInfo clean_info = {
04108     {0, 0}, {0, 0}, 0,
04109     {
04110       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04111       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04112       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04113       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04114       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04115       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04116       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04117       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04118       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04119       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04120       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04121       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04122       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04123       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04124       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04125       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04126     }
04127   };
04128 
04129   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04130 }
04131 
04132 static void
04133 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04134 {
04135   *to = *from;
04136 }
04137 
04138 static void
04139 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04140 {
04141   if (map->map[c] == 0) {
04142     map->map[c] = 1;
04143     map->value += map_position_value(enc, c);
04144   }
04145 }
04146 
04147 static int
04148 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04149                           OnigEncoding enc, OnigAmbigType ambig_flag)
04150 {
04151   int i, n, len;
04152   UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
04153   OnigCodePoint code;
04154   const OnigPairAmbigCodes* pccs;
04155   OnigAmbigType amb;
04156 
04157   add_char_opt_map_info(map, p[0], enc);
04158   code = ONIGENC_MBC_TO_CODE(enc, p, end);
04159 
04160   for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
04161     if ((amb & ambig_flag) == 0)  continue;
04162 
04163     n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
04164     for (i = 0; i < n; i++) {
04165       if (pccs[i].from == code) {
04166         len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
04167         if (len < 0) return len;
04168         add_char_opt_map_info(map, buf[0], enc);
04169       }
04170     }
04171   }
04172   return 0;
04173 }
04174 
04175 static void
04176 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04177 {
04178   static int z = 1<<15; /* 32768: something big value */
04179 
04180   int v1, v2;
04181 
04182   if (alt->value == 0) return ;
04183   if (now->value == 0) {
04184     copy_opt_map_info(now, alt);
04185     return ;
04186   }
04187 
04188   v1 = z / now->value;
04189   v2 = z / alt->value;
04190   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04191     copy_opt_map_info(now, alt);
04192 }
04193 
04194 static int
04195 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04196 {
04197 #define COMP_EM_BASE  20
04198   int ve, vm;
04199 
04200   if (m->value <= 0) return -1;
04201 
04202   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04203   vm = COMP_EM_BASE * 5 * 2 / m->value;
04204   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04205 }
04206 
04207 static void
04208 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04209 {
04210   int i, val;
04211 
04212   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04213   if (to->value == 0) return ;
04214   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04215     clear_opt_map_info(to);
04216     return ;
04217   }
04218 
04219   alt_merge_mml(&to->mmd, &add->mmd);
04220 
04221   val = 0;
04222   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04223     if (add->map[i])
04224       to->map[i] = 1;
04225 
04226     if (to->map[i])
04227       val += map_position_value(enc, i);
04228   }
04229   to->value = val;
04230 
04231   alt_merge_opt_anc_info(&to->anc, &add->anc);
04232 }
04233 
04234 static void
04235 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04236 {
04237   copy_mml(&(opt->exb.mmd),  mmd);
04238   copy_mml(&(opt->expr.mmd), mmd);
04239   copy_mml(&(opt->map.mmd),  mmd);
04240 }
04241 
04242 static void
04243 clear_node_opt_info(NodeOptInfo* opt)
04244 {
04245   clear_mml(&opt->len);
04246   clear_opt_anc_info(&opt->anc);
04247   clear_opt_exact_info(&opt->exb);
04248   clear_opt_exact_info(&opt->exm);
04249   clear_opt_exact_info(&opt->expr);
04250   clear_opt_map_info(&opt->map);
04251 }
04252 
04253 static void
04254 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04255 {
04256   *to = *from;
04257 }
04258 
04259 static void
04260 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04261 {
04262   int exb_reach, exm_reach;
04263   OptAncInfo tanc;
04264 
04265   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04266   copy_opt_anc_info(&to->anc, &tanc);
04267 
04268   if (add->exb.len > 0 && to->len.max == 0) {
04269     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04270                      to->len.max, add->len.max);
04271     copy_opt_anc_info(&add->exb.anc, &tanc);
04272   }
04273 
04274   if (add->map.value > 0 && to->len.max == 0) {
04275     if (add->map.mmd.max == 0)
04276       add->map.anc.left_anchor |= to->anc.left_anchor;
04277   }
04278 
04279   exb_reach = to->exb.reach_end;
04280   exm_reach = to->exm.reach_end;
04281 
04282   if (add->len.max != 0)
04283     to->exb.reach_end = to->exm.reach_end = 0;
04284 
04285   if (add->exb.len > 0) {
04286     if (exb_reach) {
04287       concat_opt_exact_info(&to->exb, &add->exb, enc);
04288       clear_opt_exact_info(&add->exb);
04289     }
04290     else if (exm_reach) {
04291       concat_opt_exact_info(&to->exm, &add->exb, enc);
04292       clear_opt_exact_info(&add->exb);
04293     }
04294   }
04295   select_opt_exact_info(enc, &to->exm, &add->exb);
04296   select_opt_exact_info(enc, &to->exm, &add->exm);
04297 
04298   if (to->expr.len > 0) {
04299     if (add->len.max > 0) {
04300       if (to->expr.len > (int )add->len.max)
04301        to->expr.len = add->len.max;
04302 
04303       if (to->expr.mmd.max == 0)
04304        select_opt_exact_info(enc, &to->exb, &to->expr);
04305       else
04306        select_opt_exact_info(enc, &to->exm, &to->expr);
04307     }
04308   }
04309   else if (add->expr.len > 0) {
04310     copy_opt_exact_info(&to->expr, &add->expr);
04311   }
04312 
04313   select_opt_map_info(&to->map, &add->map);
04314 
04315   add_mml(&to->len, &add->len);
04316 }
04317 
04318 static void
04319 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04320 {
04321   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04322   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04323   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04324   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04325   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04326 
04327   alt_merge_mml(&to->len, &add->len);
04328 }
04329 
04330 
04331 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04332 
04333 static int
04334 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04335 {
04336   int type;
04337   int r = 0;
04338 
04339   clear_node_opt_info(opt);
04340   set_bound_node_opt_info(opt, &env->mmd);
04341 
04342   type = NTYPE(node);
04343   switch (type) {
04344   case N_LIST:
04345     {
04346       OptEnv nenv;
04347       NodeOptInfo nopt;
04348       Node* nd = node;
04349 
04350       copy_opt_env(&nenv, env);
04351       do {
04352        r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
04353        if (r == 0) {
04354          add_mml(&nenv.mmd, &nopt.len);
04355          concat_left_node_opt_info(env->enc, opt, &nopt);
04356        }
04357       } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
04358     }
04359     break;
04360 
04361   case N_ALT:
04362     {
04363       NodeOptInfo nopt;
04364       Node* nd = node;
04365 
04366       do {
04367        r = optimize_node_left(NCONS(nd).left, &nopt, env);
04368        if (r == 0) {
04369          if (nd == node) copy_node_opt_info(opt, &nopt);
04370          else            alt_merge_node_opt_info(opt, &nopt, env);
04371        }
04372       } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
04373     }
04374     break;
04375 
04376   case N_STRING:
04377     {
04378       StrNode* sn = &(NSTRING(node));
04379       int slen = sn->end - sn->s;
04380       int is_raw = NSTRING_IS_RAW(node);
04381 
04382       if (! NSTRING_IS_AMBIG(node)) {
04383        concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04384                               NSTRING_IS_RAW(node), env->enc);
04385        if (slen > 0) {
04386          add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04387        }
04388         set_mml(&opt->len, slen, slen);
04389       }
04390       else {
04391         int n, max;
04392 
04393         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04394                                   is_raw, env->enc);
04395         opt->exb.ignore_case = 1;
04396 
04397        if (slen > 0) {
04398           r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04399                                         env->enc, env->ambig_flag);
04400           if (r != 0) break;
04401        }
04402 
04403         if (NSTRING_IS_AMBIG_REDUCE(node)) {
04404           n = onigenc_strlen(env->enc, sn->s, sn->end);
04405           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04406         }
04407         else {
04408           max = slen;
04409         }
04410         set_mml(&opt->len, slen, max);
04411       }
04412 
04413       if (opt->exb.len == slen)
04414        opt->exb.reach_end = 1;
04415     }
04416     break;
04417 
04418   case N_CCLASS:
04419     {
04420       int i, z;
04421       CClassNode* cc = &(NCCLASS(node));
04422 
04423       /* no need to check ignore case. (setted in setup_tree()) */
04424 
04425       if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
04426         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04427        OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04428 
04429        set_mml(&opt->len, min, max);
04430       }
04431       else {
04432         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04433           z = BITSET_AT(cc->bs, i);
04434           if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
04435             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04436           }
04437         }
04438        set_mml(&opt->len, 1, 1);
04439       }
04440     }
04441     break;
04442 
04443   case N_CTYPE:
04444     {
04445       int i, min, max;
04446 
04447       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04448 
04449       if (max == 1) {
04450         min = 1;
04451 
04452        switch (NCTYPE(node).type) {
04453        case CTYPE_NOT_WORD:
04454           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04455             if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04456               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04457             }
04458           }
04459           break;
04460 
04461        case CTYPE_WORD:
04462           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04463             if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04464               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04465             }
04466           }
04467          break;
04468        }
04469       }
04470       else {
04471         min = ONIGENC_MBC_MINLEN(env->enc);
04472       }
04473       set_mml(&opt->len, min, max);
04474     }
04475     break;
04476 
04477   case N_ANYCHAR:
04478     {
04479       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04480       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04481       set_mml(&opt->len, min, max);
04482     }
04483     break;
04484 
04485   case N_ANCHOR:
04486     switch (NANCHOR(node).type) {
04487     case ANCHOR_BEGIN_BUF:
04488     case ANCHOR_BEGIN_POSITION:
04489     case ANCHOR_BEGIN_LINE:
04490     case ANCHOR_END_BUF:
04491     case ANCHOR_SEMI_END_BUF:
04492     case ANCHOR_END_LINE:
04493       add_opt_anc_info(&opt->anc, NANCHOR(node).type);
04494       break;
04495 
04496     case ANCHOR_PREC_READ:
04497       {
04498        NodeOptInfo nopt;
04499 
04500        r = optimize_node_left(NANCHOR(node).target, &nopt, env);
04501        if (r == 0) {
04502          if (nopt.exb.len > 0)
04503            copy_opt_exact_info(&opt->expr, &nopt.exb);
04504          else if (nopt.exm.len > 0)
04505            copy_opt_exact_info(&opt->expr, &nopt.exm);
04506 
04507          opt->expr.reach_end = 0;
04508 
04509          if (nopt.map.value > 0)
04510            copy_opt_map_info(&opt->map, &nopt.map);
04511        }
04512       }
04513       break;
04514 
04515     case ANCHOR_PREC_READ_NOT:
04516     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
04517     case ANCHOR_LOOK_BEHIND_NOT:
04518       break;
04519     }
04520     break;
04521 
04522   case N_BACKREF:
04523     {
04524       int i;
04525       int* backs;
04526       OnigDistance min, max, tmin, tmax;
04527       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04528       BackrefNode* br = &(NBACKREF(node));
04529 
04530       if (br->state & NST_RECURSION) {
04531        set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04532        break;
04533       }
04534       backs = BACKREFS_P(br);
04535       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04536       if (r != 0) break;
04537       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04538       if (r != 0) break;
04539       for (i = 1; i < br->back_num; i++) {
04540        r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04541        if (r != 0) break;
04542        r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04543        if (r != 0) break;
04544        if (min > tmin) min = tmin;
04545        if (max < tmax) max = tmax;
04546       }
04547       if (r == 0) set_mml(&opt->len, min, max);
04548     }
04549     break;
04550 
04551 #ifdef USE_SUBEXP_CALL
04552   case N_CALL:
04553     if (IS_CALL_RECURSION(&(NCALL(node))))
04554       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04555     else {
04556       OnigOptionType save = env->options;
04557       env->options = NEFFECT(NCALL(node).target).option;
04558       r = optimize_node_left(NCALL(node).target, opt, env);
04559       env->options = save;
04560     }
04561     break;
04562 #endif
04563 
04564   case N_QUANTIFIER:
04565     {
04566       int i;
04567       OnigDistance min, max;
04568       NodeOptInfo nopt;
04569       QuantifierNode* qn = &(NQUANTIFIER(node));
04570 
04571       r = optimize_node_left(qn->target, &nopt, env);
04572       if (r) break;
04573 
04574       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04575        if (env->mmd.max == 0 &&
04576            NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
04577          if (IS_MULTILINE(env->options))
04578            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04579          else
04580            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04581        }
04582       }
04583       else {
04584        if (qn->lower > 0) {
04585          copy_node_opt_info(opt, &nopt);
04586          if (nopt.exb.len > 0) {
04587            if (nopt.exb.reach_end) {
04588              for (i = 2; i < qn->lower &&
04589                         ! is_full_opt_exact_info(&opt->exb); i++) {
04590               concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04591              }
04592              if (i < qn->lower) {
04593               opt->exb.reach_end = 0;
04594              }
04595            }
04596          }
04597 
04598          if (qn->lower != qn->upper) {
04599            opt->exb.reach_end = 0;
04600            opt->exm.reach_end = 0;
04601          }
04602          if (qn->lower > 1)
04603            opt->exm.reach_end = 0;
04604        }
04605       }
04606 
04607       min = distance_multiply(nopt.len.min, qn->lower);
04608       if (IS_REPEAT_INFINITE(qn->upper))
04609        max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04610       else
04611        max = distance_multiply(nopt.len.max, qn->upper);
04612 
04613       set_mml(&opt->len, min, max);
04614     }
04615     break;
04616 
04617   case N_EFFECT:
04618     {
04619       EffectNode* en = &(NEFFECT(node));
04620 
04621       switch (en->type) {
04622       case EFFECT_OPTION:
04623        {
04624          OnigOptionType save = env->options;
04625 
04626          env->options = en->option;
04627          r = optimize_node_left(en->target, opt, env);
04628          env->options = save;
04629        }
04630        break;
04631 
04632       case EFFECT_MEMORY:
04633 #ifdef USE_SUBEXP_CALL
04634        en->opt_count++;
04635        if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04636          OnigDistance min, max;
04637 
04638          min = 0;
04639          max = ONIG_INFINITE_DISTANCE;
04640          if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
04641          if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
04642          set_mml(&opt->len, min, max);
04643        }
04644        else
04645 #endif
04646        {
04647          r = optimize_node_left(en->target, opt, env);
04648 
04649          if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04650            if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04651              remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04652          }
04653        }
04654        break;
04655 
04656       case EFFECT_STOP_BACKTRACK:
04657        r = optimize_node_left(en->target, opt, env);
04658        break;
04659       }
04660     }
04661     break;
04662 
04663   default:
04664 #ifdef ONIG_DEBUG
04665     fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04666            NTYPE(node));
04667 #endif
04668     r = ONIGERR_TYPE_BUG;
04669     break;
04670   }
04671 
04672   return r;
04673 }
04674 
04675 static int
04676 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04677 {
04678   int r;
04679 
04680   if (e->len == 0) return 0;
04681 
04682   if (e->ignore_case) {
04683     reg->exact = (UChar* )xmalloc(e->len);
04684     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
04685     xmemcpy(reg->exact, e->s, e->len);
04686     reg->exact_end = reg->exact + e->len;
04687     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04688   }
04689   else {
04690     int allow_reverse;
04691 
04692     reg->exact = k_strdup(e->s, e->s + e->len);
04693     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
04694     reg->exact_end = reg->exact + e->len;
04695  
04696     allow_reverse =
04697        ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04698 
04699     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04700       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04701                      reg->map, &(reg->int_map));
04702       if (r) return r;
04703 
04704       reg->optimize = (allow_reverse != 0
04705                      ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04706     }
04707     else {
04708       reg->optimize = ONIG_OPTIMIZE_EXACT;
04709     }
04710   }
04711 
04712   reg->dmin = e->mmd.min;
04713   reg->dmax = e->mmd.max;
04714 
04715   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04716     reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
04717   }
04718 
04719   return 0;
04720 }
04721 
04722 static void
04723 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04724 {
04725   int i;
04726 
04727   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04728     reg->map[i] = m->map[i];
04729 
04730   reg->optimize   = ONIG_OPTIMIZE_MAP;
04731   reg->dmin       = m->mmd.min;
04732   reg->dmax       = m->mmd.max;
04733 
04734   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04735     reg->threshold_len = reg->dmin + 1;
04736   }
04737 }
04738 
04739 static void
04740 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04741 {
04742   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
04743   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04744 }
04745 
04746 #ifdef ONIG_DEBUG
04747 static void print_optimize_info(FILE* f, regex_t* reg);
04748 #endif
04749 
04750 static int
04751 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04752 {
04753 
04754   int r;
04755   NodeOptInfo opt;
04756   OptEnv env;
04757 
04758   env.enc        = reg->enc;
04759   env.options    = reg->options;
04760   env.ambig_flag = reg->ambig_flag;
04761   env.scan_env   = scan_env;
04762   clear_mml(&env.mmd);
04763 
04764   r = optimize_node_left(node, &opt, &env);
04765   if (r) return r;
04766 
04767   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
04768         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
04769 
04770   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
04771 
04772   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
04773     reg->anchor_dmin = opt.len.min;
04774     reg->anchor_dmax = opt.len.max;
04775   }
04776 
04777   if (opt.exb.len > 0 || opt.exm.len > 0) {
04778     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
04779     if (opt.map.value > 0 &&
04780        comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
04781       goto set_map;
04782     }
04783     else {
04784       r = set_optimize_exact_info(reg, &opt.exb);
04785       set_sub_anchor(reg, &opt.exb.anc);
04786     }
04787   }
04788   else if (opt.map.value > 0) {
04789   set_map:
04790     set_optimize_map_info(reg, &opt.map);
04791     set_sub_anchor(reg, &opt.map.anc);
04792   }
04793   else {
04794     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
04795     if (opt.len.max == 0)
04796       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
04797   }
04798 
04799 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
04800   print_optimize_info(stderr, reg);
04801 #endif
04802   return r;
04803 }
04804 
04805 static void
04806 clear_optimize_info(regex_t* reg)
04807 {
04808   reg->optimize      = ONIG_OPTIMIZE_NONE;
04809   reg->anchor        = 0;
04810   reg->anchor_dmin   = 0;
04811   reg->anchor_dmax   = 0;
04812   reg->sub_anchor    = 0;
04813   reg->exact_end     = (UChar* )NULL;
04814   reg->threshold_len = 0;
04815   if (IS_NOT_NULL(reg->exact)) {
04816     xfree(reg->exact);
04817     reg->exact = (UChar* )NULL;
04818   }
04819 }
04820 
04821 #ifdef ONIG_DEBUG
04822 
04823 static void print_enc_string(FILE* fp, OnigEncoding enc,
04824                           const UChar *s, const UChar *end)
04825 {
04826   fprintf(fp, "\nPATTERN: /");
04827 
04828   if (ONIGENC_MBC_MINLEN(enc) > 1) {
04829     const UChar *p;
04830     OnigCodePoint code;
04831 
04832     p = s;
04833     while (p < end) {
04834       code = ONIGENC_MBC_TO_CODE(enc, p, end);
04835       if (code >= 0x80) {
04836        fprintf(fp, " 0x%04x ", (int )code);
04837       }
04838       else {
04839        fputc((int )code, fp);
04840       }
04841 
04842       p += enc_len(enc, p);
04843     }
04844   }
04845   else {
04846     while (s < end) {
04847       fputc((int )*s, fp);
04848       s++;
04849     }
04850   }
04851 
04852   fprintf(fp, "/\n");
04853 }
04854 
04855 static void
04856 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
04857 {
04858   if (a == ONIG_INFINITE_DISTANCE)
04859     fputs("inf", f);
04860   else
04861     fprintf(f, "(%u)", a);
04862 
04863   fputs("-", f);
04864 
04865   if (b == ONIG_INFINITE_DISTANCE)
04866     fputs("inf", f);
04867   else
04868     fprintf(f, "(%u)", b);
04869 }
04870 
04871 static void
04872 print_anchor(FILE* f, int anchor)
04873 {
04874   int q = 0;
04875 
04876   fprintf(f, "[");
04877 
04878   if (anchor & ANCHOR_BEGIN_BUF) {
04879     fprintf(f, "begin-buf");
04880     q = 1;
04881   }
04882   if (anchor & ANCHOR_BEGIN_LINE) {
04883     if (q) fprintf(f, ", ");
04884     q = 1;
04885     fprintf(f, "begin-line");
04886   }
04887   if (anchor & ANCHOR_BEGIN_POSITION) {
04888     if (q) fprintf(f, ", ");
04889     q = 1;
04890     fprintf(f, "begin-pos");
04891   }
04892   if (anchor & ANCHOR_END_BUF) {
04893     if (q) fprintf(f, ", ");
04894     q = 1;
04895     fprintf(f, "end-buf");
04896   }
04897   if (anchor & ANCHOR_SEMI_END_BUF) {
04898     if (q) fprintf(f, ", ");
04899     q = 1;
04900     fprintf(f, "semi-end-buf");
04901   }
04902   if (anchor & ANCHOR_END_LINE) {
04903     if (q) fprintf(f, ", ");
04904     q = 1;
04905     fprintf(f, "end-line");
04906   }
04907   if (anchor & ANCHOR_ANYCHAR_STAR) {
04908     if (q) fprintf(f, ", ");
04909     q = 1;
04910     fprintf(f, "anychar-star");
04911   }
04912   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
04913     if (q) fprintf(f, ", ");
04914     fprintf(f, "anychar-star-pl");
04915   }
04916 
04917   fprintf(f, "]");
04918 }
04919 
04920 static void
04921 print_optimize_info(FILE* f, regex_t* reg)
04922 {
04923   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
04924                               "EXACT_IC", "MAP" };
04925 
04926   fprintf(f, "optimize: %s\n", on[reg->optimize]);
04927   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
04928   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
04929     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
04930   fprintf(f, "\n");
04931 
04932   if (reg->optimize) {
04933     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
04934     fprintf(f, "\n");
04935   }
04936   fprintf(f, "\n");
04937 
04938   if (reg->exact) {
04939     UChar *p;
04940     fprintf(f, "exact: [");
04941     for (p = reg->exact; p < reg->exact_end; p++) {
04942       fputc(*p, f);
04943     }
04944     fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
04945   }
04946   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
04947     int c, i, n = 0;
04948 
04949     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04950       if (reg->map[i]) n++;
04951 
04952     fprintf(f, "map: n=%d\n", n);
04953     if (n > 0) {
04954       c = 0;
04955       fputc('[', f);
04956       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04957        if (reg->map[i] != 0) {
04958           if (c > 0)  fputs(", ", f);
04959           c++;
04960           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
04961               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
04962             fputc(i, f);
04963           else
04964             fprintf(f, "%d", i);
04965         }
04966       }
04967       fprintf(f, "]\n");
04968     }
04969   }
04970 }
04971 #endif /* ONIG_DEBUG */
04972 
04973 
04974 static void
04975 onig_free_body(regex_t* reg)
04976 {
04977   if (IS_NOT_NULL(reg->p))                xfree(reg->p);
04978   if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
04979   if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
04980   if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
04981   if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
04982   if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
04983 
04984 #ifdef USE_NAMED_GROUP
04985   onig_names_free(reg);
04986 #endif
04987 }
04988 
04989 extern void
04990 onig_free(regex_t* reg)
04991 {
04992   if (IS_NOT_NULL(reg)) {
04993     onig_free_body(reg);
04994     xfree(reg);
04995   }
04996 }
04997 
04998 #define REGEX_TRANSFER(to,from) do {\
04999   (to)->state = ONIG_STATE_MODIFY;\
05000   onig_free_body(to);\
05001   xmemcpy(to, from, sizeof(regex_t));\
05002   xfree(from);\
05003 } while (0)
05004 
05005 extern void
05006 onig_transfer(regex_t* to, regex_t* from)
05007 {
05008   THREAD_ATOMIC_START;
05009   REGEX_TRANSFER(to, from);
05010   THREAD_ATOMIC_END;
05011 }
05012 
05013 #define REGEX_CHAIN_HEAD(reg) do {\
05014   while (IS_NOT_NULL((reg)->chain)) {\
05015     (reg) = (reg)->chain;\
05016   }\
05017 } while (0)
05018 
05019 extern void
05020 onig_chain_link_add(regex_t* to, regex_t* add)
05021 {
05022   THREAD_ATOMIC_START;
05023   REGEX_CHAIN_HEAD(to);
05024   to->chain = add;
05025   THREAD_ATOMIC_END;
05026 }
05027 
05028 extern void
05029 onig_chain_reduce(regex_t* reg)
05030 {
05031   regex_t *head, *prev;
05032 
05033   prev = reg;
05034   head = prev->chain;
05035   if (IS_NOT_NULL(head)) {
05036     reg->state = ONIG_STATE_MODIFY;
05037     while (IS_NOT_NULL(head->chain)) {
05038       prev = head;
05039       head = head->chain;
05040     }
05041     prev->chain = (regex_t* )NULL;
05042     REGEX_TRANSFER(reg, head);
05043   }
05044 }
05045 
05046 #if 0
05047 extern int
05048 onig_clone(regex_t** to, regex_t* from)
05049 {
05050   int r, size;
05051   regex_t* reg;
05052 
05053 #ifdef USE_MULTI_THREAD_SYSTEM
05054   if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
05055     ONIG_STATE_INC(from);
05056     if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05057       onig_chain_reduce(from);
05058       ONIG_STATE_INC(from);
05059     }
05060   }
05061   else {
05062     int n = 0;
05063     while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
05064       if (++n > THREAD_PASS_LIMIT_COUNT)
05065        return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
05066       THREAD_PASS;
05067     }
05068     ONIG_STATE_INC(from);
05069   }
05070 #endif /* USE_MULTI_THREAD_SYSTEM */
05071 
05072   r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
05073                       from->enc, ONIG_SYNTAX_DEFAULT);
05074   if (r != 0) {
05075     ONIG_STATE_DEC(from);
05076     return r;
05077   }
05078 
05079   xmemcpy(reg, from, sizeof(onig_t));
05080   reg->chain = (regex_t* )NULL;
05081   reg->state = ONIG_STATE_NORMAL;
05082 
05083   if (from->p) {
05084     reg->p = (UChar* )xmalloc(reg->alloc);
05085     if (IS_NULL(reg->p)) goto mem_error;
05086     xmemcpy(reg->p, from->p, reg->alloc);
05087   }
05088 
05089   if (from->exact) {
05090     reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
05091     if (IS_NULL(reg->exact)) goto mem_error;
05092     reg->exact_end = reg->exact + (from->exact_end - from->exact);
05093     xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
05094   }
05095 
05096   if (from->int_map) {
05097     size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05098     reg->int_map = (int* )xmalloc(size);
05099     if (IS_NULL(reg->int_map)) goto mem_error;
05100     xmemcpy(reg->int_map, from->int_map, size);
05101   }
05102 
05103   if (from->int_map_backward) {
05104     size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05105     reg->int_map_backward = (int* )xmalloc(size);
05106     if (IS_NULL(reg->int_map_backward)) goto mem_error;
05107     xmemcpy(reg->int_map_backward, from->int_map_backward, size);
05108   }
05109 
05110 #ifdef USE_NAMED_GROUP
05111   reg->name_table = names_clone(from); /* names_clone is not implemented */
05112 #endif
05113 
05114   ONIG_STATE_DEC(from);
05115   *to = reg;
05116   return 0;
05117 
05118  mem_error:
05119   ONIG_STATE_DEC(from);
05120   return ONIGERR_MEMORY;
05121 }
05122 #endif
05123 
05124 #ifdef ONIG_DEBUG
05125 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05126 #endif
05127 #ifdef ONIG_DEBUG_PARSE_TREE
05128 static void print_tree P_((FILE* f, Node* node));
05129 #endif
05130 
05131 extern int
05132 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05133              OnigErrorInfo* einfo)
05134 {
05135 #define COMPILE_INIT_SIZE  20
05136 
05137   int r, init_size;
05138   Node*  root;
05139   ScanEnv  scan_env;
05140 #ifdef USE_SUBEXP_CALL
05141   UnsetAddrList  uslist;
05142 #endif
05143 
05144   reg->state = ONIG_STATE_COMPILING;
05145 
05146 #ifdef ONIG_DEBUG
05147   print_enc_string(stderr, reg->enc, pattern, pattern_end);
05148 #endif
05149 
05150   if (reg->alloc == 0) {
05151     init_size = (pattern_end - pattern) * 2;
05152     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05153     r = BBUF_INIT(reg, init_size);
05154     if (r != 0) goto end;
05155   }
05156   else
05157     reg->used = 0;
05158 
05159   reg->num_mem            = 0;
05160   reg->num_repeat         = 0;
05161   reg->num_null_check     = 0;
05162   reg->repeat_range_alloc = 0;
05163   reg->repeat_range       = (OnigRepeatRange* )NULL;
05164 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05165   reg->num_comb_exp_check = 0;
05166 #endif
05167 
05168   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05169   if (r != 0) goto err;
05170 
05171 #ifdef USE_NAMED_GROUP
05172   /* mixed use named group and no-named group */
05173   if (scan_env.num_named > 0 &&
05174       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05175       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05176     if (scan_env.num_named != scan_env.num_mem)
05177       r = disable_noname_group_capture(&root, reg, &scan_env);
05178     else
05179       r = numbered_ref_check(root);
05180 
05181     if (r != 0) goto err;
05182   }
05183 #endif
05184 
05185 #ifdef ONIG_DEBUG_PARSE_TREE
05186   print_tree(stderr, root);
05187 #endif
05188 
05189 #ifdef USE_SUBEXP_CALL
05190   if (scan_env.num_call > 0) {
05191     r = unset_addr_list_init(&uslist, scan_env.num_call);
05192     if (r != 0) goto err;
05193     scan_env.unset_addr_list = &uslist;
05194     r = setup_subexp_call(root, &scan_env);
05195     if (r != 0) goto err_unset;
05196     r = subexp_recursive_check_trav(root, &scan_env);
05197     if (r  < 0) goto err_unset;
05198     r = subexp_inf_recursive_check_trav(root, &scan_env);
05199     if (r != 0) goto err_unset;
05200 
05201     reg->num_call = scan_env.num_call;
05202   }
05203   else
05204     reg->num_call = 0;
05205 #endif
05206 
05207   r = setup_tree(root, reg, 0, &scan_env);
05208   if (r != 0) goto err_unset;
05209 
05210   reg->capture_history  = scan_env.capture_history;
05211   reg->bt_mem_start     = scan_env.bt_mem_start;
05212   reg->bt_mem_start    |= reg->capture_history;
05213   if (IS_FIND_CONDITION(reg->options))
05214     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05215   else {
05216     reg->bt_mem_end  = scan_env.bt_mem_end;
05217     reg->bt_mem_end |= reg->capture_history;
05218   }
05219 
05220 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05221   if (scan_env.backrefed_mem == 0
05222 #ifdef USE_SUBEXP_CALL
05223       || scan_env.num_call == 0
05224 #endif
05225       ) {
05226     setup_comb_exp_check(root, 0, &scan_env);
05227 #ifdef USE_SUBEXP_CALL
05228     if (scan_env.has_recursion != 0) {
05229       scan_env.num_comb_exp_check = 0;
05230     }
05231     else
05232 #endif
05233     if (scan_env.comb_exp_max_regnum > 0) {
05234       int i;
05235       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05236        if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05237          scan_env.num_comb_exp_check = 0;
05238          break;
05239        }
05240       }
05241     }
05242   }
05243 
05244   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05245 #endif
05246 
05247   clear_optimize_info(reg);
05248 #ifndef ONIG_DONT_OPTIMIZE
05249   r = set_optimize_info_from_tree(root, reg, &scan_env);
05250   if (r != 0) goto err_unset;
05251 #endif
05252 
05253   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05254     xfree(scan_env.mem_nodes_dynamic);
05255     scan_env.mem_nodes_dynamic = (Node** )NULL;
05256   }
05257 
05258   r = compile_tree(root, reg);
05259   if (r == 0) {
05260     r = add_opcode(reg, OP_END);
05261 #ifdef USE_SUBEXP_CALL
05262     if (scan_env.num_call > 0) {
05263       r = unset_addr_list_fix(&uslist, reg);
05264       unset_addr_list_end(&uslist);
05265       if (r) goto err;
05266     }
05267 #endif
05268 
05269     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05270       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05271     else {
05272       if (reg->bt_mem_start != 0)
05273        reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05274       else
05275        reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05276     }
05277   }
05278 #ifdef USE_SUBEXP_CALL
05279   else if (scan_env.num_call > 0) {
05280     unset_addr_list_end(&uslist);
05281   }
05282 #endif
05283   onig_node_free(root);
05284 
05285 #ifdef ONIG_DEBUG_COMPILE
05286 #ifdef USE_NAMED_GROUP
05287   onig_print_names(stderr, reg);
05288 #endif
05289   print_compiled_byte_code_list(stderr, reg);
05290 #endif
05291 
05292  end:
05293   reg->state = ONIG_STATE_NORMAL;
05294   return r;
05295 
05296  err_unset:
05297 #ifdef USE_SUBEXP_CALL
05298   if (scan_env.num_call > 0) {
05299     unset_addr_list_end(&uslist);
05300   }
05301 #endif
05302  err:
05303   if (IS_NOT_NULL(scan_env.error)) {
05304     if (IS_NOT_NULL(einfo)) {
05305       einfo->enc     = scan_env.enc;
05306       einfo->par     = scan_env.error;
05307       einfo->par_end = scan_env.error_end;
05308     }
05309   }
05310 
05311   if (IS_NOT_NULL(root)) onig_node_free(root);
05312   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05313       xfree(scan_env.mem_nodes_dynamic);
05314   return r;
05315 }
05316 
05317 #ifdef USE_RECOMPILE_API
05318 extern int
05319 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05320            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05321            OnigErrorInfo* einfo)
05322 {
05323   int r;
05324   regex_t *new_reg;
05325 
05326   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05327   if (r) return r;
05328   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05329     onig_transfer(reg, new_reg);
05330   }
05331   else {
05332     onig_chain_link_add(reg, new_reg);
05333   }
05334   return 0;
05335 }
05336 #endif
05337 
05338 static int onig_inited = 0;
05339 
05340 extern int
05341 onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
05342                 OnigEncoding enc, OnigSyntaxType* syntax)
05343 {
05344   if (! onig_inited)
05345     onig_init();
05346 
05347   if (ONIGENC_IS_UNDEF(enc))
05348     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05349 
05350   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05351       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05352     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05353   }
05354 
05355   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05356   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05357   (*reg)->state = ONIG_STATE_MODIFY;
05358 
05359   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05360     option |= syntax->options;
05361     option &= ~ONIG_OPTION_SINGLELINE;
05362   }
05363   else
05364     option |= syntax->options;
05365 
05366   (*reg)->enc              = enc;
05367   (*reg)->options          = option;
05368   (*reg)->syntax           = syntax;
05369   (*reg)->optimize         = 0;
05370   (*reg)->exact            = (UChar* )NULL;
05371   (*reg)->int_map          = (int* )NULL;
05372   (*reg)->int_map_backward = (int* )NULL;
05373   (*reg)->chain            = (regex_t* )NULL;
05374 
05375   (*reg)->p                = (UChar* )NULL;
05376   (*reg)->alloc            = 0;
05377   (*reg)->used             = 0;
05378   (*reg)->name_table       = (void* )NULL;
05379 
05380   (*reg)->ambig_flag       = ambig_flag;
05381   (*reg)->ambig_flag      &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
05382 
05383   return 0;
05384 }
05385 
05386 extern int
05387 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05388          OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05389          OnigErrorInfo* einfo)
05390 {
05391   int r;
05392 
05393   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05394 
05395   r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
05396                       enc, syntax);
05397   if (r) return r;
05398 
05399   r = onig_compile(*reg, pattern, pattern_end, einfo);
05400   if (r) {
05401     onig_free(*reg);
05402     *reg = NULL;
05403   }
05404   return r;
05405 }
05406 
05407 extern int
05408 onig_init(void)
05409 {
05410   if (onig_inited != 0)
05411     return 0;
05412 
05413   onig_inited = 1;
05414 
05415   THREAD_SYSTEM_INIT;
05416   THREAD_ATOMIC_START;
05417 
05418   onigenc_init();
05419   onigenc_set_default_caseconv_table((UChar* )0);
05420 
05421 #ifdef ONIG_DEBUG_STATISTICS
05422   onig_statistics_init();
05423 #endif
05424 
05425   THREAD_ATOMIC_END;
05426   return 0;
05427 }
05428 
05429 
05430 extern int
05431 onig_end(void)
05432 {
05433   extern int onig_free_shared_cclass_table(void);
05434 
05435   THREAD_ATOMIC_START;
05436 
05437 #ifdef ONIG_DEBUG_STATISTICS
05438   onig_print_statistics(stderr);
05439 #endif
05440 
05441 #ifdef USE_SHARED_CCLASS_TABLE
05442   onig_free_shared_cclass_table();
05443 #endif
05444 
05445 #ifdef USE_RECYCLE_NODE
05446   onig_free_node_list();
05447 #endif
05448 
05449   onig_inited = 0;
05450 
05451   THREAD_ATOMIC_END;
05452   THREAD_SYSTEM_END;
05453   return 0;
05454 }
05455 
05456 
05457 #ifdef ONIG_DEBUG
05458 
05459 /* arguments type */
05460 #define ARG_SPECIAL     -1
05461 #define ARG_NON          0
05462 #define ARG_RELADDR      1
05463 #define ARG_ABSADDR      2
05464 #define ARG_LENGTH       3
05465 #define ARG_MEMNUM       4
05466 #define ARG_OPTION       5
05467 #define ARG_STATE_CHECK  6
05468 
05469 OnigOpInfoType OnigOpInfo[] = {
05470   { OP_FINISH,            "finish",          ARG_NON },
05471   { OP_END,               "end",             ARG_NON },
05472   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
05473   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
05474   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
05475   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
05476   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
05477   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
05478   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
05479   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
05480   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
05481   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
05482   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
05483   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
05484   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
05485   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
05486   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
05487   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
05488   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
05489   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
05490   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
05491   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
05492   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
05493   { OP_ANYCHAR,           "anychar",         ARG_NON },
05494   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
05495   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
05496   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
05497   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05498   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05499   { OP_WORD,                "word",            ARG_NON },
05500   { OP_NOT_WORD,            "not-word",        ARG_NON },
05501   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
05502   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
05503   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
05504   { OP_WORD_END,            "word-end",        ARG_NON },
05505   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
05506   { OP_END_BUF,             "end-buf",         ARG_NON },
05507   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
05508   { OP_END_LINE,            "end-line",        ARG_NON },
05509   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
05510   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
05511   { OP_BACKREF1,            "backref1",             ARG_NON },
05512   { OP_BACKREF2,            "backref2",             ARG_NON },
05513   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
05514   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
05515   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
05516   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
05517   { OP_BACKREF_AT_LEVEL,    "backref_at_level",     ARG_SPECIAL },
05518   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
05519   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
05520   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
05521   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
05522   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
05523   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
05524   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
05525   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
05526   { OP_FAIL,                "fail",                 ARG_NON },
05527   { OP_JUMP,                "jump",                 ARG_RELADDR },
05528   { OP_PUSH,                "push",                 ARG_RELADDR },
05529   { OP_POP,                 "pop",                  ARG_NON },
05530   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
05531   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
05532   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
05533   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
05534   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
05535   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
05536   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
05537   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
05538   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
05539   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
05540   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
05541   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
05542   { OP_PUSH_POS,             "push-pos",             ARG_NON },
05543   { OP_POP_POS,              "pop-pos",              ARG_NON },
05544   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
05545   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
05546   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
05547   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
05548   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
05549   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05550   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05551   { OP_CALL,                 "call",                 ARG_ABSADDR },
05552   { OP_RETURN,               "return",               ARG_NON },
05553   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
05554   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05555   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
05556   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
05557   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05558     "state-check-anychar-ml*", ARG_STATE_CHECK },
05559   { -1, "", ARG_NON }
05560 };
05561 
05562 static char*
05563 op2name(int opcode)
05564 {
05565   int i;
05566 
05567   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05568     if (opcode == OnigOpInfo[i].opcode)
05569       return OnigOpInfo[i].name;
05570   }
05571   return "";
05572 }
05573 
05574 static int
05575 op2arg_type(int opcode)
05576 {
05577   int i;
05578 
05579   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05580     if (opcode == OnigOpInfo[i].opcode)
05581       return OnigOpInfo[i].arg_type;
05582   }
05583   return ARG_SPECIAL;
05584 }
05585 
05586 static void
05587 Indent(FILE* f, int indent)
05588 {
05589   int i;
05590   for (i = 0; i < indent; i++) putc(' ', f);
05591 }
05592 
05593 static void
05594 p_string(FILE* f, int len, UChar* s)
05595 {
05596   fputs(":", f);
05597   while (len-- > 0) { fputc(*s++, f); }
05598 }
05599 
05600 static void
05601 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05602 {
05603   int x = len * mb_len;
05604 
05605   fprintf(f, ":%d:", len);
05606   while (x-- > 0) { fputc(*s++, f); }
05607 }
05608 
05609 extern void
05610 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
05611                               OnigEncoding enc)
05612 {
05613   int i, n, arg_type;
05614   RelAddrType addr;
05615   LengthType len;
05616   MemNumType mem;
05617   StateCheckNumType scn;
05618   OnigCodePoint code;
05619   UChar *q;
05620 
05621   fprintf(f, "[%s", op2name(*bp));
05622   arg_type = op2arg_type(*bp);
05623   if (arg_type != ARG_SPECIAL) {
05624     bp++;
05625     switch (arg_type) {
05626     case ARG_NON:
05627       break;
05628     case ARG_RELADDR:
05629       GET_RELADDR_INC(addr, bp);
05630       fprintf(f, ":(%d)", addr);
05631       break;
05632     case ARG_ABSADDR:
05633       GET_ABSADDR_INC(addr, bp);
05634       fprintf(f, ":(%d)", addr);
05635       break;
05636     case ARG_LENGTH:
05637       GET_LENGTH_INC(len, bp);
05638       fprintf(f, ":%d", len);
05639       break;
05640     case ARG_MEMNUM:
05641       mem = *((MemNumType* )bp);
05642       bp += SIZE_MEMNUM;
05643       fprintf(f, ":%d", mem);
05644       break;
05645     case ARG_OPTION:
05646       {
05647        OnigOptionType option = *((OnigOptionType* )bp);
05648        bp += SIZE_OPTION;
05649        fprintf(f, ":%d", option);
05650       }
05651       break;
05652 
05653     case ARG_STATE_CHECK:
05654       scn = *((StateCheckNumType* )bp);
05655       bp += SIZE_STATE_CHECK_NUM;
05656       fprintf(f, ":%d", scn);
05657       break;
05658     }
05659   }
05660   else {
05661     switch (*bp++) {
05662     case OP_EXACT1:
05663     case OP_ANYCHAR_STAR_PEEK_NEXT:
05664     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05665       p_string(f, 1, bp++); break;
05666     case OP_EXACT2:
05667       p_string(f, 2, bp); bp += 2; break;
05668     case OP_EXACT3:
05669       p_string(f, 3, bp); bp += 3; break;
05670     case OP_EXACT4:
05671       p_string(f, 4, bp); bp += 4; break;
05672     case OP_EXACT5:
05673       p_string(f, 5, bp); bp += 5; break;
05674     case OP_EXACTN:
05675       GET_LENGTH_INC(len, bp);
05676       p_len_string(f, len, 1, bp);
05677       bp += len;
05678       break;
05679     
05680     case OP_EXACTMB2N1:
05681       p_string(f, 2, bp); bp += 2; break;
05682     case OP_EXACTMB2N2:
05683       p_string(f, 4, bp); bp += 4; break;
05684     case OP_EXACTMB2N3:
05685       p_string(f, 6, bp); bp += 6; break;
05686     case OP_EXACTMB2N:
05687       GET_LENGTH_INC(len, bp);
05688       p_len_string(f, len, 2, bp);
05689       bp += len * 2;
05690       break;
05691     case OP_EXACTMB3N:
05692       GET_LENGTH_INC(len, bp);
05693       p_len_string(f, len, 3, bp);
05694       bp += len * 3;
05695       break;
05696     case OP_EXACTMBN:
05697       {
05698        int mb_len;
05699       
05700        GET_LENGTH_INC(mb_len, bp);
05701        GET_LENGTH_INC(len, bp);
05702        fprintf(f, ":%d:%d:", mb_len, len);
05703        n = len * mb_len;
05704        while (n-- > 0) { fputc(*bp++, f); }
05705       }
05706       break;
05707 
05708     case OP_EXACT1_IC:
05709       len = enc_len(enc, bp);
05710       p_string(f, len, bp);
05711       bp += len;
05712       break;
05713     case OP_EXACTN_IC:
05714       GET_LENGTH_INC(len, bp);
05715       p_len_string(f, len, 1, bp);
05716       bp += len;
05717       break;
05718 
05719     case OP_CCLASS:
05720       n = bitset_on_num((BitSetRef )bp);
05721       bp += SIZE_BITSET;
05722       fprintf(f, ":%d", n);
05723       break;
05724 
05725     case OP_CCLASS_NOT:
05726       n = bitset_on_num((BitSetRef )bp);
05727       bp += SIZE_BITSET;
05728       fprintf(f, ":%d", n);
05729       break;
05730 
05731     case OP_CCLASS_MB:
05732     case OP_CCLASS_MB_NOT:
05733       GET_LENGTH_INC(len, bp);
05734       q = bp;
05735 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05736       ALIGNMENT_RIGHT(q);
05737 #endif
05738       GET_CODE_POINT(code, q);
05739       bp += len;
05740       fprintf(f, ":%d:%d", (int )code, len);
05741       break;
05742 
05743     case OP_CCLASS_MIX:
05744     case OP_CCLASS_MIX_NOT:
05745       n = bitset_on_num((BitSetRef )bp);
05746       bp += SIZE_BITSET;
05747       GET_LENGTH_INC(len, bp);
05748       q = bp;
05749 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05750       ALIGNMENT_RIGHT(q);
05751 #endif
05752       GET_CODE_POINT(code, q);
05753       bp += len;
05754       fprintf(f, ":%d:%d:%d", n, (int )code, len);
05755       break;
05756 
05757     case OP_CCLASS_NODE:
05758       {
05759         CClassNode *cc;
05760 
05761         GET_POINTER_INC(cc, bp);
05762         n = bitset_on_num(cc->bs);
05763         fprintf(f, ":%u:%d", (unsigned int )cc, n);
05764       }
05765       break;
05766 
05767     case OP_BACKREFN_IC:
05768       mem = *((MemNumType* )bp);
05769       bp += SIZE_MEMNUM;
05770       fprintf(f, ":%d", mem);
05771       break;
05772 
05773     case OP_BACKREF_MULTI_IC:
05774     case OP_BACKREF_MULTI:
05775       fputs(" ", f);
05776       GET_LENGTH_INC(len, bp);
05777       for (i = 0; i < len; i++) {
05778        GET_MEMNUM_INC(mem, bp);
05779        if (i > 0) fputs(", ", f);
05780        fprintf(f, "%d", mem);
05781       }
05782       break;
05783 
05784     case OP_BACKREF_AT_LEVEL:
05785       {
05786        OnigOptionType option;
05787        LengthType level;
05788 
05789        GET_OPTION_INC(option, bp);
05790        fprintf(f, ":%d", option);
05791        GET_LENGTH_INC(level, bp);
05792        fprintf(f, ":%d", level);
05793 
05794        fputs(" ", f);
05795        GET_LENGTH_INC(len, bp);
05796        for (i = 0; i < len; i++) {
05797          GET_MEMNUM_INC(mem, bp);
05798          if (i > 0) fputs(", ", f);
05799          fprintf(f, "%d", mem);
05800        }
05801       }
05802       break;
05803 
05804     case OP_REPEAT:
05805     case OP_REPEAT_NG:
05806       {
05807        mem = *((MemNumType* )bp);
05808        bp += SIZE_MEMNUM;
05809        addr = *((RelAddrType* )bp);
05810        bp += SIZE_RELADDR;
05811        fprintf(f, ":%d:%d", mem, addr);
05812       }
05813       break;
05814 
05815     case OP_PUSH_OR_JUMP_EXACT1:
05816     case OP_PUSH_IF_PEEK_NEXT:
05817       addr = *((RelAddrType* )bp);
05818       bp += SIZE_RELADDR;
05819       fprintf(f, ":(%d)", addr);
05820       p_string(f, 1, bp);
05821       bp += 1;
05822       break;
05823 
05824     case OP_LOOK_BEHIND:
05825       GET_LENGTH_INC(len, bp);
05826       fprintf(f, ":%d", len);
05827       break;
05828 
05829     case OP_PUSH_LOOK_BEHIND_NOT:
05830       GET_RELADDR_INC(addr, bp);
05831       GET_LENGTH_INC(len, bp);
05832       fprintf(f, ":%d:(%d)", len, addr);
05833       break;
05834 
05835     case OP_STATE_CHECK_PUSH:
05836     case OP_STATE_CHECK_PUSH_OR_JUMP:
05837       scn = *((StateCheckNumType* )bp);
05838       bp += SIZE_STATE_CHECK_NUM;
05839       addr = *((RelAddrType* )bp);
05840       bp += SIZE_RELADDR;
05841       fprintf(f, ":%d:(%d)", scn, addr);
05842       break;
05843 
05844     default:
05845       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
05846              *--bp);
05847     }
05848   }
05849   fputs("]", f);
05850   if (nextp) *nextp = bp;
05851 }
05852 
05853 static void
05854 print_compiled_byte_code_list(FILE* f, regex_t* reg)
05855 {
05856   int ncode;
05857   UChar* bp = reg->p;
05858   UChar* end = reg->p + reg->used;
05859 
05860   fprintf(f, "code length: %d\n", reg->used);
05861 
05862   ncode = 0;
05863   while (bp < end) {
05864     ncode++;
05865     if (bp > reg->p) {
05866       if (ncode % 5 == 0)
05867        fprintf(f, "\n");
05868       else
05869        fputs(" ", f);
05870     }
05871     onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
05872   }
05873 
05874   fprintf(f, "\n");
05875 }
05876 
05877 static void
05878 print_indent_tree(FILE* f, Node* node, int indent)
05879 {
05880   int i, type;
05881   int add = 3;
05882   UChar* p;
05883 
05884   Indent(f, indent);
05885   if (IS_NULL(node)) {
05886     fprintf(f, "ERROR: null node!!!\n");
05887     exit (0);
05888   }
05889 
05890   type = NTYPE(node);
05891   switch (type) {
05892   case N_LIST:
05893   case N_ALT:
05894     if (NTYPE(node) == N_LIST)
05895       fprintf(f, "<list:%x>\n", (int )node);
05896     else
05897       fprintf(f, "<alt:%x>\n", (int )node);
05898 
05899     print_indent_tree(f, NCONS(node).left, indent + add);
05900     while (IS_NOT_NULL(node = NCONS(node).right)) {
05901       if (NTYPE(node) != type) {
05902        fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
05903        exit(0);
05904       }
05905       print_indent_tree(f, NCONS(node).left, indent + add);
05906     }
05907     break;
05908 
05909   case N_STRING:
05910     fprintf(f, "<string%s:%x>",
05911            (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
05912     for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
05913       if (*p >= 0x20 && *p < 0x7f)
05914        fputc(*p, f);
05915       else {
05916        fprintf(f, " 0x%02x", *p);
05917       }
05918     }
05919     break;
05920 
05921   case N_CCLASS:
05922     fprintf(f, "<cclass:%x>", (int )node);
05923     if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
05924     if (NCCLASS(node).mbuf) {
05925       BBuf* bbuf = NCCLASS(node).mbuf;
05926       for (i = 0; i < bbuf->used; i++) {
05927        if (i > 0) fprintf(f, ",");
05928        fprintf(f, "%0x", bbuf->p[i]);
05929       }
05930     }
05931     break;
05932 
05933   case N_CTYPE:
05934     fprintf(f, "<ctype:%x> ", (int )node);
05935     switch (NCTYPE(node).type) {
05936     case CTYPE_WORD:            fputs("word",           f); break;
05937     case CTYPE_NOT_WORD:        fputs("not word",       f); break;
05938     default:
05939       fprintf(f, "ERROR: undefined ctype.\n");
05940       exit(0);
05941     }
05942     break;
05943 
05944   case N_ANYCHAR:
05945     fprintf(f, "<anychar:%x>", (int )node);
05946     break;
05947 
05948   case N_ANCHOR:
05949     fprintf(f, "<anchor:%x> ", (int )node);
05950     switch (NANCHOR(node).type) {
05951     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
05952     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
05953     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
05954     case ANCHOR_END_LINE:       fputs("end line",       f); break;
05955     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
05956     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
05957 
05958     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
05959     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
05960 #ifdef USE_WORD_BEGIN_END
05961     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
05962     case ANCHOR_WORD_END:        fputs("word end", f);       break;
05963 #endif
05964     case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
05965     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
05966     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
05967     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
05968 
05969     default:
05970       fprintf(f, "ERROR: undefined anchor type.\n");
05971       break;
05972     }
05973     break;
05974 
05975   case N_BACKREF:
05976     {
05977       int* p;
05978       BackrefNode* br = &(NBACKREF(node));
05979       p = BACKREFS_P(br);
05980       fprintf(f, "<backref:%x>", (int )node);
05981       for (i = 0; i < br->back_num; i++) {
05982        if (i > 0) fputs(", ", f);
05983        fprintf(f, "%d", p[i]);
05984       }
05985     }
05986     break;
05987 
05988 #ifdef USE_SUBEXP_CALL
05989   case N_CALL:
05990     {
05991       CallNode* cn = &(NCALL(node));
05992       fprintf(f, "<call:%x>", (int )node);
05993       p_string(f, cn->name_end - cn->name, cn->name);
05994     }
05995     break;
05996 #endif
05997 
05998   case N_QUANTIFIER:
05999     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
06000            NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
06001            (NQUANTIFIER(node).greedy ? "" : "?"));
06002     print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
06003     break;
06004 
06005   case N_EFFECT:
06006     fprintf(f, "<effect:%x> ", (int )node);
06007     switch (NEFFECT(node).type) {
06008     case EFFECT_OPTION:
06009       fprintf(f, "option:%d\n", NEFFECT(node).option);
06010       print_indent_tree(f, NEFFECT(node).target, indent + add);
06011       break;
06012     case EFFECT_MEMORY:
06013       fprintf(f, "memory:%d", NEFFECT(node).regnum);
06014       break;
06015     case EFFECT_STOP_BACKTRACK:
06016       fprintf(f, "stop-bt");
06017       break;
06018 
06019     default:
06020       break;
06021     }
06022     fprintf(f, "\n");
06023     print_indent_tree(f, NEFFECT(node).target, indent + add);
06024     break;
06025 
06026   default:
06027     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06028     break;
06029   }
06030 
06031   if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
06032       type != N_EFFECT)
06033     fprintf(f, "\n");
06034   fflush(f);
06035 }
06036 #endif /* ONIG_DEBUG */
06037 
06038 #ifdef ONIG_DEBUG_PARSE_TREE
06039 static void
06040 print_tree(FILE* f, Node* node)
06041 {
06042   print_indent_tree(f, node, 0);
06043 }
06044 #endif