Back to index

glibc  2.9
regexec.c
Go to the documentation of this file.
00001 /* Extended regular expression matching and search library.
00002    Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
00022                                  int n) internal_function;
00023 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
00024 static void match_ctx_free (re_match_context_t *cache) internal_function;
00025 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
00026                                      int str_idx, int from, int to)
00027      internal_function;
00028 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
00029      internal_function;
00030 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
00031                                       int str_idx) internal_function;
00032 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
00033                                              int node, int str_idx)
00034      internal_function;
00035 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
00036                         re_dfastate_t **limited_sts, int last_node,
00037                         int last_str_idx)
00038      internal_function;
00039 static reg_errcode_t re_search_internal (const regex_t *preg,
00040                                     const char *string, int length,
00041                                     int start, int range, int stop,
00042                                     size_t nmatch, regmatch_t pmatch[],
00043                                     int eflags) internal_function;
00044 static int re_search_2_stub (struct re_pattern_buffer *bufp,
00045                           const char *string1, int length1,
00046                           const char *string2, int length2,
00047                           int start, int range, struct re_registers *regs,
00048                           int stop, int ret_len) internal_function;
00049 static int re_search_stub (struct re_pattern_buffer *bufp,
00050                         const char *string, int length, int start,
00051                         int range, int stop, struct re_registers *regs,
00052                         int ret_len) internal_function;
00053 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
00054                            int nregs, int regs_allocated) internal_function;
00055 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
00056      internal_function;
00057 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
00058                         int *p_match_first) internal_function;
00059 static int check_halt_state_context (const re_match_context_t *mctx,
00060                                  const re_dfastate_t *state, int idx)
00061      internal_function;
00062 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
00063                       regmatch_t *prev_idx_match, int cur_node,
00064                       int cur_idx, int nmatch) internal_function;
00065 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
00066                                   int str_idx, int dest_node, int nregs,
00067                                   regmatch_t *regs,
00068                                   re_node_set *eps_via_nodes)
00069      internal_function;
00070 static reg_errcode_t set_regs (const regex_t *preg,
00071                             const re_match_context_t *mctx,
00072                             size_t nmatch, regmatch_t *pmatch,
00073                             int fl_backtrack) internal_function;
00074 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
00075      internal_function;
00076 
00077 #ifdef RE_ENABLE_I18N
00078 static int sift_states_iter_mb (const re_match_context_t *mctx,
00079                             re_sift_context_t *sctx,
00080                             int node_idx, int str_idx, int max_str_idx)
00081      internal_function;
00082 #endif /* RE_ENABLE_I18N */
00083 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
00084                                       re_sift_context_t *sctx)
00085      internal_function;
00086 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
00087                                      re_sift_context_t *sctx, int str_idx,
00088                                      re_node_set *cur_dest)
00089      internal_function;
00090 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
00091                                          re_sift_context_t *sctx,
00092                                          int str_idx,
00093                                          re_node_set *dest_nodes)
00094      internal_function;
00095 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
00096                                        re_node_set *dest_nodes,
00097                                        const re_node_set *candidates)
00098      internal_function;
00099 static int check_dst_limits (const re_match_context_t *mctx,
00100                           re_node_set *limits,
00101                           int dst_node, int dst_idx, int src_node,
00102                           int src_idx) internal_function;
00103 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
00104                                    int boundaries, int subexp_idx,
00105                                    int from_node, int bkref_idx)
00106      internal_function;
00107 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
00108                                   int limit, int subexp_idx,
00109                                   int node, int str_idx,
00110                                   int bkref_idx) internal_function;
00111 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
00112                                      re_node_set *dest_nodes,
00113                                      const re_node_set *candidates,
00114                                      re_node_set *limits,
00115                                      struct re_backref_cache_entry *bkref_ents,
00116                                      int str_idx) internal_function;
00117 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
00118                                    re_sift_context_t *sctx,
00119                                    int str_idx, const re_node_set *candidates)
00120      internal_function;
00121 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
00122                                    re_dfastate_t **dst,
00123                                    re_dfastate_t **src, int num)
00124      internal_function;
00125 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
00126                                     re_match_context_t *mctx) internal_function;
00127 static re_dfastate_t *transit_state (reg_errcode_t *err,
00128                                  re_match_context_t *mctx,
00129                                  re_dfastate_t *state) internal_function;
00130 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
00131                                        re_match_context_t *mctx,
00132                                        re_dfastate_t *next_state)
00133      internal_function;
00134 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
00135                                           re_node_set *cur_nodes,
00136                                           int str_idx) internal_function;
00137 #if 0
00138 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
00139                                    re_match_context_t *mctx,
00140                                    re_dfastate_t *pstate)
00141      internal_function;
00142 #endif
00143 #ifdef RE_ENABLE_I18N
00144 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
00145                                    re_dfastate_t *pstate)
00146      internal_function;
00147 #endif /* RE_ENABLE_I18N */
00148 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
00149                                      const re_node_set *nodes)
00150      internal_function;
00151 static reg_errcode_t get_subexp (re_match_context_t *mctx,
00152                              int bkref_node, int bkref_str_idx)
00153      internal_function;
00154 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
00155                                  const re_sub_match_top_t *sub_top,
00156                                  re_sub_match_last_t *sub_last,
00157                                  int bkref_node, int bkref_str)
00158      internal_function;
00159 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
00160                           int subexp_idx, int type) internal_function;
00161 static reg_errcode_t check_arrival (re_match_context_t *mctx,
00162                                 state_array_t *path, int top_node,
00163                                 int top_str, int last_node, int last_str,
00164                                 int type) internal_function;
00165 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
00166                                              int str_idx,
00167                                              re_node_set *cur_nodes,
00168                                              re_node_set *next_nodes)
00169      internal_function;
00170 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
00171                                           re_node_set *cur_nodes,
00172                                           int ex_subexp, int type)
00173      internal_function;
00174 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
00175                                              re_node_set *dst_nodes,
00176                                              int target, int ex_subexp,
00177                                              int type) internal_function;
00178 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
00179                                     re_node_set *cur_nodes, int cur_str,
00180                                     int subexp_num, int type)
00181      internal_function;
00182 static int build_trtable (const re_dfa_t *dfa,
00183                        re_dfastate_t *state) internal_function;
00184 #ifdef RE_ENABLE_I18N
00185 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
00186                                 const re_string_t *input, int idx)
00187      internal_function;
00188 # ifdef _LIBC
00189 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
00190                                              size_t name_len)
00191      internal_function;
00192 # endif /* _LIBC */
00193 #endif /* RE_ENABLE_I18N */
00194 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
00195                                    const re_dfastate_t *state,
00196                                    re_node_set *states_node,
00197                                    bitset_t *states_ch) internal_function;
00198 static int check_node_accept (const re_match_context_t *mctx,
00199                            const re_token_t *node, int idx)
00200      internal_function;
00201 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
00202      internal_function;
00203 
00204 /* Entry point for POSIX code.  */
00205 
00206 /* regexec searches for a given pattern, specified by PREG, in the
00207    string STRING.
00208 
00209    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
00210    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
00211    least NMATCH elements, and we set them to the offsets of the
00212    corresponding matched substrings.
00213 
00214    EFLAGS specifies `execution flags' which affect matching: if
00215    REG_NOTBOL is set, then ^ does not match at the beginning of the
00216    string; if REG_NOTEOL is set, then $ does not match at the end.
00217 
00218    We return 0 if we find a match and REG_NOMATCH if not.  */
00219 
00220 int
00221 regexec (preg, string, nmatch, pmatch, eflags)
00222     const regex_t *__restrict preg;
00223     const char *__restrict string;
00224     size_t nmatch;
00225     regmatch_t pmatch[];
00226     int eflags;
00227 {
00228   reg_errcode_t err;
00229   int start, length;
00230   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00231 
00232   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
00233     return REG_BADPAT;
00234 
00235   if (eflags & REG_STARTEND)
00236     {
00237       start = pmatch[0].rm_so;
00238       length = pmatch[0].rm_eo;
00239     }
00240   else
00241     {
00242       start = 0;
00243       length = strlen (string);
00244     }
00245 
00246   __libc_lock_lock (dfa->lock);
00247   if (preg->no_sub)
00248     err = re_search_internal (preg, string, length, start, length - start,
00249                            length, 0, NULL, eflags);
00250   else
00251     err = re_search_internal (preg, string, length, start, length - start,
00252                            length, nmatch, pmatch, eflags);
00253   __libc_lock_unlock (dfa->lock);
00254   return err != REG_NOERROR;
00255 }
00256 
00257 #ifdef _LIBC
00258 # include <shlib-compat.h>
00259 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
00260 
00261 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
00262 __typeof__ (__regexec) __compat_regexec;
00263 
00264 int
00265 attribute_compat_text_section
00266 __compat_regexec (const regex_t *__restrict preg,
00267                 const char *__restrict string, size_t nmatch,
00268                 regmatch_t pmatch[], int eflags)
00269 {
00270   return regexec (preg, string, nmatch, pmatch,
00271                 eflags & (REG_NOTBOL | REG_NOTEOL));
00272 }
00273 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
00274 # endif
00275 #endif
00276 
00277 /* Entry points for GNU code.  */
00278 
00279 /* re_match, re_search, re_match_2, re_search_2
00280 
00281    The former two functions operate on STRING with length LENGTH,
00282    while the later two operate on concatenation of STRING1 and STRING2
00283    with lengths LENGTH1 and LENGTH2, respectively.
00284 
00285    re_match() matches the compiled pattern in BUFP against the string,
00286    starting at index START.
00287 
00288    re_search() first tries matching at index START, then it tries to match
00289    starting from index START + 1, and so on.  The last start position tried
00290    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
00291    way as re_match().)
00292 
00293    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
00294    the first STOP characters of the concatenation of the strings should be
00295    concerned.
00296 
00297    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
00298    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
00299    computed relative to the concatenation, not relative to the individual
00300    strings.)
00301 
00302    On success, re_match* functions return the length of the match, re_search*
00303    return the position of the start of the match.  Return value -1 means no
00304    match was found and -2 indicates an internal error.  */
00305 
00306 int
00307 re_match (bufp, string, length, start, regs)
00308     struct re_pattern_buffer *bufp;
00309     const char *string;
00310     int length, start;
00311     struct re_registers *regs;
00312 {
00313   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
00314 }
00315 #ifdef _LIBC
00316 weak_alias (__re_match, re_match)
00317 #endif
00318 
00319 int
00320 re_search (bufp, string, length, start, range, regs)
00321     struct re_pattern_buffer *bufp;
00322     const char *string;
00323     int length, start, range;
00324     struct re_registers *regs;
00325 {
00326   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
00327 }
00328 #ifdef _LIBC
00329 weak_alias (__re_search, re_search)
00330 #endif
00331 
00332 int
00333 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
00334     struct re_pattern_buffer *bufp;
00335     const char *string1, *string2;
00336     int length1, length2, start, stop;
00337     struct re_registers *regs;
00338 {
00339   return re_search_2_stub (bufp, string1, length1, string2, length2,
00340                         start, 0, regs, stop, 1);
00341 }
00342 #ifdef _LIBC
00343 weak_alias (__re_match_2, re_match_2)
00344 #endif
00345 
00346 int
00347 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
00348     struct re_pattern_buffer *bufp;
00349     const char *string1, *string2;
00350     int length1, length2, start, range, stop;
00351     struct re_registers *regs;
00352 {
00353   return re_search_2_stub (bufp, string1, length1, string2, length2,
00354                         start, range, regs, stop, 0);
00355 }
00356 #ifdef _LIBC
00357 weak_alias (__re_search_2, re_search_2)
00358 #endif
00359 
00360 static int
00361 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
00362                 stop, ret_len)
00363     struct re_pattern_buffer *bufp;
00364     const char *string1, *string2;
00365     int length1, length2, start, range, stop, ret_len;
00366     struct re_registers *regs;
00367 {
00368   const char *str;
00369   int rval;
00370   int len = length1 + length2;
00371   int free_str = 0;
00372 
00373   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
00374     return -2;
00375 
00376   /* Concatenate the strings.  */
00377   if (length2 > 0)
00378     if (length1 > 0)
00379       {
00380        char *s = re_malloc (char, len);
00381 
00382        if (BE (s == NULL, 0))
00383          return -2;
00384 #ifdef _LIBC
00385        memcpy (__mempcpy (s, string1, length1), string2, length2);
00386 #else
00387        memcpy (s, string1, length1);
00388        memcpy (s + length1, string2, length2);
00389 #endif
00390        str = s;
00391        free_str = 1;
00392       }
00393     else
00394       str = string2;
00395   else
00396     str = string1;
00397 
00398   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
00399                       ret_len);
00400   if (free_str)
00401     re_free ((char *) str);
00402   return rval;
00403 }
00404 
00405 /* The parameters have the same meaning as those of re_search.
00406    Additional parameters:
00407    If RET_LEN is nonzero the length of the match is returned (re_match style);
00408    otherwise the position of the match is returned.  */
00409 
00410 static int
00411 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
00412     struct re_pattern_buffer *bufp;
00413     const char *string;
00414     int length, start, range, stop, ret_len;
00415     struct re_registers *regs;
00416 {
00417   reg_errcode_t result;
00418   regmatch_t *pmatch;
00419   int nregs, rval;
00420   int eflags = 0;
00421   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
00422 
00423   /* Check for out-of-range.  */
00424   if (BE (start < 0 || start > length, 0))
00425     return -1;
00426   if (BE (start + range > length, 0))
00427     range = length - start;
00428   else if (BE (start + range < 0, 0))
00429     range = -start;
00430 
00431   __libc_lock_lock (dfa->lock);
00432 
00433   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
00434   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
00435 
00436   /* Compile fastmap if we haven't yet.  */
00437   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
00438     re_compile_fastmap (bufp);
00439 
00440   if (BE (bufp->no_sub, 0))
00441     regs = NULL;
00442 
00443   /* We need at least 1 register.  */
00444   if (regs == NULL)
00445     nregs = 1;
00446   else if (BE (bufp->regs_allocated == REGS_FIXED &&
00447               regs->num_regs < bufp->re_nsub + 1, 0))
00448     {
00449       nregs = regs->num_regs;
00450       if (BE (nregs < 1, 0))
00451        {
00452          /* Nothing can be copied to regs.  */
00453          regs = NULL;
00454          nregs = 1;
00455        }
00456     }
00457   else
00458     nregs = bufp->re_nsub + 1;
00459   pmatch = re_malloc (regmatch_t, nregs);
00460   if (BE (pmatch == NULL, 0))
00461     {
00462       rval = -2;
00463       goto out;
00464     }
00465 
00466   result = re_search_internal (bufp, string, length, start, range, stop,
00467                             nregs, pmatch, eflags);
00468 
00469   rval = 0;
00470 
00471   /* I hope we needn't fill ther regs with -1's when no match was found.  */
00472   if (result != REG_NOERROR)
00473     rval = -1;
00474   else if (regs != NULL)
00475     {
00476       /* If caller wants register contents data back, copy them.  */
00477       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
00478                                       bufp->regs_allocated);
00479       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
00480        rval = -2;
00481     }
00482 
00483   if (BE (rval == 0, 1))
00484     {
00485       if (ret_len)
00486        {
00487          assert (pmatch[0].rm_so == start);
00488          rval = pmatch[0].rm_eo - start;
00489        }
00490       else
00491        rval = pmatch[0].rm_so;
00492     }
00493   re_free (pmatch);
00494  out:
00495   __libc_lock_unlock (dfa->lock);
00496   return rval;
00497 }
00498 
00499 static unsigned
00500 re_copy_regs (regs, pmatch, nregs, regs_allocated)
00501     struct re_registers *regs;
00502     regmatch_t *pmatch;
00503     int nregs, regs_allocated;
00504 {
00505   int rval = REGS_REALLOCATE;
00506   int i;
00507   int need_regs = nregs + 1;
00508   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
00509      uses.  */
00510 
00511   /* Have the register data arrays been allocated?  */
00512   if (regs_allocated == REGS_UNALLOCATED)
00513     { /* No.  So allocate them with malloc.  */
00514       regs->start = re_malloc (regoff_t, need_regs);
00515       regs->end = re_malloc (regoff_t, need_regs);
00516       if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
00517        return REGS_UNALLOCATED;
00518       regs->num_regs = need_regs;
00519     }
00520   else if (regs_allocated == REGS_REALLOCATE)
00521     { /* Yes.  If we need more elements than were already
00522         allocated, reallocate them.  If we need fewer, just
00523         leave it alone.  */
00524       if (BE (need_regs > regs->num_regs, 0))
00525        {
00526          regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
00527          regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
00528          if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
00529            return REGS_UNALLOCATED;
00530          regs->start = new_start;
00531          regs->end = new_end;
00532          regs->num_regs = need_regs;
00533        }
00534     }
00535   else
00536     {
00537       assert (regs_allocated == REGS_FIXED);
00538       /* This function may not be called with REGS_FIXED and nregs too big.  */
00539       assert (regs->num_regs >= nregs);
00540       rval = REGS_FIXED;
00541     }
00542 
00543   /* Copy the regs.  */
00544   for (i = 0; i < nregs; ++i)
00545     {
00546       regs->start[i] = pmatch[i].rm_so;
00547       regs->end[i] = pmatch[i].rm_eo;
00548     }
00549   for ( ; i < regs->num_regs; ++i)
00550     regs->start[i] = regs->end[i] = -1;
00551 
00552   return rval;
00553 }
00554 
00555 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
00556    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
00557    this memory for recording register information.  STARTS and ENDS
00558    must be allocated using the malloc library routine, and must each
00559    be at least NUM_REGS * sizeof (regoff_t) bytes long.
00560 
00561    If NUM_REGS == 0, then subsequent matches should allocate their own
00562    register data.
00563 
00564    Unless this function is called, the first search or match using
00565    PATTERN_BUFFER will allocate its own register data, without
00566    freeing the old data.  */
00567 
00568 void
00569 re_set_registers (bufp, regs, num_regs, starts, ends)
00570     struct re_pattern_buffer *bufp;
00571     struct re_registers *regs;
00572     unsigned num_regs;
00573     regoff_t *starts, *ends;
00574 {
00575   if (num_regs)
00576     {
00577       bufp->regs_allocated = REGS_REALLOCATE;
00578       regs->num_regs = num_regs;
00579       regs->start = starts;
00580       regs->end = ends;
00581     }
00582   else
00583     {
00584       bufp->regs_allocated = REGS_UNALLOCATED;
00585       regs->num_regs = 0;
00586       regs->start = regs->end = (regoff_t *) 0;
00587     }
00588 }
00589 #ifdef _LIBC
00590 weak_alias (__re_set_registers, re_set_registers)
00591 #endif
00592 
00593 /* Entry points compatible with 4.2 BSD regex library.  We don't define
00594    them unless specifically requested.  */
00595 
00596 #if defined _REGEX_RE_COMP || defined _LIBC
00597 int
00598 # ifdef _LIBC
00599 weak_function
00600 # endif
00601 re_exec (s)
00602      const char *s;
00603 {
00604   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
00605 }
00606 #endif /* _REGEX_RE_COMP */
00607 
00608 /* Internal entry point.  */
00609 
00610 /* Searches for a compiled pattern PREG in the string STRING, whose
00611    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
00612    mingings with regexec.  START, and RANGE have the same meanings
00613    with re_search.
00614    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
00615    otherwise return the error code.
00616    Note: We assume front end functions already check ranges.
00617    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
00618 
00619 static reg_errcode_t
00620 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
00621                   eflags)
00622     const regex_t *preg;
00623     const char *string;
00624     int length, start, range, stop, eflags;
00625     size_t nmatch;
00626     regmatch_t pmatch[];
00627 {
00628   reg_errcode_t err;
00629   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
00630   int left_lim, right_lim, incr;
00631   int fl_longest_match, match_first, match_kind, match_last = -1;
00632   int extra_nmatch;
00633   int sb, ch;
00634 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
00635   re_match_context_t mctx = { .dfa = dfa };
00636 #else
00637   re_match_context_t mctx;
00638 #endif
00639   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
00640                  && range && !preg->can_be_null) ? preg->fastmap : NULL;
00641   RE_TRANSLATE_TYPE t = preg->translate;
00642 
00643 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
00644   memset (&mctx, '\0', sizeof (re_match_context_t));
00645   mctx.dfa = dfa;
00646 #endif
00647 
00648   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
00649   nmatch -= extra_nmatch;
00650 
00651   /* Check if the DFA haven't been compiled.  */
00652   if (BE (preg->used == 0 || dfa->init_state == NULL
00653          || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
00654          || dfa->init_state_begbuf == NULL, 0))
00655     return REG_NOMATCH;
00656 
00657 #ifdef DEBUG
00658   /* We assume front-end functions already check them.  */
00659   assert (start + range >= 0 && start + range <= length);
00660 #endif
00661 
00662   /* If initial states with non-begbuf contexts have no elements,
00663      the regex must be anchored.  If preg->newline_anchor is set,
00664      we'll never use init_state_nl, so do not check it.  */
00665   if (dfa->init_state->nodes.nelem == 0
00666       && dfa->init_state_word->nodes.nelem == 0
00667       && (dfa->init_state_nl->nodes.nelem == 0
00668          || !preg->newline_anchor))
00669     {
00670       if (start != 0 && start + range != 0)
00671         return REG_NOMATCH;
00672       start = range = 0;
00673     }
00674 
00675   /* We must check the longest matching, if nmatch > 0.  */
00676   fl_longest_match = (nmatch != 0 || dfa->nbackref);
00677 
00678   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
00679                          preg->translate, preg->syntax & RE_ICASE, dfa);
00680   if (BE (err != REG_NOERROR, 0))
00681     goto free_return;
00682   mctx.input.stop = stop;
00683   mctx.input.raw_stop = stop;
00684   mctx.input.newline_anchor = preg->newline_anchor;
00685 
00686   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
00687   if (BE (err != REG_NOERROR, 0))
00688     goto free_return;
00689 
00690   /* We will log all the DFA states through which the dfa pass,
00691      if nmatch > 1, or this dfa has "multibyte node", which is a
00692      back-reference or a node which can accept multibyte character or
00693      multi character collating element.  */
00694   if (nmatch > 1 || dfa->has_mb_node)
00695     {
00696       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
00697       if (BE (mctx.state_log == NULL, 0))
00698        {
00699          err = REG_ESPACE;
00700          goto free_return;
00701        }
00702     }
00703   else
00704     mctx.state_log = NULL;
00705 
00706   match_first = start;
00707   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
00708                         : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
00709 
00710   /* Check incrementally whether of not the input string match.  */
00711   incr = (range < 0) ? -1 : 1;
00712   left_lim = (range < 0) ? start + range : start;
00713   right_lim = (range < 0) ? start : start + range;
00714   sb = dfa->mb_cur_max == 1;
00715   match_kind =
00716     (fastmap
00717      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
00718        | (range >= 0 ? 2 : 0)
00719        | (t != NULL ? 1 : 0))
00720      : 8);
00721 
00722   for (;; match_first += incr)
00723     {
00724       err = REG_NOMATCH;
00725       if (match_first < left_lim || right_lim < match_first)
00726        goto free_return;
00727 
00728       /* Advance as rapidly as possible through the string, until we
00729         find a plausible place to start matching.  This may be done
00730         with varying efficiency, so there are various possibilities:
00731         only the most common of them are specialized, in order to
00732         save on code size.  We use a switch statement for speed.  */
00733       switch (match_kind)
00734        {
00735        case 8:
00736          /* No fastmap.  */
00737          break;
00738 
00739        case 7:
00740          /* Fastmap with single-byte translation, match forward.  */
00741          while (BE (match_first < right_lim, 1)
00742                && !fastmap[t[(unsigned char) string[match_first]]])
00743            ++match_first;
00744          goto forward_match_found_start_or_reached_end;
00745 
00746        case 6:
00747          /* Fastmap without translation, match forward.  */
00748          while (BE (match_first < right_lim, 1)
00749                && !fastmap[(unsigned char) string[match_first]])
00750            ++match_first;
00751 
00752        forward_match_found_start_or_reached_end:
00753          if (BE (match_first == right_lim, 0))
00754            {
00755              ch = match_first >= length
00756                      ? 0 : (unsigned char) string[match_first];
00757              if (!fastmap[t ? t[ch] : ch])
00758               goto free_return;
00759            }
00760          break;
00761 
00762        case 4:
00763        case 5:
00764          /* Fastmap without multi-byte translation, match backwards.  */
00765          while (match_first >= left_lim)
00766            {
00767              ch = match_first >= length
00768                      ? 0 : (unsigned char) string[match_first];
00769              if (fastmap[t ? t[ch] : ch])
00770               break;
00771              --match_first;
00772            }
00773          if (match_first < left_lim)
00774            goto free_return;
00775          break;
00776 
00777        default:
00778          /* In this case, we can't determine easily the current byte,
00779             since it might be a component byte of a multibyte
00780             character.  Then we use the constructed buffer instead.  */
00781          for (;;)
00782            {
00783              /* If MATCH_FIRST is out of the valid range, reconstruct the
00784                buffers.  */
00785              unsigned int offset = match_first - mctx.input.raw_mbs_idx;
00786              if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
00787               {
00788                 err = re_string_reconstruct (&mctx.input, match_first,
00789                                           eflags);
00790                 if (BE (err != REG_NOERROR, 0))
00791                   goto free_return;
00792 
00793                 offset = match_first - mctx.input.raw_mbs_idx;
00794               }
00795              /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
00796                Note that MATCH_FIRST must not be smaller than 0.  */
00797              ch = (match_first >= length
00798                   ? 0 : re_string_byte_at (&mctx.input, offset));
00799              if (fastmap[ch])
00800               break;
00801              match_first += incr;
00802              if (match_first < left_lim || match_first > right_lim)
00803                {
00804                  err = REG_NOMATCH;
00805                  goto free_return;
00806                }
00807            }
00808          break;
00809        }
00810 
00811       /* Reconstruct the buffers so that the matcher can assume that
00812         the matching starts from the beginning of the buffer.  */
00813       err = re_string_reconstruct (&mctx.input, match_first, eflags);
00814       if (BE (err != REG_NOERROR, 0))
00815        goto free_return;
00816 
00817 #ifdef RE_ENABLE_I18N
00818      /* Don't consider this char as a possible match start if it part,
00819        yet isn't the head, of a multibyte character.  */
00820       if (!sb && !re_string_first_byte (&mctx.input, 0))
00821        continue;
00822 #endif
00823 
00824       /* It seems to be appropriate one, then use the matcher.  */
00825       /* We assume that the matching starts from 0.  */
00826       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
00827       match_last = check_matching (&mctx, fl_longest_match,
00828                                range >= 0 ? &match_first : NULL);
00829       if (match_last != -1)
00830        {
00831          if (BE (match_last == -2, 0))
00832            {
00833              err = REG_ESPACE;
00834              goto free_return;
00835            }
00836          else
00837            {
00838              mctx.match_last = match_last;
00839              if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
00840               {
00841                 re_dfastate_t *pstate = mctx.state_log[match_last];
00842                 mctx.last_node = check_halt_state_context (&mctx, pstate,
00843                                                       match_last);
00844               }
00845              if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
00846                 || dfa->nbackref)
00847               {
00848                 err = prune_impossible_nodes (&mctx);
00849                 if (err == REG_NOERROR)
00850                   break;
00851                 if (BE (err != REG_NOMATCH, 0))
00852                   goto free_return;
00853                 match_last = -1;
00854               }
00855              else
00856               break; /* We found a match.  */
00857            }
00858        }
00859 
00860       match_ctx_clean (&mctx);
00861     }
00862 
00863 #ifdef DEBUG
00864   assert (match_last != -1);
00865   assert (err == REG_NOERROR);
00866 #endif
00867 
00868   /* Set pmatch[] if we need.  */
00869   if (nmatch > 0)
00870     {
00871       int reg_idx;
00872 
00873       /* Initialize registers.  */
00874       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
00875        pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
00876 
00877       /* Set the points where matching start/end.  */
00878       pmatch[0].rm_so = 0;
00879       pmatch[0].rm_eo = mctx.match_last;
00880 
00881       if (!preg->no_sub && nmatch > 1)
00882        {
00883          err = set_regs (preg, &mctx, nmatch, pmatch,
00884                        dfa->has_plural_match && dfa->nbackref > 0);
00885          if (BE (err != REG_NOERROR, 0))
00886            goto free_return;
00887        }
00888 
00889       /* At last, add the offset to the each registers, since we slided
00890         the buffers so that we could assume that the matching starts
00891         from 0.  */
00892       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00893        if (pmatch[reg_idx].rm_so != -1)
00894          {
00895 #ifdef RE_ENABLE_I18N
00896            if (BE (mctx.input.offsets_needed != 0, 0))
00897              {
00898               pmatch[reg_idx].rm_so =
00899                 (pmatch[reg_idx].rm_so == mctx.input.valid_len
00900                  ? mctx.input.valid_raw_len
00901                  : mctx.input.offsets[pmatch[reg_idx].rm_so]);
00902               pmatch[reg_idx].rm_eo =
00903                 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
00904                  ? mctx.input.valid_raw_len
00905                  : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
00906              }
00907 #else
00908            assert (mctx.input.offsets_needed == 0);
00909 #endif
00910            pmatch[reg_idx].rm_so += match_first;
00911            pmatch[reg_idx].rm_eo += match_first;
00912          }
00913       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
00914        {
00915          pmatch[nmatch + reg_idx].rm_so = -1;
00916          pmatch[nmatch + reg_idx].rm_eo = -1;
00917        }
00918 
00919       if (dfa->subexp_map)
00920         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
00921           if (dfa->subexp_map[reg_idx] != reg_idx)
00922             {
00923               pmatch[reg_idx + 1].rm_so
00924                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
00925               pmatch[reg_idx + 1].rm_eo
00926                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
00927             }
00928     }
00929 
00930  free_return:
00931   re_free (mctx.state_log);
00932   if (dfa->nbackref)
00933     match_ctx_free (&mctx);
00934   re_string_destruct (&mctx.input);
00935   return err;
00936 }
00937 
00938 static reg_errcode_t
00939 prune_impossible_nodes (mctx)
00940      re_match_context_t *mctx;
00941 {
00942   const re_dfa_t *const dfa = mctx->dfa;
00943   int halt_node, match_last;
00944   reg_errcode_t ret;
00945   re_dfastate_t **sifted_states;
00946   re_dfastate_t **lim_states = NULL;
00947   re_sift_context_t sctx;
00948 #ifdef DEBUG
00949   assert (mctx->state_log != NULL);
00950 #endif
00951   match_last = mctx->match_last;
00952   halt_node = mctx->last_node;
00953   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
00954   if (BE (sifted_states == NULL, 0))
00955     {
00956       ret = REG_ESPACE;
00957       goto free_return;
00958     }
00959   if (dfa->nbackref)
00960     {
00961       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
00962       if (BE (lim_states == NULL, 0))
00963        {
00964          ret = REG_ESPACE;
00965          goto free_return;
00966        }
00967       while (1)
00968        {
00969          memset (lim_states, '\0',
00970                 sizeof (re_dfastate_t *) * (match_last + 1));
00971          sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
00972                       match_last);
00973          ret = sift_states_backward (mctx, &sctx);
00974          re_node_set_free (&sctx.limits);
00975          if (BE (ret != REG_NOERROR, 0))
00976              goto free_return;
00977          if (sifted_states[0] != NULL || lim_states[0] != NULL)
00978            break;
00979          do
00980            {
00981              --match_last;
00982              if (match_last < 0)
00983               {
00984                 ret = REG_NOMATCH;
00985                 goto free_return;
00986               }
00987            } while (mctx->state_log[match_last] == NULL
00988                    || !mctx->state_log[match_last]->halt);
00989          halt_node = check_halt_state_context (mctx,
00990                                           mctx->state_log[match_last],
00991                                           match_last);
00992        }
00993       ret = merge_state_array (dfa, sifted_states, lim_states,
00994                             match_last + 1);
00995       re_free (lim_states);
00996       lim_states = NULL;
00997       if (BE (ret != REG_NOERROR, 0))
00998        goto free_return;
00999     }
01000   else
01001     {
01002       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
01003       ret = sift_states_backward (mctx, &sctx);
01004       re_node_set_free (&sctx.limits);
01005       if (BE (ret != REG_NOERROR, 0))
01006        goto free_return;
01007     }
01008   re_free (mctx->state_log);
01009   mctx->state_log = sifted_states;
01010   sifted_states = NULL;
01011   mctx->last_node = halt_node;
01012   mctx->match_last = match_last;
01013   ret = REG_NOERROR;
01014  free_return:
01015   re_free (sifted_states);
01016   re_free (lim_states);
01017   return ret;
01018 }
01019 
01020 /* Acquire an initial state and return it.
01021    We must select appropriate initial state depending on the context,
01022    since initial states may have constraints like "<", "^", etc..  */
01023 
01024 static inline re_dfastate_t *
01025 __attribute ((always_inline)) internal_function
01026 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
01027                          int idx)
01028 {
01029   const re_dfa_t *const dfa = mctx->dfa;
01030   if (dfa->init_state->has_constraint)
01031     {
01032       unsigned int context;
01033       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
01034       if (IS_WORD_CONTEXT (context))
01035        return dfa->init_state_word;
01036       else if (IS_ORDINARY_CONTEXT (context))
01037        return dfa->init_state;
01038       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
01039        return dfa->init_state_begbuf;
01040       else if (IS_NEWLINE_CONTEXT (context))
01041        return dfa->init_state_nl;
01042       else if (IS_BEGBUF_CONTEXT (context))
01043        {
01044          /* It is relatively rare case, then calculate on demand.  */
01045          return re_acquire_state_context (err, dfa,
01046                                       dfa->init_state->entrance_nodes,
01047                                       context);
01048        }
01049       else
01050        /* Must not happen?  */
01051        return dfa->init_state;
01052     }
01053   else
01054     return dfa->init_state;
01055 }
01056 
01057 /* Check whether the regular expression match input string INPUT or not,
01058    and return the index where the matching end, return -1 if not match,
01059    or return -2 in case of an error.
01060    FL_LONGEST_MATCH means we want the POSIX longest matching.
01061    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
01062    next place where we may want to try matching.
01063    Note that the matcher assume that the maching starts from the current
01064    index of the buffer.  */
01065 
01066 static int
01067 internal_function
01068 check_matching (re_match_context_t *mctx, int fl_longest_match,
01069               int *p_match_first)
01070 {
01071   const re_dfa_t *const dfa = mctx->dfa;
01072   reg_errcode_t err;
01073   int match = 0;
01074   int match_last = -1;
01075   int cur_str_idx = re_string_cur_idx (&mctx->input);
01076   re_dfastate_t *cur_state;
01077   int at_init_state = p_match_first != NULL;
01078   int next_start_idx = cur_str_idx;
01079 
01080   err = REG_NOERROR;
01081   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
01082   /* An initial state must not be NULL (invalid).  */
01083   if (BE (cur_state == NULL, 0))
01084     {
01085       assert (err == REG_ESPACE);
01086       return -2;
01087     }
01088 
01089   if (mctx->state_log != NULL)
01090     {
01091       mctx->state_log[cur_str_idx] = cur_state;
01092 
01093       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
01094         later.  E.g. Processing back references.  */
01095       if (BE (dfa->nbackref, 0))
01096        {
01097          at_init_state = 0;
01098          err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
01099          if (BE (err != REG_NOERROR, 0))
01100            return err;
01101 
01102          if (cur_state->has_backref)
01103            {
01104              err = transit_state_bkref (mctx, &cur_state->nodes);
01105              if (BE (err != REG_NOERROR, 0))
01106                return err;
01107            }
01108        }
01109     }
01110 
01111   /* If the RE accepts NULL string.  */
01112   if (BE (cur_state->halt, 0))
01113     {
01114       if (!cur_state->has_constraint
01115          || check_halt_state_context (mctx, cur_state, cur_str_idx))
01116        {
01117          if (!fl_longest_match)
01118            return cur_str_idx;
01119          else
01120            {
01121              match_last = cur_str_idx;
01122              match = 1;
01123            }
01124        }
01125     }
01126 
01127   while (!re_string_eoi (&mctx->input))
01128     {
01129       re_dfastate_t *old_state = cur_state;
01130       int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
01131 
01132       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
01133           || (BE (next_char_idx >= mctx->input.valid_len, 0)
01134               && mctx->input.valid_len < mctx->input.len))
01135         {
01136           err = extend_buffers (mctx);
01137           if (BE (err != REG_NOERROR, 0))
01138            {
01139              assert (err == REG_ESPACE);
01140              return -2;
01141            }
01142         }
01143 
01144       cur_state = transit_state (&err, mctx, cur_state);
01145       if (mctx->state_log != NULL)
01146        cur_state = merge_state_with_log (&err, mctx, cur_state);
01147 
01148       if (cur_state == NULL)
01149        {
01150          /* Reached the invalid state or an error.  Try to recover a valid
01151             state using the state log, if available and if we have not
01152             already found a valid (even if not the longest) match.  */
01153          if (BE (err != REG_NOERROR, 0))
01154            return -2;
01155 
01156          if (mctx->state_log == NULL
01157              || (match && !fl_longest_match)
01158              || (cur_state = find_recover_state (&err, mctx)) == NULL)
01159            break;
01160        }
01161 
01162       if (BE (at_init_state, 0))
01163        {
01164          if (old_state == cur_state)
01165            next_start_idx = next_char_idx;
01166          else
01167            at_init_state = 0;
01168        }
01169 
01170       if (cur_state->halt)
01171        {
01172          /* Reached a halt state.
01173             Check the halt state can satisfy the current context.  */
01174          if (!cur_state->has_constraint
01175              || check_halt_state_context (mctx, cur_state,
01176                                       re_string_cur_idx (&mctx->input)))
01177            {
01178              /* We found an appropriate halt state.  */
01179              match_last = re_string_cur_idx (&mctx->input);
01180              match = 1;
01181 
01182              /* We found a match, do not modify match_first below.  */
01183              p_match_first = NULL;
01184              if (!fl_longest_match)
01185               break;
01186            }
01187        }
01188     }
01189 
01190   if (p_match_first)
01191     *p_match_first += next_start_idx;
01192 
01193   return match_last;
01194 }
01195 
01196 /* Check NODE match the current context.  */
01197 
01198 static int
01199 internal_function
01200 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
01201 {
01202   re_token_type_t type = dfa->nodes[node].type;
01203   unsigned int constraint = dfa->nodes[node].constraint;
01204   if (type != END_OF_RE)
01205     return 0;
01206   if (!constraint)
01207     return 1;
01208   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
01209     return 0;
01210   return 1;
01211 }
01212 
01213 /* Check the halt state STATE match the current context.
01214    Return 0 if not match, if the node, STATE has, is a halt node and
01215    match the context, return the node.  */
01216 
01217 static int
01218 internal_function
01219 check_halt_state_context (const re_match_context_t *mctx,
01220                        const re_dfastate_t *state, int idx)
01221 {
01222   int i;
01223   unsigned int context;
01224 #ifdef DEBUG
01225   assert (state->halt);
01226 #endif
01227   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
01228   for (i = 0; i < state->nodes.nelem; ++i)
01229     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
01230       return state->nodes.elems[i];
01231   return 0;
01232 }
01233 
01234 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
01235    corresponding to the DFA).
01236    Return the destination node, and update EPS_VIA_NODES, return -1 in case
01237    of errors.  */
01238 
01239 static int
01240 internal_function
01241 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
01242                  int *pidx, int node, re_node_set *eps_via_nodes,
01243                  struct re_fail_stack_t *fs)
01244 {
01245   const re_dfa_t *const dfa = mctx->dfa;
01246   int i, err;
01247   if (IS_EPSILON_NODE (dfa->nodes[node].type))
01248     {
01249       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
01250       re_node_set *edests = &dfa->edests[node];
01251       int dest_node;
01252       err = re_node_set_insert (eps_via_nodes, node);
01253       if (BE (err < 0, 0))
01254        return -2;
01255       /* Pick up a valid destination, or return -1 if none is found.  */
01256       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
01257        {
01258          int candidate = edests->elems[i];
01259          if (!re_node_set_contains (cur_nodes, candidate))
01260            continue;
01261           if (dest_node == -1)
01262            dest_node = candidate;
01263 
01264           else
01265            {
01266              /* In order to avoid infinite loop like "(a*)*", return the second
01267                 epsilon-transition if the first was already considered.  */
01268              if (re_node_set_contains (eps_via_nodes, dest_node))
01269                return candidate;
01270 
01271              /* Otherwise, push the second epsilon-transition on the fail stack.  */
01272              else if (fs != NULL
01273                      && push_fail_stack (fs, *pidx, candidate, nregs, regs,
01274                                        eps_via_nodes))
01275               return -2;
01276 
01277              /* We know we are going to exit.  */
01278              break;
01279            }
01280        }
01281       return dest_node;
01282     }
01283   else
01284     {
01285       int naccepted = 0;
01286       re_token_type_t type = dfa->nodes[node].type;
01287 
01288 #ifdef RE_ENABLE_I18N
01289       if (dfa->nodes[node].accept_mb)
01290        naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
01291       else
01292 #endif /* RE_ENABLE_I18N */
01293       if (type == OP_BACK_REF)
01294        {
01295          int subexp_idx = dfa->nodes[node].opr.idx + 1;
01296          naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
01297          if (fs != NULL)
01298            {
01299              if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
01300               return -1;
01301              else if (naccepted)
01302               {
01303                 char *buf = (char *) re_string_get_buffer (&mctx->input);
01304                 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
01305                            naccepted) != 0)
01306                   return -1;
01307               }
01308            }
01309 
01310          if (naccepted == 0)
01311            {
01312              int dest_node;
01313              err = re_node_set_insert (eps_via_nodes, node);
01314              if (BE (err < 0, 0))
01315               return -2;
01316              dest_node = dfa->edests[node].elems[0];
01317              if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
01318                                    dest_node))
01319               return dest_node;
01320            }
01321        }
01322 
01323       if (naccepted != 0
01324          || check_node_accept (mctx, dfa->nodes + node, *pidx))
01325        {
01326          int dest_node = dfa->nexts[node];
01327          *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
01328          if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
01329                    || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
01330                                           dest_node)))
01331            return -1;
01332          re_node_set_empty (eps_via_nodes);
01333          return dest_node;
01334        }
01335     }
01336   return -1;
01337 }
01338 
01339 static reg_errcode_t
01340 internal_function
01341 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
01342                int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
01343 {
01344   reg_errcode_t err;
01345   int num = fs->num++;
01346   if (fs->num == fs->alloc)
01347     {
01348       struct re_fail_stack_ent_t *new_array;
01349       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
01350                                    * fs->alloc * 2));
01351       if (new_array == NULL)
01352        return REG_ESPACE;
01353       fs->alloc *= 2;
01354       fs->stack = new_array;
01355     }
01356   fs->stack[num].idx = str_idx;
01357   fs->stack[num].node = dest_node;
01358   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
01359   if (fs->stack[num].regs == NULL)
01360     return REG_ESPACE;
01361   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
01362   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
01363   return err;
01364 }
01365 
01366 static int
01367 internal_function
01368 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
01369               regmatch_t *regs, re_node_set *eps_via_nodes)
01370 {
01371   int num = --fs->num;
01372   assert (num >= 0);
01373   *pidx = fs->stack[num].idx;
01374   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
01375   re_node_set_free (eps_via_nodes);
01376   re_free (fs->stack[num].regs);
01377   *eps_via_nodes = fs->stack[num].eps_via_nodes;
01378   return fs->stack[num].node;
01379 }
01380 
01381 /* Set the positions where the subexpressions are starts/ends to registers
01382    PMATCH.
01383    Note: We assume that pmatch[0] is already set, and
01384    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
01385 
01386 static reg_errcode_t
01387 internal_function
01388 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
01389          regmatch_t *pmatch, int fl_backtrack)
01390 {
01391   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
01392   int idx, cur_node;
01393   re_node_set eps_via_nodes;
01394   struct re_fail_stack_t *fs;
01395   struct re_fail_stack_t fs_body = { 0, 2, NULL };
01396   regmatch_t *prev_idx_match;
01397   int prev_idx_match_malloced = 0;
01398 
01399 #ifdef DEBUG
01400   assert (nmatch > 1);
01401   assert (mctx->state_log != NULL);
01402 #endif
01403   if (fl_backtrack)
01404     {
01405       fs = &fs_body;
01406       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
01407       if (fs->stack == NULL)
01408        return REG_ESPACE;
01409     }
01410   else
01411     fs = NULL;
01412 
01413   cur_node = dfa->init_node;
01414   re_node_set_init_empty (&eps_via_nodes);
01415 
01416   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
01417     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
01418   else
01419     {
01420       prev_idx_match = re_malloc (regmatch_t, nmatch);
01421       if (prev_idx_match == NULL)
01422        {
01423          free_fail_stack_return (fs);
01424          return REG_ESPACE;
01425        }
01426       prev_idx_match_malloced = 1;
01427     }
01428   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
01429 
01430   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
01431     {
01432       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
01433 
01434       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
01435        {
01436          int reg_idx;
01437          if (fs)
01438            {
01439              for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
01440               if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
01441                 break;
01442              if (reg_idx == nmatch)
01443               {
01444                 re_node_set_free (&eps_via_nodes);
01445                 if (prev_idx_match_malloced)
01446                   re_free (prev_idx_match);
01447                 return free_fail_stack_return (fs);
01448               }
01449              cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
01450                                     &eps_via_nodes);
01451            }
01452          else
01453            {
01454              re_node_set_free (&eps_via_nodes);
01455              if (prev_idx_match_malloced)
01456               re_free (prev_idx_match);
01457              return REG_NOERROR;
01458            }
01459        }
01460 
01461       /* Proceed to next node.  */
01462       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
01463                                 &eps_via_nodes, fs);
01464 
01465       if (BE (cur_node < 0, 0))
01466        {
01467          if (BE (cur_node == -2, 0))
01468            {
01469              re_node_set_free (&eps_via_nodes);
01470              if (prev_idx_match_malloced)
01471               re_free (prev_idx_match);
01472              free_fail_stack_return (fs);
01473              return REG_ESPACE;
01474            }
01475          if (fs)
01476            cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
01477                                    &eps_via_nodes);
01478          else
01479            {
01480              re_node_set_free (&eps_via_nodes);
01481              if (prev_idx_match_malloced)
01482               re_free (prev_idx_match);
01483              return REG_NOMATCH;
01484            }
01485        }
01486     }
01487   re_node_set_free (&eps_via_nodes);
01488   if (prev_idx_match_malloced)
01489     re_free (prev_idx_match);
01490   return free_fail_stack_return (fs);
01491 }
01492 
01493 static reg_errcode_t
01494 internal_function
01495 free_fail_stack_return (struct re_fail_stack_t *fs)
01496 {
01497   if (fs)
01498     {
01499       int fs_idx;
01500       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
01501        {
01502          re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
01503          re_free (fs->stack[fs_idx].regs);
01504        }
01505       re_free (fs->stack);
01506     }
01507   return REG_NOERROR;
01508 }
01509 
01510 static void
01511 internal_function
01512 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
01513             regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
01514 {
01515   int type = dfa->nodes[cur_node].type;
01516   if (type == OP_OPEN_SUBEXP)
01517     {
01518       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
01519 
01520       /* We are at the first node of this sub expression.  */
01521       if (reg_num < nmatch)
01522        {
01523          pmatch[reg_num].rm_so = cur_idx;
01524          pmatch[reg_num].rm_eo = -1;
01525        }
01526     }
01527   else if (type == OP_CLOSE_SUBEXP)
01528     {
01529       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
01530       if (reg_num < nmatch)
01531        {
01532          /* We are at the last node of this sub expression.  */
01533          if (pmatch[reg_num].rm_so < cur_idx)
01534            {
01535              pmatch[reg_num].rm_eo = cur_idx;
01536              /* This is a non-empty match or we are not inside an optional
01537                subexpression.  Accept this right away.  */
01538              memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
01539            }
01540          else
01541            {
01542              if (dfa->nodes[cur_node].opt_subexp
01543                 && prev_idx_match[reg_num].rm_so != -1)
01544               /* We transited through an empty match for an optional
01545                  subexpression, like (a?)*, and this is not the subexp's
01546                  first match.  Copy back the old content of the registers
01547                  so that matches of an inner subexpression are undone as
01548                  well, like in ((a?))*.  */
01549               memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
01550              else
01551               /* We completed a subexpression, but it may be part of
01552                  an optional one, so do not update PREV_IDX_MATCH.  */
01553               pmatch[reg_num].rm_eo = cur_idx;
01554            }
01555        }
01556     }
01557 }
01558 
01559 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
01560    and sift the nodes in each states according to the following rules.
01561    Updated state_log will be wrote to STATE_LOG.
01562 
01563    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
01564      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
01565        If `a' isn't the LAST_NODE and `a' can't epsilon transit to
01566        the LAST_NODE, we throw away the node `a'.
01567      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
01568        string `s' and transit to `b':
01569        i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
01570           away the node `a'.
01571        ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
01572            thrown away, we throw away the node `a'.
01573      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
01574        i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
01575           node `a'.
01576        ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
01577            we throw away the node `a'.  */
01578 
01579 #define STATE_NODE_CONTAINS(state,node) \
01580   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
01581 
01582 static reg_errcode_t
01583 internal_function
01584 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
01585 {
01586   reg_errcode_t err;
01587   int null_cnt = 0;
01588   int str_idx = sctx->last_str_idx;
01589   re_node_set cur_dest;
01590 
01591 #ifdef DEBUG
01592   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
01593 #endif
01594 
01595   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
01596      transit to the last_node and the last_node itself.  */
01597   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
01598   if (BE (err != REG_NOERROR, 0))
01599     return err;
01600   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
01601   if (BE (err != REG_NOERROR, 0))
01602     goto free_return;
01603 
01604   /* Then check each states in the state_log.  */
01605   while (str_idx > 0)
01606     {
01607       /* Update counters.  */
01608       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
01609       if (null_cnt > mctx->max_mb_elem_len)
01610        {
01611          memset (sctx->sifted_states, '\0',
01612                 sizeof (re_dfastate_t *) * str_idx);
01613          re_node_set_free (&cur_dest);
01614          return REG_NOERROR;
01615        }
01616       re_node_set_empty (&cur_dest);
01617       --str_idx;
01618 
01619       if (mctx->state_log[str_idx])
01620        {
01621          err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
01622           if (BE (err != REG_NOERROR, 0))
01623            goto free_return;
01624        }
01625 
01626       /* Add all the nodes which satisfy the following conditions:
01627         - It can epsilon transit to a node in CUR_DEST.
01628         - It is in CUR_SRC.
01629         And update state_log.  */
01630       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
01631       if (BE (err != REG_NOERROR, 0))
01632        goto free_return;
01633     }
01634   err = REG_NOERROR;
01635  free_return:
01636   re_node_set_free (&cur_dest);
01637   return err;
01638 }
01639 
01640 static reg_errcode_t
01641 internal_function
01642 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
01643                    int str_idx, re_node_set *cur_dest)
01644 {
01645   const re_dfa_t *const dfa = mctx->dfa;
01646   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
01647   int i;
01648 
01649   /* Then build the next sifted state.
01650      We build the next sifted state on `cur_dest', and update
01651      `sifted_states[str_idx]' with `cur_dest'.
01652      Note:
01653      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
01654      `cur_src' points the node_set of the old `state_log[str_idx]'
01655      (with the epsilon nodes pre-filtered out).  */
01656   for (i = 0; i < cur_src->nelem; i++)
01657     {
01658       int prev_node = cur_src->elems[i];
01659       int naccepted = 0;
01660       int ret;
01661 
01662 #ifdef DEBUG
01663       re_token_type_t type = dfa->nodes[prev_node].type;
01664       assert (!IS_EPSILON_NODE (type));
01665 #endif
01666 #ifdef RE_ENABLE_I18N
01667       /* If the node may accept `multi byte'.  */
01668       if (dfa->nodes[prev_node].accept_mb)
01669        naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
01670                                     str_idx, sctx->last_str_idx);
01671 #endif /* RE_ENABLE_I18N */
01672 
01673       /* We don't check backreferences here.
01674         See update_cur_sifted_state().  */
01675       if (!naccepted
01676          && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
01677          && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
01678                               dfa->nexts[prev_node]))
01679        naccepted = 1;
01680 
01681       if (naccepted == 0)
01682        continue;
01683 
01684       if (sctx->limits.nelem)
01685        {
01686          int to_idx = str_idx + naccepted;
01687          if (check_dst_limits (mctx, &sctx->limits,
01688                             dfa->nexts[prev_node], to_idx,
01689                             prev_node, str_idx))
01690            continue;
01691        }
01692       ret = re_node_set_insert (cur_dest, prev_node);
01693       if (BE (ret == -1, 0))
01694        return REG_ESPACE;
01695     }
01696 
01697   return REG_NOERROR;
01698 }
01699 
01700 /* Helper functions.  */
01701 
01702 static reg_errcode_t
01703 internal_function
01704 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
01705 {
01706   int top = mctx->state_log_top;
01707 
01708   if (next_state_log_idx >= mctx->input.bufs_len
01709       || (next_state_log_idx >= mctx->input.valid_len
01710          && mctx->input.valid_len < mctx->input.len))
01711     {
01712       reg_errcode_t err;
01713       err = extend_buffers (mctx);
01714       if (BE (err != REG_NOERROR, 0))
01715        return err;
01716     }
01717 
01718   if (top < next_state_log_idx)
01719     {
01720       memset (mctx->state_log + top + 1, '\0',
01721              sizeof (re_dfastate_t *) * (next_state_log_idx - top));
01722       mctx->state_log_top = next_state_log_idx;
01723     }
01724   return REG_NOERROR;
01725 }
01726 
01727 static reg_errcode_t
01728 internal_function
01729 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
01730                  re_dfastate_t **src, int num)
01731 {
01732   int st_idx;
01733   reg_errcode_t err;
01734   for (st_idx = 0; st_idx < num; ++st_idx)
01735     {
01736       if (dst[st_idx] == NULL)
01737        dst[st_idx] = src[st_idx];
01738       else if (src[st_idx] != NULL)
01739        {
01740          re_node_set merged_set;
01741          err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
01742                                    &src[st_idx]->nodes);
01743          if (BE (err != REG_NOERROR, 0))
01744            return err;
01745          dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
01746          re_node_set_free (&merged_set);
01747          if (BE (err != REG_NOERROR, 0))
01748            return err;
01749        }
01750     }
01751   return REG_NOERROR;
01752 }
01753 
01754 static reg_errcode_t
01755 internal_function
01756 update_cur_sifted_state (const re_match_context_t *mctx,
01757                       re_sift_context_t *sctx, int str_idx,
01758                       re_node_set *dest_nodes)
01759 {
01760   const re_dfa_t *const dfa = mctx->dfa;
01761   reg_errcode_t err = REG_NOERROR;
01762   const re_node_set *candidates;
01763   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
01764               : &mctx->state_log[str_idx]->nodes);
01765 
01766   if (dest_nodes->nelem == 0)
01767     sctx->sifted_states[str_idx] = NULL;
01768   else
01769     {
01770       if (candidates)
01771        {
01772          /* At first, add the nodes which can epsilon transit to a node in
01773             DEST_NODE.  */
01774          err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
01775          if (BE (err != REG_NOERROR, 0))
01776            return err;
01777 
01778          /* Then, check the limitations in the current sift_context.  */
01779          if (sctx->limits.nelem)
01780            {
01781              err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
01782                                     mctx->bkref_ents, str_idx);
01783              if (BE (err != REG_NOERROR, 0))
01784               return err;
01785            }
01786        }
01787 
01788       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
01789       if (BE (err != REG_NOERROR, 0))
01790        return err;
01791     }
01792 
01793   if (candidates && mctx->state_log[str_idx]->has_backref)
01794     {
01795       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
01796       if (BE (err != REG_NOERROR, 0))
01797        return err;
01798     }
01799   return REG_NOERROR;
01800 }
01801 
01802 static reg_errcode_t
01803 internal_function
01804 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
01805                      const re_node_set *candidates)
01806 {
01807   reg_errcode_t err = REG_NOERROR;
01808   int i;
01809 
01810   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
01811   if (BE (err != REG_NOERROR, 0))
01812     return err;
01813 
01814   if (!state->inveclosure.alloc)
01815     {
01816       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
01817       if (BE (err != REG_NOERROR, 0))
01818         return REG_ESPACE;
01819       for (i = 0; i < dest_nodes->nelem; i++)
01820         re_node_set_merge (&state->inveclosure,
01821                         dfa->inveclosures + dest_nodes->elems[i]);
01822     }
01823   return re_node_set_add_intersect (dest_nodes, candidates,
01824                                 &state->inveclosure);
01825 }
01826 
01827 static reg_errcode_t
01828 internal_function
01829 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
01830                      const re_node_set *candidates)
01831 {
01832     int ecl_idx;
01833     reg_errcode_t err;
01834     re_node_set *inv_eclosure = dfa->inveclosures + node;
01835     re_node_set except_nodes;
01836     re_node_set_init_empty (&except_nodes);
01837     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
01838       {
01839        int cur_node = inv_eclosure->elems[ecl_idx];
01840        if (cur_node == node)
01841          continue;
01842        if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
01843          {
01844            int edst1 = dfa->edests[cur_node].elems[0];
01845            int edst2 = ((dfa->edests[cur_node].nelem > 1)
01846                       ? dfa->edests[cur_node].elems[1] : -1);
01847            if ((!re_node_set_contains (inv_eclosure, edst1)
01848                && re_node_set_contains (dest_nodes, edst1))
01849               || (edst2 > 0
01850                   && !re_node_set_contains (inv_eclosure, edst2)
01851                   && re_node_set_contains (dest_nodes, edst2)))
01852              {
01853               err = re_node_set_add_intersect (&except_nodes, candidates,
01854                                            dfa->inveclosures + cur_node);
01855               if (BE (err != REG_NOERROR, 0))
01856                 {
01857                   re_node_set_free (&except_nodes);
01858                   return err;
01859                 }
01860              }
01861          }
01862       }
01863     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
01864       {
01865        int cur_node = inv_eclosure->elems[ecl_idx];
01866        if (!re_node_set_contains (&except_nodes, cur_node))
01867          {
01868            int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
01869            re_node_set_remove_at (dest_nodes, idx);
01870          }
01871       }
01872     re_node_set_free (&except_nodes);
01873     return REG_NOERROR;
01874 }
01875 
01876 static int
01877 internal_function
01878 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
01879                 int dst_node, int dst_idx, int src_node, int src_idx)
01880 {
01881   const re_dfa_t *const dfa = mctx->dfa;
01882   int lim_idx, src_pos, dst_pos;
01883 
01884   int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
01885   int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
01886   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
01887     {
01888       int subexp_idx;
01889       struct re_backref_cache_entry *ent;
01890       ent = mctx->bkref_ents + limits->elems[lim_idx];
01891       subexp_idx = dfa->nodes[ent->node].opr.idx;
01892 
01893       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
01894                                       subexp_idx, dst_node, dst_idx,
01895                                       dst_bkref_idx);
01896       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
01897                                       subexp_idx, src_node, src_idx,
01898                                       src_bkref_idx);
01899 
01900       /* In case of:
01901         <src> <dst> ( <subexp> )
01902         ( <subexp> ) <src> <dst>
01903         ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
01904       if (src_pos == dst_pos)
01905        continue; /* This is unrelated limitation.  */
01906       else
01907        return 1;
01908     }
01909   return 0;
01910 }
01911 
01912 static int
01913 internal_function
01914 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
01915                           int subexp_idx, int from_node, int bkref_idx)
01916 {
01917   const re_dfa_t *const dfa = mctx->dfa;
01918   const re_node_set *eclosures = dfa->eclosures + from_node;
01919   int node_idx;
01920 
01921   /* Else, we are on the boundary: examine the nodes on the epsilon
01922      closure.  */
01923   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
01924     {
01925       int node = eclosures->elems[node_idx];
01926       switch (dfa->nodes[node].type)
01927        {
01928        case OP_BACK_REF:
01929          if (bkref_idx != -1)
01930            {
01931              struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
01932              do
01933                {
01934                 int dst, cpos;
01935 
01936                 if (ent->node != node)
01937                   continue;
01938 
01939                 if (subexp_idx < BITSET_WORD_BITS
01940                     && !(ent->eps_reachable_subexps_map
01941                         & ((bitset_word_t) 1 << subexp_idx)))
01942                   continue;
01943 
01944                 /* Recurse trying to reach the OP_OPEN_SUBEXP and
01945                    OP_CLOSE_SUBEXP cases below.  But, if the
01946                    destination node is the same node as the source
01947                    node, don't recurse because it would cause an
01948                    infinite loop: a regex that exhibits this behavior
01949                    is ()\1*\1*  */
01950                 dst = dfa->edests[node].elems[0];
01951                 if (dst == from_node)
01952                   {
01953                     if (boundaries & 1)
01954                       return -1;
01955                     else /* if (boundaries & 2) */
01956                       return 0;
01957                   }
01958 
01959                 cpos =
01960                   check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
01961                                            dst, bkref_idx);
01962                 if (cpos == -1 /* && (boundaries & 1) */)
01963                   return -1;
01964                 if (cpos == 0 && (boundaries & 2))
01965                   return 0;
01966 
01967                 if (subexp_idx < BITSET_WORD_BITS)
01968                   ent->eps_reachable_subexps_map
01969                     &= ~((bitset_word_t) 1 << subexp_idx);
01970                }
01971              while (ent++->more);
01972            }
01973          break;
01974 
01975        case OP_OPEN_SUBEXP:
01976          if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
01977            return -1;
01978          break;
01979 
01980        case OP_CLOSE_SUBEXP:
01981          if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
01982            return 0;
01983          break;
01984 
01985        default:
01986            break;
01987        }
01988     }
01989 
01990   return (boundaries & 2) ? 1 : 0;
01991 }
01992 
01993 static int
01994 internal_function
01995 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
01996                         int subexp_idx, int from_node, int str_idx,
01997                         int bkref_idx)
01998 {
01999   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
02000   int boundaries;
02001 
02002   /* If we are outside the range of the subexpression, return -1 or 1.  */
02003   if (str_idx < lim->subexp_from)
02004     return -1;
02005 
02006   if (lim->subexp_to < str_idx)
02007     return 1;
02008 
02009   /* If we are within the subexpression, return 0.  */
02010   boundaries = (str_idx == lim->subexp_from);
02011   boundaries |= (str_idx == lim->subexp_to) << 1;
02012   if (boundaries == 0)
02013     return 0;
02014 
02015   /* Else, examine epsilon closure.  */
02016   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
02017                                   from_node, bkref_idx);
02018 }
02019 
02020 /* Check the limitations of sub expressions LIMITS, and remove the nodes
02021    which are against limitations from DEST_NODES. */
02022 
02023 static reg_errcode_t
02024 internal_function
02025 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
02026                    const re_node_set *candidates, re_node_set *limits,
02027                    struct re_backref_cache_entry *bkref_ents, int str_idx)
02028 {
02029   reg_errcode_t err;
02030   int node_idx, lim_idx;
02031 
02032   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
02033     {
02034       int subexp_idx;
02035       struct re_backref_cache_entry *ent;
02036       ent = bkref_ents + limits->elems[lim_idx];
02037 
02038       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
02039        continue; /* This is unrelated limitation.  */
02040 
02041       subexp_idx = dfa->nodes[ent->node].opr.idx;
02042       if (ent->subexp_to == str_idx)
02043        {
02044          int ops_node = -1;
02045          int cls_node = -1;
02046          for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02047            {
02048              int node = dest_nodes->elems[node_idx];
02049              re_token_type_t type = dfa->nodes[node].type;
02050              if (type == OP_OPEN_SUBEXP
02051                 && subexp_idx == dfa->nodes[node].opr.idx)
02052               ops_node = node;
02053              else if (type == OP_CLOSE_SUBEXP
02054                      && subexp_idx == dfa->nodes[node].opr.idx)
02055               cls_node = node;
02056            }
02057 
02058          /* Check the limitation of the open subexpression.  */
02059          /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
02060          if (ops_node >= 0)
02061            {
02062              err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
02063                                       candidates);
02064              if (BE (err != REG_NOERROR, 0))
02065               return err;
02066            }
02067 
02068          /* Check the limitation of the close subexpression.  */
02069          if (cls_node >= 0)
02070            for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02071              {
02072               int node = dest_nodes->elems[node_idx];
02073               if (!re_node_set_contains (dfa->inveclosures + node,
02074                                       cls_node)
02075                   && !re_node_set_contains (dfa->eclosures + node,
02076                                          cls_node))
02077                 {
02078                   /* It is against this limitation.
02079                      Remove it form the current sifted state.  */
02080                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
02081                                            candidates);
02082                   if (BE (err != REG_NOERROR, 0))
02083                     return err;
02084                   --node_idx;
02085                 }
02086              }
02087        }
02088       else /* (ent->subexp_to != str_idx)  */
02089        {
02090          for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02091            {
02092              int node = dest_nodes->elems[node_idx];
02093              re_token_type_t type = dfa->nodes[node].type;
02094              if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
02095               {
02096                 if (subexp_idx != dfa->nodes[node].opr.idx)
02097                   continue;
02098                 /* It is against this limitation.
02099                    Remove it form the current sifted state.  */
02100                 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
02101                                           candidates);
02102                 if (BE (err != REG_NOERROR, 0))
02103                   return err;
02104               }
02105            }
02106        }
02107     }
02108   return REG_NOERROR;
02109 }
02110 
02111 static reg_errcode_t
02112 internal_function
02113 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
02114                  int str_idx, const re_node_set *candidates)
02115 {
02116   const re_dfa_t *const dfa = mctx->dfa;
02117   reg_errcode_t err;
02118   int node_idx, node;
02119   re_sift_context_t local_sctx;
02120   int first_idx = search_cur_bkref_entry (mctx, str_idx);
02121 
02122   if (first_idx == -1)
02123     return REG_NOERROR;
02124 
02125   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
02126 
02127   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
02128     {
02129       int enabled_idx;
02130       re_token_type_t type;
02131       struct re_backref_cache_entry *entry;
02132       node = candidates->elems[node_idx];
02133       type = dfa->nodes[node].type;
02134       /* Avoid infinite loop for the REs like "()\1+".  */
02135       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
02136        continue;
02137       if (type != OP_BACK_REF)
02138        continue;
02139 
02140       entry = mctx->bkref_ents + first_idx;
02141       enabled_idx = first_idx;
02142       do
02143        {
02144          int subexp_len;
02145          int to_idx;
02146          int dst_node;
02147          int ret;
02148          re_dfastate_t *cur_state;
02149 
02150          if (entry->node != node)
02151            continue;
02152          subexp_len = entry->subexp_to - entry->subexp_from;
02153          to_idx = str_idx + subexp_len;
02154          dst_node = (subexp_len ? dfa->nexts[node]
02155                     : dfa->edests[node].elems[0]);
02156 
02157          if (to_idx > sctx->last_str_idx
02158              || sctx->sifted_states[to_idx] == NULL
02159              || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
02160              || check_dst_limits (mctx, &sctx->limits, node,
02161                                str_idx, dst_node, to_idx))
02162            continue;
02163 
02164          if (local_sctx.sifted_states == NULL)
02165            {
02166              local_sctx = *sctx;
02167              err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
02168              if (BE (err != REG_NOERROR, 0))
02169               goto free_return;
02170            }
02171          local_sctx.last_node = node;
02172          local_sctx.last_str_idx = str_idx;
02173          ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
02174          if (BE (ret < 0, 0))
02175            {
02176              err = REG_ESPACE;
02177              goto free_return;
02178            }
02179          cur_state = local_sctx.sifted_states[str_idx];
02180          err = sift_states_backward (mctx, &local_sctx);
02181          if (BE (err != REG_NOERROR, 0))
02182            goto free_return;
02183          if (sctx->limited_states != NULL)
02184            {
02185              err = merge_state_array (dfa, sctx->limited_states,
02186                                    local_sctx.sifted_states,
02187                                    str_idx + 1);
02188              if (BE (err != REG_NOERROR, 0))
02189               goto free_return;
02190            }
02191          local_sctx.sifted_states[str_idx] = cur_state;
02192          re_node_set_remove (&local_sctx.limits, enabled_idx);
02193 
02194          /* mctx->bkref_ents may have changed, reload the pointer.  */
02195           entry = mctx->bkref_ents + enabled_idx;
02196        }
02197       while (enabled_idx++, entry++->more);
02198     }
02199   err = REG_NOERROR;
02200  free_return:
02201   if (local_sctx.sifted_states != NULL)
02202     {
02203       re_node_set_free (&local_sctx.limits);
02204     }
02205 
02206   return err;
02207 }
02208 
02209 
02210 #ifdef RE_ENABLE_I18N
02211 static int
02212 internal_function
02213 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
02214                    int node_idx, int str_idx, int max_str_idx)
02215 {
02216   const re_dfa_t *const dfa = mctx->dfa;
02217   int naccepted;
02218   /* Check the node can accept `multi byte'.  */
02219   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
02220   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
02221       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
02222                          dfa->nexts[node_idx]))
02223     /* The node can't accept the `multi byte', or the
02224        destination was already thrown away, then the node
02225        could't accept the current input `multi byte'.   */
02226     naccepted = 0;
02227   /* Otherwise, it is sure that the node could accept
02228      `naccepted' bytes input.  */
02229   return naccepted;
02230 }
02231 #endif /* RE_ENABLE_I18N */
02232 
02233 
02234 /* Functions for state transition.  */
02235 
02236 /* Return the next state to which the current state STATE will transit by
02237    accepting the current input byte, and update STATE_LOG if necessary.
02238    If STATE can accept a multibyte char/collating element/back reference
02239    update the destination of STATE_LOG.  */
02240 
02241 static re_dfastate_t *
02242 internal_function
02243 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
02244               re_dfastate_t *state)
02245 {
02246   re_dfastate_t **trtable;
02247   unsigned char ch;
02248 
02249 #ifdef RE_ENABLE_I18N
02250   /* If the current state can accept multibyte.  */
02251   if (BE (state->accept_mb, 0))
02252     {
02253       *err = transit_state_mb (mctx, state);
02254       if (BE (*err != REG_NOERROR, 0))
02255        return NULL;
02256     }
02257 #endif /* RE_ENABLE_I18N */
02258 
02259   /* Then decide the next state with the single byte.  */
02260 #if 0
02261   if (0)
02262     /* don't use transition table  */
02263     return transit_state_sb (err, mctx, state);
02264 #endif
02265 
02266   /* Use transition table  */
02267   ch = re_string_fetch_byte (&mctx->input);
02268   for (;;)
02269     {
02270       trtable = state->trtable;
02271       if (BE (trtable != NULL, 1))
02272        return trtable[ch];
02273 
02274       trtable = state->word_trtable;
02275       if (BE (trtable != NULL, 1))
02276         {
02277          unsigned int context;
02278          context
02279            = re_string_context_at (&mctx->input,
02280                                 re_string_cur_idx (&mctx->input) - 1,
02281                                 mctx->eflags);
02282          if (IS_WORD_CONTEXT (context))
02283            return trtable[ch + SBC_MAX];
02284          else
02285            return trtable[ch];
02286        }
02287 
02288       if (!build_trtable (mctx->dfa, state))
02289        {
02290          *err = REG_ESPACE;
02291          return NULL;
02292        }
02293 
02294       /* Retry, we now have a transition table.  */
02295     }
02296 }
02297 
02298 /* Update the state_log if we need */
02299 re_dfastate_t *
02300 internal_function
02301 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
02302                     re_dfastate_t *next_state)
02303 {
02304   const re_dfa_t *const dfa = mctx->dfa;
02305   int cur_idx = re_string_cur_idx (&mctx->input);
02306 
02307   if (cur_idx > mctx->state_log_top)
02308     {
02309       mctx->state_log[cur_idx] = next_state;
02310       mctx->state_log_top = cur_idx;
02311     }
02312   else if (mctx->state_log[cur_idx] == 0)
02313     {
02314       mctx->state_log[cur_idx] = next_state;
02315     }
02316   else
02317     {
02318       re_dfastate_t *pstate;
02319       unsigned int context;
02320       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
02321       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
02322          the destination of a multibyte char/collating element/
02323          back reference.  Then the next state is the union set of
02324          these destinations and the results of the transition table.  */
02325       pstate = mctx->state_log[cur_idx];
02326       log_nodes = pstate->entrance_nodes;
02327       if (next_state != NULL)
02328         {
02329           table_nodes = next_state->entrance_nodes;
02330           *err = re_node_set_init_union (&next_nodes, table_nodes,
02331                                         log_nodes);
02332           if (BE (*err != REG_NOERROR, 0))
02333            return NULL;
02334         }
02335       else
02336         next_nodes = *log_nodes;
02337       /* Note: We already add the nodes of the initial state,
02338         then we don't need to add them here.  */
02339 
02340       context = re_string_context_at (&mctx->input,
02341                                   re_string_cur_idx (&mctx->input) - 1,
02342                                   mctx->eflags);
02343       next_state = mctx->state_log[cur_idx]
02344         = re_acquire_state_context (err, dfa, &next_nodes, context);
02345       /* We don't need to check errors here, since the return value of
02346          this function is next_state and ERR is already set.  */
02347 
02348       if (table_nodes != NULL)
02349         re_node_set_free (&next_nodes);
02350     }
02351 
02352   if (BE (dfa->nbackref, 0) && next_state != NULL)
02353     {
02354       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
02355         later.  We must check them here, since the back references in the
02356         next state might use them.  */
02357       *err = check_subexp_matching_top (mctx, &next_state->nodes,
02358                                    cur_idx);
02359       if (BE (*err != REG_NOERROR, 0))
02360        return NULL;
02361 
02362       /* If the next state has back references.  */
02363       if (next_state->has_backref)
02364        {
02365          *err = transit_state_bkref (mctx, &next_state->nodes);
02366          if (BE (*err != REG_NOERROR, 0))
02367            return NULL;
02368          next_state = mctx->state_log[cur_idx];
02369        }
02370     }
02371 
02372   return next_state;
02373 }
02374 
02375 /* Skip bytes in the input that correspond to part of a
02376    multi-byte match, then look in the log for a state
02377    from which to restart matching.  */
02378 re_dfastate_t *
02379 internal_function
02380 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
02381 {
02382   re_dfastate_t *cur_state;
02383   do
02384     {
02385       int max = mctx->state_log_top;
02386       int cur_str_idx = re_string_cur_idx (&mctx->input);
02387 
02388       do
02389        {
02390           if (++cur_str_idx > max)
02391             return NULL;
02392           re_string_skip_bytes (&mctx->input, 1);
02393        }
02394       while (mctx->state_log[cur_str_idx] == NULL);
02395 
02396       cur_state = merge_state_with_log (err, mctx, NULL);
02397     }
02398   while (*err == REG_NOERROR && cur_state == NULL);
02399   return cur_state;
02400 }
02401 
02402 /* Helper functions for transit_state.  */
02403 
02404 /* From the node set CUR_NODES, pick up the nodes whose types are
02405    OP_OPEN_SUBEXP and which have corresponding back references in the regular
02406    expression. And register them to use them later for evaluating the
02407    correspoding back references.  */
02408 
02409 static reg_errcode_t
02410 internal_function
02411 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
02412                         int str_idx)
02413 {
02414   const re_dfa_t *const dfa = mctx->dfa;
02415   int node_idx;
02416   reg_errcode_t err;
02417 
02418   /* TODO: This isn't efficient.
02419           Because there might be more than one nodes whose types are
02420           OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
02421           nodes.
02422           E.g. RE: (a){2}  */
02423   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
02424     {
02425       int node = cur_nodes->elems[node_idx];
02426       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
02427          && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
02428          && (dfa->used_bkref_map
02429              & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
02430        {
02431          err = match_ctx_add_subtop (mctx, node, str_idx);
02432          if (BE (err != REG_NOERROR, 0))
02433            return err;
02434        }
02435     }
02436   return REG_NOERROR;
02437 }
02438 
02439 #if 0
02440 /* Return the next state to which the current state STATE will transit by
02441    accepting the current input byte.  */
02442 
02443 static re_dfastate_t *
02444 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
02445                 re_dfastate_t *state)
02446 {
02447   const re_dfa_t *const dfa = mctx->dfa;
02448   re_node_set next_nodes;
02449   re_dfastate_t *next_state;
02450   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
02451   unsigned int context;
02452 
02453   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
02454   if (BE (*err != REG_NOERROR, 0))
02455     return NULL;
02456   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
02457     {
02458       int cur_node = state->nodes.elems[node_cnt];
02459       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
02460        {
02461          *err = re_node_set_merge (&next_nodes,
02462                                 dfa->eclosures + dfa->nexts[cur_node]);
02463          if (BE (*err != REG_NOERROR, 0))
02464            {
02465              re_node_set_free (&next_nodes);
02466              return NULL;
02467            }
02468        }
02469     }
02470   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
02471   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
02472   /* We don't need to check errors here, since the return value of
02473      this function is next_state and ERR is already set.  */
02474 
02475   re_node_set_free (&next_nodes);
02476   re_string_skip_bytes (&mctx->input, 1);
02477   return next_state;
02478 }
02479 #endif
02480 
02481 #ifdef RE_ENABLE_I18N
02482 static reg_errcode_t
02483 internal_function
02484 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
02485 {
02486   const re_dfa_t *const dfa = mctx->dfa;
02487   reg_errcode_t err;
02488   int i;
02489 
02490   for (i = 0; i < pstate->nodes.nelem; ++i)
02491     {
02492       re_node_set dest_nodes, *new_nodes;
02493       int cur_node_idx = pstate->nodes.elems[i];
02494       int naccepted, dest_idx;
02495       unsigned int context;
02496       re_dfastate_t *dest_state;
02497 
02498       if (!dfa->nodes[cur_node_idx].accept_mb)
02499         continue;
02500 
02501       if (dfa->nodes[cur_node_idx].constraint)
02502        {
02503          context = re_string_context_at (&mctx->input,
02504                                      re_string_cur_idx (&mctx->input),
02505                                      mctx->eflags);
02506          if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
02507                                       context))
02508            continue;
02509        }
02510 
02511       /* How many bytes the node can accept?  */
02512       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
02513                                       re_string_cur_idx (&mctx->input));
02514       if (naccepted == 0)
02515        continue;
02516 
02517       /* The node can accepts `naccepted' bytes.  */
02518       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
02519       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
02520                             : mctx->max_mb_elem_len);
02521       err = clean_state_log_if_needed (mctx, dest_idx);
02522       if (BE (err != REG_NOERROR, 0))
02523        return err;
02524 #ifdef DEBUG
02525       assert (dfa->nexts[cur_node_idx] != -1);
02526 #endif
02527       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
02528 
02529       dest_state = mctx->state_log[dest_idx];
02530       if (dest_state == NULL)
02531        dest_nodes = *new_nodes;
02532       else
02533        {
02534          err = re_node_set_init_union (&dest_nodes,
02535                                    dest_state->entrance_nodes, new_nodes);
02536          if (BE (err != REG_NOERROR, 0))
02537            return err;
02538        }
02539       context = re_string_context_at (&mctx->input, dest_idx - 1,
02540                                   mctx->eflags);
02541       mctx->state_log[dest_idx]
02542        = re_acquire_state_context (&err, dfa, &dest_nodes, context);
02543       if (dest_state != NULL)
02544        re_node_set_free (&dest_nodes);
02545       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
02546        return err;
02547     }
02548   return REG_NOERROR;
02549 }
02550 #endif /* RE_ENABLE_I18N */
02551 
02552 static reg_errcode_t
02553 internal_function
02554 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
02555 {
02556   const re_dfa_t *const dfa = mctx->dfa;
02557   reg_errcode_t err;
02558   int i;
02559   int cur_str_idx = re_string_cur_idx (&mctx->input);
02560 
02561   for (i = 0; i < nodes->nelem; ++i)
02562     {
02563       int dest_str_idx, prev_nelem, bkc_idx;
02564       int node_idx = nodes->elems[i];
02565       unsigned int context;
02566       const re_token_t *node = dfa->nodes + node_idx;
02567       re_node_set *new_dest_nodes;
02568 
02569       /* Check whether `node' is a backreference or not.  */
02570       if (node->type != OP_BACK_REF)
02571        continue;
02572 
02573       if (node->constraint)
02574        {
02575          context = re_string_context_at (&mctx->input, cur_str_idx,
02576                                      mctx->eflags);
02577          if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
02578            continue;
02579        }
02580 
02581       /* `node' is a backreference.
02582         Check the substring which the substring matched.  */
02583       bkc_idx = mctx->nbkref_ents;
02584       err = get_subexp (mctx, node_idx, cur_str_idx);
02585       if (BE (err != REG_NOERROR, 0))
02586        goto free_return;
02587 
02588       /* And add the epsilon closures (which is `new_dest_nodes') of
02589         the backreference to appropriate state_log.  */
02590 #ifdef DEBUG
02591       assert (dfa->nexts[node_idx] != -1);
02592 #endif
02593       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
02594        {
02595          int subexp_len;
02596          re_dfastate_t *dest_state;
02597          struct re_backref_cache_entry *bkref_ent;
02598          bkref_ent = mctx->bkref_ents + bkc_idx;
02599          if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
02600            continue;
02601          subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
02602          new_dest_nodes = (subexp_len == 0
02603                          ? dfa->eclosures + dfa->edests[node_idx].elems[0]
02604                          : dfa->eclosures + dfa->nexts[node_idx]);
02605          dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
02606                        - bkref_ent->subexp_from);
02607          context = re_string_context_at (&mctx->input, dest_str_idx - 1,
02608                                      mctx->eflags);
02609          dest_state = mctx->state_log[dest_str_idx];
02610          prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
02611                      : mctx->state_log[cur_str_idx]->nodes.nelem);
02612          /* Add `new_dest_node' to state_log.  */
02613          if (dest_state == NULL)
02614            {
02615              mctx->state_log[dest_str_idx]
02616               = re_acquire_state_context (&err, dfa, new_dest_nodes,
02617                                        context);
02618              if (BE (mctx->state_log[dest_str_idx] == NULL
02619                     && err != REG_NOERROR, 0))
02620               goto free_return;
02621            }
02622          else
02623            {
02624              re_node_set dest_nodes;
02625              err = re_node_set_init_union (&dest_nodes,
02626                                        dest_state->entrance_nodes,
02627                                        new_dest_nodes);
02628              if (BE (err != REG_NOERROR, 0))
02629               {
02630                 re_node_set_free (&dest_nodes);
02631                 goto free_return;
02632               }
02633              mctx->state_log[dest_str_idx]
02634               = re_acquire_state_context (&err, dfa, &dest_nodes, context);
02635              re_node_set_free (&dest_nodes);
02636              if (BE (mctx->state_log[dest_str_idx] == NULL
02637                     && err != REG_NOERROR, 0))
02638               goto free_return;
02639            }
02640          /* We need to check recursively if the backreference can epsilon
02641             transit.  */
02642          if (subexp_len == 0
02643              && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
02644            {
02645              err = check_subexp_matching_top (mctx, new_dest_nodes,
02646                                           cur_str_idx);
02647              if (BE (err != REG_NOERROR, 0))
02648               goto free_return;
02649              err = transit_state_bkref (mctx, new_dest_nodes);
02650              if (BE (err != REG_NOERROR, 0))
02651               goto free_return;
02652            }
02653        }
02654     }
02655   err = REG_NOERROR;
02656  free_return:
02657   return err;
02658 }
02659 
02660 /* Enumerate all the candidates which the backreference BKREF_NODE can match
02661    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
02662    Note that we might collect inappropriate candidates here.
02663    However, the cost of checking them strictly here is too high, then we
02664    delay these checking for prune_impossible_nodes().  */
02665 
02666 static reg_errcode_t
02667 internal_function
02668 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
02669 {
02670   const re_dfa_t *const dfa = mctx->dfa;
02671   int subexp_num, sub_top_idx;
02672   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
02673   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
02674   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
02675   if (cache_idx != -1)
02676     {
02677       const struct re_backref_cache_entry *entry
02678        = mctx->bkref_ents + cache_idx;
02679       do
02680         if (entry->node == bkref_node)
02681          return REG_NOERROR; /* We already checked it.  */
02682       while (entry++->more);
02683     }
02684 
02685   subexp_num = dfa->nodes[bkref_node].opr.idx;
02686 
02687   /* For each sub expression  */
02688   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
02689     {
02690       reg_errcode_t err;
02691       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
02692       re_sub_match_last_t *sub_last;
02693       int sub_last_idx, sl_str, bkref_str_off;
02694 
02695       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
02696        continue; /* It isn't related.  */
02697 
02698       sl_str = sub_top->str_idx;
02699       bkref_str_off = bkref_str_idx;
02700       /* At first, check the last node of sub expressions we already
02701         evaluated.  */
02702       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
02703        {
02704          int sl_str_diff;
02705          sub_last = sub_top->lasts[sub_last_idx];
02706          sl_str_diff = sub_last->str_idx - sl_str;
02707          /* The matched string by the sub expression match with the substring
02708             at the back reference?  */
02709          if (sl_str_diff > 0)
02710            {
02711              if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
02712               {
02713                 /* Not enough chars for a successful match.  */
02714                 if (bkref_str_off + sl_str_diff > mctx->input.len)
02715                   break;
02716 
02717                 err = clean_state_log_if_needed (mctx,
02718                                              bkref_str_off
02719                                              + sl_str_diff);
02720                 if (BE (err != REG_NOERROR, 0))
02721                   return err;
02722                 buf = (const char *) re_string_get_buffer (&mctx->input);
02723               }
02724              if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
02725               /* We don't need to search this sub expression any more.  */
02726               break;
02727            }
02728          bkref_str_off += sl_str_diff;
02729          sl_str += sl_str_diff;
02730          err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
02731                             bkref_str_idx);
02732 
02733          /* Reload buf, since the preceding call might have reallocated
02734             the buffer.  */
02735          buf = (const char *) re_string_get_buffer (&mctx->input);
02736 
02737          if (err == REG_NOMATCH)
02738            continue;
02739          if (BE (err != REG_NOERROR, 0))
02740            return err;
02741        }
02742 
02743       if (sub_last_idx < sub_top->nlasts)
02744        continue;
02745       if (sub_last_idx > 0)
02746        ++sl_str;
02747       /* Then, search for the other last nodes of the sub expression.  */
02748       for (; sl_str <= bkref_str_idx; ++sl_str)
02749        {
02750          int cls_node, sl_str_off;
02751          const re_node_set *nodes;
02752          sl_str_off = sl_str - sub_top->str_idx;
02753          /* The matched string by the sub expression match with the substring
02754             at the back reference?  */
02755          if (sl_str_off > 0)
02756            {
02757              if (BE (bkref_str_off >= mctx->input.valid_len, 0))
02758               {
02759                 /* If we are at the end of the input, we cannot match.  */
02760                 if (bkref_str_off >= mctx->input.len)
02761                   break;
02762 
02763                 err = extend_buffers (mctx);
02764                 if (BE (err != REG_NOERROR, 0))
02765                   return err;
02766 
02767                 buf = (const char *) re_string_get_buffer (&mctx->input);
02768               }
02769              if (buf [bkref_str_off++] != buf[sl_str - 1])
02770               break; /* We don't need to search this sub expression
02771                        any more.  */
02772            }
02773          if (mctx->state_log[sl_str] == NULL)
02774            continue;
02775          /* Does this state have a ')' of the sub expression?  */
02776          nodes = &mctx->state_log[sl_str]->nodes;
02777          cls_node = find_subexp_node (dfa, nodes, subexp_num,
02778                                    OP_CLOSE_SUBEXP);
02779          if (cls_node == -1)
02780            continue; /* No.  */
02781          if (sub_top->path == NULL)
02782            {
02783              sub_top->path = calloc (sizeof (state_array_t),
02784                                   sl_str - sub_top->str_idx + 1);
02785              if (sub_top->path == NULL)
02786               return REG_ESPACE;
02787            }
02788          /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
02789             in the current context?  */
02790          err = check_arrival (mctx, sub_top->path, sub_top->node,
02791                             sub_top->str_idx, cls_node, sl_str,
02792                             OP_CLOSE_SUBEXP);
02793          if (err == REG_NOMATCH)
02794              continue;
02795          if (BE (err != REG_NOERROR, 0))
02796              return err;
02797          sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
02798          if (BE (sub_last == NULL, 0))
02799            return REG_ESPACE;
02800          err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
02801                             bkref_str_idx);
02802          if (err == REG_NOMATCH)
02803            continue;
02804        }
02805     }
02806   return REG_NOERROR;
02807 }
02808 
02809 /* Helper functions for get_subexp().  */
02810 
02811 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
02812    If it can arrive, register the sub expression expressed with SUB_TOP
02813    and SUB_LAST.  */
02814 
02815 static reg_errcode_t
02816 internal_function
02817 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
02818               re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
02819 {
02820   reg_errcode_t err;
02821   int to_idx;
02822   /* Can the subexpression arrive the back reference?  */
02823   err = check_arrival (mctx, &sub_last->path, sub_last->node,
02824                      sub_last->str_idx, bkref_node, bkref_str,
02825                      OP_OPEN_SUBEXP);
02826   if (err != REG_NOERROR)
02827     return err;
02828   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
02829                           sub_last->str_idx);
02830   if (BE (err != REG_NOERROR, 0))
02831     return err;
02832   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
02833   return clean_state_log_if_needed (mctx, to_idx);
02834 }
02835 
02836 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
02837    Search '(' if FL_OPEN, or search ')' otherwise.
02838    TODO: This function isn't efficient...
02839         Because there might be more than one nodes whose types are
02840         OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
02841         nodes.
02842         E.g. RE: (a){2}  */
02843 
02844 static int
02845 internal_function
02846 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
02847                 int subexp_idx, int type)
02848 {
02849   int cls_idx;
02850   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
02851     {
02852       int cls_node = nodes->elems[cls_idx];
02853       const re_token_t *node = dfa->nodes + cls_node;
02854       if (node->type == type
02855          && node->opr.idx == subexp_idx)
02856        return cls_node;
02857     }
02858   return -1;
02859 }
02860 
02861 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
02862    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
02863    heavily reused.
02864    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
02865 
02866 static reg_errcode_t
02867 internal_function
02868 check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
02869               int top_str, int last_node, int last_str, int type)
02870 {
02871   const re_dfa_t *const dfa = mctx->dfa;
02872   reg_errcode_t err = REG_NOERROR;
02873   int subexp_num, backup_cur_idx, str_idx, null_cnt;
02874   re_dfastate_t *cur_state = NULL;
02875   re_node_set *cur_nodes, next_nodes;
02876   re_dfastate_t **backup_state_log;
02877   unsigned int context;
02878 
02879   subexp_num = dfa->nodes[top_node].opr.idx;
02880   /* Extend the buffer if we need.  */
02881   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
02882     {
02883       re_dfastate_t **new_array;
02884       int old_alloc = path->alloc;
02885       path->alloc += last_str + mctx->max_mb_elem_len + 1;
02886       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
02887       if (BE (new_array == NULL, 0))
02888        {
02889          path->alloc = old_alloc;
02890          return REG_ESPACE;
02891        }
02892       path->array = new_array;
02893       memset (new_array + old_alloc, '\0',
02894              sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
02895     }
02896 
02897   str_idx = path->next_idx ?: top_str;
02898 
02899   /* Temporary modify MCTX.  */
02900   backup_state_log = mctx->state_log;
02901   backup_cur_idx = mctx->input.cur_idx;
02902   mctx->state_log = path->array;
02903   mctx->input.cur_idx = str_idx;
02904 
02905   /* Setup initial node set.  */
02906   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
02907   if (str_idx == top_str)
02908     {
02909       err = re_node_set_init_1 (&next_nodes, top_node);
02910       if (BE (err != REG_NOERROR, 0))
02911        return err;
02912       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
02913       if (BE (err != REG_NOERROR, 0))
02914        {
02915          re_node_set_free (&next_nodes);
02916          return err;
02917        }
02918     }
02919   else
02920     {
02921       cur_state = mctx->state_log[str_idx];
02922       if (cur_state && cur_state->has_backref)
02923        {
02924          err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
02925          if (BE (err != REG_NOERROR, 0))
02926            return err;
02927        }
02928       else
02929        re_node_set_init_empty (&next_nodes);
02930     }
02931   if (str_idx == top_str || (cur_state && cur_state->has_backref))
02932     {
02933       if (next_nodes.nelem)
02934        {
02935          err = expand_bkref_cache (mctx, &next_nodes, str_idx,
02936                                 subexp_num, type);
02937          if (BE (err != REG_NOERROR, 0))
02938            {
02939              re_node_set_free (&next_nodes);
02940              return err;
02941            }
02942        }
02943       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
02944       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
02945        {
02946          re_node_set_free (&next_nodes);
02947          return err;
02948        }
02949       mctx->state_log[str_idx] = cur_state;
02950     }
02951 
02952   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
02953     {
02954       re_node_set_empty (&next_nodes);
02955       if (mctx->state_log[str_idx + 1])
02956        {
02957          err = re_node_set_merge (&next_nodes,
02958                                &mctx->state_log[str_idx + 1]->nodes);
02959          if (BE (err != REG_NOERROR, 0))
02960            {
02961              re_node_set_free (&next_nodes);
02962              return err;
02963            }
02964        }
02965       if (cur_state)
02966        {
02967          err = check_arrival_add_next_nodes (mctx, str_idx,
02968                                          &cur_state->non_eps_nodes,
02969                                          &next_nodes);
02970          if (BE (err != REG_NOERROR, 0))
02971            {
02972              re_node_set_free (&next_nodes);
02973              return err;
02974            }
02975        }
02976       ++str_idx;
02977       if (next_nodes.nelem)
02978        {
02979          err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
02980          if (BE (err != REG_NOERROR, 0))
02981            {
02982              re_node_set_free (&next_nodes);
02983              return err;
02984            }
02985          err = expand_bkref_cache (mctx, &next_nodes, str_idx,
02986                                 subexp_num, type);
02987          if (BE (err != REG_NOERROR, 0))
02988            {
02989              re_node_set_free (&next_nodes);
02990              return err;
02991            }
02992        }
02993       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
02994       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
02995       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
02996        {
02997          re_node_set_free (&next_nodes);
02998          return err;
02999        }
03000       mctx->state_log[str_idx] = cur_state;
03001       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
03002     }
03003   re_node_set_free (&next_nodes);
03004   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
03005               : &mctx->state_log[last_str]->nodes);
03006   path->next_idx = str_idx;
03007 
03008   /* Fix MCTX.  */
03009   mctx->state_log = backup_state_log;
03010   mctx->input.cur_idx = backup_cur_idx;
03011 
03012   /* Then check the current node set has the node LAST_NODE.  */
03013   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
03014     return REG_NOERROR;
03015 
03016   return REG_NOMATCH;
03017 }
03018 
03019 /* Helper functions for check_arrival.  */
03020 
03021 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
03022    to NEXT_NODES.
03023    TODO: This function is similar to the functions transit_state*(),
03024         however this function has many additional works.
03025         Can't we unify them?  */
03026 
03027 static reg_errcode_t
03028 internal_function
03029 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
03030                            re_node_set *cur_nodes, re_node_set *next_nodes)
03031 {
03032   const re_dfa_t *const dfa = mctx->dfa;
03033   int result;
03034   int cur_idx;
03035   reg_errcode_t err = REG_NOERROR;
03036   re_node_set union_set;
03037   re_node_set_init_empty (&union_set);
03038   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
03039     {
03040       int naccepted = 0;
03041       int cur_node = cur_nodes->elems[cur_idx];
03042 #ifdef DEBUG
03043       re_token_type_t type = dfa->nodes[cur_node].type;
03044       assert (!IS_EPSILON_NODE (type));
03045 #endif
03046 #ifdef RE_ENABLE_I18N
03047       /* If the node may accept `multi byte'.  */
03048       if (dfa->nodes[cur_node].accept_mb)
03049        {
03050          naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
03051                                           str_idx);
03052          if (naccepted > 1)
03053            {
03054              re_dfastate_t *dest_state;
03055              int next_node = dfa->nexts[cur_node];
03056              int next_idx = str_idx + naccepted;
03057              dest_state = mctx->state_log[next_idx];
03058              re_node_set_empty (&union_set);
03059              if (dest_state)
03060               {
03061                 err = re_node_set_merge (&union_set, &dest_state->nodes);
03062                 if (BE (err != REG_NOERROR, 0))
03063                   {
03064                     re_node_set_free (&union_set);
03065                     return err;
03066                   }
03067               }
03068              result = re_node_set_insert (&union_set, next_node);
03069              if (BE (result < 0, 0))
03070               {
03071                 re_node_set_free (&union_set);
03072                 return REG_ESPACE;
03073               }
03074              mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
03075                                                      &union_set);
03076              if (BE (mctx->state_log[next_idx] == NULL
03077                     && err != REG_NOERROR, 0))
03078               {
03079                 re_node_set_free (&union_set);
03080                 return err;
03081               }
03082            }
03083        }
03084 #endif /* RE_ENABLE_I18N */
03085       if (naccepted
03086          || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
03087        {
03088          result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
03089          if (BE (result < 0, 0))
03090            {
03091              re_node_set_free (&union_set);
03092              return REG_ESPACE;
03093            }
03094        }
03095     }
03096   re_node_set_free (&union_set);
03097   return REG_NOERROR;
03098 }
03099 
03100 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
03101    CUR_NODES, however exclude the nodes which are:
03102     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
03103     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
03104 */
03105 
03106 static reg_errcode_t
03107 internal_function
03108 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
03109                        int ex_subexp, int type)
03110 {
03111   reg_errcode_t err;
03112   int idx, outside_node;
03113   re_node_set new_nodes;
03114 #ifdef DEBUG
03115   assert (cur_nodes->nelem);
03116 #endif
03117   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
03118   if (BE (err != REG_NOERROR, 0))
03119     return err;
03120   /* Create a new node set NEW_NODES with the nodes which are epsilon
03121      closures of the node in CUR_NODES.  */
03122 
03123   for (idx = 0; idx < cur_nodes->nelem; ++idx)
03124     {
03125       int cur_node = cur_nodes->elems[idx];
03126       const re_node_set *eclosure = dfa->eclosures + cur_node;
03127       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
03128       if (outside_node == -1)
03129        {
03130          /* There are no problematic nodes, just merge them.  */
03131          err = re_node_set_merge (&new_nodes, eclosure);
03132          if (BE (err != REG_NOERROR, 0))
03133            {
03134              re_node_set_free (&new_nodes);
03135              return err;
03136            }
03137        }
03138       else
03139        {
03140          /* There are problematic nodes, re-calculate incrementally.  */
03141          err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
03142                                          ex_subexp, type);
03143          if (BE (err != REG_NOERROR, 0))
03144            {
03145              re_node_set_free (&new_nodes);
03146              return err;
03147            }
03148        }
03149     }
03150   re_node_set_free (cur_nodes);
03151   *cur_nodes = new_nodes;
03152   return REG_NOERROR;
03153 }
03154 
03155 /* Helper function for check_arrival_expand_ecl.
03156    Check incrementally the epsilon closure of TARGET, and if it isn't
03157    problematic append it to DST_NODES.  */
03158 
03159 static reg_errcode_t
03160 internal_function
03161 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
03162                            int target, int ex_subexp, int type)
03163 {
03164   int cur_node;
03165   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
03166     {
03167       int err;
03168 
03169       if (dfa->nodes[cur_node].type == type
03170          && dfa->nodes[cur_node].opr.idx == ex_subexp)
03171        {
03172          if (type == OP_CLOSE_SUBEXP)
03173            {
03174              err = re_node_set_insert (dst_nodes, cur_node);
03175              if (BE (err == -1, 0))
03176               return REG_ESPACE;
03177            }
03178          break;
03179        }
03180       err = re_node_set_insert (dst_nodes, cur_node);
03181       if (BE (err == -1, 0))
03182        return REG_ESPACE;
03183       if (dfa->edests[cur_node].nelem == 0)
03184        break;
03185       if (dfa->edests[cur_node].nelem == 2)
03186        {
03187          err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
03188                                          dfa->edests[cur_node].elems[1],
03189                                          ex_subexp, type);
03190          if (BE (err != REG_NOERROR, 0))
03191            return err;
03192        }
03193       cur_node = dfa->edests[cur_node].elems[0];
03194     }
03195   return REG_NOERROR;
03196 }
03197 
03198 
03199 /* For all the back references in the current state, calculate the
03200    destination of the back references by the appropriate entry
03201    in MCTX->BKREF_ENTS.  */
03202 
03203 static reg_errcode_t
03204 internal_function
03205 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
03206                   int cur_str, int subexp_num, int type)
03207 {
03208   const re_dfa_t *const dfa = mctx->dfa;
03209   reg_errcode_t err;
03210   int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
03211   struct re_backref_cache_entry *ent;
03212 
03213   if (cache_idx_start == -1)
03214     return REG_NOERROR;
03215 
03216  restart:
03217   ent = mctx->bkref_ents + cache_idx_start;
03218   do
03219     {
03220       int to_idx, next_node;
03221 
03222       /* Is this entry ENT is appropriate?  */
03223       if (!re_node_set_contains (cur_nodes, ent->node))
03224        continue; /* No.  */
03225 
03226       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
03227       /* Calculate the destination of the back reference, and append it
03228         to MCTX->STATE_LOG.  */
03229       if (to_idx == cur_str)
03230        {
03231          /* The backreference did epsilon transit, we must re-check all the
03232             node in the current state.  */
03233          re_node_set new_dests;
03234          reg_errcode_t err2, err3;
03235          next_node = dfa->edests[ent->node].elems[0];
03236          if (re_node_set_contains (cur_nodes, next_node))
03237            continue;
03238          err = re_node_set_init_1 (&new_dests, next_node);
03239          err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
03240          err3 = re_node_set_merge (cur_nodes, &new_dests);
03241          re_node_set_free (&new_dests);
03242          if (BE (err != REG_NOERROR || err2 != REG_NOERROR
03243                 || err3 != REG_NOERROR, 0))
03244            {
03245              err = (err != REG_NOERROR ? err
03246                    : (err2 != REG_NOERROR ? err2 : err3));
03247              return err;
03248            }
03249          /* TODO: It is still inefficient...  */
03250          goto restart;
03251        }
03252       else
03253        {
03254          re_node_set union_set;
03255          next_node = dfa->nexts[ent->node];
03256          if (mctx->state_log[to_idx])
03257            {
03258              int ret;
03259              if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
03260                                    next_node))
03261               continue;
03262              err = re_node_set_init_copy (&union_set,
03263                                       &mctx->state_log[to_idx]->nodes);
03264              ret = re_node_set_insert (&union_set, next_node);
03265              if (BE (err != REG_NOERROR || ret < 0, 0))
03266               {
03267                 re_node_set_free (&union_set);
03268                 err = err != REG_NOERROR ? err : REG_ESPACE;
03269                 return err;
03270               }
03271            }
03272          else
03273            {
03274              err = re_node_set_init_1 (&union_set, next_node);
03275              if (BE (err != REG_NOERROR, 0))
03276               return err;
03277            }
03278          mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
03279          re_node_set_free (&union_set);
03280          if (BE (mctx->state_log[to_idx] == NULL
03281                 && err != REG_NOERROR, 0))
03282            return err;
03283        }
03284     }
03285   while (ent++->more);
03286   return REG_NOERROR;
03287 }
03288 
03289 /* Build transition table for the state.
03290    Return 1 if succeeded, otherwise return NULL.  */
03291 
03292 static int
03293 internal_function
03294 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
03295 {
03296   reg_errcode_t err;
03297   int i, j, ch, need_word_trtable = 0;
03298   bitset_word_t elem, mask;
03299   bool dests_node_malloced = false;
03300   bool dest_states_malloced = false;
03301   int ndests; /* Number of the destination states from `state'.  */
03302   re_dfastate_t **trtable;
03303   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
03304   re_node_set follows, *dests_node;
03305   bitset_t *dests_ch;
03306   bitset_t acceptable;
03307 
03308   struct dests_alloc
03309   {
03310     re_node_set dests_node[SBC_MAX];
03311     bitset_t dests_ch[SBC_MAX];
03312   } *dests_alloc;
03313 
03314   /* We build DFA states which corresponds to the destination nodes
03315      from `state'.  `dests_node[i]' represents the nodes which i-th
03316      destination state contains, and `dests_ch[i]' represents the
03317      characters which i-th destination state accepts.  */
03318   if (__libc_use_alloca (sizeof (struct dests_alloc)))
03319     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
03320   else
03321     {
03322       dests_alloc = re_malloc (struct dests_alloc, 1);
03323       if (BE (dests_alloc == NULL, 0))
03324        return 0;
03325       dests_node_malloced = true;
03326     }
03327   dests_node = dests_alloc->dests_node;
03328   dests_ch = dests_alloc->dests_ch;
03329 
03330   /* Initialize transiton table.  */
03331   state->word_trtable = state->trtable = NULL;
03332 
03333   /* At first, group all nodes belonging to `state' into several
03334      destinations.  */
03335   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
03336   if (BE (ndests <= 0, 0))
03337     {
03338       if (dests_node_malloced)
03339        free (dests_alloc);
03340       /* Return 0 in case of an error, 1 otherwise.  */
03341       if (ndests == 0)
03342        {
03343          state->trtable = (re_dfastate_t **)
03344            calloc (sizeof (re_dfastate_t *), SBC_MAX);
03345          return 1;
03346        }
03347       return 0;
03348     }
03349 
03350   err = re_node_set_alloc (&follows, ndests + 1);
03351   if (BE (err != REG_NOERROR, 0))
03352     goto out_free;
03353 
03354   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
03355                       + ndests * 3 * sizeof (re_dfastate_t *)))
03356     dest_states = (re_dfastate_t **)
03357       alloca (ndests * 3 * sizeof (re_dfastate_t *));
03358   else
03359     {
03360       dest_states = (re_dfastate_t **)
03361        malloc (ndests * 3 * sizeof (re_dfastate_t *));
03362       if (BE (dest_states == NULL, 0))
03363        {
03364 out_free:
03365          if (dest_states_malloced)
03366            free (dest_states);
03367          re_node_set_free (&follows);
03368          for (i = 0; i < ndests; ++i)
03369            re_node_set_free (dests_node + i);
03370          if (dests_node_malloced)
03371            free (dests_alloc);
03372          return 0;
03373        }
03374       dest_states_malloced = true;
03375     }
03376   dest_states_word = dest_states + ndests;
03377   dest_states_nl = dest_states_word + ndests;
03378   bitset_empty (acceptable);
03379 
03380   /* Then build the states for all destinations.  */
03381   for (i = 0; i < ndests; ++i)
03382     {
03383       int next_node;
03384       re_node_set_empty (&follows);
03385       /* Merge the follows of this destination states.  */
03386       for (j = 0; j < dests_node[i].nelem; ++j)
03387        {
03388          next_node = dfa->nexts[dests_node[i].elems[j]];
03389          if (next_node != -1)
03390            {
03391              err = re_node_set_merge (&follows, dfa->eclosures + next_node);
03392              if (BE (err != REG_NOERROR, 0))
03393               goto out_free;
03394            }
03395        }
03396       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
03397       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
03398        goto out_free;
03399       /* If the new state has context constraint,
03400         build appropriate states for these contexts.  */
03401       if (dest_states[i]->has_constraint)
03402        {
03403          dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
03404                                                    CONTEXT_WORD);
03405          if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
03406            goto out_free;
03407 
03408          if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
03409            need_word_trtable = 1;
03410 
03411          dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
03412                                                  CONTEXT_NEWLINE);
03413          if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
03414            goto out_free;
03415        }
03416       else
03417        {
03418          dest_states_word[i] = dest_states[i];
03419          dest_states_nl[i] = dest_states[i];
03420        }
03421       bitset_merge (acceptable, dests_ch[i]);
03422     }
03423 
03424   if (!BE (need_word_trtable, 0))
03425     {
03426       /* We don't care about whether the following character is a word
03427         character, or we are in a single-byte character set so we can
03428         discern by looking at the character code: allocate a
03429         256-entry transition table.  */
03430       trtable = state->trtable =
03431        (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
03432       if (BE (trtable == NULL, 0))
03433        goto out_free;
03434 
03435       /* For all characters ch...:  */
03436       for (i = 0; i < BITSET_WORDS; ++i)
03437        for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
03438             elem;
03439             mask <<= 1, elem >>= 1, ++ch)
03440          if (BE (elem & 1, 0))
03441            {
03442              /* There must be exactly one destination which accepts
03443                character ch.  See group_nodes_into_DFAstates.  */
03444              for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
03445               ;
03446 
03447              /* j-th destination accepts the word character ch.  */
03448              if (dfa->word_char[i] & mask)
03449               trtable[ch] = dest_states_word[j];
03450              else
03451               trtable[ch] = dest_states[j];
03452            }
03453     }
03454   else
03455     {
03456       /* We care about whether the following character is a word
03457         character, and we are in a multi-byte character set: discern
03458         by looking at the character code: build two 256-entry
03459         transition tables, one starting at trtable[0] and one
03460         starting at trtable[SBC_MAX].  */
03461       trtable = state->word_trtable =
03462        (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
03463       if (BE (trtable == NULL, 0))
03464        goto out_free;
03465 
03466       /* For all characters ch...:  */
03467       for (i = 0; i < BITSET_WORDS; ++i)
03468        for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
03469             elem;
03470             mask <<= 1, elem >>= 1, ++ch)
03471          if (BE (elem & 1, 0))
03472            {
03473              /* There must be exactly one destination which accepts
03474                character ch.  See group_nodes_into_DFAstates.  */
03475              for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
03476               ;
03477 
03478              /* j-th destination accepts the word character ch.  */
03479              trtable[ch] = dest_states[j];
03480              trtable[ch + SBC_MAX] = dest_states_word[j];
03481            }
03482     }
03483 
03484   /* new line */
03485   if (bitset_contain (acceptable, NEWLINE_CHAR))
03486     {
03487       /* The current state accepts newline character.  */
03488       for (j = 0; j < ndests; ++j)
03489        if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
03490          {
03491            /* k-th destination accepts newline character.  */
03492            trtable[NEWLINE_CHAR] = dest_states_nl[j];
03493            if (need_word_trtable)
03494              trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
03495            /* There must be only one destination which accepts
03496               newline.  See group_nodes_into_DFAstates.  */
03497            break;
03498          }
03499     }
03500 
03501   if (dest_states_malloced)
03502     free (dest_states);
03503 
03504   re_node_set_free (&follows);
03505   for (i = 0; i < ndests; ++i)
03506     re_node_set_free (dests_node + i);
03507 
03508   if (dests_node_malloced)
03509     free (dests_alloc);
03510 
03511   return 1;
03512 }
03513 
03514 /* Group all nodes belonging to STATE into several destinations.
03515    Then for all destinations, set the nodes belonging to the destination
03516    to DESTS_NODE[i] and set the characters accepted by the destination
03517    to DEST_CH[i].  This function return the number of destinations.  */
03518 
03519 static int
03520 internal_function
03521 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
03522                          re_node_set *dests_node, bitset_t *dests_ch)
03523 {
03524   reg_errcode_t err;
03525   int result;
03526   int i, j, k;
03527   int ndests; /* Number of the destinations from `state'.  */
03528   bitset_t accepts; /* Characters a node can accept.  */
03529   const re_node_set *cur_nodes = &state->nodes;
03530   bitset_empty (accepts);
03531   ndests = 0;
03532 
03533   /* For all the nodes belonging to `state',  */
03534   for (i = 0; i < cur_nodes->nelem; ++i)
03535     {
03536       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
03537       re_token_type_t type = node->type;
03538       unsigned int constraint = node->constraint;
03539 
03540       /* Enumerate all single byte character this node can accept.  */
03541       if (type == CHARACTER)
03542        bitset_set (accepts, node->opr.c);
03543       else if (type == SIMPLE_BRACKET)
03544        {
03545          bitset_merge (accepts, node->opr.sbcset);
03546        }
03547       else if (type == OP_PERIOD)
03548        {
03549 #ifdef RE_ENABLE_I18N
03550          if (dfa->mb_cur_max > 1)
03551            bitset_merge (accepts, dfa->sb_char);
03552          else
03553 #endif
03554            bitset_set_all (accepts);
03555          if (!(dfa->syntax & RE_DOT_NEWLINE))
03556            bitset_clear (accepts, '\n');
03557          if (dfa->syntax & RE_DOT_NOT_NULL)
03558            bitset_clear (accepts, '\0');
03559        }
03560 #ifdef RE_ENABLE_I18N
03561       else if (type == OP_UTF8_PERIOD)
03562         {
03563          memset (accepts, '\xff', sizeof (bitset_t) / 2);
03564          if (!(dfa->syntax & RE_DOT_NEWLINE))
03565            bitset_clear (accepts, '\n');
03566          if (dfa->syntax & RE_DOT_NOT_NULL)
03567            bitset_clear (accepts, '\0');
03568         }
03569 #endif
03570       else
03571        continue;
03572 
03573       /* Check the `accepts' and sift the characters which are not
03574         match it the context.  */
03575       if (constraint)
03576        {
03577          if (constraint & NEXT_NEWLINE_CONSTRAINT)
03578            {
03579              bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
03580              bitset_empty (accepts);
03581              if (accepts_newline)
03582               bitset_set (accepts, NEWLINE_CHAR);
03583              else
03584               continue;
03585            }
03586          if (constraint & NEXT_ENDBUF_CONSTRAINT)
03587            {
03588              bitset_empty (accepts);
03589              continue;
03590            }
03591 
03592          if (constraint & NEXT_WORD_CONSTRAINT)
03593            {
03594              bitset_word_t any_set = 0;
03595              if (type == CHARACTER && !node->word_char)
03596               {
03597                 bitset_empty (accepts);
03598                 continue;
03599               }
03600 #ifdef RE_ENABLE_I18N
03601              if (dfa->mb_cur_max > 1)
03602               for (j = 0; j < BITSET_WORDS; ++j)
03603                 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
03604              else
03605 #endif
03606               for (j = 0; j < BITSET_WORDS; ++j)
03607                 any_set |= (accepts[j] &= dfa->word_char[j]);
03608              if (!any_set)
03609               continue;
03610            }
03611          if (constraint & NEXT_NOTWORD_CONSTRAINT)
03612            {
03613              bitset_word_t any_set = 0;
03614              if (type == CHARACTER && node->word_char)
03615               {
03616                 bitset_empty (accepts);
03617                 continue;
03618               }
03619 #ifdef RE_ENABLE_I18N
03620              if (dfa->mb_cur_max > 1)
03621               for (j = 0; j < BITSET_WORDS; ++j)
03622                 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
03623              else
03624 #endif
03625               for (j = 0; j < BITSET_WORDS; ++j)
03626                 any_set |= (accepts[j] &= ~dfa->word_char[j]);
03627              if (!any_set)
03628               continue;
03629            }
03630        }
03631 
03632       /* Then divide `accepts' into DFA states, or create a new
03633         state.  Above, we make sure that accepts is not empty.  */
03634       for (j = 0; j < ndests; ++j)
03635        {
03636          bitset_t intersec; /* Intersection sets, see below.  */
03637          bitset_t remains;
03638          /* Flags, see below.  */
03639          bitset_word_t has_intersec, not_subset, not_consumed;
03640 
03641          /* Optimization, skip if this state doesn't accept the character.  */
03642          if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
03643            continue;
03644 
03645          /* Enumerate the intersection set of this state and `accepts'.  */
03646          has_intersec = 0;
03647          for (k = 0; k < BITSET_WORDS; ++k)
03648            has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
03649          /* And skip if the intersection set is empty.  */
03650          if (!has_intersec)
03651            continue;
03652 
03653          /* Then check if this state is a subset of `accepts'.  */
03654          not_subset = not_consumed = 0;
03655          for (k = 0; k < BITSET_WORDS; ++k)
03656            {
03657              not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
03658              not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
03659            }
03660 
03661          /* If this state isn't a subset of `accepts', create a
03662             new group state, which has the `remains'. */
03663          if (not_subset)
03664            {
03665              bitset_copy (dests_ch[ndests], remains);
03666              bitset_copy (dests_ch[j], intersec);
03667              err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
03668              if (BE (err != REG_NOERROR, 0))
03669               goto error_return;
03670              ++ndests;
03671            }
03672 
03673          /* Put the position in the current group. */
03674          result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
03675          if (BE (result < 0, 0))
03676            goto error_return;
03677 
03678          /* If all characters are consumed, go to next node. */
03679          if (!not_consumed)
03680            break;
03681        }
03682       /* Some characters remain, create a new group. */
03683       if (j == ndests)
03684        {
03685          bitset_copy (dests_ch[ndests], accepts);
03686          err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
03687          if (BE (err != REG_NOERROR, 0))
03688            goto error_return;
03689          ++ndests;
03690          bitset_empty (accepts);
03691        }
03692     }
03693   return ndests;
03694  error_return:
03695   for (j = 0; j < ndests; ++j)
03696     re_node_set_free (dests_node + j);
03697   return -1;
03698 }
03699 
03700 #ifdef RE_ENABLE_I18N
03701 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
03702    Return the number of the bytes the node accepts.
03703    STR_IDX is the current index of the input string.
03704 
03705    This function handles the nodes which can accept one character, or
03706    one collating element like '.', '[a-z]', opposite to the other nodes
03707    can only accept one byte.  */
03708 
03709 static int
03710 internal_function
03711 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
03712                       const re_string_t *input, int str_idx)
03713 {
03714   const re_token_t *node = dfa->nodes + node_idx;
03715   int char_len, elem_len;
03716   int i;
03717 
03718   if (BE (node->type == OP_UTF8_PERIOD, 0))
03719     {
03720       unsigned char c = re_string_byte_at (input, str_idx), d;
03721       if (BE (c < 0xc2, 1))
03722        return 0;
03723 
03724       if (str_idx + 2 > input->len)
03725        return 0;
03726 
03727       d = re_string_byte_at (input, str_idx + 1);
03728       if (c < 0xe0)
03729        return (d < 0x80 || d > 0xbf) ? 0 : 2;
03730       else if (c < 0xf0)
03731        {
03732          char_len = 3;
03733          if (c == 0xe0 && d < 0xa0)
03734            return 0;
03735        }
03736       else if (c < 0xf8)
03737        {
03738          char_len = 4;
03739          if (c == 0xf0 && d < 0x90)
03740            return 0;
03741        }
03742       else if (c < 0xfc)
03743        {
03744          char_len = 5;
03745          if (c == 0xf8 && d < 0x88)
03746            return 0;
03747        }
03748       else if (c < 0xfe)
03749        {
03750          char_len = 6;
03751          if (c == 0xfc && d < 0x84)
03752            return 0;
03753        }
03754       else
03755        return 0;
03756 
03757       if (str_idx + char_len > input->len)
03758        return 0;
03759 
03760       for (i = 1; i < char_len; ++i)
03761        {
03762          d = re_string_byte_at (input, str_idx + i);
03763          if (d < 0x80 || d > 0xbf)
03764            return 0;
03765        }
03766       return char_len;
03767     }
03768 
03769   char_len = re_string_char_size_at (input, str_idx);
03770   if (node->type == OP_PERIOD)
03771     {
03772       if (char_len <= 1)
03773         return 0;
03774       /* FIXME: I don't think this if is needed, as both '\n'
03775         and '\0' are char_len == 1.  */
03776       /* '.' accepts any one character except the following two cases.  */
03777       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
03778           re_string_byte_at (input, str_idx) == '\n') ||
03779          ((dfa->syntax & RE_DOT_NOT_NULL) &&
03780           re_string_byte_at (input, str_idx) == '\0'))
03781        return 0;
03782       return char_len;
03783     }
03784 
03785   elem_len = re_string_elem_size_at (input, str_idx);
03786   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
03787     return 0;
03788 
03789   if (node->type == COMPLEX_BRACKET)
03790     {
03791       const re_charset_t *cset = node->opr.mbcset;
03792 # ifdef _LIBC
03793       const unsigned char *pin
03794        = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
03795       int j;
03796       uint32_t nrules;
03797 # endif /* _LIBC */
03798       int match_len = 0;
03799       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
03800                   ? re_string_wchar_at (input, str_idx) : 0);
03801 
03802       /* match with multibyte character?  */
03803       for (i = 0; i < cset->nmbchars; ++i)
03804        if (wc == cset->mbchars[i])
03805          {
03806            match_len = char_len;
03807            goto check_node_accept_bytes_match;
03808          }
03809       /* match with character_class?  */
03810       for (i = 0; i < cset->nchar_classes; ++i)
03811        {
03812          wctype_t wt = cset->char_classes[i];
03813          if (__iswctype (wc, wt))
03814            {
03815              match_len = char_len;
03816              goto check_node_accept_bytes_match;
03817            }
03818        }
03819 
03820 # ifdef _LIBC
03821       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03822       if (nrules != 0)
03823        {
03824          unsigned int in_collseq = 0;
03825          const int32_t *table, *indirect;
03826          const unsigned char *weights, *extra;
03827          const char *collseqwc;
03828          /* This #include defines a local function!  */
03829 #  include <locale/weight.h>
03830 
03831          /* match with collating_symbol?  */
03832          if (cset->ncoll_syms)
03833            extra = (const unsigned char *)
03834              _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
03835          for (i = 0; i < cset->ncoll_syms; ++i)
03836            {
03837              const unsigned char *coll_sym = extra + cset->coll_syms[i];
03838              /* Compare the length of input collating element and
03839                the length of current collating element.  */
03840              if (*coll_sym != elem_len)
03841               continue;
03842              /* Compare each bytes.  */
03843              for (j = 0; j < *coll_sym; j++)
03844               if (pin[j] != coll_sym[1 + j])
03845                 break;
03846              if (j == *coll_sym)
03847               {
03848                 /* Match if every bytes is equal.  */
03849                 match_len = j;
03850                 goto check_node_accept_bytes_match;
03851               }
03852            }
03853 
03854          if (cset->nranges)
03855            {
03856              if (elem_len <= char_len)
03857               {
03858                 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
03859                 in_collseq = __collseq_table_lookup (collseqwc, wc);
03860               }
03861              else
03862               in_collseq = find_collation_sequence_value (pin, elem_len);
03863            }
03864          /* match with range expression?  */
03865          for (i = 0; i < cset->nranges; ++i)
03866            if (cset->range_starts[i] <= in_collseq
03867               && in_collseq <= cset->range_ends[i])
03868              {
03869               match_len = elem_len;
03870               goto check_node_accept_bytes_match;
03871              }
03872 
03873          /* match with equivalence_class?  */
03874          if (cset->nequiv_classes)
03875            {
03876              const unsigned char *cp = pin;
03877              table = (const int32_t *)
03878               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03879              weights = (const unsigned char *)
03880               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
03881              extra = (const unsigned char *)
03882               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
03883              indirect = (const int32_t *)
03884               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
03885              int32_t idx = findidx (&cp);
03886              if (idx > 0)
03887               for (i = 0; i < cset->nequiv_classes; ++i)
03888                 {
03889                   int32_t equiv_class_idx = cset->equiv_classes[i];
03890                   size_t weight_len = weights[idx & 0xffffff];
03891                   if (weight_len == weights[equiv_class_idx & 0xffffff]
03892                      && (idx >> 24) == (equiv_class_idx >> 24))
03893                     {
03894                      int cnt = 0;
03895 
03896                      idx &= 0xffffff;
03897                      equiv_class_idx &= 0xffffff;
03898 
03899                      while (cnt <= weight_len
03900                             && (weights[equiv_class_idx + 1 + cnt]
03901                                == weights[idx + 1 + cnt]))
03902                        ++cnt;
03903                      if (cnt > weight_len)
03904                        {
03905                          match_len = elem_len;
03906                          goto check_node_accept_bytes_match;
03907                        }
03908                     }
03909                 }
03910            }
03911        }
03912       else
03913 # endif /* _LIBC */
03914        {
03915          /* match with range expression?  */
03916 #if __GNUC__ >= 2
03917          wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
03918 #else
03919          wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
03920          cmp_buf[2] = wc;
03921 #endif
03922          for (i = 0; i < cset->nranges; ++i)
03923            {
03924              cmp_buf[0] = cset->range_starts[i];
03925              cmp_buf[4] = cset->range_ends[i];
03926              if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
03927                 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
03928               {
03929                 match_len = char_len;
03930                 goto check_node_accept_bytes_match;
03931               }
03932            }
03933        }
03934     check_node_accept_bytes_match:
03935       if (!cset->non_match)
03936        return match_len;
03937       else
03938        {
03939          if (match_len > 0)
03940            return 0;
03941          else
03942            return (elem_len > char_len) ? elem_len : char_len;
03943        }
03944     }
03945   return 0;
03946 }
03947 
03948 # ifdef _LIBC
03949 static unsigned int
03950 internal_function
03951 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
03952 {
03953   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03954   if (nrules == 0)
03955     {
03956       if (mbs_len == 1)
03957        {
03958          /* No valid character.  Match it as a single byte character.  */
03959          const unsigned char *collseq = (const unsigned char *)
03960            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
03961          return collseq[mbs[0]];
03962        }
03963       return UINT_MAX;
03964     }
03965   else
03966     {
03967       int32_t idx;
03968       const unsigned char *extra = (const unsigned char *)
03969        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
03970       int32_t extrasize = (const unsigned char *)
03971        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
03972 
03973       for (idx = 0; idx < extrasize;)
03974        {
03975          int mbs_cnt, found = 0;
03976          int32_t elem_mbs_len;
03977          /* Skip the name of collating element name.  */
03978          idx = idx + extra[idx] + 1;
03979          elem_mbs_len = extra[idx++];
03980          if (mbs_len == elem_mbs_len)
03981            {
03982              for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
03983               if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
03984                 break;
03985              if (mbs_cnt == elem_mbs_len)
03986               /* Found the entry.  */
03987               found = 1;
03988            }
03989          /* Skip the byte sequence of the collating element.  */
03990          idx += elem_mbs_len;
03991          /* Adjust for the alignment.  */
03992          idx = (idx + 3) & ~3;
03993          /* Skip the collation sequence value.  */
03994          idx += sizeof (uint32_t);
03995          /* Skip the wide char sequence of the collating element.  */
03996          idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
03997          /* If we found the entry, return the sequence value.  */
03998          if (found)
03999            return *(uint32_t *) (extra + idx);
04000          /* Skip the collation sequence value.  */
04001          idx += sizeof (uint32_t);
04002        }
04003       return UINT_MAX;
04004     }
04005 }
04006 # endif /* _LIBC */
04007 #endif /* RE_ENABLE_I18N */
04008 
04009 /* Check whether the node accepts the byte which is IDX-th
04010    byte of the INPUT.  */
04011 
04012 static int
04013 internal_function
04014 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
04015                  int idx)
04016 {
04017   unsigned char ch;
04018   ch = re_string_byte_at (&mctx->input, idx);
04019   switch (node->type)
04020     {
04021     case CHARACTER:
04022       if (node->opr.c != ch)
04023         return 0;
04024       break;
04025 
04026     case SIMPLE_BRACKET:
04027       if (!bitset_contain (node->opr.sbcset, ch))
04028         return 0;
04029       break;
04030 
04031 #ifdef RE_ENABLE_I18N
04032     case OP_UTF8_PERIOD:
04033       if (ch >= 0x80)
04034         return 0;
04035       /* FALLTHROUGH */
04036 #endif
04037     case OP_PERIOD:
04038       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
04039          || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
04040        return 0;
04041       break;
04042 
04043     default:
04044       return 0;
04045     }
04046 
04047   if (node->constraint)
04048     {
04049       /* The node has constraints.  Check whether the current context
04050         satisfies the constraints.  */
04051       unsigned int context = re_string_context_at (&mctx->input, idx,
04052                                              mctx->eflags);
04053       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
04054        return 0;
04055     }
04056 
04057   return 1;
04058 }
04059 
04060 /* Extend the buffers, if the buffers have run out.  */
04061 
04062 static reg_errcode_t
04063 internal_function
04064 extend_buffers (re_match_context_t *mctx)
04065 {
04066   reg_errcode_t ret;
04067   re_string_t *pstr = &mctx->input;
04068 
04069   /* Double the lengthes of the buffers.  */
04070   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
04071   if (BE (ret != REG_NOERROR, 0))
04072     return ret;
04073 
04074   if (mctx->state_log != NULL)
04075     {
04076       /* And double the length of state_log.  */
04077       /* XXX We have no indication of the size of this buffer.  If this
04078         allocation fail we have no indication that the state_log array
04079         does not have the right size.  */
04080       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
04081                                          pstr->bufs_len + 1);
04082       if (BE (new_array == NULL, 0))
04083        return REG_ESPACE;
04084       mctx->state_log = new_array;
04085     }
04086 
04087   /* Then reconstruct the buffers.  */
04088   if (pstr->icase)
04089     {
04090 #ifdef RE_ENABLE_I18N
04091       if (pstr->mb_cur_max > 1)
04092        {
04093          ret = build_wcs_upper_buffer (pstr);
04094          if (BE (ret != REG_NOERROR, 0))
04095            return ret;
04096        }
04097       else
04098 #endif /* RE_ENABLE_I18N  */
04099        build_upper_buffer (pstr);
04100     }
04101   else
04102     {
04103 #ifdef RE_ENABLE_I18N
04104       if (pstr->mb_cur_max > 1)
04105        build_wcs_buffer (pstr);
04106       else
04107 #endif /* RE_ENABLE_I18N  */
04108        {
04109          if (pstr->trans != NULL)
04110            re_string_translate_buffer (pstr);
04111        }
04112     }
04113   return REG_NOERROR;
04114 }
04115 
04116 
04117 /* Functions for matching context.  */
04118 
04119 /* Initialize MCTX.  */
04120 
04121 static reg_errcode_t
04122 internal_function
04123 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
04124 {
04125   mctx->eflags = eflags;
04126   mctx->match_last = -1;
04127   if (n > 0)
04128     {
04129       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
04130       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
04131       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
04132        return REG_ESPACE;
04133     }
04134   /* Already zero-ed by the caller.
04135      else
04136        mctx->bkref_ents = NULL;
04137      mctx->nbkref_ents = 0;
04138      mctx->nsub_tops = 0;  */
04139   mctx->abkref_ents = n;
04140   mctx->max_mb_elem_len = 1;
04141   mctx->asub_tops = n;
04142   return REG_NOERROR;
04143 }
04144 
04145 /* Clean the entries which depend on the current input in MCTX.
04146    This function must be invoked when the matcher changes the start index
04147    of the input, or changes the input string.  */
04148 
04149 static void
04150 internal_function
04151 match_ctx_clean (re_match_context_t *mctx)
04152 {
04153   int st_idx;
04154   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
04155     {
04156       int sl_idx;
04157       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
04158       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
04159        {
04160          re_sub_match_last_t *last = top->lasts[sl_idx];
04161          re_free (last->path.array);
04162          re_free (last);
04163        }
04164       re_free (top->lasts);
04165       if (top->path)
04166        {
04167          re_free (top->path->array);
04168          re_free (top->path);
04169        }
04170       free (top);
04171     }
04172 
04173   mctx->nsub_tops = 0;
04174   mctx->nbkref_ents = 0;
04175 }
04176 
04177 /* Free all the memory associated with MCTX.  */
04178 
04179 static void
04180 internal_function
04181 match_ctx_free (re_match_context_t *mctx)
04182 {
04183   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
04184   match_ctx_clean (mctx);
04185   re_free (mctx->sub_tops);
04186   re_free (mctx->bkref_ents);
04187 }
04188 
04189 /* Add a new backreference entry to MCTX.
04190    Note that we assume that caller never call this function with duplicate
04191    entry, and call with STR_IDX which isn't smaller than any existing entry.
04192 */
04193 
04194 static reg_errcode_t
04195 internal_function
04196 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
04197                    int to)
04198 {
04199   if (mctx->nbkref_ents >= mctx->abkref_ents)
04200     {
04201       struct re_backref_cache_entry* new_entry;
04202       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
04203                            mctx->abkref_ents * 2);
04204       if (BE (new_entry == NULL, 0))
04205        {
04206          re_free (mctx->bkref_ents);
04207          return REG_ESPACE;
04208        }
04209       mctx->bkref_ents = new_entry;
04210       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
04211              sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
04212       mctx->abkref_ents *= 2;
04213     }
04214   if (mctx->nbkref_ents > 0
04215       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
04216     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
04217 
04218   mctx->bkref_ents[mctx->nbkref_ents].node = node;
04219   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
04220   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
04221   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
04222 
04223   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
04224      If bit N is clear, means that this entry won't epsilon-transition to
04225      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
04226      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
04227      such node.
04228 
04229      A backreference does not epsilon-transition unless it is empty, so set
04230      to all zeros if FROM != TO.  */
04231   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
04232     = (from == to ? ~0 : 0);
04233 
04234   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
04235   if (mctx->max_mb_elem_len < to - from)
04236     mctx->max_mb_elem_len = to - from;
04237   return REG_NOERROR;
04238 }
04239 
04240 /* Search for the first entry which has the same str_idx, or -1 if none is
04241    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
04242 
04243 static int
04244 internal_function
04245 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
04246 {
04247   int left, right, mid, last;
04248   last = right = mctx->nbkref_ents;
04249   for (left = 0; left < right;)
04250     {
04251       mid = (left + right) / 2;
04252       if (mctx->bkref_ents[mid].str_idx < str_idx)
04253        left = mid + 1;
04254       else
04255        right = mid;
04256     }
04257   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
04258     return left;
04259   else
04260     return -1;
04261 }
04262 
04263 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
04264    at STR_IDX.  */
04265 
04266 static reg_errcode_t
04267 internal_function
04268 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
04269 {
04270 #ifdef DEBUG
04271   assert (mctx->sub_tops != NULL);
04272   assert (mctx->asub_tops > 0);
04273 #endif
04274   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
04275     {
04276       int new_asub_tops = mctx->asub_tops * 2;
04277       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
04278                                              re_sub_match_top_t *,
04279                                              new_asub_tops);
04280       if (BE (new_array == NULL, 0))
04281        return REG_ESPACE;
04282       mctx->sub_tops = new_array;
04283       mctx->asub_tops = new_asub_tops;
04284     }
04285   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
04286   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
04287     return REG_ESPACE;
04288   mctx->sub_tops[mctx->nsub_tops]->node = node;
04289   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
04290   return REG_NOERROR;
04291 }
04292 
04293 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
04294    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
04295 
04296 static re_sub_match_last_t *
04297 internal_function
04298 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
04299 {
04300   re_sub_match_last_t *new_entry;
04301   if (BE (subtop->nlasts == subtop->alasts, 0))
04302     {
04303       int new_alasts = 2 * subtop->alasts + 1;
04304       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
04305                                               re_sub_match_last_t *,
04306                                               new_alasts);
04307       if (BE (new_array == NULL, 0))
04308        return NULL;
04309       subtop->lasts = new_array;
04310       subtop->alasts = new_alasts;
04311     }
04312   new_entry = calloc (1, sizeof (re_sub_match_last_t));
04313   if (BE (new_entry != NULL, 1))
04314     {
04315       subtop->lasts[subtop->nlasts] = new_entry;
04316       new_entry->node = node;
04317       new_entry->str_idx = str_idx;
04318       ++subtop->nlasts;
04319     }
04320   return new_entry;
04321 }
04322 
04323 static void
04324 internal_function
04325 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
04326               re_dfastate_t **limited_sts, int last_node, int last_str_idx)
04327 {
04328   sctx->sifted_states = sifted_sts;
04329   sctx->limited_states = limited_sts;
04330   sctx->last_node = last_node;
04331   sctx->last_str_idx = last_str_idx;
04332   re_node_set_init_empty (&sctx->limits);
04333 }