Back to index

nagios-plugins  1.4.16
regexec.c
Go to the documentation of this file.
00001 /* Extended regular expression matching and search library.
00002    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
00003    Software Foundation, Inc.
00004    This file is part of the GNU C Library.
00005    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 3, or (at your option)
00010    any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License along
00018    with this program; if not, write to the Free Software Foundation,
00019    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
00020 
00021 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
00022                                  Idx n) internal_function;
00023 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
00024 static void match_ctx_free (re_match_context_t *cache) internal_function;
00025 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
00026                                      Idx str_idx, Idx from, Idx to)
00027      internal_function;
00028 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
00029      internal_function;
00030 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
00031                                       Idx str_idx) internal_function;
00032 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
00033                                               Idx node, Idx str_idx)
00034      internal_function;
00035 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
00036                         re_dfastate_t **limited_sts, Idx last_node,
00037                         Idx last_str_idx)
00038      internal_function;
00039 static reg_errcode_t re_search_internal (const regex_t *preg,
00040                                     const char *string, Idx length,
00041                                     Idx start, Idx last_start, Idx stop,
00042                                     size_t nmatch, regmatch_t pmatch[],
00043                                     int eflags) internal_function;
00044 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
00045                               const char *string1, Idx length1,
00046                               const char *string2, Idx length2,
00047                               Idx start, regoff_t range,
00048                               struct re_registers *regs,
00049                               Idx stop, bool ret_len) internal_function;
00050 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
00051                             const char *string, Idx length, Idx start,
00052                             regoff_t range, Idx stop,
00053                             struct re_registers *regs,
00054                             bool ret_len) internal_function;
00055 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
00056                               Idx nregs, int regs_allocated)
00057      internal_function;
00058 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
00059      internal_function;
00060 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
00061                         Idx *p_match_first) internal_function;
00062 static Idx check_halt_state_context (const re_match_context_t *mctx,
00063                                  const re_dfastate_t *state, Idx idx)
00064      internal_function;
00065 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
00066                       regmatch_t *prev_idx_match, Idx cur_node,
00067                       Idx cur_idx, Idx nmatch) internal_function;
00068 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
00069                                   Idx str_idx, Idx dest_node, Idx nregs,
00070                                   regmatch_t *regs,
00071                                   re_node_set *eps_via_nodes)
00072      internal_function;
00073 static reg_errcode_t set_regs (const regex_t *preg,
00074                             const re_match_context_t *mctx,
00075                             size_t nmatch, regmatch_t *pmatch,
00076                             bool fl_backtrack) internal_function;
00077 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
00078      internal_function;
00079 
00080 #ifdef RE_ENABLE_I18N
00081 static int sift_states_iter_mb (const re_match_context_t *mctx,
00082                             re_sift_context_t *sctx,
00083                             Idx node_idx, Idx str_idx, Idx max_str_idx)
00084      internal_function;
00085 #endif /* RE_ENABLE_I18N */
00086 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
00087                                       re_sift_context_t *sctx)
00088      internal_function;
00089 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
00090                                      re_sift_context_t *sctx, Idx str_idx,
00091                                      re_node_set *cur_dest)
00092      internal_function;
00093 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
00094                                          re_sift_context_t *sctx,
00095                                          Idx str_idx,
00096                                          re_node_set *dest_nodes)
00097      internal_function;
00098 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
00099                                        re_node_set *dest_nodes,
00100                                        const re_node_set *candidates)
00101      internal_function;
00102 static bool check_dst_limits (const re_match_context_t *mctx,
00103                            const re_node_set *limits,
00104                            Idx dst_node, Idx dst_idx, Idx src_node,
00105                            Idx src_idx) internal_function;
00106 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
00107                                    int boundaries, Idx subexp_idx,
00108                                    Idx from_node, Idx bkref_idx)
00109      internal_function;
00110 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
00111                                   Idx limit, Idx subexp_idx,
00112                                   Idx node, Idx str_idx,
00113                                   Idx bkref_idx) internal_function;
00114 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
00115                                      re_node_set *dest_nodes,
00116                                      const re_node_set *candidates,
00117                                      re_node_set *limits,
00118                                      struct re_backref_cache_entry *bkref_ents,
00119                                      Idx str_idx) internal_function;
00120 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
00121                                    re_sift_context_t *sctx,
00122                                    Idx str_idx, const re_node_set *candidates)
00123      internal_function;
00124 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
00125                                    re_dfastate_t **dst,
00126                                    re_dfastate_t **src, Idx num)
00127      internal_function;
00128 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
00129                                     re_match_context_t *mctx) internal_function;
00130 static re_dfastate_t *transit_state (reg_errcode_t *err,
00131                                  re_match_context_t *mctx,
00132                                  re_dfastate_t *state) internal_function;
00133 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
00134                                        re_match_context_t *mctx,
00135                                        re_dfastate_t *next_state)
00136      internal_function;
00137 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
00138                                           re_node_set *cur_nodes,
00139                                           Idx str_idx) internal_function;
00140 #if 0
00141 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
00142                                    re_match_context_t *mctx,
00143                                    re_dfastate_t *pstate)
00144      internal_function;
00145 #endif
00146 #ifdef RE_ENABLE_I18N
00147 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
00148                                    re_dfastate_t *pstate)
00149      internal_function;
00150 #endif /* RE_ENABLE_I18N */
00151 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
00152                                      const re_node_set *nodes)
00153      internal_function;
00154 static reg_errcode_t get_subexp (re_match_context_t *mctx,
00155                              Idx bkref_node, Idx bkref_str_idx)
00156      internal_function;
00157 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
00158                                  const re_sub_match_top_t *sub_top,
00159                                  re_sub_match_last_t *sub_last,
00160                                  Idx bkref_node, Idx bkref_str)
00161      internal_function;
00162 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
00163                           Idx subexp_idx, int type) internal_function;
00164 static reg_errcode_t check_arrival (re_match_context_t *mctx,
00165                                 state_array_t *path, Idx top_node,
00166                                 Idx top_str, Idx last_node, Idx last_str,
00167                                 int type) internal_function;
00168 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
00169                                              Idx str_idx,
00170                                              re_node_set *cur_nodes,
00171                                              re_node_set *next_nodes)
00172      internal_function;
00173 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
00174                                           re_node_set *cur_nodes,
00175                                           Idx ex_subexp, int type)
00176      internal_function;
00177 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
00178                                              re_node_set *dst_nodes,
00179                                              Idx target, Idx ex_subexp,
00180                                              int type) internal_function;
00181 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
00182                                     re_node_set *cur_nodes, Idx cur_str,
00183                                     Idx subexp_num, int type)
00184      internal_function;
00185 static bool build_trtable (const re_dfa_t *dfa,
00186                         re_dfastate_t *state) internal_function;
00187 #ifdef RE_ENABLE_I18N
00188 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
00189                                 const re_string_t *input, Idx idx)
00190      internal_function;
00191 # ifdef _LIBC
00192 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
00193                                              size_t name_len)
00194      internal_function;
00195 # endif /* _LIBC */
00196 #endif /* RE_ENABLE_I18N */
00197 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
00198                                    const re_dfastate_t *state,
00199                                    re_node_set *states_node,
00200                                    bitset_t *states_ch) internal_function;
00201 static bool check_node_accept (const re_match_context_t *mctx,
00202                             const re_token_t *node, Idx idx)
00203      internal_function;
00204 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
00205      internal_function;
00206 
00207 /* Entry point for POSIX code.  */
00208 
00209 /* regexec searches for a given pattern, specified by PREG, in the
00210    string STRING.
00211 
00212    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
00213    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
00214    least NMATCH elements, and we set them to the offsets of the
00215    corresponding matched substrings.
00216 
00217    EFLAGS specifies `execution flags' which affect matching: if
00218    REG_NOTBOL is set, then ^ does not match at the beginning of the
00219    string; if REG_NOTEOL is set, then $ does not match at the end.
00220 
00221    We return 0 if we find a match and REG_NOMATCH if not.  */
00222 
00223 int
00224 regexec (preg, string, nmatch, pmatch, eflags)
00225     const regex_t *_Restrict_ preg;
00226     const char *_Restrict_ string;
00227     size_t nmatch;
00228     regmatch_t pmatch[_Restrict_arr_];
00229     int eflags;
00230 {
00231   reg_errcode_t err;
00232   Idx start, length;
00233 #ifdef _LIBC
00234   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00235 #endif
00236 
00237   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
00238     return REG_BADPAT;
00239 
00240   if (eflags & REG_STARTEND)
00241     {
00242       start = pmatch[0].rm_so;
00243       length = pmatch[0].rm_eo;
00244     }
00245   else
00246     {
00247       start = 0;
00248       length = strlen (string);
00249     }
00250 
00251   __libc_lock_lock (dfa->lock);
00252   if (preg->no_sub)
00253     err = re_search_internal (preg, string, length, start, length,
00254                            length, 0, NULL, eflags);
00255   else
00256     err = re_search_internal (preg, string, length, start, length,
00257                            length, nmatch, pmatch, eflags);
00258   __libc_lock_unlock (dfa->lock);
00259   return err != REG_NOERROR;
00260 }
00261 
00262 #ifdef _LIBC
00263 # include <shlib-compat.h>
00264 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
00265 
00266 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
00267 __typeof__ (__regexec) __compat_regexec;
00268 
00269 int
00270 attribute_compat_text_section
00271 __compat_regexec (const regex_t *_Restrict_ preg,
00272                 const char *_Restrict_ string, size_t nmatch,
00273                 regmatch_t pmatch[], int eflags)
00274 {
00275   return regexec (preg, string, nmatch, pmatch,
00276                 eflags & (REG_NOTBOL | REG_NOTEOL));
00277 }
00278 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
00279 # endif
00280 #endif
00281 
00282 /* Entry points for GNU code.  */
00283 
00284 /* re_match, re_search, re_match_2, re_search_2
00285 
00286    The former two functions operate on STRING with length LENGTH,
00287    while the later two operate on concatenation of STRING1 and STRING2
00288    with lengths LENGTH1 and LENGTH2, respectively.
00289 
00290    re_match() matches the compiled pattern in BUFP against the string,
00291    starting at index START.
00292 
00293    re_search() first tries matching at index START, then it tries to match
00294    starting from index START + 1, and so on.  The last start position tried
00295    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
00296    way as re_match().)
00297 
00298    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
00299    the first STOP characters of the concatenation of the strings should be
00300    concerned.
00301 
00302    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
00303    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
00304    computed relative to the concatenation, not relative to the individual
00305    strings.)
00306 
00307    On success, re_match* functions return the length of the match, re_search*
00308    return the position of the start of the match.  Return value -1 means no
00309    match was found and -2 indicates an internal error.  */
00310 
00311 regoff_t
00312 re_match (bufp, string, length, start, regs)
00313     struct re_pattern_buffer *bufp;
00314     const char *string;
00315     Idx length, start;
00316     struct re_registers *regs;
00317 {
00318   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
00319 }
00320 #ifdef _LIBC
00321 weak_alias (__re_match, re_match)
00322 #endif
00323 
00324 regoff_t
00325 re_search (bufp, string, length, start, range, regs)
00326     struct re_pattern_buffer *bufp;
00327     const char *string;
00328     Idx length, start;
00329     regoff_t range;
00330     struct re_registers *regs;
00331 {
00332   return re_search_stub (bufp, string, length, start, range, length, regs,
00333                       false);
00334 }
00335 #ifdef _LIBC
00336 weak_alias (__re_search, re_search)
00337 #endif
00338 
00339 regoff_t
00340 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
00341     struct re_pattern_buffer *bufp;
00342     const char *string1, *string2;
00343     Idx length1, length2, start, stop;
00344     struct re_registers *regs;
00345 {
00346   return re_search_2_stub (bufp, string1, length1, string2, length2,
00347                         start, 0, regs, stop, true);
00348 }
00349 #ifdef _LIBC
00350 weak_alias (__re_match_2, re_match_2)
00351 #endif
00352 
00353 regoff_t
00354 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
00355     struct re_pattern_buffer *bufp;
00356     const char *string1, *string2;
00357     Idx length1, length2, start, stop;
00358     regoff_t range;
00359     struct re_registers *regs;
00360 {
00361   return re_search_2_stub (bufp, string1, length1, string2, length2,
00362                         start, range, regs, stop, false);
00363 }
00364 #ifdef _LIBC
00365 weak_alias (__re_search_2, re_search_2)
00366 #endif
00367 
00368 static regoff_t
00369 internal_function
00370 re_search_2_stub (struct re_pattern_buffer *bufp,
00371                 const char *string1, Idx length1,
00372                 const char *string2, Idx length2,
00373                 Idx start, regoff_t range, struct re_registers *regs,
00374                 Idx stop, bool ret_len)
00375 {
00376   const char *str;
00377   regoff_t rval;
00378   Idx len = length1 + length2;
00379   char *s = NULL;
00380 
00381   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
00382     return -2;
00383 
00384   /* Concatenate the strings.  */
00385   if (length2 > 0)
00386     if (length1 > 0)
00387       {
00388        s = re_malloc (char, len);
00389 
00390        if (BE (s == NULL, 0))
00391          return -2;
00392 #ifdef _LIBC
00393        memcpy (__mempcpy (s, string1, length1), string2, length2);
00394 #else
00395        memcpy (s, string1, length1);
00396        memcpy (s + length1, string2, length2);
00397 #endif
00398        str = s;
00399       }
00400     else
00401       str = string2;
00402   else
00403     str = string1;
00404 
00405   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
00406                       ret_len);
00407   re_free (s);
00408   return rval;
00409 }
00410 
00411 /* The parameters have the same meaning as those of re_search.
00412    Additional parameters:
00413    If RET_LEN is true the length of the match is returned (re_match style);
00414    otherwise the position of the match is returned.  */
00415 
00416 static regoff_t
00417 internal_function
00418 re_search_stub (struct re_pattern_buffer *bufp,
00419               const char *string, Idx length,
00420               Idx start, regoff_t range, Idx stop, struct re_registers *regs,
00421               bool ret_len)
00422 {
00423   reg_errcode_t result;
00424   regmatch_t *pmatch;
00425   Idx nregs;
00426   regoff_t rval;
00427   int eflags = 0;
00428 #ifdef _LIBC
00429   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
00430 #endif
00431   Idx last_start = start + range;
00432 
00433   /* Check for out-of-range.  */
00434   if (BE (start < 0 || start > length, 0))
00435     return -1;
00436   if (BE (length < last_start || (0 <= range && last_start < start), 0))
00437     last_start = length;
00438   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
00439     last_start = 0;
00440 
00441   __libc_lock_lock (dfa->lock);
00442 
00443   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
00444   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
00445 
00446   /* Compile fastmap if we haven't yet.  */
00447   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
00448     re_compile_fastmap (bufp);
00449 
00450   if (BE (bufp->no_sub, 0))
00451     regs = NULL;
00452 
00453   /* We need at least 1 register.  */
00454   if (regs == NULL)
00455     nregs = 1;
00456   else if (BE (bufp->regs_allocated == REGS_FIXED
00457               && regs->num_regs <= bufp->re_nsub, 0))
00458     {
00459       nregs = regs->num_regs;
00460       if (BE (nregs < 1, 0))
00461        {
00462          /* Nothing can be copied to regs.  */
00463          regs = NULL;
00464          nregs = 1;
00465        }
00466     }
00467   else
00468     nregs = bufp->re_nsub + 1;
00469   pmatch = re_malloc (regmatch_t, nregs);
00470   if (BE (pmatch == NULL, 0))
00471     {
00472       rval = -2;
00473       goto out;
00474     }
00475 
00476   result = re_search_internal (bufp, string, length, start, last_start, stop,
00477                             nregs, pmatch, eflags);
00478 
00479   rval = 0;
00480 
00481   /* I hope we needn't fill ther regs with -1's when no match was found.  */
00482   if (result != REG_NOERROR)
00483     rval = -1;
00484   else if (regs != NULL)
00485     {
00486       /* If caller wants register contents data back, copy them.  */
00487       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
00488                                       bufp->regs_allocated);
00489       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
00490        rval = -2;
00491     }
00492 
00493   if (BE (rval == 0, 1))
00494     {
00495       if (ret_len)
00496        {
00497          assert (pmatch[0].rm_so == start);
00498          rval = pmatch[0].rm_eo - start;
00499        }
00500       else
00501        rval = pmatch[0].rm_so;
00502     }
00503   re_free (pmatch);
00504  out:
00505   __libc_lock_unlock (dfa->lock);
00506   return rval;
00507 }
00508 
00509 static unsigned int
00510 internal_function
00511 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
00512              int regs_allocated)
00513 {
00514   int rval = REGS_REALLOCATE;
00515   Idx i;
00516   Idx need_regs = nregs + 1;
00517   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
00518      uses.  */
00519 
00520   /* Have the register data arrays been allocated?  */
00521   if (regs_allocated == REGS_UNALLOCATED)
00522     { /* No.  So allocate them with malloc.  */
00523       regs->start = re_malloc (regoff_t, need_regs);
00524       if (BE (regs->start == NULL, 0))
00525        return REGS_UNALLOCATED;
00526       regs->end = re_malloc (regoff_t, need_regs);
00527       if (BE (regs->end == NULL, 0))
00528        {
00529          re_free (regs->start);
00530          return REGS_UNALLOCATED;
00531        }
00532       regs->num_regs = need_regs;
00533     }
00534   else if (regs_allocated == REGS_REALLOCATE)
00535     { /* Yes.  If we need more elements than were already
00536         allocated, reallocate them.  If we need fewer, just
00537         leave it alone.  */
00538       if (BE (need_regs > regs->num_regs, 0))
00539        {
00540          regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
00541          regoff_t *new_end;
00542          if (BE (new_start == NULL, 0))
00543            return REGS_UNALLOCATED;
00544          new_end = re_realloc (regs->end, regoff_t, need_regs);
00545          if (BE (new_end == NULL, 0))
00546            {
00547              re_free (new_start);
00548              return REGS_UNALLOCATED;
00549            }
00550          regs->start = new_start;
00551          regs->end = new_end;
00552          regs->num_regs = need_regs;
00553        }
00554     }
00555   else
00556     {
00557       assert (regs_allocated == REGS_FIXED);
00558       /* This function may not be called with REGS_FIXED and nregs too big.  */
00559       assert (regs->num_regs >= nregs);
00560       rval = REGS_FIXED;
00561     }
00562 
00563   /* Copy the regs.  */
00564   for (i = 0; i < nregs; ++i)
00565     {
00566       regs->start[i] = pmatch[i].rm_so;
00567       regs->end[i] = pmatch[i].rm_eo;
00568     }
00569   for ( ; i < regs->num_regs; ++i)
00570     regs->start[i] = regs->end[i] = -1;
00571 
00572   return rval;
00573 }
00574 
00575 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
00576    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
00577    this memory for recording register information.  STARTS and ENDS
00578    must be allocated using the malloc library routine, and must each
00579    be at least NUM_REGS * sizeof (regoff_t) bytes long.
00580 
00581    If NUM_REGS == 0, then subsequent matches should allocate their own
00582    register data.
00583 
00584    Unless this function is called, the first search or match using
00585    PATTERN_BUFFER will allocate its own register data, without
00586    freeing the old data.  */
00587 
00588 void
00589 re_set_registers (bufp, regs, num_regs, starts, ends)
00590     struct re_pattern_buffer *bufp;
00591     struct re_registers *regs;
00592     __re_size_t num_regs;
00593     regoff_t *starts, *ends;
00594 {
00595   if (num_regs)
00596     {
00597       bufp->regs_allocated = REGS_REALLOCATE;
00598       regs->num_regs = num_regs;
00599       regs->start = starts;
00600       regs->end = ends;
00601     }
00602   else
00603     {
00604       bufp->regs_allocated = REGS_UNALLOCATED;
00605       regs->num_regs = 0;
00606       regs->start = regs->end = NULL;
00607     }
00608 }
00609 #ifdef _LIBC
00610 weak_alias (__re_set_registers, re_set_registers)
00611 #endif
00612 
00613 /* Entry points compatible with 4.2 BSD regex library.  We don't define
00614    them unless specifically requested.  */
00615 
00616 #if defined _REGEX_RE_COMP || defined _LIBC
00617 int
00618 # ifdef _LIBC
00619 weak_function
00620 # endif
00621 re_exec (s)
00622      const char *s;
00623 {
00624   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
00625 }
00626 #endif /* _REGEX_RE_COMP */
00627 
00628 /* Internal entry point.  */
00629 
00630 /* Searches for a compiled pattern PREG in the string STRING, whose
00631    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
00632    meaning as with regexec.  LAST_START is START + RANGE, where
00633    START and RANGE have the same meaning as with re_search.
00634    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
00635    otherwise return the error code.
00636    Note: We assume front end functions already check ranges.
00637    (0 <= LAST_START && LAST_START <= LENGTH)  */
00638 
00639 static reg_errcode_t
00640 internal_function __attribute_warn_unused_result__
00641 re_search_internal (const regex_t *preg,
00642                   const char *string, Idx length,
00643                   Idx start, Idx last_start, Idx stop,
00644                   size_t nmatch, regmatch_t pmatch[],
00645                   int eflags)
00646 {
00647   reg_errcode_t err;
00648   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
00649   Idx left_lim, right_lim;
00650   int incr;
00651   bool fl_longest_match;
00652   int match_kind;
00653   Idx match_first;
00654   Idx match_last = REG_MISSING;
00655   Idx extra_nmatch;
00656   bool sb;
00657   int ch;
00658 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
00659   re_match_context_t mctx = { .dfa = dfa };
00660 #else
00661   re_match_context_t mctx;
00662 #endif
00663   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
00664                   && start != last_start && !preg->can_be_null)
00665                  ? preg->fastmap : NULL);
00666   RE_TRANSLATE_TYPE t = preg->translate;
00667 
00668 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
00669   memset (&mctx, '\0', sizeof (re_match_context_t));
00670   mctx.dfa = dfa;
00671 #endif
00672 
00673   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
00674   nmatch -= extra_nmatch;
00675 
00676   /* Check if the DFA haven't been compiled.  */
00677   if (BE (preg->used == 0 || dfa->init_state == NULL
00678          || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
00679          || dfa->init_state_begbuf == NULL, 0))
00680     return REG_NOMATCH;
00681 
00682 #ifdef DEBUG
00683   /* We assume front-end functions already check them.  */
00684   assert (0 <= last_start && last_start <= length);
00685 #endif
00686 
00687   /* If initial states with non-begbuf contexts have no elements,
00688      the regex must be anchored.  If preg->newline_anchor is set,
00689      we'll never use init_state_nl, so do not check it.  */
00690   if (dfa->init_state->nodes.nelem == 0
00691       && dfa->init_state_word->nodes.nelem == 0
00692       && (dfa->init_state_nl->nodes.nelem == 0
00693          || !preg->newline_anchor))
00694     {
00695       if (start != 0 && last_start != 0)
00696         return REG_NOMATCH;
00697       start = last_start = 0;
00698     }
00699 
00700   /* We must check the longest matching, if nmatch > 0.  */
00701   fl_longest_match = (nmatch != 0 || dfa->nbackref);
00702 
00703   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
00704                          preg->translate, (preg->syntax & RE_ICASE) != 0,
00705                          dfa);
00706   if (BE (err != REG_NOERROR, 0))
00707     goto free_return;
00708   mctx.input.stop = stop;
00709   mctx.input.raw_stop = stop;
00710   mctx.input.newline_anchor = preg->newline_anchor;
00711 
00712   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
00713   if (BE (err != REG_NOERROR, 0))
00714     goto free_return;
00715 
00716   /* We will log all the DFA states through which the dfa pass,
00717      if nmatch > 1, or this dfa has "multibyte node", which is a
00718      back-reference or a node which can accept multibyte character or
00719      multi character collating element.  */
00720   if (nmatch > 1 || dfa->has_mb_node)
00721     {
00722       /* Avoid overflow.  */
00723       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
00724        {
00725          err = REG_ESPACE;
00726          goto free_return;
00727        }
00728 
00729       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
00730       if (BE (mctx.state_log == NULL, 0))
00731        {
00732          err = REG_ESPACE;
00733          goto free_return;
00734        }
00735     }
00736   else
00737     mctx.state_log = NULL;
00738 
00739   match_first = start;
00740   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
00741                         : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
00742 
00743   /* Check incrementally whether of not the input string match.  */
00744   incr = (last_start < start) ? -1 : 1;
00745   left_lim = (last_start < start) ? last_start : start;
00746   right_lim = (last_start < start) ? start : last_start;
00747   sb = dfa->mb_cur_max == 1;
00748   match_kind =
00749     (fastmap
00750      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
00751        | (start <= last_start ? 2 : 0)
00752        | (t != NULL ? 1 : 0))
00753      : 8);
00754 
00755   for (;; match_first += incr)
00756     {
00757       err = REG_NOMATCH;
00758       if (match_first < left_lim || right_lim < match_first)
00759        goto free_return;
00760 
00761       /* Advance as rapidly as possible through the string, until we
00762         find a plausible place to start matching.  This may be done
00763         with varying efficiency, so there are various possibilities:
00764         only the most common of them are specialized, in order to
00765         save on code size.  We use a switch statement for speed.  */
00766       switch (match_kind)
00767        {
00768        case 8:
00769          /* No fastmap.  */
00770          break;
00771 
00772        case 7:
00773          /* Fastmap with single-byte translation, match forward.  */
00774          while (BE (match_first < right_lim, 1)
00775                && !fastmap[t[(unsigned char) string[match_first]]])
00776            ++match_first;
00777          goto forward_match_found_start_or_reached_end;
00778 
00779        case 6:
00780          /* Fastmap without translation, match forward.  */
00781          while (BE (match_first < right_lim, 1)
00782                && !fastmap[(unsigned char) string[match_first]])
00783            ++match_first;
00784 
00785        forward_match_found_start_or_reached_end:
00786          if (BE (match_first == right_lim, 0))
00787            {
00788              ch = match_first >= length
00789                      ? 0 : (unsigned char) string[match_first];
00790              if (!fastmap[t ? t[ch] : ch])
00791               goto free_return;
00792            }
00793          break;
00794 
00795        case 4:
00796        case 5:
00797          /* Fastmap without multi-byte translation, match backwards.  */
00798          while (match_first >= left_lim)
00799            {
00800              ch = match_first >= length
00801                      ? 0 : (unsigned char) string[match_first];
00802              if (fastmap[t ? t[ch] : ch])
00803               break;
00804              --match_first;
00805            }
00806          if (match_first < left_lim)
00807            goto free_return;
00808          break;
00809 
00810        default:
00811          /* In this case, we can't determine easily the current byte,
00812             since it might be a component byte of a multibyte
00813             character.  Then we use the constructed buffer instead.  */
00814          for (;;)
00815            {
00816              /* If MATCH_FIRST is out of the valid range, reconstruct the
00817                buffers.  */
00818              __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
00819              if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
00820               {
00821                 err = re_string_reconstruct (&mctx.input, match_first,
00822                                           eflags);
00823                 if (BE (err != REG_NOERROR, 0))
00824                   goto free_return;
00825 
00826                 offset = match_first - mctx.input.raw_mbs_idx;
00827               }
00828              /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
00829                Note that MATCH_FIRST must not be smaller than 0.  */
00830              ch = (match_first >= length
00831                   ? 0 : re_string_byte_at (&mctx.input, offset));
00832              if (fastmap[ch])
00833               break;
00834              match_first += incr;
00835              if (match_first < left_lim || match_first > right_lim)
00836               {
00837                 err = REG_NOMATCH;
00838                 goto free_return;
00839               }
00840            }
00841          break;
00842        }
00843 
00844       /* Reconstruct the buffers so that the matcher can assume that
00845         the matching starts from the beginning of the buffer.  */
00846       err = re_string_reconstruct (&mctx.input, match_first, eflags);
00847       if (BE (err != REG_NOERROR, 0))
00848        goto free_return;
00849 
00850 #ifdef RE_ENABLE_I18N
00851      /* Don't consider this char as a possible match start if it part,
00852        yet isn't the head, of a multibyte character.  */
00853       if (!sb && !re_string_first_byte (&mctx.input, 0))
00854        continue;
00855 #endif
00856 
00857       /* It seems to be appropriate one, then use the matcher.  */
00858       /* We assume that the matching starts from 0.  */
00859       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
00860       match_last = check_matching (&mctx, fl_longest_match,
00861                                start <= last_start ? &match_first : NULL);
00862       if (match_last != REG_MISSING)
00863        {
00864          if (BE (match_last == REG_ERROR, 0))
00865            {
00866              err = REG_ESPACE;
00867              goto free_return;
00868            }
00869          else
00870            {
00871              mctx.match_last = match_last;
00872              if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
00873               {
00874                 re_dfastate_t *pstate = mctx.state_log[match_last];
00875                 mctx.last_node = check_halt_state_context (&mctx, pstate,
00876                                                       match_last);
00877               }
00878              if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
00879                 || dfa->nbackref)
00880               {
00881                 err = prune_impossible_nodes (&mctx);
00882                 if (err == REG_NOERROR)
00883                   break;
00884                 if (BE (err != REG_NOMATCH, 0))
00885                   goto free_return;
00886                 match_last = REG_MISSING;
00887               }
00888              else
00889               break; /* We found a match.  */
00890            }
00891        }
00892 
00893       match_ctx_clean (&mctx);
00894     }
00895 
00896 #ifdef DEBUG
00897   assert (match_last != REG_MISSING);
00898   assert (err == REG_NOERROR);
00899 #endif
00900 
00901   /* Set pmatch[] if we need.  */
00902   if (nmatch > 0)
00903     {
00904       Idx reg_idx;
00905 
00906       /* Initialize registers.  */
00907       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
00908        pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
00909 
00910       /* Set the points where matching start/end.  */
00911       pmatch[0].rm_so = 0;
00912       pmatch[0].rm_eo = mctx.match_last;
00913       /* FIXME: This function should fail if mctx.match_last exceeds
00914         the maximum possible regoff_t value.  We need a new error
00915         code REG_OVERFLOW.  */
00916 
00917       if (!preg->no_sub && nmatch > 1)
00918        {
00919          err = set_regs (preg, &mctx, nmatch, pmatch,
00920                        dfa->has_plural_match && dfa->nbackref > 0);
00921          if (BE (err != REG_NOERROR, 0))
00922            goto free_return;
00923        }
00924 
00925       /* At last, add the offset to the each registers, since we slided
00926         the buffers so that we could assume that the matching starts
00927         from 0.  */
00928       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00929        if (pmatch[reg_idx].rm_so != -1)
00930          {
00931 #ifdef RE_ENABLE_I18N
00932            if (BE (mctx.input.offsets_needed != 0, 0))
00933              {
00934               pmatch[reg_idx].rm_so =
00935                 (pmatch[reg_idx].rm_so == mctx.input.valid_len
00936                  ? mctx.input.valid_raw_len
00937                  : mctx.input.offsets[pmatch[reg_idx].rm_so]);
00938               pmatch[reg_idx].rm_eo =
00939                 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
00940                  ? mctx.input.valid_raw_len
00941                  : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
00942              }
00943 #else
00944            assert (mctx.input.offsets_needed == 0);
00945 #endif
00946            pmatch[reg_idx].rm_so += match_first;
00947            pmatch[reg_idx].rm_eo += match_first;
00948          }
00949       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
00950        {
00951          pmatch[nmatch + reg_idx].rm_so = -1;
00952          pmatch[nmatch + reg_idx].rm_eo = -1;
00953        }
00954 
00955       if (dfa->subexp_map)
00956        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
00957          if (dfa->subexp_map[reg_idx] != reg_idx)
00958            {
00959              pmatch[reg_idx + 1].rm_so
00960               = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
00961              pmatch[reg_idx + 1].rm_eo
00962               = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
00963            }
00964     }
00965 
00966  free_return:
00967   re_free (mctx.state_log);
00968   if (dfa->nbackref)
00969     match_ctx_free (&mctx);
00970   re_string_destruct (&mctx.input);
00971   return err;
00972 }
00973 
00974 static reg_errcode_t
00975 internal_function __attribute_warn_unused_result__
00976 prune_impossible_nodes (re_match_context_t *mctx)
00977 {
00978   const re_dfa_t *const dfa = mctx->dfa;
00979   Idx halt_node, match_last;
00980   reg_errcode_t ret;
00981   re_dfastate_t **sifted_states;
00982   re_dfastate_t **lim_states = NULL;
00983   re_sift_context_t sctx;
00984 #ifdef DEBUG
00985   assert (mctx->state_log != NULL);
00986 #endif
00987   match_last = mctx->match_last;
00988   halt_node = mctx->last_node;
00989 
00990   /* Avoid overflow.  */
00991   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
00992     return REG_ESPACE;
00993 
00994   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
00995   if (BE (sifted_states == NULL, 0))
00996     {
00997       ret = REG_ESPACE;
00998       goto free_return;
00999     }
01000   if (dfa->nbackref)
01001     {
01002       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
01003       if (BE (lim_states == NULL, 0))
01004        {
01005          ret = REG_ESPACE;
01006          goto free_return;
01007        }
01008       while (1)
01009        {
01010          memset (lim_states, '\0',
01011                 sizeof (re_dfastate_t *) * (match_last + 1));
01012          sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
01013                       match_last);
01014          ret = sift_states_backward (mctx, &sctx);
01015          re_node_set_free (&sctx.limits);
01016          if (BE (ret != REG_NOERROR, 0))
01017              goto free_return;
01018          if (sifted_states[0] != NULL || lim_states[0] != NULL)
01019            break;
01020          do
01021            {
01022              --match_last;
01023              if (! REG_VALID_INDEX (match_last))
01024               {
01025                 ret = REG_NOMATCH;
01026                 goto free_return;
01027               }
01028            } while (mctx->state_log[match_last] == NULL
01029                    || !mctx->state_log[match_last]->halt);
01030          halt_node = check_halt_state_context (mctx,
01031                                           mctx->state_log[match_last],
01032                                           match_last);
01033        }
01034       ret = merge_state_array (dfa, sifted_states, lim_states,
01035                             match_last + 1);
01036       re_free (lim_states);
01037       lim_states = NULL;
01038       if (BE (ret != REG_NOERROR, 0))
01039        goto free_return;
01040     }
01041   else
01042     {
01043       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
01044       ret = sift_states_backward (mctx, &sctx);
01045       re_node_set_free (&sctx.limits);
01046       if (BE (ret != REG_NOERROR, 0))
01047        goto free_return;
01048       if (sifted_states[0] == NULL)
01049        {
01050          ret = REG_NOMATCH;
01051          goto free_return;
01052        }
01053     }
01054   re_free (mctx->state_log);
01055   mctx->state_log = sifted_states;
01056   sifted_states = NULL;
01057   mctx->last_node = halt_node;
01058   mctx->match_last = match_last;
01059   ret = REG_NOERROR;
01060  free_return:
01061   re_free (sifted_states);
01062   re_free (lim_states);
01063   return ret;
01064 }
01065 
01066 /* Acquire an initial state and return it.
01067    We must select appropriate initial state depending on the context,
01068    since initial states may have constraints like "<", "^", etc..  */
01069 
01070 static inline re_dfastate_t *
01071 __attribute ((always_inline)) internal_function
01072 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
01073                          Idx idx)
01074 {
01075   const re_dfa_t *const dfa = mctx->dfa;
01076   if (dfa->init_state->has_constraint)
01077     {
01078       unsigned int context;
01079       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
01080       if (IS_WORD_CONTEXT (context))
01081        return dfa->init_state_word;
01082       else if (IS_ORDINARY_CONTEXT (context))
01083        return dfa->init_state;
01084       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
01085        return dfa->init_state_begbuf;
01086       else if (IS_NEWLINE_CONTEXT (context))
01087        return dfa->init_state_nl;
01088       else if (IS_BEGBUF_CONTEXT (context))
01089        {
01090          /* It is relatively rare case, then calculate on demand.  */
01091          return re_acquire_state_context (err, dfa,
01092                                       dfa->init_state->entrance_nodes,
01093                                       context);
01094        }
01095       else
01096        /* Must not happen?  */
01097        return dfa->init_state;
01098     }
01099   else
01100     return dfa->init_state;
01101 }
01102 
01103 /* Check whether the regular expression match input string INPUT or not,
01104    and return the index where the matching end.  Return REG_MISSING if
01105    there is no match, and return REG_ERROR in case of an error.
01106    FL_LONGEST_MATCH means we want the POSIX longest matching.
01107    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
01108    next place where we may want to try matching.
01109    Note that the matcher assume that the maching starts from the current
01110    index of the buffer.  */
01111 
01112 static Idx
01113 internal_function __attribute_warn_unused_result__
01114 check_matching (re_match_context_t *mctx, bool fl_longest_match,
01115               Idx *p_match_first)
01116 {
01117   const re_dfa_t *const dfa = mctx->dfa;
01118   reg_errcode_t err;
01119   Idx match = 0;
01120   Idx match_last = REG_MISSING;
01121   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
01122   re_dfastate_t *cur_state;
01123   bool at_init_state = p_match_first != NULL;
01124   Idx next_start_idx = cur_str_idx;
01125 
01126   err = REG_NOERROR;
01127   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
01128   /* An initial state must not be NULL (invalid).  */
01129   if (BE (cur_state == NULL, 0))
01130     {
01131       assert (err == REG_ESPACE);
01132       return REG_ERROR;
01133     }
01134 
01135   if (mctx->state_log != NULL)
01136     {
01137       mctx->state_log[cur_str_idx] = cur_state;
01138 
01139       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
01140         later.  E.g. Processing back references.  */
01141       if (BE (dfa->nbackref, 0))
01142        {
01143          at_init_state = false;
01144          err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
01145          if (BE (err != REG_NOERROR, 0))
01146            return err;
01147 
01148          if (cur_state->has_backref)
01149            {
01150              err = transit_state_bkref (mctx, &cur_state->nodes);
01151              if (BE (err != REG_NOERROR, 0))
01152               return err;
01153            }
01154        }
01155     }
01156 
01157   /* If the RE accepts NULL string.  */
01158   if (BE (cur_state->halt, 0))
01159     {
01160       if (!cur_state->has_constraint
01161          || check_halt_state_context (mctx, cur_state, cur_str_idx))
01162        {
01163          if (!fl_longest_match)
01164            return cur_str_idx;
01165          else
01166            {
01167              match_last = cur_str_idx;
01168              match = 1;
01169            }
01170        }
01171     }
01172 
01173   while (!re_string_eoi (&mctx->input))
01174     {
01175       re_dfastate_t *old_state = cur_state;
01176       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
01177 
01178       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
01179          || (BE (next_char_idx >= mctx->input.valid_len, 0)
01180              && mctx->input.valid_len < mctx->input.len))
01181        {
01182          err = extend_buffers (mctx);
01183          if (BE (err != REG_NOERROR, 0))
01184            {
01185              assert (err == REG_ESPACE);
01186              return REG_ERROR;
01187            }
01188        }
01189 
01190       cur_state = transit_state (&err, mctx, cur_state);
01191       if (mctx->state_log != NULL)
01192        cur_state = merge_state_with_log (&err, mctx, cur_state);
01193 
01194       if (cur_state == NULL)
01195        {
01196          /* Reached the invalid state or an error.  Try to recover a valid
01197             state using the state log, if available and if we have not
01198             already found a valid (even if not the longest) match.  */
01199          if (BE (err != REG_NOERROR, 0))
01200            return REG_ERROR;
01201 
01202          if (mctx->state_log == NULL
01203              || (match && !fl_longest_match)
01204              || (cur_state = find_recover_state (&err, mctx)) == NULL)
01205            break;
01206        }
01207 
01208       if (BE (at_init_state, 0))
01209        {
01210          if (old_state == cur_state)
01211            next_start_idx = next_char_idx;
01212          else
01213            at_init_state = false;
01214        }
01215 
01216       if (cur_state->halt)
01217        {
01218          /* Reached a halt state.
01219             Check the halt state can satisfy the current context.  */
01220          if (!cur_state->has_constraint
01221              || check_halt_state_context (mctx, cur_state,
01222                                       re_string_cur_idx (&mctx->input)))
01223            {
01224              /* We found an appropriate halt state.  */
01225              match_last = re_string_cur_idx (&mctx->input);
01226              match = 1;
01227 
01228              /* We found a match, do not modify match_first below.  */
01229              p_match_first = NULL;
01230              if (!fl_longest_match)
01231               break;
01232            }
01233        }
01234     }
01235 
01236   if (p_match_first)
01237     *p_match_first += next_start_idx;
01238 
01239   return match_last;
01240 }
01241 
01242 /* Check NODE match the current context.  */
01243 
01244 static bool
01245 internal_function
01246 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
01247 {
01248   re_token_type_t type = dfa->nodes[node].type;
01249   unsigned int constraint = dfa->nodes[node].constraint;
01250   if (type != END_OF_RE)
01251     return false;
01252   if (!constraint)
01253     return true;
01254   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
01255     return false;
01256   return true;
01257 }
01258 
01259 /* Check the halt state STATE match the current context.
01260    Return 0 if not match, if the node, STATE has, is a halt node and
01261    match the context, return the node.  */
01262 
01263 static Idx
01264 internal_function
01265 check_halt_state_context (const re_match_context_t *mctx,
01266                        const re_dfastate_t *state, Idx idx)
01267 {
01268   Idx i;
01269   unsigned int context;
01270 #ifdef DEBUG
01271   assert (state->halt);
01272 #endif
01273   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
01274   for (i = 0; i < state->nodes.nelem; ++i)
01275     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
01276       return state->nodes.elems[i];
01277   return 0;
01278 }
01279 
01280 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
01281    corresponding to the DFA).
01282    Return the destination node, and update EPS_VIA_NODES;
01283    return REG_MISSING in case of errors.  */
01284 
01285 static Idx
01286 internal_function
01287 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
01288                  Idx *pidx, Idx node, re_node_set *eps_via_nodes,
01289                  struct re_fail_stack_t *fs)
01290 {
01291   const re_dfa_t *const dfa = mctx->dfa;
01292   Idx i;
01293   bool ok;
01294   if (IS_EPSILON_NODE (dfa->nodes[node].type))
01295     {
01296       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
01297       re_node_set *edests = &dfa->edests[node];
01298       Idx dest_node;
01299       ok = re_node_set_insert (eps_via_nodes, node);
01300       if (BE (! ok, 0))
01301        return REG_ERROR;
01302       /* Pick up a valid destination, or return REG_MISSING if none
01303         is found.  */
01304       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
01305        {
01306          Idx candidate = edests->elems[i];
01307          if (!re_node_set_contains (cur_nodes, candidate))
01308            continue;
01309           if (dest_node == REG_MISSING)
01310            dest_node = candidate;
01311 
01312          else
01313            {
01314              /* In order to avoid infinite loop like "(a*)*", return the second
01315                epsilon-transition if the first was already considered.  */
01316              if (re_node_set_contains (eps_via_nodes, dest_node))
01317               return candidate;
01318 
01319              /* Otherwise, push the second epsilon-transition on the fail stack.  */
01320              else if (fs != NULL
01321                      && push_fail_stack (fs, *pidx, candidate, nregs, regs,
01322                                       eps_via_nodes))
01323               return REG_ERROR;
01324 
01325              /* We know we are going to exit.  */
01326              break;
01327            }
01328        }
01329       return dest_node;
01330     }
01331   else
01332     {
01333       Idx naccepted = 0;
01334       re_token_type_t type = dfa->nodes[node].type;
01335 
01336 #ifdef RE_ENABLE_I18N
01337       if (dfa->nodes[node].accept_mb)
01338        naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
01339       else
01340 #endif /* RE_ENABLE_I18N */
01341       if (type == OP_BACK_REF)
01342        {
01343          Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
01344          naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
01345          if (fs != NULL)
01346            {
01347              if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
01348               return REG_MISSING;
01349              else if (naccepted)
01350               {
01351                 char *buf = (char *) re_string_get_buffer (&mctx->input);
01352                 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
01353                            naccepted) != 0)
01354                   return REG_MISSING;
01355               }
01356            }
01357 
01358          if (naccepted == 0)
01359            {
01360              Idx dest_node;
01361              ok = re_node_set_insert (eps_via_nodes, node);
01362              if (BE (! ok, 0))
01363               return REG_ERROR;
01364              dest_node = dfa->edests[node].elems[0];
01365              if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
01366                                    dest_node))
01367               return dest_node;
01368            }
01369        }
01370 
01371       if (naccepted != 0
01372          || check_node_accept (mctx, dfa->nodes + node, *pidx))
01373        {
01374          Idx dest_node = dfa->nexts[node];
01375          *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
01376          if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
01377                    || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
01378                                           dest_node)))
01379            return REG_MISSING;
01380          re_node_set_empty (eps_via_nodes);
01381          return dest_node;
01382        }
01383     }
01384   return REG_MISSING;
01385 }
01386 
01387 static reg_errcode_t
01388 internal_function __attribute_warn_unused_result__
01389 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
01390                Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
01391 {
01392   reg_errcode_t err;
01393   Idx num = fs->num++;
01394   if (fs->num == fs->alloc)
01395     {
01396       struct re_fail_stack_ent_t *new_array;
01397       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
01398                                    * fs->alloc * 2));
01399       if (new_array == NULL)
01400        return REG_ESPACE;
01401       fs->alloc *= 2;
01402       fs->stack = new_array;
01403     }
01404   fs->stack[num].idx = str_idx;
01405   fs->stack[num].node = dest_node;
01406   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
01407   if (fs->stack[num].regs == NULL)
01408     return REG_ESPACE;
01409   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
01410   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
01411   return err;
01412 }
01413 
01414 static Idx
01415 internal_function
01416 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
01417               regmatch_t *regs, re_node_set *eps_via_nodes)
01418 {
01419   Idx num = --fs->num;
01420   assert (REG_VALID_INDEX (num));
01421   *pidx = fs->stack[num].idx;
01422   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
01423   re_node_set_free (eps_via_nodes);
01424   re_free (fs->stack[num].regs);
01425   *eps_via_nodes = fs->stack[num].eps_via_nodes;
01426   return fs->stack[num].node;
01427 }
01428 
01429 /* Set the positions where the subexpressions are starts/ends to registers
01430    PMATCH.
01431    Note: We assume that pmatch[0] is already set, and
01432    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
01433 
01434 static reg_errcode_t
01435 internal_function __attribute_warn_unused_result__
01436 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
01437          regmatch_t *pmatch, bool fl_backtrack)
01438 {
01439   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
01440   Idx idx, cur_node;
01441   re_node_set eps_via_nodes;
01442   struct re_fail_stack_t *fs;
01443   struct re_fail_stack_t fs_body = { 0, 2, NULL };
01444   regmatch_t *prev_idx_match;
01445   bool prev_idx_match_malloced = false;
01446 
01447 #ifdef DEBUG
01448   assert (nmatch > 1);
01449   assert (mctx->state_log != NULL);
01450 #endif
01451   if (fl_backtrack)
01452     {
01453       fs = &fs_body;
01454       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
01455       if (fs->stack == NULL)
01456        return REG_ESPACE;
01457     }
01458   else
01459     fs = NULL;
01460 
01461   cur_node = dfa->init_node;
01462   re_node_set_init_empty (&eps_via_nodes);
01463 
01464   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
01465     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
01466   else
01467     {
01468       prev_idx_match = re_malloc (regmatch_t, nmatch);
01469       if (prev_idx_match == NULL)
01470        {
01471          free_fail_stack_return (fs);
01472          return REG_ESPACE;
01473        }
01474       prev_idx_match_malloced = true;
01475     }
01476   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
01477 
01478   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
01479     {
01480       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
01481 
01482       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
01483        {
01484          Idx reg_idx;
01485          if (fs)
01486            {
01487              for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
01488               if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
01489                 break;
01490              if (reg_idx == nmatch)
01491               {
01492                 re_node_set_free (&eps_via_nodes);
01493                 if (prev_idx_match_malloced)
01494                   re_free (prev_idx_match);
01495                 return free_fail_stack_return (fs);
01496               }
01497              cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
01498                                     &eps_via_nodes);
01499            }
01500          else
01501            {
01502              re_node_set_free (&eps_via_nodes);
01503              if (prev_idx_match_malloced)
01504               re_free (prev_idx_match);
01505              return REG_NOERROR;
01506            }
01507        }
01508 
01509       /* Proceed to next node.  */
01510       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
01511                                 &eps_via_nodes, fs);
01512 
01513       if (BE (! REG_VALID_INDEX (cur_node), 0))
01514        {
01515          if (BE (cur_node == REG_ERROR, 0))
01516            {
01517              re_node_set_free (&eps_via_nodes);
01518              if (prev_idx_match_malloced)
01519               re_free (prev_idx_match);
01520              free_fail_stack_return (fs);
01521              return REG_ESPACE;
01522            }
01523          if (fs)
01524            cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
01525                                    &eps_via_nodes);
01526          else
01527            {
01528              re_node_set_free (&eps_via_nodes);
01529              if (prev_idx_match_malloced)
01530               re_free (prev_idx_match);
01531              return REG_NOMATCH;
01532            }
01533        }
01534     }
01535   re_node_set_free (&eps_via_nodes);
01536   if (prev_idx_match_malloced)
01537     re_free (prev_idx_match);
01538   return free_fail_stack_return (fs);
01539 }
01540 
01541 static reg_errcode_t
01542 internal_function
01543 free_fail_stack_return (struct re_fail_stack_t *fs)
01544 {
01545   if (fs)
01546     {
01547       Idx fs_idx;
01548       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
01549        {
01550          re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
01551          re_free (fs->stack[fs_idx].regs);
01552        }
01553       re_free (fs->stack);
01554     }
01555   return REG_NOERROR;
01556 }
01557 
01558 static void
01559 internal_function
01560 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
01561             regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
01562 {
01563   int type = dfa->nodes[cur_node].type;
01564   if (type == OP_OPEN_SUBEXP)
01565     {
01566       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
01567 
01568       /* We are at the first node of this sub expression.  */
01569       if (reg_num < nmatch)
01570        {
01571          pmatch[reg_num].rm_so = cur_idx;
01572          pmatch[reg_num].rm_eo = -1;
01573        }
01574     }
01575   else if (type == OP_CLOSE_SUBEXP)
01576     {
01577       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
01578       if (reg_num < nmatch)
01579        {
01580          /* We are at the last node of this sub expression.  */
01581          if (pmatch[reg_num].rm_so < cur_idx)
01582            {
01583              pmatch[reg_num].rm_eo = cur_idx;
01584              /* This is a non-empty match or we are not inside an optional
01585                subexpression.  Accept this right away.  */
01586              memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
01587            }
01588          else
01589            {
01590              if (dfa->nodes[cur_node].opt_subexp
01591                 && prev_idx_match[reg_num].rm_so != -1)
01592               /* We transited through an empty match for an optional
01593                  subexpression, like (a?)*, and this is not the subexp's
01594                  first match.  Copy back the old content of the registers
01595                  so that matches of an inner subexpression are undone as
01596                  well, like in ((a?))*.  */
01597               memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
01598              else
01599               /* We completed a subexpression, but it may be part of
01600                  an optional one, so do not update PREV_IDX_MATCH.  */
01601               pmatch[reg_num].rm_eo = cur_idx;
01602            }
01603        }
01604     }
01605 }
01606 
01607 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
01608    and sift the nodes in each states according to the following rules.
01609    Updated state_log will be wrote to STATE_LOG.
01610 
01611    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
01612      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
01613        If `a' isn't the LAST_NODE and `a' can't epsilon transit to
01614        the LAST_NODE, we throw away the node `a'.
01615      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
01616        string `s' and transit to `b':
01617        i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
01618           away the node `a'.
01619        ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
01620            thrown away, we throw away the node `a'.
01621      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
01622        i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
01623           node `a'.
01624        ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
01625            we throw away the node `a'.  */
01626 
01627 #define STATE_NODE_CONTAINS(state,node) \
01628   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
01629 
01630 static reg_errcode_t
01631 internal_function
01632 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
01633 {
01634   reg_errcode_t err;
01635   int null_cnt = 0;
01636   Idx str_idx = sctx->last_str_idx;
01637   re_node_set cur_dest;
01638 
01639 #ifdef DEBUG
01640   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
01641 #endif
01642 
01643   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
01644      transit to the last_node and the last_node itself.  */
01645   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
01646   if (BE (err != REG_NOERROR, 0))
01647     return err;
01648   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
01649   if (BE (err != REG_NOERROR, 0))
01650     goto free_return;
01651 
01652   /* Then check each states in the state_log.  */
01653   while (str_idx > 0)
01654     {
01655       /* Update counters.  */
01656       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
01657       if (null_cnt > mctx->max_mb_elem_len)
01658        {
01659          memset (sctx->sifted_states, '\0',
01660                 sizeof (re_dfastate_t *) * str_idx);
01661          re_node_set_free (&cur_dest);
01662          return REG_NOERROR;
01663        }
01664       re_node_set_empty (&cur_dest);
01665       --str_idx;
01666 
01667       if (mctx->state_log[str_idx])
01668        {
01669          err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
01670          if (BE (err != REG_NOERROR, 0))
01671            goto free_return;
01672        }
01673 
01674       /* Add all the nodes which satisfy the following conditions:
01675         - It can epsilon transit to a node in CUR_DEST.
01676         - It is in CUR_SRC.
01677         And update state_log.  */
01678       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
01679       if (BE (err != REG_NOERROR, 0))
01680        goto free_return;
01681     }
01682   err = REG_NOERROR;
01683  free_return:
01684   re_node_set_free (&cur_dest);
01685   return err;
01686 }
01687 
01688 static reg_errcode_t
01689 internal_function __attribute_warn_unused_result__
01690 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
01691                    Idx str_idx, re_node_set *cur_dest)
01692 {
01693   const re_dfa_t *const dfa = mctx->dfa;
01694   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
01695   Idx i;
01696 
01697   /* Then build the next sifted state.
01698      We build the next sifted state on `cur_dest', and update
01699      `sifted_states[str_idx]' with `cur_dest'.
01700      Note:
01701      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
01702      `cur_src' points the node_set of the old `state_log[str_idx]'
01703      (with the epsilon nodes pre-filtered out).  */
01704   for (i = 0; i < cur_src->nelem; i++)
01705     {
01706       Idx prev_node = cur_src->elems[i];
01707       int naccepted = 0;
01708       bool ok;
01709 
01710 #ifdef DEBUG
01711       re_token_type_t type = dfa->nodes[prev_node].type;
01712       assert (!IS_EPSILON_NODE (type));
01713 #endif
01714 #ifdef RE_ENABLE_I18N
01715       /* If the node may accept `multi byte'.  */
01716       if (dfa->nodes[prev_node].accept_mb)
01717        naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
01718                                     str_idx, sctx->last_str_idx);
01719 #endif /* RE_ENABLE_I18N */
01720 
01721       /* We don't check backreferences here.
01722         See update_cur_sifted_state().  */
01723       if (!naccepted
01724          && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
01725          && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
01726                               dfa->nexts[prev_node]))
01727        naccepted = 1;
01728 
01729       if (naccepted == 0)
01730        continue;
01731 
01732       if (sctx->limits.nelem)
01733        {
01734          Idx to_idx = str_idx + naccepted;
01735          if (check_dst_limits (mctx, &sctx->limits,
01736                             dfa->nexts[prev_node], to_idx,
01737                             prev_node, str_idx))
01738            continue;
01739        }
01740       ok = re_node_set_insert (cur_dest, prev_node);
01741       if (BE (! ok, 0))
01742        return REG_ESPACE;
01743     }
01744 
01745   return REG_NOERROR;
01746 }
01747 
01748 /* Helper functions.  */
01749 
01750 static reg_errcode_t
01751 internal_function
01752 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
01753 {
01754   Idx top = mctx->state_log_top;
01755 
01756   if (next_state_log_idx >= mctx->input.bufs_len
01757       || (next_state_log_idx >= mctx->input.valid_len
01758          && mctx->input.valid_len < mctx->input.len))
01759     {
01760       reg_errcode_t err;
01761       err = extend_buffers (mctx);
01762       if (BE (err != REG_NOERROR, 0))
01763        return err;
01764     }
01765 
01766   if (top < next_state_log_idx)
01767     {
01768       memset (mctx->state_log + top + 1, '\0',
01769              sizeof (re_dfastate_t *) * (next_state_log_idx - top));
01770       mctx->state_log_top = next_state_log_idx;
01771     }
01772   return REG_NOERROR;
01773 }
01774 
01775 static reg_errcode_t
01776 internal_function
01777 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
01778                  re_dfastate_t **src, Idx num)
01779 {
01780   Idx st_idx;
01781   reg_errcode_t err;
01782   for (st_idx = 0; st_idx < num; ++st_idx)
01783     {
01784       if (dst[st_idx] == NULL)
01785        dst[st_idx] = src[st_idx];
01786       else if (src[st_idx] != NULL)
01787        {
01788          re_node_set merged_set;
01789          err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
01790                                    &src[st_idx]->nodes);
01791          if (BE (err != REG_NOERROR, 0))
01792            return err;
01793          dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
01794          re_node_set_free (&merged_set);
01795          if (BE (err != REG_NOERROR, 0))
01796            return err;
01797        }
01798     }
01799   return REG_NOERROR;
01800 }
01801 
01802 static reg_errcode_t
01803 internal_function
01804 update_cur_sifted_state (const re_match_context_t *mctx,
01805                       re_sift_context_t *sctx, Idx str_idx,
01806                       re_node_set *dest_nodes)
01807 {
01808   const re_dfa_t *const dfa = mctx->dfa;
01809   reg_errcode_t err = REG_NOERROR;
01810   const re_node_set *candidates;
01811   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
01812               : &mctx->state_log[str_idx]->nodes);
01813 
01814   if (dest_nodes->nelem == 0)
01815     sctx->sifted_states[str_idx] = NULL;
01816   else
01817     {
01818       if (candidates)
01819        {
01820          /* At first, add the nodes which can epsilon transit to a node in
01821             DEST_NODE.  */
01822          err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
01823          if (BE (err != REG_NOERROR, 0))
01824            return err;
01825 
01826          /* Then, check the limitations in the current sift_context.  */
01827          if (sctx->limits.nelem)
01828            {
01829              err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
01830                                     mctx->bkref_ents, str_idx);
01831              if (BE (err != REG_NOERROR, 0))
01832               return err;
01833            }
01834        }
01835 
01836       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
01837       if (BE (err != REG_NOERROR, 0))
01838        return err;
01839     }
01840 
01841   if (candidates && mctx->state_log[str_idx]->has_backref)
01842     {
01843       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
01844       if (BE (err != REG_NOERROR, 0))
01845        return err;
01846     }
01847   return REG_NOERROR;
01848 }
01849 
01850 static reg_errcode_t
01851 internal_function __attribute_warn_unused_result__
01852 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
01853                      const re_node_set *candidates)
01854 {
01855   reg_errcode_t err = REG_NOERROR;
01856   Idx i;
01857 
01858   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
01859   if (BE (err != REG_NOERROR, 0))
01860     return err;
01861 
01862   if (!state->inveclosure.alloc)
01863     {
01864       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
01865       if (BE (err != REG_NOERROR, 0))
01866        return REG_ESPACE;
01867       for (i = 0; i < dest_nodes->nelem; i++)
01868        {
01869          err = re_node_set_merge (&state->inveclosure,
01870                                dfa->inveclosures + dest_nodes->elems[i]);
01871          if (BE (err != REG_NOERROR, 0))
01872            return REG_ESPACE;
01873        }
01874     }
01875   return re_node_set_add_intersect (dest_nodes, candidates,
01876                                 &state->inveclosure);
01877 }
01878 
01879 static reg_errcode_t
01880 internal_function
01881 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
01882                      const re_node_set *candidates)
01883 {
01884     Idx ecl_idx;
01885     reg_errcode_t err;
01886     re_node_set *inv_eclosure = dfa->inveclosures + node;
01887     re_node_set except_nodes;
01888     re_node_set_init_empty (&except_nodes);
01889     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
01890       {
01891        Idx cur_node = inv_eclosure->elems[ecl_idx];
01892        if (cur_node == node)
01893          continue;
01894        if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
01895          {
01896            Idx edst1 = dfa->edests[cur_node].elems[0];
01897            Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
01898                       ? dfa->edests[cur_node].elems[1] : REG_MISSING);
01899            if ((!re_node_set_contains (inv_eclosure, edst1)
01900                && re_node_set_contains (dest_nodes, edst1))
01901               || (REG_VALID_NONZERO_INDEX (edst2)
01902                   && !re_node_set_contains (inv_eclosure, edst2)
01903                   && re_node_set_contains (dest_nodes, edst2)))
01904              {
01905               err = re_node_set_add_intersect (&except_nodes, candidates,
01906                                            dfa->inveclosures + cur_node);
01907               if (BE (err != REG_NOERROR, 0))
01908                 {
01909                   re_node_set_free (&except_nodes);
01910                   return err;
01911                 }
01912              }
01913          }
01914       }
01915     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
01916       {
01917        Idx cur_node = inv_eclosure->elems[ecl_idx];
01918        if (!re_node_set_contains (&except_nodes, cur_node))
01919          {
01920            Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
01921            re_node_set_remove_at (dest_nodes, idx);
01922          }
01923       }
01924     re_node_set_free (&except_nodes);
01925     return REG_NOERROR;
01926 }
01927 
01928 static bool
01929 internal_function
01930 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
01931                 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
01932 {
01933   const re_dfa_t *const dfa = mctx->dfa;
01934   Idx lim_idx, src_pos, dst_pos;
01935 
01936   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
01937   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
01938   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
01939     {
01940       Idx subexp_idx;
01941       struct re_backref_cache_entry *ent;
01942       ent = mctx->bkref_ents + limits->elems[lim_idx];
01943       subexp_idx = dfa->nodes[ent->node].opr.idx;
01944 
01945       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
01946                                       subexp_idx, dst_node, dst_idx,
01947                                       dst_bkref_idx);
01948       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
01949                                       subexp_idx, src_node, src_idx,
01950                                       src_bkref_idx);
01951 
01952       /* In case of:
01953         <src> <dst> ( <subexp> )
01954         ( <subexp> ) <src> <dst>
01955         ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
01956       if (src_pos == dst_pos)
01957        continue; /* This is unrelated limitation.  */
01958       else
01959        return true;
01960     }
01961   return false;
01962 }
01963 
01964 static int
01965 internal_function
01966 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
01967                           Idx subexp_idx, Idx from_node, Idx bkref_idx)
01968 {
01969   const re_dfa_t *const dfa = mctx->dfa;
01970   const re_node_set *eclosures = dfa->eclosures + from_node;
01971   Idx node_idx;
01972 
01973   /* Else, we are on the boundary: examine the nodes on the epsilon
01974      closure.  */
01975   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
01976     {
01977       Idx node = eclosures->elems[node_idx];
01978       switch (dfa->nodes[node].type)
01979        {
01980        case OP_BACK_REF:
01981          if (bkref_idx != REG_MISSING)
01982            {
01983              struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
01984              do
01985               {
01986                 Idx dst;
01987                 int cpos;
01988 
01989                 if (ent->node != node)
01990                   continue;
01991 
01992                 if (subexp_idx < BITSET_WORD_BITS
01993                     && !(ent->eps_reachable_subexps_map
01994                         & ((bitset_word_t) 1 << subexp_idx)))
01995                   continue;
01996 
01997                 /* Recurse trying to reach the OP_OPEN_SUBEXP and
01998                    OP_CLOSE_SUBEXP cases below.  But, if the
01999                    destination node is the same node as the source
02000                    node, don't recurse because it would cause an
02001                    infinite loop: a regex that exhibits this behavior
02002                    is ()\1*\1*  */
02003                 dst = dfa->edests[node].elems[0];
02004                 if (dst == from_node)
02005                   {
02006                     if (boundaries & 1)
02007                      return -1;
02008                     else /* if (boundaries & 2) */
02009                      return 0;
02010                   }
02011 
02012                 cpos =
02013                   check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
02014                                            dst, bkref_idx);
02015                 if (cpos == -1 /* && (boundaries & 1) */)
02016                   return -1;
02017                 if (cpos == 0 && (boundaries & 2))
02018                   return 0;
02019 
02020                 if (subexp_idx < BITSET_WORD_BITS)
02021                   ent->eps_reachable_subexps_map
02022                     &= ~((bitset_word_t) 1 << subexp_idx);
02023               }
02024              while (ent++->more);
02025            }
02026          break;
02027 
02028        case OP_OPEN_SUBEXP:
02029          if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
02030            return -1;
02031          break;
02032 
02033        case OP_CLOSE_SUBEXP:
02034          if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
02035            return 0;
02036          break;
02037 
02038        default:
02039            break;
02040        }
02041     }
02042 
02043   return (boundaries & 2) ? 1 : 0;
02044 }
02045 
02046 static int
02047 internal_function
02048 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
02049                         Idx subexp_idx, Idx from_node, Idx str_idx,
02050                         Idx bkref_idx)
02051 {
02052   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
02053   int boundaries;
02054 
02055   /* If we are outside the range of the subexpression, return -1 or 1.  */
02056   if (str_idx < lim->subexp_from)
02057     return -1;
02058 
02059   if (lim->subexp_to < str_idx)
02060     return 1;
02061 
02062   /* If we are within the subexpression, return 0.  */
02063   boundaries = (str_idx == lim->subexp_from);
02064   boundaries |= (str_idx == lim->subexp_to) << 1;
02065   if (boundaries == 0)
02066     return 0;
02067 
02068   /* Else, examine epsilon closure.  */
02069   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
02070                                   from_node, bkref_idx);
02071 }
02072 
02073 /* Check the limitations of sub expressions LIMITS, and remove the nodes
02074    which are against limitations from DEST_NODES. */
02075 
02076 static reg_errcode_t
02077 internal_function
02078 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
02079                    const re_node_set *candidates, re_node_set *limits,
02080                    struct re_backref_cache_entry *bkref_ents, Idx str_idx)
02081 {
02082   reg_errcode_t err;
02083   Idx node_idx, lim_idx;
02084 
02085   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
02086     {
02087       Idx subexp_idx;
02088       struct re_backref_cache_entry *ent;
02089       ent = bkref_ents + limits->elems[lim_idx];
02090 
02091       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
02092        continue; /* This is unrelated limitation.  */
02093 
02094       subexp_idx = dfa->nodes[ent->node].opr.idx;
02095       if (ent->subexp_to == str_idx)
02096        {
02097          Idx ops_node = REG_MISSING;
02098          Idx cls_node = REG_MISSING;
02099          for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02100            {
02101              Idx node = dest_nodes->elems[node_idx];
02102              re_token_type_t type = dfa->nodes[node].type;
02103              if (type == OP_OPEN_SUBEXP
02104                 && subexp_idx == dfa->nodes[node].opr.idx)
02105               ops_node = node;
02106              else if (type == OP_CLOSE_SUBEXP
02107                      && subexp_idx == dfa->nodes[node].opr.idx)
02108               cls_node = node;
02109            }
02110 
02111          /* Check the limitation of the open subexpression.  */
02112          /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
02113          if (REG_VALID_INDEX (ops_node))
02114            {
02115              err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
02116                                       candidates);
02117              if (BE (err != REG_NOERROR, 0))
02118               return err;
02119            }
02120 
02121          /* Check the limitation of the close subexpression.  */
02122          if (REG_VALID_INDEX (cls_node))
02123            for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02124              {
02125               Idx node = dest_nodes->elems[node_idx];
02126               if (!re_node_set_contains (dfa->inveclosures + node,
02127                                       cls_node)
02128                   && !re_node_set_contains (dfa->eclosures + node,
02129                                          cls_node))
02130                 {
02131                   /* It is against this limitation.
02132                      Remove it form the current sifted state.  */
02133                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
02134                                            candidates);
02135                   if (BE (err != REG_NOERROR, 0))
02136                     return err;
02137                   --node_idx;
02138                 }
02139              }
02140        }
02141       else /* (ent->subexp_to != str_idx)  */
02142        {
02143          for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
02144            {
02145              Idx node = dest_nodes->elems[node_idx];
02146              re_token_type_t type = dfa->nodes[node].type;
02147              if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
02148               {
02149                 if (subexp_idx != dfa->nodes[node].opr.idx)
02150                   continue;
02151                 /* It is against this limitation.
02152                    Remove it form the current sifted state.  */
02153                 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
02154                                           candidates);
02155                 if (BE (err != REG_NOERROR, 0))
02156                   return err;
02157               }
02158            }
02159        }
02160     }
02161   return REG_NOERROR;
02162 }
02163 
02164 static reg_errcode_t
02165 internal_function __attribute_warn_unused_result__
02166 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
02167                  Idx str_idx, const re_node_set *candidates)
02168 {
02169   const re_dfa_t *const dfa = mctx->dfa;
02170   reg_errcode_t err;
02171   Idx node_idx, node;
02172   re_sift_context_t local_sctx;
02173   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
02174 
02175   if (first_idx == REG_MISSING)
02176     return REG_NOERROR;
02177 
02178   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
02179 
02180   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
02181     {
02182       Idx enabled_idx;
02183       re_token_type_t type;
02184       struct re_backref_cache_entry *entry;
02185       node = candidates->elems[node_idx];
02186       type = dfa->nodes[node].type;
02187       /* Avoid infinite loop for the REs like "()\1+".  */
02188       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
02189        continue;
02190       if (type != OP_BACK_REF)
02191        continue;
02192 
02193       entry = mctx->bkref_ents + first_idx;
02194       enabled_idx = first_idx;
02195       do
02196        {
02197          Idx subexp_len;
02198          Idx to_idx;
02199          Idx dst_node;
02200          bool ok;
02201          re_dfastate_t *cur_state;
02202 
02203          if (entry->node != node)
02204            continue;
02205          subexp_len = entry->subexp_to - entry->subexp_from;
02206          to_idx = str_idx + subexp_len;
02207          dst_node = (subexp_len ? dfa->nexts[node]
02208                     : dfa->edests[node].elems[0]);
02209 
02210          if (to_idx > sctx->last_str_idx
02211              || sctx->sifted_states[to_idx] == NULL
02212              || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
02213              || check_dst_limits (mctx, &sctx->limits, node,
02214                                str_idx, dst_node, to_idx))
02215            continue;
02216 
02217          if (local_sctx.sifted_states == NULL)
02218            {
02219              local_sctx = *sctx;
02220              err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
02221              if (BE (err != REG_NOERROR, 0))
02222               goto free_return;
02223            }
02224          local_sctx.last_node = node;
02225          local_sctx.last_str_idx = str_idx;
02226          ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
02227          if (BE (! ok, 0))
02228            {
02229              err = REG_ESPACE;
02230              goto free_return;
02231            }
02232          cur_state = local_sctx.sifted_states[str_idx];
02233          err = sift_states_backward (mctx, &local_sctx);
02234          if (BE (err != REG_NOERROR, 0))
02235            goto free_return;
02236          if (sctx->limited_states != NULL)
02237            {
02238              err = merge_state_array (dfa, sctx->limited_states,
02239                                    local_sctx.sifted_states,
02240                                    str_idx + 1);
02241              if (BE (err != REG_NOERROR, 0))
02242               goto free_return;
02243            }
02244          local_sctx.sifted_states[str_idx] = cur_state;
02245          re_node_set_remove (&local_sctx.limits, enabled_idx);
02246 
02247          /* mctx->bkref_ents may have changed, reload the pointer.  */
02248          entry = mctx->bkref_ents + enabled_idx;
02249        }
02250       while (enabled_idx++, entry++->more);
02251     }
02252   err = REG_NOERROR;
02253  free_return:
02254   if (local_sctx.sifted_states != NULL)
02255     {
02256       re_node_set_free (&local_sctx.limits);
02257     }
02258 
02259   return err;
02260 }
02261 
02262 
02263 #ifdef RE_ENABLE_I18N
02264 static int
02265 internal_function
02266 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
02267                    Idx node_idx, Idx str_idx, Idx max_str_idx)
02268 {
02269   const re_dfa_t *const dfa = mctx->dfa;
02270   int naccepted;
02271   /* Check the node can accept `multi byte'.  */
02272   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
02273   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
02274       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
02275                          dfa->nexts[node_idx]))
02276     /* The node can't accept the `multi byte', or the
02277        destination was already thrown away, then the node
02278        could't accept the current input `multi byte'.   */
02279     naccepted = 0;
02280   /* Otherwise, it is sure that the node could accept
02281      `naccepted' bytes input.  */
02282   return naccepted;
02283 }
02284 #endif /* RE_ENABLE_I18N */
02285 
02286 
02287 /* Functions for state transition.  */
02288 
02289 /* Return the next state to which the current state STATE will transit by
02290    accepting the current input byte, and update STATE_LOG if necessary.
02291    If STATE can accept a multibyte char/collating element/back reference
02292    update the destination of STATE_LOG.  */
02293 
02294 static re_dfastate_t *
02295 internal_function __attribute_warn_unused_result__
02296 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
02297               re_dfastate_t *state)
02298 {
02299   re_dfastate_t **trtable;
02300   unsigned char ch;
02301 
02302 #ifdef RE_ENABLE_I18N
02303   /* If the current state can accept multibyte.  */
02304   if (BE (state->accept_mb, 0))
02305     {
02306       *err = transit_state_mb (mctx, state);
02307       if (BE (*err != REG_NOERROR, 0))
02308        return NULL;
02309     }
02310 #endif /* RE_ENABLE_I18N */
02311 
02312   /* Then decide the next state with the single byte.  */
02313 #if 0
02314   if (0)
02315     /* don't use transition table  */
02316     return transit_state_sb (err, mctx, state);
02317 #endif
02318 
02319   /* Use transition table  */
02320   ch = re_string_fetch_byte (&mctx->input);
02321   for (;;)
02322     {
02323       trtable = state->trtable;
02324       if (BE (trtable != NULL, 1))
02325        return trtable[ch];
02326 
02327       trtable = state->word_trtable;
02328       if (BE (trtable != NULL, 1))
02329        {
02330          unsigned int context;
02331          context
02332            = re_string_context_at (&mctx->input,
02333                                 re_string_cur_idx (&mctx->input) - 1,
02334                                 mctx->eflags);
02335          if (IS_WORD_CONTEXT (context))
02336            return trtable[ch + SBC_MAX];
02337          else
02338            return trtable[ch];
02339        }
02340 
02341       if (!build_trtable (mctx->dfa, state))
02342        {
02343          *err = REG_ESPACE;
02344          return NULL;
02345        }
02346 
02347       /* Retry, we now have a transition table.  */
02348     }
02349 }
02350 
02351 /* Update the state_log if we need */
02352 static re_dfastate_t *
02353 internal_function
02354 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
02355                     re_dfastate_t *next_state)
02356 {
02357   const re_dfa_t *const dfa = mctx->dfa;
02358   Idx cur_idx = re_string_cur_idx (&mctx->input);
02359 
02360   if (cur_idx > mctx->state_log_top)
02361     {
02362       mctx->state_log[cur_idx] = next_state;
02363       mctx->state_log_top = cur_idx;
02364     }
02365   else if (mctx->state_log[cur_idx] == 0)
02366     {
02367       mctx->state_log[cur_idx] = next_state;
02368     }
02369   else
02370     {
02371       re_dfastate_t *pstate;
02372       unsigned int context;
02373       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
02374       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
02375         the destination of a multibyte char/collating element/
02376         back reference.  Then the next state is the union set of
02377         these destinations and the results of the transition table.  */
02378       pstate = mctx->state_log[cur_idx];
02379       log_nodes = pstate->entrance_nodes;
02380       if (next_state != NULL)
02381        {
02382          table_nodes = next_state->entrance_nodes;
02383          *err = re_node_set_init_union (&next_nodes, table_nodes,
02384                                         log_nodes);
02385          if (BE (*err != REG_NOERROR, 0))
02386            return NULL;
02387        }
02388       else
02389        next_nodes = *log_nodes;
02390       /* Note: We already add the nodes of the initial state,
02391         then we don't need to add them here.  */
02392 
02393       context = re_string_context_at (&mctx->input,
02394                                   re_string_cur_idx (&mctx->input) - 1,
02395                                   mctx->eflags);
02396       next_state = mctx->state_log[cur_idx]
02397        = re_acquire_state_context (err, dfa, &next_nodes, context);
02398       /* We don't need to check errors here, since the return value of
02399         this function is next_state and ERR is already set.  */
02400 
02401       if (table_nodes != NULL)
02402        re_node_set_free (&next_nodes);
02403     }
02404 
02405   if (BE (dfa->nbackref, 0) && next_state != NULL)
02406     {
02407       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
02408         later.  We must check them here, since the back references in the
02409         next state might use them.  */
02410       *err = check_subexp_matching_top (mctx, &next_state->nodes,
02411                                    cur_idx);
02412       if (BE (*err != REG_NOERROR, 0))
02413        return NULL;
02414 
02415       /* If the next state has back references.  */
02416       if (next_state->has_backref)
02417        {
02418          *err = transit_state_bkref (mctx, &next_state->nodes);
02419          if (BE (*err != REG_NOERROR, 0))
02420            return NULL;
02421          next_state = mctx->state_log[cur_idx];
02422        }
02423     }
02424 
02425   return next_state;
02426 }
02427 
02428 /* Skip bytes in the input that correspond to part of a
02429    multi-byte match, then look in the log for a state
02430    from which to restart matching.  */
02431 static re_dfastate_t *
02432 internal_function
02433 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
02434 {
02435   re_dfastate_t *cur_state;
02436   do
02437     {
02438       Idx max = mctx->state_log_top;
02439       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
02440 
02441       do
02442        {
02443          if (++cur_str_idx > max)
02444            return NULL;
02445          re_string_skip_bytes (&mctx->input, 1);
02446        }
02447       while (mctx->state_log[cur_str_idx] == NULL);
02448 
02449       cur_state = merge_state_with_log (err, mctx, NULL);
02450     }
02451   while (*err == REG_NOERROR && cur_state == NULL);
02452   return cur_state;
02453 }
02454 
02455 /* Helper functions for transit_state.  */
02456 
02457 /* From the node set CUR_NODES, pick up the nodes whose types are
02458    OP_OPEN_SUBEXP and which have corresponding back references in the regular
02459    expression. And register them to use them later for evaluating the
02460    correspoding back references.  */
02461 
02462 static reg_errcode_t
02463 internal_function
02464 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
02465                         Idx str_idx)
02466 {
02467   const re_dfa_t *const dfa = mctx->dfa;
02468   Idx node_idx;
02469   reg_errcode_t err;
02470 
02471   /* TODO: This isn't efficient.
02472           Because there might be more than one nodes whose types are
02473           OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
02474           nodes.
02475           E.g. RE: (a){2}  */
02476   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
02477     {
02478       Idx node = cur_nodes->elems[node_idx];
02479       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
02480          && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
02481          && (dfa->used_bkref_map
02482              & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
02483        {
02484          err = match_ctx_add_subtop (mctx, node, str_idx);
02485          if (BE (err != REG_NOERROR, 0))
02486            return err;
02487        }
02488     }
02489   return REG_NOERROR;
02490 }
02491 
02492 #if 0
02493 /* Return the next state to which the current state STATE will transit by
02494    accepting the current input byte.  */
02495 
02496 static re_dfastate_t *
02497 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
02498                 re_dfastate_t *state)
02499 {
02500   const re_dfa_t *const dfa = mctx->dfa;
02501   re_node_set next_nodes;
02502   re_dfastate_t *next_state;
02503   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
02504   unsigned int context;
02505 
02506   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
02507   if (BE (*err != REG_NOERROR, 0))
02508     return NULL;
02509   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
02510     {
02511       Idx cur_node = state->nodes.elems[node_cnt];
02512       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
02513        {
02514          *err = re_node_set_merge (&next_nodes,
02515                                 dfa->eclosures + dfa->nexts[cur_node]);
02516          if (BE (*err != REG_NOERROR, 0))
02517            {
02518              re_node_set_free (&next_nodes);
02519              return NULL;
02520            }
02521        }
02522     }
02523   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
02524   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
02525   /* We don't need to check errors here, since the return value of
02526      this function is next_state and ERR is already set.  */
02527 
02528   re_node_set_free (&next_nodes);
02529   re_string_skip_bytes (&mctx->input, 1);
02530   return next_state;
02531 }
02532 #endif
02533 
02534 #ifdef RE_ENABLE_I18N
02535 static reg_errcode_t
02536 internal_function
02537 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
02538 {
02539   const re_dfa_t *const dfa = mctx->dfa;
02540   reg_errcode_t err;
02541   Idx i;
02542 
02543   for (i = 0; i < pstate->nodes.nelem; ++i)
02544     {
02545       re_node_set dest_nodes, *new_nodes;
02546       Idx cur_node_idx = pstate->nodes.elems[i];
02547       int naccepted;
02548       Idx dest_idx;
02549       unsigned int context;
02550       re_dfastate_t *dest_state;
02551 
02552       if (!dfa->nodes[cur_node_idx].accept_mb)
02553        continue;
02554 
02555       if (dfa->nodes[cur_node_idx].constraint)
02556        {
02557          context = re_string_context_at (&mctx->input,
02558                                      re_string_cur_idx (&mctx->input),
02559                                      mctx->eflags);
02560          if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
02561                                       context))
02562            continue;
02563        }
02564 
02565       /* How many bytes the node can accept?  */
02566       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
02567                                       re_string_cur_idx (&mctx->input));
02568       if (naccepted == 0)
02569        continue;
02570 
02571       /* The node can accepts `naccepted' bytes.  */
02572       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
02573       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
02574                             : mctx->max_mb_elem_len);
02575       err = clean_state_log_if_needed (mctx, dest_idx);
02576       if (BE (err != REG_NOERROR, 0))
02577        return err;
02578 #ifdef DEBUG
02579       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
02580 #endif
02581       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
02582 
02583       dest_state = mctx->state_log[dest_idx];
02584       if (dest_state == NULL)
02585        dest_nodes = *new_nodes;
02586       else
02587        {
02588          err = re_node_set_init_union (&dest_nodes,
02589                                    dest_state->entrance_nodes, new_nodes);
02590          if (BE (err != REG_NOERROR, 0))
02591            return err;
02592        }
02593       context = re_string_context_at (&mctx->input, dest_idx - 1,
02594                                   mctx->eflags);
02595       mctx->state_log[dest_idx]
02596        = re_acquire_state_context (&err, dfa, &dest_nodes, context);
02597       if (dest_state != NULL)
02598        re_node_set_free (&dest_nodes);
02599       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
02600        return err;
02601     }
02602   return REG_NOERROR;
02603 }
02604 #endif /* RE_ENABLE_I18N */
02605 
02606 static reg_errcode_t
02607 internal_function
02608 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
02609 {
02610   const re_dfa_t *const dfa = mctx->dfa;
02611   reg_errcode_t err;
02612   Idx i;
02613   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
02614 
02615   for (i = 0; i < nodes->nelem; ++i)
02616     {
02617       Idx dest_str_idx, prev_nelem, bkc_idx;
02618       Idx node_idx = nodes->elems[i];
02619       unsigned int context;
02620       const re_token_t *node = dfa->nodes + node_idx;
02621       re_node_set *new_dest_nodes;
02622 
02623       /* Check whether `node' is a backreference or not.  */
02624       if (node->type != OP_BACK_REF)
02625        continue;
02626 
02627       if (node->constraint)
02628        {
02629          context = re_string_context_at (&mctx->input, cur_str_idx,
02630                                      mctx->eflags);
02631          if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
02632            continue;
02633        }
02634 
02635       /* `node' is a backreference.
02636         Check the substring which the substring matched.  */
02637       bkc_idx = mctx->nbkref_ents;
02638       err = get_subexp (mctx, node_idx, cur_str_idx);
02639       if (BE (err != REG_NOERROR, 0))
02640        goto free_return;
02641 
02642       /* And add the epsilon closures (which is `new_dest_nodes') of
02643         the backreference to appropriate state_log.  */
02644 #ifdef DEBUG
02645       assert (dfa->nexts[node_idx] != REG_MISSING);
02646 #endif
02647       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
02648        {
02649          Idx subexp_len;
02650          re_dfastate_t *dest_state;
02651          struct re_backref_cache_entry *bkref_ent;
02652          bkref_ent = mctx->bkref_ents + bkc_idx;
02653          if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
02654            continue;
02655          subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
02656          new_dest_nodes = (subexp_len == 0
02657                          ? dfa->eclosures + dfa->edests[node_idx].elems[0]
02658                          : dfa->eclosures + dfa->nexts[node_idx]);
02659          dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
02660                        - bkref_ent->subexp_from);
02661          context = re_string_context_at (&mctx->input, dest_str_idx - 1,
02662                                      mctx->eflags);
02663          dest_state = mctx->state_log[dest_str_idx];
02664          prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
02665                      : mctx->state_log[cur_str_idx]->nodes.nelem);
02666          /* Add `new_dest_node' to state_log.  */
02667          if (dest_state == NULL)
02668            {
02669              mctx->state_log[dest_str_idx]
02670               = re_acquire_state_context (&err, dfa, new_dest_nodes,
02671                                        context);
02672              if (BE (mctx->state_log[dest_str_idx] == NULL
02673                     && err != REG_NOERROR, 0))
02674               goto free_return;
02675            }
02676          else
02677            {
02678              re_node_set dest_nodes;
02679              err = re_node_set_init_union (&dest_nodes,
02680                                        dest_state->entrance_nodes,
02681                                        new_dest_nodes);
02682              if (BE (err != REG_NOERROR, 0))
02683               {
02684                 re_node_set_free (&dest_nodes);
02685                 goto free_return;
02686               }
02687              mctx->state_log[dest_str_idx]
02688               = re_acquire_state_context (&err, dfa, &dest_nodes, context);
02689              re_node_set_free (&dest_nodes);
02690              if (BE (mctx->state_log[dest_str_idx] == NULL
02691                     && err != REG_NOERROR, 0))
02692               goto free_return;
02693            }
02694          /* We need to check recursively if the backreference can epsilon
02695             transit.  */
02696          if (subexp_len == 0
02697              && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
02698            {
02699              err = check_subexp_matching_top (mctx, new_dest_nodes,
02700                                           cur_str_idx);
02701              if (BE (err != REG_NOERROR, 0))
02702               goto free_return;
02703              err = transit_state_bkref (mctx, new_dest_nodes);
02704              if (BE (err != REG_NOERROR, 0))
02705               goto free_return;
02706            }
02707        }
02708     }
02709   err = REG_NOERROR;
02710  free_return:
02711   return err;
02712 }
02713 
02714 /* Enumerate all the candidates which the backreference BKREF_NODE can match
02715    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
02716    Note that we might collect inappropriate candidates here.
02717    However, the cost of checking them strictly here is too high, then we
02718    delay these checking for prune_impossible_nodes().  */
02719 
02720 static reg_errcode_t
02721 internal_function __attribute_warn_unused_result__
02722 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
02723 {
02724   const re_dfa_t *const dfa = mctx->dfa;
02725   Idx subexp_num, sub_top_idx;
02726   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
02727   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
02728   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
02729   if (cache_idx != REG_MISSING)
02730     {
02731       const struct re_backref_cache_entry *entry
02732        = mctx->bkref_ents + cache_idx;
02733       do
02734        if (entry->node == bkref_node)
02735          return REG_NOERROR; /* We already checked it.  */
02736       while (entry++->more);
02737     }
02738 
02739   subexp_num = dfa->nodes[bkref_node].opr.idx;
02740 
02741   /* For each sub expression  */
02742   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
02743     {
02744       reg_errcode_t err;
02745       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
02746       re_sub_match_last_t *sub_last;
02747       Idx sub_last_idx, sl_str, bkref_str_off;
02748 
02749       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
02750        continue; /* It isn't related.  */
02751 
02752       sl_str = sub_top->str_idx;
02753       bkref_str_off = bkref_str_idx;
02754       /* At first, check the last node of sub expressions we already
02755         evaluated.  */
02756       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
02757        {
02758          regoff_t sl_str_diff;
02759          sub_last = sub_top->lasts[sub_last_idx];
02760          sl_str_diff = sub_last->str_idx - sl_str;
02761          /* The matched string by the sub expression match with the substring
02762             at the back reference?  */
02763          if (sl_str_diff > 0)
02764            {
02765              if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
02766               {
02767                 /* Not enough chars for a successful match.  */
02768                 if (bkref_str_off + sl_str_diff > mctx->input.len)
02769                   break;
02770 
02771                 err = clean_state_log_if_needed (mctx,
02772                                              bkref_str_off
02773                                              + sl_str_diff);
02774                 if (BE (err != REG_NOERROR, 0))
02775                   return err;
02776                 buf = (const char *) re_string_get_buffer (&mctx->input);
02777               }
02778              if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
02779               /* We don't need to search this sub expression any more.  */
02780               break;
02781            }
02782          bkref_str_off += sl_str_diff;
02783          sl_str += sl_str_diff;
02784          err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
02785                             bkref_str_idx);
02786 
02787          /* Reload buf, since the preceding call might have reallocated
02788             the buffer.  */
02789          buf = (const char *) re_string_get_buffer (&mctx->input);
02790 
02791          if (err == REG_NOMATCH)
02792            continue;
02793          if (BE (err != REG_NOERROR, 0))
02794            return err;
02795        }
02796 
02797       if (sub_last_idx < sub_top->nlasts)
02798        continue;
02799       if (sub_last_idx > 0)
02800        ++sl_str;
02801       /* Then, search for the other last nodes of the sub expression.  */
02802       for (; sl_str <= bkref_str_idx; ++sl_str)
02803        {
02804          Idx cls_node;
02805          regoff_t sl_str_off;
02806          const re_node_set *nodes;
02807          sl_str_off = sl_str - sub_top->str_idx;
02808          /* The matched string by the sub expression match with the substring
02809             at the back reference?  */
02810          if (sl_str_off > 0)
02811            {
02812              if (BE (bkref_str_off >= mctx->input.valid_len, 0))
02813               {
02814                 /* If we are at the end of the input, we cannot match.  */
02815                 if (bkref_str_off >= mctx->input.len)
02816                   break;
02817 
02818                 err = extend_buffers (mctx);
02819                 if (BE (err != REG_NOERROR, 0))
02820                   return err;
02821 
02822                 buf = (const char *) re_string_get_buffer (&mctx->input);
02823               }
02824              if (buf [bkref_str_off++] != buf[sl_str - 1])
02825               break; /* We don't need to search this sub expression
02826                        any more.  */
02827            }
02828          if (mctx->state_log[sl_str] == NULL)
02829            continue;
02830          /* Does this state have a ')' of the sub expression?  */
02831          nodes = &mctx->state_log[sl_str]->nodes;
02832          cls_node = find_subexp_node (dfa, nodes, subexp_num,
02833                                    OP_CLOSE_SUBEXP);
02834          if (cls_node == REG_MISSING)
02835            continue; /* No.  */
02836          if (sub_top->path == NULL)
02837            {
02838              sub_top->path = calloc (sizeof (state_array_t),
02839                                   sl_str - sub_top->str_idx + 1);
02840              if (sub_top->path == NULL)
02841               return REG_ESPACE;
02842            }
02843          /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
02844             in the current context?  */
02845          err = check_arrival (mctx, sub_top->path, sub_top->node,
02846                             sub_top->str_idx, cls_node, sl_str,
02847                             OP_CLOSE_SUBEXP);
02848          if (err == REG_NOMATCH)
02849              continue;
02850          if (BE (err != REG_NOERROR, 0))
02851              return err;
02852          sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
02853          if (BE (sub_last == NULL, 0))
02854            return REG_ESPACE;
02855          err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
02856                             bkref_str_idx);
02857          if (err == REG_NOMATCH)
02858            continue;
02859        }
02860     }
02861   return REG_NOERROR;
02862 }
02863 
02864 /* Helper functions for get_subexp().  */
02865 
02866 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
02867    If it can arrive, register the sub expression expressed with SUB_TOP
02868    and SUB_LAST.  */
02869 
02870 static reg_errcode_t
02871 internal_function
02872 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
02873               re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
02874 {
02875   reg_errcode_t err;
02876   Idx to_idx;
02877   /* Can the subexpression arrive the back reference?  */
02878   err = check_arrival (mctx, &sub_last->path, sub_last->node,
02879                      sub_last->str_idx, bkref_node, bkref_str,
02880                      OP_OPEN_SUBEXP);
02881   if (err != REG_NOERROR)
02882     return err;
02883   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
02884                           sub_last->str_idx);
02885   if (BE (err != REG_NOERROR, 0))
02886     return err;
02887   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
02888   return clean_state_log_if_needed (mctx, to_idx);
02889 }
02890 
02891 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
02892    Search '(' if FL_OPEN, or search ')' otherwise.
02893    TODO: This function isn't efficient...
02894         Because there might be more than one nodes whose types are
02895         OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
02896         nodes.
02897         E.g. RE: (a){2}  */
02898 
02899 static Idx
02900 internal_function
02901 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
02902                 Idx subexp_idx, int type)
02903 {
02904   Idx cls_idx;
02905   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
02906     {
02907       Idx cls_node = nodes->elems[cls_idx];
02908       const re_token_t *node = dfa->nodes + cls_node;
02909       if (node->type == type
02910          && node->opr.idx == subexp_idx)
02911        return cls_node;
02912     }
02913   return REG_MISSING;
02914 }
02915 
02916 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
02917    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
02918    heavily reused.
02919    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
02920 
02921 static reg_errcode_t
02922 internal_function __attribute_warn_unused_result__
02923 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
02924               Idx top_str, Idx last_node, Idx last_str, int type)
02925 {
02926   const re_dfa_t *const dfa = mctx->dfa;
02927   reg_errcode_t err = REG_NOERROR;
02928   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
02929   re_dfastate_t *cur_state = NULL;
02930   re_node_set *cur_nodes, next_nodes;
02931   re_dfastate_t **backup_state_log;
02932   unsigned int context;
02933 
02934   subexp_num = dfa->nodes[top_node].opr.idx;
02935   /* Extend the buffer if we need.  */
02936   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
02937     {
02938       re_dfastate_t **new_array;
02939       Idx old_alloc = path->alloc;
02940       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
02941       if (BE (new_alloc < old_alloc, 0)
02942          || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
02943        return REG_ESPACE;
02944       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
02945       if (BE (new_array == NULL, 0))
02946        return REG_ESPACE;
02947       path->array = new_array;
02948       path->alloc = new_alloc;
02949       memset (new_array + old_alloc, '\0',
02950              sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
02951     }
02952 
02953   str_idx = path->next_idx ? path->next_idx : top_str;
02954 
02955   /* Temporary modify MCTX.  */
02956   backup_state_log = mctx->state_log;
02957   backup_cur_idx = mctx->input.cur_idx;
02958   mctx->state_log = path->array;
02959   mctx->input.cur_idx = str_idx;
02960 
02961   /* Setup initial node set.  */
02962   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
02963   if (str_idx == top_str)
02964     {
02965       err = re_node_set_init_1 (&next_nodes, top_node);
02966       if (BE (err != REG_NOERROR, 0))
02967        return err;
02968       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
02969       if (BE (err != REG_NOERROR, 0))
02970        {
02971          re_node_set_free (&next_nodes);
02972          return err;
02973        }
02974     }
02975   else
02976     {
02977       cur_state = mctx->state_log[str_idx];
02978       if (cur_state && cur_state->has_backref)
02979        {
02980          err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
02981          if (BE (err != REG_NOERROR, 0))
02982            return err;
02983        }
02984       else
02985        re_node_set_init_empty (&next_nodes);
02986     }
02987   if (str_idx == top_str || (cur_state && cur_state->has_backref))
02988     {
02989       if (next_nodes.nelem)
02990        {
02991          err = expand_bkref_cache (mctx, &next_nodes, str_idx,
02992                                 subexp_num, type);
02993          if (BE (err != REG_NOERROR, 0))
02994            {
02995              re_node_set_free (&next_nodes);
02996              return err;
02997            }
02998        }
02999       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
03000       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
03001        {
03002          re_node_set_free (&next_nodes);
03003          return err;
03004        }
03005       mctx->state_log[str_idx] = cur_state;
03006     }
03007 
03008   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
03009     {
03010       re_node_set_empty (&next_nodes);
03011       if (mctx->state_log[str_idx + 1])
03012        {
03013          err = re_node_set_merge (&next_nodes,
03014                                &mctx->state_log[str_idx + 1]->nodes);
03015          if (BE (err != REG_NOERROR, 0))
03016            {
03017              re_node_set_free (&next_nodes);
03018              return err;
03019            }
03020        }
03021       if (cur_state)
03022        {
03023          err = check_arrival_add_next_nodes (mctx, str_idx,
03024                                          &cur_state->non_eps_nodes,
03025                                          &next_nodes);
03026          if (BE (err != REG_NOERROR, 0))
03027            {
03028              re_node_set_free (&next_nodes);
03029              return err;
03030            }
03031        }
03032       ++str_idx;
03033       if (next_nodes.nelem)
03034        {
03035          err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
03036          if (BE (err != REG_NOERROR, 0))
03037            {
03038              re_node_set_free (&next_nodes);
03039              return err;
03040            }
03041          err = expand_bkref_cache (mctx, &next_nodes, str_idx,
03042                                 subexp_num, type);
03043          if (BE (err != REG_NOERROR, 0))
03044            {
03045              re_node_set_free (&next_nodes);
03046              return err;
03047            }
03048        }
03049       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
03050       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
03051       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
03052        {
03053          re_node_set_free (&next_nodes);
03054          return err;
03055        }
03056       mctx->state_log[str_idx] = cur_state;
03057       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
03058     }
03059   re_node_set_free (&next_nodes);
03060   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
03061               : &mctx->state_log[last_str]->nodes);
03062   path->next_idx = str_idx;
03063 
03064   /* Fix MCTX.  */
03065   mctx->state_log = backup_state_log;
03066   mctx->input.cur_idx = backup_cur_idx;
03067 
03068   /* Then check the current node set has the node LAST_NODE.  */
03069   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
03070     return REG_NOERROR;
03071 
03072   return REG_NOMATCH;
03073 }
03074 
03075 /* Helper functions for check_arrival.  */
03076 
03077 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
03078    to NEXT_NODES.
03079    TODO: This function is similar to the functions transit_state*(),
03080         however this function has many additional works.
03081         Can't we unify them?  */
03082 
03083 static reg_errcode_t
03084 internal_function __attribute_warn_unused_result__
03085 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
03086                            re_node_set *cur_nodes, re_node_set *next_nodes)
03087 {
03088   const re_dfa_t *const dfa = mctx->dfa;
03089   bool ok;
03090   Idx cur_idx;
03091 #ifdef RE_ENABLE_I18N
03092   reg_errcode_t err = REG_NOERROR;
03093 #endif
03094   re_node_set union_set;
03095   re_node_set_init_empty (&union_set);
03096   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
03097     {
03098       int naccepted = 0;
03099       Idx cur_node = cur_nodes->elems[cur_idx];
03100 #ifdef DEBUG
03101       re_token_type_t type = dfa->nodes[cur_node].type;
03102       assert (!IS_EPSILON_NODE (type));
03103 #endif
03104 #ifdef RE_ENABLE_I18N
03105       /* If the node may accept `multi byte'.  */
03106       if (dfa->nodes[cur_node].accept_mb)
03107        {
03108          naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
03109                                           str_idx);
03110          if (naccepted > 1)
03111            {
03112              re_dfastate_t *dest_state;
03113              Idx next_node = dfa->nexts[cur_node];
03114              Idx next_idx = str_idx + naccepted;
03115              dest_state = mctx->state_log[next_idx];
03116              re_node_set_empty (&union_set);
03117              if (dest_state)
03118               {
03119                 err = re_node_set_merge (&union_set, &dest_state->nodes);
03120                 if (BE (err != REG_NOERROR, 0))
03121                   {
03122                     re_node_set_free (&union_set);
03123                     return err;
03124                   }
03125               }
03126              ok = re_node_set_insert (&union_set, next_node);
03127              if (BE (! ok, 0))
03128               {
03129                 re_node_set_free (&union_set);
03130                 return REG_ESPACE;
03131               }
03132              mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
03133                                                      &union_set);
03134              if (BE (mctx->state_log[next_idx] == NULL
03135                     && err != REG_NOERROR, 0))
03136               {
03137                 re_node_set_free (&union_set);
03138                 return err;
03139               }
03140            }
03141        }
03142 #endif /* RE_ENABLE_I18N */
03143       if (naccepted
03144          || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
03145        {
03146          ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
03147          if (BE (! ok, 0))
03148            {
03149              re_node_set_free (&union_set);
03150              return REG_ESPACE;
03151            }
03152        }
03153     }
03154   re_node_set_free (&union_set);
03155   return REG_NOERROR;
03156 }
03157 
03158 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
03159    CUR_NODES, however exclude the nodes which are:
03160     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
03161     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
03162 */
03163 
03164 static reg_errcode_t
03165 internal_function
03166 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
03167                        Idx ex_subexp, int type)
03168 {
03169   reg_errcode_t err;
03170   Idx idx, outside_node;
03171   re_node_set new_nodes;
03172 #ifdef DEBUG
03173   assert (cur_nodes->nelem);
03174 #endif
03175   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
03176   if (BE (err != REG_NOERROR, 0))
03177     return err;
03178   /* Create a new node set NEW_NODES with the nodes which are epsilon
03179      closures of the node in CUR_NODES.  */
03180 
03181   for (idx = 0; idx < cur_nodes->nelem; ++idx)
03182     {
03183       Idx cur_node = cur_nodes->elems[idx];
03184       const re_node_set *eclosure = dfa->eclosures + cur_node;
03185       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
03186       if (outside_node == REG_MISSING)
03187        {
03188          /* There are no problematic nodes, just merge them.  */
03189          err = re_node_set_merge (&new_nodes, eclosure);
03190          if (BE (err != REG_NOERROR, 0))
03191            {
03192              re_node_set_free (&new_nodes);
03193              return err;
03194            }
03195        }
03196       else
03197        {
03198          /* There are problematic nodes, re-calculate incrementally.  */
03199          err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
03200                                          ex_subexp, type);
03201          if (BE (err != REG_NOERROR, 0))
03202            {
03203              re_node_set_free (&new_nodes);
03204              return err;
03205            }
03206        }
03207     }
03208   re_node_set_free (cur_nodes);
03209   *cur_nodes = new_nodes;
03210   return REG_NOERROR;
03211 }
03212 
03213 /* Helper function for check_arrival_expand_ecl.
03214    Check incrementally the epsilon closure of TARGET, and if it isn't
03215    problematic append it to DST_NODES.  */
03216 
03217 static reg_errcode_t
03218 internal_function __attribute_warn_unused_result__
03219 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
03220                            Idx target, Idx ex_subexp, int type)
03221 {
03222   Idx cur_node;
03223   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
03224     {
03225       bool ok;
03226 
03227       if (dfa->nodes[cur_node].type == type
03228          && dfa->nodes[cur_node].opr.idx == ex_subexp)
03229        {
03230          if (type == OP_CLOSE_SUBEXP)
03231            {
03232              ok = re_node_set_insert (dst_nodes, cur_node);
03233              if (BE (! ok, 0))
03234               return REG_ESPACE;
03235            }
03236          break;
03237        }
03238       ok = re_node_set_insert (dst_nodes, cur_node);
03239       if (BE (! ok, 0))
03240        return REG_ESPACE;
03241       if (dfa->edests[cur_node].nelem == 0)
03242        break;
03243       if (dfa->edests[cur_node].nelem == 2)
03244        {
03245          reg_errcode_t err;
03246          err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
03247                                          dfa->edests[cur_node].elems[1],
03248                                          ex_subexp, type);
03249          if (BE (err != REG_NOERROR, 0))
03250            return err;
03251        }
03252       cur_node = dfa->edests[cur_node].elems[0];
03253     }
03254   return REG_NOERROR;
03255 }
03256 
03257 
03258 /* For all the back references in the current state, calculate the
03259    destination of the back references by the appropriate entry
03260    in MCTX->BKREF_ENTS.  */
03261 
03262 static reg_errcode_t
03263 internal_function __attribute_warn_unused_result__
03264 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
03265                   Idx cur_str, Idx subexp_num, int type)
03266 {
03267   const re_dfa_t *const dfa = mctx->dfa;
03268   reg_errcode_t err;
03269   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
03270   struct re_backref_cache_entry *ent;
03271 
03272   if (cache_idx_start == REG_MISSING)
03273     return REG_NOERROR;
03274 
03275  restart:
03276   ent = mctx->bkref_ents + cache_idx_start;
03277   do
03278     {
03279       Idx to_idx, next_node;
03280 
03281       /* Is this entry ENT is appropriate?  */
03282       if (!re_node_set_contains (cur_nodes, ent->node))
03283        continue; /* No.  */
03284 
03285       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
03286       /* Calculate the destination of the back reference, and append it
03287         to MCTX->STATE_LOG.  */
03288       if (to_idx == cur_str)
03289        {
03290          /* The backreference did epsilon transit, we must re-check all the
03291             node in the current state.  */
03292          re_node_set new_dests;
03293          reg_errcode_t err2, err3;
03294          next_node = dfa->edests[ent->node].elems[0];
03295          if (re_node_set_contains (cur_nodes, next_node))
03296            continue;
03297          err = re_node_set_init_1 (&new_dests, next_node);
03298          err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
03299          err3 = re_node_set_merge (cur_nodes, &new_dests);
03300          re_node_set_free (&new_dests);
03301          if (BE (err != REG_NOERROR || err2 != REG_NOERROR
03302                 || err3 != REG_NOERROR, 0))
03303            {
03304              err = (err != REG_NOERROR ? err
03305                    : (err2 != REG_NOERROR ? err2 : err3));
03306              return err;
03307            }
03308          /* TODO: It is still inefficient...  */
03309          goto restart;
03310        }
03311       else
03312        {
03313          re_node_set union_set;
03314          next_node = dfa->nexts[ent->node];
03315          if (mctx->state_log[to_idx])
03316            {
03317              bool ok;
03318              if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
03319                                    next_node))
03320               continue;
03321              err = re_node_set_init_copy (&union_set,
03322                                       &mctx->state_log[to_idx]->nodes);
03323              ok = re_node_set_insert (&union_set, next_node);
03324              if (BE (err != REG_NOERROR || ! ok, 0))
03325               {
03326                 re_node_set_free (&union_set);
03327                 err = err != REG_NOERROR ? err : REG_ESPACE;
03328                 return err;
03329               }
03330            }
03331          else
03332            {
03333              err = re_node_set_init_1 (&union_set, next_node);
03334              if (BE (err != REG_NOERROR, 0))
03335               return err;
03336            }
03337          mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
03338          re_node_set_free (&union_set);
03339          if (BE (mctx->state_log[to_idx] == NULL
03340                 && err != REG_NOERROR, 0))
03341            return err;
03342        }
03343     }
03344   while (ent++->more);
03345   return REG_NOERROR;
03346 }
03347 
03348 /* Build transition table for the state.
03349    Return true if successful.  */
03350 
03351 static bool
03352 internal_function
03353 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
03354 {
03355   reg_errcode_t err;
03356   Idx i, j;
03357   int ch;
03358   bool need_word_trtable = false;
03359   bitset_word_t elem, mask;
03360   bool dests_node_malloced = false;
03361   bool dest_states_malloced = false;
03362   Idx ndests; /* Number of the destination states from `state'.  */
03363   re_dfastate_t **trtable;
03364   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
03365   re_node_set follows, *dests_node;
03366   bitset_t *dests_ch;
03367   bitset_t acceptable;
03368 
03369   struct dests_alloc
03370   {
03371     re_node_set dests_node[SBC_MAX];
03372     bitset_t dests_ch[SBC_MAX];
03373   } *dests_alloc;
03374 
03375   /* We build DFA states which corresponds to the destination nodes
03376      from `state'.  `dests_node[i]' represents the nodes which i-th
03377      destination state contains, and `dests_ch[i]' represents the
03378      characters which i-th destination state accepts.  */
03379   if (__libc_use_alloca (sizeof (struct dests_alloc)))
03380     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
03381   else
03382     {
03383       dests_alloc = re_malloc (struct dests_alloc, 1);
03384       if (BE (dests_alloc == NULL, 0))
03385        return false;
03386       dests_node_malloced = true;
03387     }
03388   dests_node = dests_alloc->dests_node;
03389   dests_ch = dests_alloc->dests_ch;
03390 
03391   /* Initialize transiton table.  */
03392   state->word_trtable = state->trtable = NULL;
03393 
03394   /* At first, group all nodes belonging to `state' into several
03395      destinations.  */
03396   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
03397   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
03398     {
03399       if (dests_node_malloced)
03400        free (dests_alloc);
03401       if (ndests == 0)
03402        {
03403          state->trtable = (re_dfastate_t **)
03404            calloc (sizeof (re_dfastate_t *), SBC_MAX);
03405          return true;
03406        }
03407       return false;
03408     }
03409 
03410   err = re_node_set_alloc (&follows, ndests + 1);
03411   if (BE (err != REG_NOERROR, 0))
03412     goto out_free;
03413 
03414   /* Avoid arithmetic overflow in size calculation.  */
03415   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
03416            / (3 * sizeof (re_dfastate_t *)))
03417           < ndests),
03418          0))
03419     goto out_free;
03420 
03421   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
03422                       + ndests * 3 * sizeof (re_dfastate_t *)))
03423     dest_states = (re_dfastate_t **)
03424       alloca (ndests * 3 * sizeof (re_dfastate_t *));
03425   else
03426     {
03427       dest_states = (re_dfastate_t **)
03428        malloc (ndests * 3 * sizeof (re_dfastate_t *));
03429       if (BE (dest_states == NULL, 0))
03430        {
03431 out_free:
03432          if (dest_states_malloced)
03433            free (dest_states);
03434          re_node_set_free (&follows);
03435          for (i = 0; i < ndests; ++i)
03436            re_node_set_free (dests_node + i);
03437          if (dests_node_malloced)
03438            free (dests_alloc);
03439          return false;
03440        }
03441       dest_states_malloced = true;
03442     }
03443   dest_states_word = dest_states + ndests;
03444   dest_states_nl = dest_states_word + ndests;
03445   bitset_empty (acceptable);
03446 
03447   /* Then build the states for all destinations.  */
03448   for (i = 0; i < ndests; ++i)
03449     {
03450       Idx next_node;
03451       re_node_set_empty (&follows);
03452       /* Merge the follows of this destination states.  */
03453       for (j = 0; j < dests_node[i].nelem; ++j)
03454        {
03455          next_node = dfa->nexts[dests_node[i].elems[j]];
03456          if (next_node != REG_MISSING)
03457            {
03458              err = re_node_set_merge (&follows, dfa->eclosures + next_node);
03459              if (BE (err != REG_NOERROR, 0))
03460               goto out_free;
03461            }
03462        }
03463       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
03464       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
03465        goto out_free;
03466       /* If the new state has context constraint,
03467         build appropriate states for these contexts.  */
03468       if (dest_states[i]->has_constraint)
03469        {
03470          dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
03471                                                    CONTEXT_WORD);
03472          if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
03473            goto out_free;
03474 
03475          if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
03476            need_word_trtable = true;
03477 
03478          dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
03479                                                  CONTEXT_NEWLINE);
03480          if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
03481            goto out_free;
03482        }
03483       else
03484        {
03485          dest_states_word[i] = dest_states[i];
03486          dest_states_nl[i] = dest_states[i];
03487        }
03488       bitset_merge (acceptable, dests_ch[i]);
03489     }
03490 
03491   if (!BE (need_word_trtable, 0))
03492     {
03493       /* We don't care about whether the following character is a word
03494         character, or we are in a single-byte character set so we can
03495         discern by looking at the character code: allocate a
03496         256-entry transition table.  */
03497       trtable = state->trtable =
03498        (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
03499       if (BE (trtable == NULL, 0))
03500        goto out_free;
03501 
03502       /* For all characters ch...:  */
03503       for (i = 0; i < BITSET_WORDS; ++i)
03504        for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
03505             elem;
03506             mask <<= 1, elem >>= 1, ++ch)
03507          if (BE (elem & 1, 0))
03508            {
03509              /* There must be exactly one destination which accepts
03510                character ch.  See group_nodes_into_DFAstates.  */
03511              for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
03512               ;
03513 
03514              /* j-th destination accepts the word character ch.  */
03515              if (dfa->word_char[i] & mask)
03516               trtable[ch] = dest_states_word[j];
03517              else
03518               trtable[ch] = dest_states[j];
03519            }
03520     }
03521   else
03522     {
03523       /* We care about whether the following character is a word
03524         character, and we are in a multi-byte character set: discern
03525         by looking at the character code: build two 256-entry
03526         transition tables, one starting at trtable[0] and one
03527         starting at trtable[SBC_MAX].  */
03528       trtable = state->word_trtable =
03529        (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
03530       if (BE (trtable == NULL, 0))
03531        goto out_free;
03532 
03533       /* For all characters ch...:  */
03534       for (i = 0; i < BITSET_WORDS; ++i)
03535        for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
03536             elem;
03537             mask <<= 1, elem >>= 1, ++ch)
03538          if (BE (elem & 1, 0))
03539            {
03540              /* There must be exactly one destination which accepts
03541                character ch.  See group_nodes_into_DFAstates.  */
03542              for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
03543               ;
03544 
03545              /* j-th destination accepts the word character ch.  */
03546              trtable[ch] = dest_states[j];
03547              trtable[ch + SBC_MAX] = dest_states_word[j];
03548            }
03549     }
03550 
03551   /* new line */
03552   if (bitset_contain (acceptable, NEWLINE_CHAR))
03553     {
03554       /* The current state accepts newline character.  */
03555       for (j = 0; j < ndests; ++j)
03556        if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
03557          {
03558            /* k-th destination accepts newline character.  */
03559            trtable[NEWLINE_CHAR] = dest_states_nl[j];
03560            if (need_word_trtable)
03561              trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
03562            /* There must be only one destination which accepts
03563               newline.  See group_nodes_into_DFAstates.  */
03564            break;
03565          }
03566     }
03567 
03568   if (dest_states_malloced)
03569     free (dest_states);
03570 
03571   re_node_set_free (&follows);
03572   for (i = 0; i < ndests; ++i)
03573     re_node_set_free (dests_node + i);
03574 
03575   if (dests_node_malloced)
03576     free (dests_alloc);
03577 
03578   return true;
03579 }
03580 
03581 /* Group all nodes belonging to STATE into several destinations.
03582    Then for all destinations, set the nodes belonging to the destination
03583    to DESTS_NODE[i] and set the characters accepted by the destination
03584    to DEST_CH[i].  This function return the number of destinations.  */
03585 
03586 static Idx
03587 internal_function
03588 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
03589                          re_node_set *dests_node, bitset_t *dests_ch)
03590 {
03591   reg_errcode_t err;
03592   bool ok;
03593   Idx i, j, k;
03594   Idx ndests; /* Number of the destinations from `state'.  */
03595   bitset_t accepts; /* Characters a node can accept.  */
03596   const re_node_set *cur_nodes = &state->nodes;
03597   bitset_empty (accepts);
03598   ndests = 0;
03599 
03600   /* For all the nodes belonging to `state',  */
03601   for (i = 0; i < cur_nodes->nelem; ++i)
03602     {
03603       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
03604       re_token_type_t type = node->type;
03605       unsigned int constraint = node->constraint;
03606 
03607       /* Enumerate all single byte character this node can accept.  */
03608       if (type == CHARACTER)
03609        bitset_set (accepts, node->opr.c);
03610       else if (type == SIMPLE_BRACKET)
03611        {
03612          bitset_merge (accepts, node->opr.sbcset);
03613        }
03614       else if (type == OP_PERIOD)
03615        {
03616 #ifdef RE_ENABLE_I18N
03617          if (dfa->mb_cur_max > 1)
03618            bitset_merge (accepts, dfa->sb_char);
03619          else
03620 #endif
03621            bitset_set_all (accepts);
03622          if (!(dfa->syntax & RE_DOT_NEWLINE))
03623            bitset_clear (accepts, '\n');
03624          if (dfa->syntax & RE_DOT_NOT_NULL)
03625            bitset_clear (accepts, '\0');
03626        }
03627 #ifdef RE_ENABLE_I18N
03628       else if (type == OP_UTF8_PERIOD)
03629        {
03630          if (ASCII_CHARS % BITSET_WORD_BITS == 0)
03631            memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
03632          else
03633            bitset_merge (accepts, utf8_sb_map);
03634          if (!(dfa->syntax & RE_DOT_NEWLINE))
03635            bitset_clear (accepts, '\n');
03636          if (dfa->syntax & RE_DOT_NOT_NULL)
03637            bitset_clear (accepts, '\0');
03638        }
03639 #endif
03640       else
03641        continue;
03642 
03643       /* Check the `accepts' and sift the characters which are not
03644         match it the context.  */
03645       if (constraint)
03646        {
03647          if (constraint & NEXT_NEWLINE_CONSTRAINT)
03648            {
03649              bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
03650              bitset_empty (accepts);
03651              if (accepts_newline)
03652               bitset_set (accepts, NEWLINE_CHAR);
03653              else
03654               continue;
03655            }
03656          if (constraint & NEXT_ENDBUF_CONSTRAINT)
03657            {
03658              bitset_empty (accepts);
03659              continue;
03660            }
03661 
03662          if (constraint & NEXT_WORD_CONSTRAINT)
03663            {
03664              bitset_word_t any_set = 0;
03665              if (type == CHARACTER && !node->word_char)
03666               {
03667                 bitset_empty (accepts);
03668                 continue;
03669               }
03670 #ifdef RE_ENABLE_I18N
03671              if (dfa->mb_cur_max > 1)
03672               for (j = 0; j < BITSET_WORDS; ++j)
03673                 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
03674              else
03675 #endif
03676               for (j = 0; j < BITSET_WORDS; ++j)
03677                 any_set |= (accepts[j] &= dfa->word_char[j]);
03678              if (!any_set)
03679               continue;
03680            }
03681          if (constraint & NEXT_NOTWORD_CONSTRAINT)
03682            {
03683              bitset_word_t any_set = 0;
03684              if (type == CHARACTER && node->word_char)
03685               {
03686                 bitset_empty (accepts);
03687                 continue;
03688               }
03689 #ifdef RE_ENABLE_I18N
03690              if (dfa->mb_cur_max > 1)
03691               for (j = 0; j < BITSET_WORDS; ++j)
03692                 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
03693              else
03694 #endif
03695               for (j = 0; j < BITSET_WORDS; ++j)
03696                 any_set |= (accepts[j] &= ~dfa->word_char[j]);
03697              if (!any_set)
03698               continue;
03699            }
03700        }
03701 
03702       /* Then divide `accepts' into DFA states, or create a new
03703         state.  Above, we make sure that accepts is not empty.  */
03704       for (j = 0; j < ndests; ++j)
03705        {
03706          bitset_t intersec; /* Intersection sets, see below.  */
03707          bitset_t remains;
03708          /* Flags, see below.  */
03709          bitset_word_t has_intersec, not_subset, not_consumed;
03710 
03711          /* Optimization, skip if this state doesn't accept the character.  */
03712          if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
03713            continue;
03714 
03715          /* Enumerate the intersection set of this state and `accepts'.  */
03716          has_intersec = 0;
03717          for (k = 0; k < BITSET_WORDS; ++k)
03718            has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
03719          /* And skip if the intersection set is empty.  */
03720          if (!has_intersec)
03721            continue;
03722 
03723          /* Then check if this state is a subset of `accepts'.  */
03724          not_subset = not_consumed = 0;
03725          for (k = 0; k < BITSET_WORDS; ++k)
03726            {
03727              not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
03728              not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
03729            }
03730 
03731          /* If this state isn't a subset of `accepts', create a
03732             new group state, which has the `remains'. */
03733          if (not_subset)
03734            {
03735              bitset_copy (dests_ch[ndests], remains);
03736              bitset_copy (dests_ch[j], intersec);
03737              err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
03738              if (BE (err != REG_NOERROR, 0))
03739               goto error_return;
03740              ++ndests;
03741            }
03742 
03743          /* Put the position in the current group. */
03744          ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
03745          if (BE (! ok, 0))
03746            goto error_return;
03747 
03748          /* If all characters are consumed, go to next node. */
03749          if (!not_consumed)
03750            break;
03751        }
03752       /* Some characters remain, create a new group. */
03753       if (j == ndests)
03754        {
03755          bitset_copy (dests_ch[ndests], accepts);
03756          err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
03757          if (BE (err != REG_NOERROR, 0))
03758            goto error_return;
03759          ++ndests;
03760          bitset_empty (accepts);
03761        }
03762     }
03763   return ndests;
03764  error_return:
03765   for (j = 0; j < ndests; ++j)
03766     re_node_set_free (dests_node + j);
03767   return REG_MISSING;
03768 }
03769 
03770 #ifdef RE_ENABLE_I18N
03771 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
03772    Return the number of the bytes the node accepts.
03773    STR_IDX is the current index of the input string.
03774 
03775    This function handles the nodes which can accept one character, or
03776    one collating element like '.', '[a-z]', opposite to the other nodes
03777    can only accept one byte.  */
03778 
03779 static int
03780 internal_function
03781 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
03782                       const re_string_t *input, Idx str_idx)
03783 {
03784   const re_token_t *node = dfa->nodes + node_idx;
03785   int char_len, elem_len;
03786   Idx i;
03787 
03788   if (BE (node->type == OP_UTF8_PERIOD, 0))
03789     {
03790       unsigned char c = re_string_byte_at (input, str_idx), d;
03791       if (BE (c < 0xc2, 1))
03792        return 0;
03793 
03794       if (str_idx + 2 > input->len)
03795        return 0;
03796 
03797       d = re_string_byte_at (input, str_idx + 1);
03798       if (c < 0xe0)
03799        return (d < 0x80 || d > 0xbf) ? 0 : 2;
03800       else if (c < 0xf0)
03801        {
03802          char_len = 3;
03803          if (c == 0xe0 && d < 0xa0)
03804            return 0;
03805        }
03806       else if (c < 0xf8)
03807        {
03808          char_len = 4;
03809          if (c == 0xf0 && d < 0x90)
03810            return 0;
03811        }
03812       else if (c < 0xfc)
03813        {
03814          char_len = 5;
03815          if (c == 0xf8 && d < 0x88)
03816            return 0;
03817        }
03818       else if (c < 0xfe)
03819        {
03820          char_len = 6;
03821          if (c == 0xfc && d < 0x84)
03822            return 0;
03823        }
03824       else
03825        return 0;
03826 
03827       if (str_idx + char_len > input->len)
03828        return 0;
03829 
03830       for (i = 1; i < char_len; ++i)
03831        {
03832          d = re_string_byte_at (input, str_idx + i);
03833          if (d < 0x80 || d > 0xbf)
03834            return 0;
03835        }
03836       return char_len;
03837     }
03838 
03839   char_len = re_string_char_size_at (input, str_idx);
03840   if (node->type == OP_PERIOD)
03841     {
03842       if (char_len <= 1)
03843        return 0;
03844       /* FIXME: I don't think this if is needed, as both '\n'
03845         and '\0' are char_len == 1.  */
03846       /* '.' accepts any one character except the following two cases.  */
03847       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
03848           re_string_byte_at (input, str_idx) == '\n') ||
03849          ((dfa->syntax & RE_DOT_NOT_NULL) &&
03850           re_string_byte_at (input, str_idx) == '\0'))
03851        return 0;
03852       return char_len;
03853     }
03854 
03855   elem_len = re_string_elem_size_at (input, str_idx);
03856   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
03857     return 0;
03858 
03859   if (node->type == COMPLEX_BRACKET)
03860     {
03861       const re_charset_t *cset = node->opr.mbcset;
03862 # ifdef _LIBC
03863       const unsigned char *pin
03864        = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
03865       Idx j;
03866       uint32_t nrules;
03867 # endif /* _LIBC */
03868       int match_len = 0;
03869       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
03870                   ? re_string_wchar_at (input, str_idx) : 0);
03871 
03872       /* match with multibyte character?  */
03873       for (i = 0; i < cset->nmbchars; ++i)
03874        if (wc == cset->mbchars[i])
03875          {
03876            match_len = char_len;
03877            goto check_node_accept_bytes_match;
03878          }
03879       /* match with character_class?  */
03880       for (i = 0; i < cset->nchar_classes; ++i)
03881        {
03882          wctype_t wt = cset->char_classes[i];
03883          if (__iswctype (wc, wt))
03884            {
03885              match_len = char_len;
03886              goto check_node_accept_bytes_match;
03887            }
03888        }
03889 
03890 # ifdef _LIBC
03891       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03892       if (nrules != 0)
03893        {
03894          unsigned int in_collseq = 0;
03895          const int32_t *table, *indirect;
03896          const unsigned char *weights, *extra;
03897          const char *collseqwc;
03898          int32_t idx;
03899          /* This #include defines a local function!  */
03900 #  include <locale/weight.h>
03901 
03902          /* match with collating_symbol?  */
03903          if (cset->ncoll_syms)
03904            extra = (const unsigned char *)
03905              _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
03906          for (i = 0; i < cset->ncoll_syms; ++i)
03907            {
03908              const unsigned char *coll_sym = extra + cset->coll_syms[i];
03909              /* Compare the length of input collating element and
03910                the length of current collating element.  */
03911              if (*coll_sym != elem_len)
03912               continue;
03913              /* Compare each bytes.  */
03914              for (j = 0; j < *coll_sym; j++)
03915               if (pin[j] != coll_sym[1 + j])
03916                 break;
03917              if (j == *coll_sym)
03918               {
03919                 /* Match if every bytes is equal.  */
03920                 match_len = j;
03921                 goto check_node_accept_bytes_match;
03922               }
03923            }
03924 
03925          if (cset->nranges)
03926            {
03927              if (elem_len <= char_len)
03928               {
03929                 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
03930                 in_collseq = __collseq_table_lookup (collseqwc, wc);
03931               }
03932              else
03933               in_collseq = find_collation_sequence_value (pin, elem_len);
03934            }
03935          /* match with range expression?  */
03936          for (i = 0; i < cset->nranges; ++i)
03937            if (cset->range_starts[i] <= in_collseq
03938               && in_collseq <= cset->range_ends[i])
03939              {
03940               match_len = elem_len;
03941               goto check_node_accept_bytes_match;
03942              }
03943 
03944          /* match with equivalence_class?  */
03945          if (cset->nequiv_classes)
03946            {
03947              const unsigned char *cp = pin;
03948              table = (const int32_t *)
03949               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03950              weights = (const unsigned char *)
03951               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
03952              extra = (const unsigned char *)
03953               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
03954              indirect = (const int32_t *)
03955               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
03956              int32_t idx = findidx (&cp);
03957              if (idx > 0)
03958               for (i = 0; i < cset->nequiv_classes; ++i)
03959                 {
03960                   int32_t equiv_class_idx = cset->equiv_classes[i];
03961                   size_t weight_len = weights[idx & 0xffffff];
03962                   if (weight_len == weights[equiv_class_idx & 0xffffff]
03963                      && (idx >> 24) == (equiv_class_idx >> 24))
03964                     {
03965                      Idx cnt = 0;
03966 
03967                      idx &= 0xffffff;
03968                      equiv_class_idx &= 0xffffff;
03969 
03970                      while (cnt <= weight_len
03971                             && (weights[equiv_class_idx + 1 + cnt]
03972                                == weights[idx + 1 + cnt]))
03973                        ++cnt;
03974                      if (cnt > weight_len)
03975                        {
03976                          match_len = elem_len;
03977                          goto check_node_accept_bytes_match;
03978                        }
03979                     }
03980                 }
03981            }
03982        }
03983       else
03984 # endif /* _LIBC */
03985        {
03986          /* match with range expression?  */
03987 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
03988          wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
03989 #else
03990          wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
03991          cmp_buf[2] = wc;
03992 #endif
03993          for (i = 0; i < cset->nranges; ++i)
03994            {
03995              cmp_buf[0] = cset->range_starts[i];
03996              cmp_buf[4] = cset->range_ends[i];
03997              if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
03998                 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
03999               {
04000                 match_len = char_len;
04001                 goto check_node_accept_bytes_match;
04002               }
04003            }
04004        }
04005     check_node_accept_bytes_match:
04006       if (!cset->non_match)
04007        return match_len;
04008       else
04009        {
04010          if (match_len > 0)
04011            return 0;
04012          else
04013            return (elem_len > char_len) ? elem_len : char_len;
04014        }
04015     }
04016   return 0;
04017 }
04018 
04019 # ifdef _LIBC
04020 static unsigned int
04021 internal_function
04022 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
04023 {
04024   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
04025   if (nrules == 0)
04026     {
04027       if (mbs_len == 1)
04028        {
04029          /* No valid character.  Match it as a single byte character.  */
04030          const unsigned char *collseq = (const unsigned char *)
04031            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
04032          return collseq[mbs[0]];
04033        }
04034       return UINT_MAX;
04035     }
04036   else
04037     {
04038       int32_t idx;
04039       const unsigned char *extra = (const unsigned char *)
04040        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
04041       int32_t extrasize = (const unsigned char *)
04042        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
04043 
04044       for (idx = 0; idx < extrasize;)
04045        {
04046          int mbs_cnt;
04047          bool found = false;
04048          int32_t elem_mbs_len;
04049          /* Skip the name of collating element name.  */
04050          idx = idx + extra[idx] + 1;
04051          elem_mbs_len = extra[idx++];
04052          if (mbs_len == elem_mbs_len)
04053            {
04054              for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
04055               if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
04056                 break;
04057              if (mbs_cnt == elem_mbs_len)
04058               /* Found the entry.  */
04059               found = true;
04060            }
04061          /* Skip the byte sequence of the collating element.  */
04062          idx += elem_mbs_len;
04063          /* Adjust for the alignment.  */
04064          idx = (idx + 3) & ~3;
04065          /* Skip the collation sequence value.  */
04066          idx += sizeof (uint32_t);
04067          /* Skip the wide char sequence of the collating element.  */
04068          idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
04069          /* If we found the entry, return the sequence value.  */
04070          if (found)
04071            return *(uint32_t *) (extra + idx);
04072          /* Skip the collation sequence value.  */
04073          idx += sizeof (uint32_t);
04074        }
04075       return UINT_MAX;
04076     }
04077 }
04078 # endif /* _LIBC */
04079 #endif /* RE_ENABLE_I18N */
04080 
04081 /* Check whether the node accepts the byte which is IDX-th
04082    byte of the INPUT.  */
04083 
04084 static bool
04085 internal_function
04086 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
04087                  Idx idx)
04088 {
04089   unsigned char ch;
04090   ch = re_string_byte_at (&mctx->input, idx);
04091   switch (node->type)
04092     {
04093     case CHARACTER:
04094       if (node->opr.c != ch)
04095         return false;
04096       break;
04097 
04098     case SIMPLE_BRACKET:
04099       if (!bitset_contain (node->opr.sbcset, ch))
04100         return false;
04101       break;
04102 
04103 #ifdef RE_ENABLE_I18N
04104     case OP_UTF8_PERIOD:
04105       if (ch >= ASCII_CHARS)
04106         return false;
04107       /* FALLTHROUGH */
04108 #endif
04109     case OP_PERIOD:
04110       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
04111          || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
04112        return false;
04113       break;
04114 
04115     default:
04116       return false;
04117     }
04118 
04119   if (node->constraint)
04120     {
04121       /* The node has constraints.  Check whether the current context
04122         satisfies the constraints.  */
04123       unsigned int context = re_string_context_at (&mctx->input, idx,
04124                                              mctx->eflags);
04125       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
04126        return false;
04127     }
04128 
04129   return true;
04130 }
04131 
04132 /* Extend the buffers, if the buffers have run out.  */
04133 
04134 static reg_errcode_t
04135 internal_function __attribute_warn_unused_result__
04136 extend_buffers (re_match_context_t *mctx)
04137 {
04138   reg_errcode_t ret;
04139   re_string_t *pstr = &mctx->input;
04140 
04141   /* Avoid overflow.  */
04142   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
04143     return REG_ESPACE;
04144 
04145   /* Double the lengthes of the buffers.  */
04146   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
04147   if (BE (ret != REG_NOERROR, 0))
04148     return ret;
04149 
04150   if (mctx->state_log != NULL)
04151     {
04152       /* And double the length of state_log.  */
04153       /* XXX We have no indication of the size of this buffer.  If this
04154         allocation fail we have no indication that the state_log array
04155         does not have the right size.  */
04156       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
04157                                          pstr->bufs_len + 1);
04158       if (BE (new_array == NULL, 0))
04159        return REG_ESPACE;
04160       mctx->state_log = new_array;
04161     }
04162 
04163   /* Then reconstruct the buffers.  */
04164   if (pstr->icase)
04165     {
04166 #ifdef RE_ENABLE_I18N
04167       if (pstr->mb_cur_max > 1)
04168        {
04169          ret = build_wcs_upper_buffer (pstr);
04170          if (BE (ret != REG_NOERROR, 0))
04171            return ret;
04172        }
04173       else
04174 #endif /* RE_ENABLE_I18N  */
04175        build_upper_buffer (pstr);
04176     }
04177   else
04178     {
04179 #ifdef RE_ENABLE_I18N
04180       if (pstr->mb_cur_max > 1)
04181        build_wcs_buffer (pstr);
04182       else
04183 #endif /* RE_ENABLE_I18N  */
04184        {
04185          if (pstr->trans != NULL)
04186            re_string_translate_buffer (pstr);
04187        }
04188     }
04189   return REG_NOERROR;
04190 }
04191 
04192 
04193 /* Functions for matching context.  */
04194 
04195 /* Initialize MCTX.  */
04196 
04197 static reg_errcode_t
04198 internal_function __attribute_warn_unused_result__
04199 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
04200 {
04201   mctx->eflags = eflags;
04202   mctx->match_last = REG_MISSING;
04203   if (n > 0)
04204     {
04205       /* Avoid overflow.  */
04206       size_t max_object_size =
04207        MAX (sizeof (struct re_backref_cache_entry),
04208             sizeof (re_sub_match_top_t *));
04209       if (BE (SIZE_MAX / max_object_size < n, 0))
04210        return REG_ESPACE;
04211 
04212       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
04213       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
04214       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
04215        return REG_ESPACE;
04216     }
04217   /* Already zero-ed by the caller.
04218      else
04219        mctx->bkref_ents = NULL;
04220      mctx->nbkref_ents = 0;
04221      mctx->nsub_tops = 0;  */
04222   mctx->abkref_ents = n;
04223   mctx->max_mb_elem_len = 1;
04224   mctx->asub_tops = n;
04225   return REG_NOERROR;
04226 }
04227 
04228 /* Clean the entries which depend on the current input in MCTX.
04229    This function must be invoked when the matcher changes the start index
04230    of the input, or changes the input string.  */
04231 
04232 static void
04233 internal_function
04234 match_ctx_clean (re_match_context_t *mctx)
04235 {
04236   Idx st_idx;
04237   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
04238     {
04239       Idx sl_idx;
04240       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
04241       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
04242        {
04243          re_sub_match_last_t *last = top->lasts[sl_idx];
04244          re_free (last->path.array);
04245          re_free (last);
04246        }
04247       re_free (top->lasts);
04248       if (top->path)
04249        {
04250          re_free (top->path->array);
04251          re_free (top->path);
04252        }
04253       free (top);
04254     }
04255 
04256   mctx->nsub_tops = 0;
04257   mctx->nbkref_ents = 0;
04258 }
04259 
04260 /* Free all the memory associated with MCTX.  */
04261 
04262 static void
04263 internal_function
04264 match_ctx_free (re_match_context_t *mctx)
04265 {
04266   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
04267   match_ctx_clean (mctx);
04268   re_free (mctx->sub_tops);
04269   re_free (mctx->bkref_ents);
04270 }
04271 
04272 /* Add a new backreference entry to MCTX.
04273    Note that we assume that caller never call this function with duplicate
04274    entry, and call with STR_IDX which isn't smaller than any existing entry.
04275 */
04276 
04277 static reg_errcode_t
04278 internal_function __attribute_warn_unused_result__
04279 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
04280                    Idx to)
04281 {
04282   if (mctx->nbkref_ents >= mctx->abkref_ents)
04283     {
04284       struct re_backref_cache_entry* new_entry;
04285       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
04286                            mctx->abkref_ents * 2);
04287       if (BE (new_entry == NULL, 0))
04288        {
04289          re_free (mctx->bkref_ents);
04290          return REG_ESPACE;
04291        }
04292       mctx->bkref_ents = new_entry;
04293       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
04294              sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
04295       mctx->abkref_ents *= 2;
04296     }
04297   if (mctx->nbkref_ents > 0
04298       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
04299     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
04300 
04301   mctx->bkref_ents[mctx->nbkref_ents].node = node;
04302   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
04303   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
04304   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
04305 
04306   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
04307      If bit N is clear, means that this entry won't epsilon-transition to
04308      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
04309      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
04310      such node.
04311 
04312      A backreference does not epsilon-transition unless it is empty, so set
04313      to all zeros if FROM != TO.  */
04314   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
04315     = (from == to ? -1 : 0);
04316 
04317   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
04318   if (mctx->max_mb_elem_len < to - from)
04319     mctx->max_mb_elem_len = to - from;
04320   return REG_NOERROR;
04321 }
04322 
04323 /* Return the first entry with the same str_idx, or REG_MISSING if none is
04324    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
04325 
04326 static Idx
04327 internal_function
04328 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
04329 {
04330   Idx left, right, mid, last;
04331   last = right = mctx->nbkref_ents;
04332   for (left = 0; left < right;)
04333     {
04334       mid = (left + right) / 2;
04335       if (mctx->bkref_ents[mid].str_idx < str_idx)
04336        left = mid + 1;
04337       else
04338        right = mid;
04339     }
04340   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
04341     return left;
04342   else
04343     return REG_MISSING;
04344 }
04345 
04346 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
04347    at STR_IDX.  */
04348 
04349 static reg_errcode_t
04350 internal_function __attribute_warn_unused_result__
04351 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
04352 {
04353 #ifdef DEBUG
04354   assert (mctx->sub_tops != NULL);
04355   assert (mctx->asub_tops > 0);
04356 #endif
04357   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
04358     {
04359       Idx new_asub_tops = mctx->asub_tops * 2;
04360       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
04361                                              re_sub_match_top_t *,
04362                                              new_asub_tops);
04363       if (BE (new_array == NULL, 0))
04364        return REG_ESPACE;
04365       mctx->sub_tops = new_array;
04366       mctx->asub_tops = new_asub_tops;
04367     }
04368   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
04369   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
04370     return REG_ESPACE;
04371   mctx->sub_tops[mctx->nsub_tops]->node = node;
04372   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
04373   return REG_NOERROR;
04374 }
04375 
04376 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
04377    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
04378 
04379 static re_sub_match_last_t *
04380 internal_function
04381 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
04382 {
04383   re_sub_match_last_t *new_entry;
04384   if (BE (subtop->nlasts == subtop->alasts, 0))
04385     {
04386       Idx new_alasts = 2 * subtop->alasts + 1;
04387       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
04388                                               re_sub_match_last_t *,
04389                                               new_alasts);
04390       if (BE (new_array == NULL, 0))
04391        return NULL;
04392       subtop->lasts = new_array;
04393       subtop->alasts = new_alasts;
04394     }
04395   new_entry = calloc (1, sizeof (re_sub_match_last_t));
04396   if (BE (new_entry != NULL, 1))
04397     {
04398       subtop->lasts[subtop->nlasts] = new_entry;
04399       new_entry->node = node;
04400       new_entry->str_idx = str_idx;
04401       ++subtop->nlasts;
04402     }
04403   return new_entry;
04404 }
04405 
04406 static void
04407 internal_function
04408 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
04409               re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
04410 {
04411   sctx->sifted_states = sifted_sts;
04412   sctx->limited_states = limited_sts;
04413   sctx->last_node = last_node;
04414   sctx->last_str_idx = last_str_idx;
04415   re_node_set_init_empty (&sctx->limits);
04416 }