Back to index

nagios-plugins  1.4.16
regex_internal.h
Go to the documentation of this file.
00001 /* Extended regular expression matching and search library.
00002    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
00003    Software Foundation, Inc.
00004    This file is part of the GNU C Library.
00005    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 3, or (at your option)
00010    any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License along
00018    with this program; if not, write to the Free Software Foundation,
00019    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
00020 
00021 #ifndef _REGEX_INTERNAL_H
00022 #define _REGEX_INTERNAL_H 1
00023 
00024 #include <assert.h>
00025 #include <ctype.h>
00026 #include <stdbool.h>
00027 #include <stdio.h>
00028 #include <stdlib.h>
00029 #include <string.h>
00030 
00031 #include <langinfo.h>
00032 #ifndef _LIBC
00033 # include "localcharset.h"
00034 #endif
00035 #if defined HAVE_LOCALE_H || defined _LIBC
00036 # include <locale.h>
00037 #endif
00038 
00039 #include <wchar.h>
00040 #include <wctype.h>
00041 #include <stdint.h>
00042 #if defined _LIBC
00043 # include <bits/libc-lock.h>
00044 #else
00045 # define __libc_lock_init(NAME) do { } while (0)
00046 # define __libc_lock_lock(NAME) do { } while (0)
00047 # define __libc_lock_unlock(NAME) do { } while (0)
00048 #endif
00049 
00050 /* In case that the system doesn't have isblank().  */
00051 #if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
00052 # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
00053 #endif
00054 
00055 #ifdef _LIBC
00056 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
00057 #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
00058 #   include <locale/localeinfo.h>
00059 #   include <locale/elem-hash.h>
00060 #   include <locale/coll-lookup.h>
00061 # endif
00062 #endif
00063 
00064 /* This is for other GNU distributions with internationalized messages.  */
00065 #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
00066 # include <libintl.h>
00067 # ifdef _LIBC
00068 #  undef gettext
00069 #  define gettext(msgid) \
00070   INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
00071 # endif
00072 #else
00073 # define gettext(msgid) (msgid)
00074 #endif
00075 
00076 #ifndef gettext_noop
00077 /* This define is so xgettext can find the internationalizable
00078    strings.  */
00079 # define gettext_noop(String) String
00080 #endif
00081 
00082 /* For loser systems without the definition.  */
00083 #ifndef SIZE_MAX
00084 # define SIZE_MAX ((size_t) -1)
00085 #endif
00086 
00087 #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
00088 # define RE_ENABLE_I18N
00089 #endif
00090 
00091 #if __GNUC__ >= 3
00092 # define BE(expr, val) __builtin_expect (expr, val)
00093 #else
00094 # define BE(expr, val) (expr)
00095 # ifdef _LIBC
00096 #  define inline
00097 # endif
00098 #endif
00099 
00100 /* Number of ASCII characters.  */
00101 #define ASCII_CHARS 0x80
00102 
00103 /* Number of single byte characters.  */
00104 #define SBC_MAX (UCHAR_MAX + 1)
00105 
00106 #define COLL_ELEM_LEN_MAX 8
00107 
00108 /* The character which represents newline.  */
00109 #define NEWLINE_CHAR '\n'
00110 #define WIDE_NEWLINE_CHAR L'\n'
00111 
00112 /* Rename to standard API for using out of glibc.  */
00113 #ifndef _LIBC
00114 # define __wctype wctype
00115 # define __iswctype iswctype
00116 # define __btowc btowc
00117 # define __wcrtomb wcrtomb
00118 # define __mbrtowc mbrtowc
00119 # define __regfree regfree
00120 # define attribute_hidden
00121 #endif /* not _LIBC */
00122 
00123 #if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
00124 # define __attribute(arg) __attribute__ (arg)
00125 #else
00126 # define __attribute(arg)
00127 #endif
00128 
00129 typedef __re_idx_t Idx;
00130 
00131 /* Special return value for failure to match.  */
00132 #define REG_MISSING ((Idx) -1)
00133 
00134 /* Special return value for internal error.  */
00135 #define REG_ERROR ((Idx) -2)
00136 
00137 /* Test whether N is a valid index, and is not one of the above.  */
00138 #ifdef _REGEX_LARGE_OFFSETS
00139 # define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
00140 #else
00141 # define REG_VALID_INDEX(n) (0 <= (n))
00142 #endif
00143 
00144 /* Test whether N is a valid nonzero index.  */
00145 #ifdef _REGEX_LARGE_OFFSETS
00146 # define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
00147 #else
00148 # define REG_VALID_NONZERO_INDEX(n) (0 < (n))
00149 #endif
00150 
00151 /* A hash value, suitable for computing hash tables.  */
00152 typedef __re_size_t re_hashval_t;
00153 
00154 /* An integer used to represent a set of bits.  It must be unsigned,
00155    and must be at least as wide as unsigned int.  */
00156 typedef unsigned long int bitset_word_t;
00157 /* All bits set in a bitset_word_t.  */
00158 #define BITSET_WORD_MAX ULONG_MAX
00159 
00160 /* Number of bits in a bitset_word_t.  For portability to hosts with
00161    padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
00162    instead, deduce it directly from BITSET_WORD_MAX.  Avoid
00163    greater-than-32-bit integers and unconditional shifts by more than
00164    31 bits, as they're not portable.  */
00165 #if BITSET_WORD_MAX == 0xffffffffUL
00166 # define BITSET_WORD_BITS 32
00167 #elif BITSET_WORD_MAX >> 31 >> 4 == 1
00168 # define BITSET_WORD_BITS 36
00169 #elif BITSET_WORD_MAX >> 31 >> 16 == 1
00170 # define BITSET_WORD_BITS 48
00171 #elif BITSET_WORD_MAX >> 31 >> 28 == 1
00172 # define BITSET_WORD_BITS 60
00173 #elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
00174 # define BITSET_WORD_BITS 64
00175 #elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
00176 # define BITSET_WORD_BITS 72
00177 #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
00178 # define BITSET_WORD_BITS 128
00179 #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
00180 # define BITSET_WORD_BITS 256
00181 #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
00182 # define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
00183 # if BITSET_WORD_BITS <= SBC_MAX
00184 #  error "Invalid SBC_MAX"
00185 # endif
00186 #else
00187 # error "Add case for new bitset_word_t size"
00188 #endif
00189 
00190 /* Number of bitset_word_t values in a bitset_t.  */
00191 #define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
00192 
00193 typedef bitset_word_t bitset_t[BITSET_WORDS];
00194 typedef bitset_word_t *re_bitset_ptr_t;
00195 typedef const bitset_word_t *re_const_bitset_ptr_t;
00196 
00197 #define PREV_WORD_CONSTRAINT 0x0001
00198 #define PREV_NOTWORD_CONSTRAINT 0x0002
00199 #define NEXT_WORD_CONSTRAINT 0x0004
00200 #define NEXT_NOTWORD_CONSTRAINT 0x0008
00201 #define PREV_NEWLINE_CONSTRAINT 0x0010
00202 #define NEXT_NEWLINE_CONSTRAINT 0x0020
00203 #define PREV_BEGBUF_CONSTRAINT 0x0040
00204 #define NEXT_ENDBUF_CONSTRAINT 0x0080
00205 #define WORD_DELIM_CONSTRAINT 0x0100
00206 #define NOT_WORD_DELIM_CONSTRAINT 0x0200
00207 
00208 typedef enum
00209 {
00210   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
00211   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
00212   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
00213   INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
00214   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
00215   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
00216   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
00217   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
00218   WORD_DELIM = WORD_DELIM_CONSTRAINT,
00219   NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
00220 } re_context_type;
00221 
00222 typedef struct
00223 {
00224   Idx alloc;
00225   Idx nelem;
00226   Idx *elems;
00227 } re_node_set;
00228 
00229 typedef enum
00230 {
00231   NON_TYPE = 0,
00232 
00233   /* Node type, These are used by token, node, tree.  */
00234   CHARACTER = 1,
00235   END_OF_RE = 2,
00236   SIMPLE_BRACKET = 3,
00237   OP_BACK_REF = 4,
00238   OP_PERIOD = 5,
00239 #ifdef RE_ENABLE_I18N
00240   COMPLEX_BRACKET = 6,
00241   OP_UTF8_PERIOD = 7,
00242 #endif /* RE_ENABLE_I18N */
00243 
00244   /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
00245      when the debugger shows values of this enum type.  */
00246 #define EPSILON_BIT 8
00247   OP_OPEN_SUBEXP = EPSILON_BIT | 0,
00248   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
00249   OP_ALT = EPSILON_BIT | 2,
00250   OP_DUP_ASTERISK = EPSILON_BIT | 3,
00251   ANCHOR = EPSILON_BIT | 4,
00252 
00253   /* Tree type, these are used only by tree. */
00254   CONCAT = 16,
00255   SUBEXP = 17,
00256 
00257   /* Token type, these are used only by token.  */
00258   OP_DUP_PLUS = 18,
00259   OP_DUP_QUESTION,
00260   OP_OPEN_BRACKET,
00261   OP_CLOSE_BRACKET,
00262   OP_CHARSET_RANGE,
00263   OP_OPEN_DUP_NUM,
00264   OP_CLOSE_DUP_NUM,
00265   OP_NON_MATCH_LIST,
00266   OP_OPEN_COLL_ELEM,
00267   OP_CLOSE_COLL_ELEM,
00268   OP_OPEN_EQUIV_CLASS,
00269   OP_CLOSE_EQUIV_CLASS,
00270   OP_OPEN_CHAR_CLASS,
00271   OP_CLOSE_CHAR_CLASS,
00272   OP_WORD,
00273   OP_NOTWORD,
00274   OP_SPACE,
00275   OP_NOTSPACE,
00276   BACK_SLASH
00277 
00278 } re_token_type_t;
00279 
00280 #ifdef RE_ENABLE_I18N
00281 typedef struct
00282 {
00283   /* Multibyte characters.  */
00284   wchar_t *mbchars;
00285 
00286   /* Collating symbols.  */
00287 # ifdef _LIBC
00288   int32_t *coll_syms;
00289 # endif
00290 
00291   /* Equivalence classes. */
00292 # ifdef _LIBC
00293   int32_t *equiv_classes;
00294 # endif
00295 
00296   /* Range expressions. */
00297 # ifdef _LIBC
00298   uint32_t *range_starts;
00299   uint32_t *range_ends;
00300 # else /* not _LIBC */
00301   wchar_t *range_starts;
00302   wchar_t *range_ends;
00303 # endif /* not _LIBC */
00304 
00305   /* Character classes. */
00306   wctype_t *char_classes;
00307 
00308   /* If this character set is the non-matching list.  */
00309   unsigned int non_match : 1;
00310 
00311   /* # of multibyte characters.  */
00312   Idx nmbchars;
00313 
00314   /* # of collating symbols.  */
00315   Idx ncoll_syms;
00316 
00317   /* # of equivalence classes. */
00318   Idx nequiv_classes;
00319 
00320   /* # of range expressions. */
00321   Idx nranges;
00322 
00323   /* # of character classes. */
00324   Idx nchar_classes;
00325 } re_charset_t;
00326 #endif /* RE_ENABLE_I18N */
00327 
00328 typedef struct
00329 {
00330   union
00331   {
00332     unsigned char c;        /* for CHARACTER */
00333     re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
00334 #ifdef RE_ENABLE_I18N
00335     re_charset_t *mbcset;   /* for COMPLEX_BRACKET */
00336 #endif /* RE_ENABLE_I18N */
00337     Idx idx;                /* for BACK_REF */
00338     re_context_type ctx_type;      /* for ANCHOR */
00339   } opr;
00340 #if __GNUC__ >= 2 && !__STRICT_ANSI__
00341   re_token_type_t type : 8;
00342 #else
00343   re_token_type_t type;
00344 #endif
00345   unsigned int constraint : 10;    /* context constraint */
00346   unsigned int duplicated : 1;
00347   unsigned int opt_subexp : 1;
00348 #ifdef RE_ENABLE_I18N
00349   unsigned int accept_mb : 1;
00350   /* These 2 bits can be moved into the union if needed (e.g. if running out
00351      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
00352   unsigned int mb_partial : 1;
00353 #endif
00354   unsigned int word_char : 1;
00355 } re_token_t;
00356 
00357 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
00358 
00359 struct re_string_t
00360 {
00361   /* Indicate the raw buffer which is the original string passed as an
00362      argument of regexec(), re_search(), etc..  */
00363   const unsigned char *raw_mbs;
00364   /* Store the multibyte string.  In case of "case insensitive mode" like
00365      REG_ICASE, upper cases of the string are stored, otherwise MBS points
00366      the same address that RAW_MBS points.  */
00367   unsigned char *mbs;
00368 #ifdef RE_ENABLE_I18N
00369   /* Store the wide character string which is corresponding to MBS.  */
00370   wint_t *wcs;
00371   Idx *offsets;
00372   mbstate_t cur_state;
00373 #endif
00374   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
00375      raw_mbs[raw_mbs_idx + i].  */
00376   Idx raw_mbs_idx;
00377   /* The length of the valid characters in the buffers.  */
00378   Idx valid_len;
00379   /* The corresponding number of bytes in raw_mbs array.  */
00380   Idx valid_raw_len;
00381   /* The length of the buffers MBS and WCS.  */
00382   Idx bufs_len;
00383   /* The index in MBS, which is updated by re_string_fetch_byte.  */
00384   Idx cur_idx;
00385   /* length of RAW_MBS array.  */
00386   Idx raw_len;
00387   /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
00388   Idx len;
00389   /* End of the buffer may be shorter than its length in the cases such
00390      as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
00391      instead of LEN.  */
00392   Idx raw_stop;
00393   /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
00394   Idx stop;
00395 
00396   /* The context of mbs[0].  We store the context independently, since
00397      the context of mbs[0] may be different from raw_mbs[0], which is
00398      the beginning of the input string.  */
00399   unsigned int tip_context;
00400   /* The translation passed as a part of an argument of re_compile_pattern.  */
00401   RE_TRANSLATE_TYPE trans;
00402   /* Copy of re_dfa_t's word_char.  */
00403   re_const_bitset_ptr_t word_char;
00404   /* true if REG_ICASE.  */
00405   unsigned char icase;
00406   unsigned char is_utf8;
00407   unsigned char map_notascii;
00408   unsigned char mbs_allocated;
00409   unsigned char offsets_needed;
00410   unsigned char newline_anchor;
00411   unsigned char word_ops_used;
00412   int mb_cur_max;
00413 };
00414 typedef struct re_string_t re_string_t;
00415 
00416 
00417 struct re_dfa_t;
00418 typedef struct re_dfa_t re_dfa_t;
00419 
00420 #ifndef _LIBC
00421 # if defined __i386__ && !defined __EMX__
00422 #  define internal_function   __attribute ((regparm (3), stdcall))
00423 # else
00424 #  define internal_function
00425 # endif
00426 #endif
00427 
00428 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
00429                                           Idx new_buf_len)
00430      internal_function;
00431 #ifdef RE_ENABLE_I18N
00432 static void build_wcs_buffer (re_string_t *pstr) internal_function;
00433 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
00434      internal_function;
00435 #endif /* RE_ENABLE_I18N */
00436 static void build_upper_buffer (re_string_t *pstr) internal_function;
00437 static void re_string_translate_buffer (re_string_t *pstr) internal_function;
00438 static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
00439                                      int eflags)
00440      internal_function __attribute ((pure));
00441 #define re_string_peek_byte(pstr, offset) \
00442   ((pstr)->mbs[(pstr)->cur_idx + offset])
00443 #define re_string_fetch_byte(pstr) \
00444   ((pstr)->mbs[(pstr)->cur_idx++])
00445 #define re_string_first_byte(pstr, idx) \
00446   ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
00447 #define re_string_is_single_byte_char(pstr, idx) \
00448   ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
00449                             || (pstr)->wcs[(idx) + 1] != WEOF))
00450 #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
00451 #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
00452 #define re_string_get_buffer(pstr) ((pstr)->mbs)
00453 #define re_string_length(pstr) ((pstr)->len)
00454 #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
00455 #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
00456 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
00457 
00458 #include <alloca.h>
00459 
00460 #ifndef _LIBC
00461 # if HAVE_ALLOCA
00462 /* The OS usually guarantees only one guard page at the bottom of the stack,
00463    and a page size can be as small as 4096 bytes.  So we cannot safely
00464    allocate anything larger than 4096 bytes.  Also care for the possibility
00465    of a few compiler-allocated temporary stack slots.  */
00466 #  define __libc_use_alloca(n) ((n) < 4032)
00467 # else
00468 /* alloca is implemented with malloc, so just use malloc.  */
00469 #  define __libc_use_alloca(n) 0
00470 #  undef alloca
00471 #  define alloca(n) malloc (n)
00472 # endif
00473 #endif
00474 
00475 #ifndef MAX
00476 # define MAX(a,b) ((a) < (b) ? (b) : (a))
00477 #endif
00478 
00479 #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
00480 #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
00481 #define re_free(p) free (p)
00482 
00483 struct bin_tree_t
00484 {
00485   struct bin_tree_t *parent;
00486   struct bin_tree_t *left;
00487   struct bin_tree_t *right;
00488   struct bin_tree_t *first;
00489   struct bin_tree_t *next;
00490 
00491   re_token_t token;
00492 
00493   /* `node_idx' is the index in dfa->nodes, if `type' == 0.
00494      Otherwise `type' indicate the type of this node.  */
00495   Idx node_idx;
00496 };
00497 typedef struct bin_tree_t bin_tree_t;
00498 
00499 #define BIN_TREE_STORAGE_SIZE \
00500   ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
00501 
00502 struct bin_tree_storage_t
00503 {
00504   struct bin_tree_storage_t *next;
00505   bin_tree_t data[BIN_TREE_STORAGE_SIZE];
00506 };
00507 typedef struct bin_tree_storage_t bin_tree_storage_t;
00508 
00509 #define CONTEXT_WORD 1
00510 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
00511 #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
00512 #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
00513 
00514 #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
00515 #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
00516 #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
00517 #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
00518 #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
00519 
00520 #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
00521 #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
00522 #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
00523 #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
00524 
00525 #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
00526  ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
00527   || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
00528   || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
00529   || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
00530 
00531 #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
00532  ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
00533   || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
00534   || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
00535   || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
00536 
00537 struct re_dfastate_t
00538 {
00539   re_hashval_t hash;
00540   re_node_set nodes;
00541   re_node_set non_eps_nodes;
00542   re_node_set inveclosure;
00543   re_node_set *entrance_nodes;
00544   struct re_dfastate_t **trtable, **word_trtable;
00545   unsigned int context : 4;
00546   unsigned int halt : 1;
00547   /* If this state can accept `multi byte'.
00548      Note that we refer to multibyte characters, and multi character
00549      collating elements as `multi byte'.  */
00550   unsigned int accept_mb : 1;
00551   /* If this state has backreference node(s).  */
00552   unsigned int has_backref : 1;
00553   unsigned int has_constraint : 1;
00554 };
00555 typedef struct re_dfastate_t re_dfastate_t;
00556 
00557 struct re_state_table_entry
00558 {
00559   Idx num;
00560   Idx alloc;
00561   re_dfastate_t **array;
00562 };
00563 
00564 /* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
00565 
00566 typedef struct
00567 {
00568   Idx next_idx;
00569   Idx alloc;
00570   re_dfastate_t **array;
00571 } state_array_t;
00572 
00573 /* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
00574 
00575 typedef struct
00576 {
00577   Idx node;
00578   Idx str_idx; /* The position NODE match at.  */
00579   state_array_t path;
00580 } re_sub_match_last_t;
00581 
00582 /* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
00583    And information about the node, whose type is OP_CLOSE_SUBEXP,
00584    corresponding to NODE is stored in LASTS.  */
00585 
00586 typedef struct
00587 {
00588   Idx str_idx;
00589   Idx node;
00590   state_array_t *path;
00591   Idx alasts; /* Allocation size of LASTS.  */
00592   Idx nlasts; /* The number of LASTS.  */
00593   re_sub_match_last_t **lasts;
00594 } re_sub_match_top_t;
00595 
00596 struct re_backref_cache_entry
00597 {
00598   Idx node;
00599   Idx str_idx;
00600   Idx subexp_from;
00601   Idx subexp_to;
00602   char more;
00603   char unused;
00604   unsigned short int eps_reachable_subexps_map;
00605 };
00606 
00607 typedef struct
00608 {
00609   /* The string object corresponding to the input string.  */
00610   re_string_t input;
00611 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
00612   const re_dfa_t *const dfa;
00613 #else
00614   const re_dfa_t *dfa;
00615 #endif
00616   /* EFLAGS of the argument of regexec.  */
00617   int eflags;
00618   /* Where the matching ends.  */
00619   Idx match_last;
00620   Idx last_node;
00621   /* The state log used by the matcher.  */
00622   re_dfastate_t **state_log;
00623   Idx state_log_top;
00624   /* Back reference cache.  */
00625   Idx nbkref_ents;
00626   Idx abkref_ents;
00627   struct re_backref_cache_entry *bkref_ents;
00628   int max_mb_elem_len;
00629   Idx nsub_tops;
00630   Idx asub_tops;
00631   re_sub_match_top_t **sub_tops;
00632 } re_match_context_t;
00633 
00634 typedef struct
00635 {
00636   re_dfastate_t **sifted_states;
00637   re_dfastate_t **limited_states;
00638   Idx last_node;
00639   Idx last_str_idx;
00640   re_node_set limits;
00641 } re_sift_context_t;
00642 
00643 struct re_fail_stack_ent_t
00644 {
00645   Idx idx;
00646   Idx node;
00647   regmatch_t *regs;
00648   re_node_set eps_via_nodes;
00649 };
00650 
00651 struct re_fail_stack_t
00652 {
00653   Idx num;
00654   Idx alloc;
00655   struct re_fail_stack_ent_t *stack;
00656 };
00657 
00658 struct re_dfa_t
00659 {
00660   re_token_t *nodes;
00661   size_t nodes_alloc;
00662   size_t nodes_len;
00663   Idx *nexts;
00664   Idx *org_indices;
00665   re_node_set *edests;
00666   re_node_set *eclosures;
00667   re_node_set *inveclosures;
00668   struct re_state_table_entry *state_table;
00669   re_dfastate_t *init_state;
00670   re_dfastate_t *init_state_word;
00671   re_dfastate_t *init_state_nl;
00672   re_dfastate_t *init_state_begbuf;
00673   bin_tree_t *str_tree;
00674   bin_tree_storage_t *str_tree_storage;
00675   re_bitset_ptr_t sb_char;
00676   int str_tree_storage_idx;
00677 
00678   /* number of subexpressions `re_nsub' is in regex_t.  */
00679   re_hashval_t state_hash_mask;
00680   Idx init_node;
00681   Idx nbackref; /* The number of backreference in this dfa.  */
00682 
00683   /* Bitmap expressing which backreference is used.  */
00684   bitset_word_t used_bkref_map;
00685   bitset_word_t completed_bkref_map;
00686 
00687   unsigned int has_plural_match : 1;
00688   /* If this dfa has "multibyte node", which is a backreference or
00689      a node which can accept multibyte character or multi character
00690      collating element.  */
00691   unsigned int has_mb_node : 1;
00692   unsigned int is_utf8 : 1;
00693   unsigned int map_notascii : 1;
00694   unsigned int word_ops_used : 1;
00695   int mb_cur_max;
00696   bitset_t word_char;
00697   reg_syntax_t syntax;
00698   Idx *subexp_map;
00699 #ifdef DEBUG
00700   char* re_str;
00701 #endif
00702 #ifdef _LIBC
00703   __libc_lock_define (, lock)
00704 #endif
00705 };
00706 
00707 #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
00708 #define re_node_set_remove(set,id) \
00709   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
00710 #define re_node_set_empty(p) ((p)->nelem = 0)
00711 #define re_node_set_free(set) re_free ((set)->elems)
00712 
00713 
00714 typedef enum
00715 {
00716   SB_CHAR,
00717   MB_CHAR,
00718   EQUIV_CLASS,
00719   COLL_SYM,
00720   CHAR_CLASS
00721 } bracket_elem_type;
00722 
00723 typedef struct
00724 {
00725   bracket_elem_type type;
00726   union
00727   {
00728     unsigned char ch;
00729     unsigned char *name;
00730     wchar_t wch;
00731   } opr;
00732 } bracket_elem_t;
00733 
00734 
00735 /* Inline functions for bitset_t operation.  */
00736 
00737 static inline void
00738 bitset_set (bitset_t set, Idx i)
00739 {
00740   set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
00741 }
00742 
00743 static inline void
00744 bitset_clear (bitset_t set, Idx i)
00745 {
00746   set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
00747 }
00748 
00749 static inline bool
00750 bitset_contain (const bitset_t set, Idx i)
00751 {
00752   return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
00753 }
00754 
00755 static inline void
00756 bitset_empty (bitset_t set)
00757 {
00758   memset (set, '\0', sizeof (bitset_t));
00759 }
00760 
00761 static inline void
00762 bitset_set_all (bitset_t set)
00763 {
00764   memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
00765   if (SBC_MAX % BITSET_WORD_BITS != 0)
00766     set[BITSET_WORDS - 1] =
00767       ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
00768 }
00769 
00770 static inline void
00771 bitset_copy (bitset_t dest, const bitset_t src)
00772 {
00773   memcpy (dest, src, sizeof (bitset_t));
00774 }
00775 
00776 static inline void
00777 bitset_not (bitset_t set)
00778 {
00779   int bitset_i;
00780   for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
00781     set[bitset_i] = ~set[bitset_i];
00782   if (SBC_MAX % BITSET_WORD_BITS != 0)
00783     set[BITSET_WORDS - 1] =
00784       ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
00785        & ~set[BITSET_WORDS - 1]);
00786 }
00787 
00788 static inline void
00789 bitset_merge (bitset_t dest, const bitset_t src)
00790 {
00791   int bitset_i;
00792   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
00793     dest[bitset_i] |= src[bitset_i];
00794 }
00795 
00796 static inline void
00797 bitset_mask (bitset_t dest, const bitset_t src)
00798 {
00799   int bitset_i;
00800   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
00801     dest[bitset_i] &= src[bitset_i];
00802 }
00803 
00804 #ifdef RE_ENABLE_I18N
00805 /* Inline functions for re_string.  */
00806 static inline int
00807 internal_function __attribute ((pure))
00808 re_string_char_size_at (const re_string_t *pstr, Idx idx)
00809 {
00810   int byte_idx;
00811   if (pstr->mb_cur_max == 1)
00812     return 1;
00813   for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
00814     if (pstr->wcs[idx + byte_idx] != WEOF)
00815       break;
00816   return byte_idx;
00817 }
00818 
00819 static inline wint_t
00820 internal_function __attribute ((pure))
00821 re_string_wchar_at (const re_string_t *pstr, Idx idx)
00822 {
00823   if (pstr->mb_cur_max == 1)
00824     return (wint_t) pstr->mbs[idx];
00825   return (wint_t) pstr->wcs[idx];
00826 }
00827 
00828 static int
00829 internal_function __attribute ((pure))
00830 re_string_elem_size_at (const re_string_t *pstr, Idx idx)
00831 {
00832 # ifdef _LIBC
00833   const unsigned char *p, *extra;
00834   const int32_t *table, *indirect;
00835   int32_t tmp;
00836 #  include <locale/weight.h>
00837   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
00838 
00839   if (nrules != 0)
00840     {
00841       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
00842       extra = (const unsigned char *)
00843        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
00844       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
00845                                           _NL_COLLATE_INDIRECTMB);
00846       p = pstr->mbs + idx;
00847       tmp = findidx (&p);
00848       return p - pstr->mbs - idx;
00849     }
00850   else
00851 # endif /* _LIBC */
00852     return 1;
00853 }
00854 #endif /* RE_ENABLE_I18N */
00855 
00856 #ifndef __GNUC_PREREQ
00857 # if defined __GNUC__ && defined __GNUC_MINOR__
00858 #  define __GNUC_PREREQ(maj, min) \
00859          ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
00860 # else
00861 #  define __GNUC_PREREQ(maj, min) 0
00862 # endif
00863 #endif
00864 
00865 #if __GNUC_PREREQ (3,4)
00866 # undef __attribute_warn_unused_result__
00867 # define __attribute_warn_unused_result__ \
00868    __attribute__ ((__warn_unused_result__))
00869 #else
00870 # define __attribute_warn_unused_result__ /* empty */
00871 #endif
00872 
00873 #endif /*  _REGEX_INTERNAL_H */