Back to index

cell-binutils  2.17cvs20070401
regex.c
Go to the documentation of this file.
00001 /* Extended regular expression matching and search library,
00002    version 0.12.
00003    (Implements POSIX draft P1003.2/D11.2, except for some of the
00004    internationalization features.)
00005 
00006    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
00007    2002, 2005 Free Software Foundation, Inc.
00008    This file is part of the GNU C Library.
00009 
00010    The GNU C Library is free software; you can redistribute it and/or
00011    modify it under the terms of the GNU Lesser General Public
00012    License as published by the Free Software Foundation; either
00013    version 2.1 of the License, or (at your option) any later version.
00014 
00015    The GNU C Library is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018    Lesser General Public License for more details.
00019 
00020    You should have received a copy of the GNU Lesser General Public
00021    License along with the GNU C Library; if not, write to the Free
00022    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00023    02110-1301 USA.  */
00024 
00025 /* This file has been modified for usage in libiberty.  It includes "xregex.h"
00026    instead of <regex.h>.  The "xregex.h" header file renames all external
00027    routines with an "x" prefix so they do not collide with the native regex
00028    routines or with other components regex routines. */
00029 /* AIX requires this to be the first thing in the file. */
00030 #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
00031   #pragma alloca
00032 #endif
00033 
00034 #undef _GNU_SOURCE
00035 #define _GNU_SOURCE
00036 
00037 #ifndef INSIDE_RECURSION
00038 # ifdef HAVE_CONFIG_H
00039 #  include <config.h>
00040 # endif
00041 #endif
00042 
00043 #include <ansidecl.h>
00044 
00045 #ifndef INSIDE_RECURSION
00046 
00047 # if defined STDC_HEADERS && !defined emacs
00048 #  include <stddef.h>
00049 # else
00050 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00051 #  include <sys/types.h>
00052 # endif
00053 
00054 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
00055 
00056 /* For platform which support the ISO C amendement 1 functionality we
00057    support user defined character classes.  */
00058 # if defined _LIBC || WIDE_CHAR_SUPPORT
00059 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
00060 #  include <wchar.h>
00061 #  include <wctype.h>
00062 # endif
00063 
00064 # ifdef _LIBC
00065 /* We have to keep the namespace clean.  */
00066 #  define regfree(preg) __regfree (preg)
00067 #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
00068 #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
00069 #  define regerror(errcode, preg, errbuf, errbuf_size) \
00070        __regerror(errcode, preg, errbuf, errbuf_size)
00071 #  define re_set_registers(bu, re, nu, st, en) \
00072        __re_set_registers (bu, re, nu, st, en)
00073 #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
00074        __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
00075 #  define re_match(bufp, string, size, pos, regs) \
00076        __re_match (bufp, string, size, pos, regs)
00077 #  define re_search(bufp, string, size, startpos, range, regs) \
00078        __re_search (bufp, string, size, startpos, range, regs)
00079 #  define re_compile_pattern(pattern, length, bufp) \
00080        __re_compile_pattern (pattern, length, bufp)
00081 #  define re_set_syntax(syntax) __re_set_syntax (syntax)
00082 #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
00083        __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
00084 #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
00085 
00086 #  define btowc __btowc
00087 
00088 /* We are also using some library internals.  */
00089 #  include <locale/localeinfo.h>
00090 #  include <locale/elem-hash.h>
00091 #  include <langinfo.h>
00092 #  include <locale/coll-lookup.h>
00093 # endif
00094 
00095 /* This is for other GNU distributions with internationalized messages.  */
00096 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
00097 #  include <libintl.h>
00098 #  ifdef _LIBC
00099 #   undef gettext
00100 #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
00101 #  endif
00102 # else
00103 #  define gettext(msgid) (msgid)
00104 # endif
00105 
00106 # ifndef gettext_noop
00107 /* This define is so xgettext can find the internationalizable
00108    strings.  */
00109 #  define gettext_noop(String) String
00110 # endif
00111 
00112 /* The `emacs' switch turns on certain matching commands
00113    that make sense only in Emacs. */
00114 # ifdef emacs
00115 
00116 #  include "lisp.h"
00117 #  include "buffer.h"
00118 #  include "syntax.h"
00119 
00120 # else  /* not emacs */
00121 
00122 /* If we are not linking with Emacs proper,
00123    we can't use the relocating allocator
00124    even if config.h says that we can.  */
00125 #  undef REL_ALLOC
00126 
00127 #  if defined STDC_HEADERS || defined _LIBC
00128 #   include <stdlib.h>
00129 #  else
00130 char *malloc ();
00131 char *realloc ();
00132 #  endif
00133 
00134 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
00135    If nothing else has been done, use the method below.  */
00136 #  ifdef INHIBIT_STRING_HEADER
00137 #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
00138 #    if !defined bzero && !defined bcopy
00139 #     undef INHIBIT_STRING_HEADER
00140 #    endif
00141 #   endif
00142 #  endif
00143 
00144 /* This is the normal way of making sure we have a bcopy and a bzero.
00145    This is used in most programs--a few other programs avoid this
00146    by defining INHIBIT_STRING_HEADER.  */
00147 #  ifndef INHIBIT_STRING_HEADER
00148 #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
00149 #    include <string.h>
00150 #    ifndef bzero
00151 #     ifndef _LIBC
00152 #      define bzero(s, n)   (memset (s, '\0', n), (s))
00153 #     else
00154 #      define bzero(s, n)   __bzero (s, n)
00155 #     endif
00156 #    endif
00157 #   else
00158 #    include <strings.h>
00159 #    ifndef memcmp
00160 #     define memcmp(s1, s2, n)     bcmp (s1, s2, n)
00161 #    endif
00162 #    ifndef memcpy
00163 #     define memcpy(d, s, n)       (bcopy (s, d, n), (d))
00164 #    endif
00165 #   endif
00166 #  endif
00167 
00168 /* Define the syntax stuff for <, >, etc.  */
00169 
00170 /* This must be nonzero for the wordchar and notwordchar pattern
00171    commands in re_match_2.  */
00172 #  ifndef Sword
00173 #   define Sword 1
00174 #  endif
00175 
00176 #  ifdef SWITCH_ENUM_BUG
00177 #   define SWITCH_ENUM_CAST(x) ((int)(x))
00178 #  else
00179 #   define SWITCH_ENUM_CAST(x) (x)
00180 #  endif
00181 
00182 # endif /* not emacs */
00183 
00184 # if defined _LIBC || HAVE_LIMITS_H
00185 #  include <limits.h>
00186 # endif
00187 
00188 # ifndef MB_LEN_MAX
00189 #  define MB_LEN_MAX 1
00190 # endif
00191 
00192 /* Get the interface, including the syntax bits.  */
00193 # include "xregex.h"  /* change for libiberty */
00194 
00195 /* isalpha etc. are used for the character classes.  */
00196 # include <ctype.h>
00197 
00198 /* Jim Meyering writes:
00199 
00200    "... Some ctype macros are valid only for character codes that
00201    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
00202    using /bin/cc or gcc but without giving an ansi option).  So, all
00203    ctype uses should be through macros like ISPRINT...  If
00204    STDC_HEADERS is defined, then autoconf has verified that the ctype
00205    macros don't need to be guarded with references to isascii. ...
00206    Defining isascii to 1 should let any compiler worth its salt
00207    eliminate the && through constant folding."
00208    Solaris defines some of these symbols so we must undefine them first.  */
00209 
00210 # undef ISASCII
00211 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
00212 #  define ISASCII(c) 1
00213 # else
00214 #  define ISASCII(c) isascii(c)
00215 # endif
00216 
00217 # ifdef isblank
00218 #  define ISBLANK(c) (ISASCII (c) && isblank (c))
00219 # else
00220 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00221 # endif
00222 # ifdef isgraph
00223 #  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00224 # else
00225 #  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00226 # endif
00227 
00228 # undef ISPRINT
00229 # define ISPRINT(c) (ISASCII (c) && isprint (c))
00230 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00231 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
00232 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
00233 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00234 # define ISLOWER(c) (ISASCII (c) && islower (c))
00235 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00236 # define ISSPACE(c) (ISASCII (c) && isspace (c))
00237 # define ISUPPER(c) (ISASCII (c) && isupper (c))
00238 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00239 
00240 # ifdef _tolower
00241 #  define TOLOWER(c) _tolower(c)
00242 # else
00243 #  define TOLOWER(c) tolower(c)
00244 # endif
00245 
00246 # ifndef NULL
00247 #  define NULL (void *)0
00248 # endif
00249 
00250 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00251    since ours (we hope) works properly with all combinations of
00252    machines, compilers, `char' and `unsigned char' argument types.
00253    (Per Bothner suggested the basic approach.)  */
00254 # undef SIGN_EXTEND_CHAR
00255 # if __STDC__
00256 #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00257 # else  /* not __STDC__ */
00258 /* As in Harbison and Steele.  */
00259 #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00260 # endif
00261 
00262 # ifndef emacs
00263 /* How many characters in the character set.  */
00264 #  define CHAR_SET_SIZE 256
00265 
00266 #  ifdef SYNTAX_TABLE
00267 
00268 extern char *re_syntax_table;
00269 
00270 #  else /* not SYNTAX_TABLE */
00271 
00272 static char re_syntax_table[CHAR_SET_SIZE];
00273 
00274 static void init_syntax_once (void);
00275 
00276 static void
00277 init_syntax_once (void)
00278 {
00279    register int c;
00280    static int done = 0;
00281 
00282    if (done)
00283      return;
00284    bzero (re_syntax_table, sizeof re_syntax_table);
00285 
00286    for (c = 0; c < CHAR_SET_SIZE; ++c)
00287      if (ISALNUM (c))
00288        re_syntax_table[c] = Sword;
00289 
00290    re_syntax_table['_'] = Sword;
00291 
00292    done = 1;
00293 }
00294 
00295 #  endif /* not SYNTAX_TABLE */
00296 
00297 #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
00298 
00299 # endif /* emacs */
00300 
00301 /* Integer type for pointers.  */
00302 # if !defined _LIBC && !defined HAVE_UINTPTR_T
00303 typedef unsigned long int uintptr_t;
00304 # endif
00305 
00306 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00307    use `alloca' instead of `malloc'.  This is because using malloc in
00308    re_search* or re_match* could cause memory leaks when C-g is used in
00309    Emacs; also, malloc is slower and causes storage fragmentation.  On
00310    the other hand, malloc is more portable, and easier to debug.
00311 
00312    Because we sometimes use alloca, some routines have to be macros,
00313    not functions -- `alloca'-allocated space disappears at the end of the
00314    function it is called in.  */
00315 
00316 # ifdef REGEX_MALLOC
00317 
00318 #  define REGEX_ALLOCATE malloc
00319 #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00320 #  define REGEX_FREE free
00321 
00322 # else /* not REGEX_MALLOC  */
00323 
00324 /* Emacs already defines alloca, sometimes.  */
00325 #  ifndef alloca
00326 
00327 /* Make alloca work the best possible way.  */
00328 #   ifdef __GNUC__
00329 #    define alloca __builtin_alloca
00330 #   else /* not __GNUC__ */
00331 #    if HAVE_ALLOCA_H
00332 #     include <alloca.h>
00333 #    endif /* HAVE_ALLOCA_H */
00334 #   endif /* not __GNUC__ */
00335 
00336 #  endif /* not alloca */
00337 
00338 #  define REGEX_ALLOCATE alloca
00339 
00340 /* Assumes a `char *destination' variable.  */
00341 #  define REGEX_REALLOCATE(source, osize, nsize)               \
00342   (destination = (char *) alloca (nsize),                      \
00343    memcpy (destination, source, osize))
00344 
00345 /* No need to do anything to free, after alloca.  */
00346 #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
00347 
00348 # endif /* not REGEX_MALLOC */
00349 
00350 /* Define how to allocate the failure stack.  */
00351 
00352 # if defined REL_ALLOC && defined REGEX_MALLOC
00353 
00354 #  define REGEX_ALLOCATE_STACK(size)                           \
00355   r_alloc (&failure_stack_ptr, (size))
00356 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)         \
00357   r_re_alloc (&failure_stack_ptr, (nsize))
00358 #  define REGEX_FREE_STACK(ptr)                                \
00359   r_alloc_free (&failure_stack_ptr)
00360 
00361 # else /* not using relocating allocator */
00362 
00363 #  ifdef REGEX_MALLOC
00364 
00365 #   define REGEX_ALLOCATE_STACK malloc
00366 #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00367 #   define REGEX_FREE_STACK free
00368 
00369 #  else /* not REGEX_MALLOC */
00370 
00371 #   define REGEX_ALLOCATE_STACK alloca
00372 
00373 #   define REGEX_REALLOCATE_STACK(source, osize, nsize)               \
00374    REGEX_REALLOCATE (source, osize, nsize)
00375 /* No need to explicitly free anything.  */
00376 #   define REGEX_FREE_STACK(arg)
00377 
00378 #  endif /* not REGEX_MALLOC */
00379 # endif /* not using relocating allocator */
00380 
00381 
00382 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00383    `string1' or just past its end.  This works if PTR is NULL, which is
00384    a good thing.  */
00385 # define FIRST_STRING_P(ptr)                                   \
00386   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00387 
00388 /* (Re)Allocate N items of type T using malloc, or fail.  */
00389 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00390 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00391 # define RETALLOC_IF(addr, n, t) \
00392   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00393 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00394 
00395 # define BYTEWIDTH 8 /* In bits.  */
00396 
00397 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00398 
00399 # undef MAX
00400 # undef MIN
00401 # define MAX(a, b) ((a) > (b) ? (a) : (b))
00402 # define MIN(a, b) ((a) < (b) ? (a) : (b))
00403 
00404 typedef char boolean;
00405 # define false 0
00406 # define true 1
00407 
00408 static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
00409                                          reg_syntax_t syntax,
00410                                          struct re_pattern_buffer *bufp);
00411 
00412 static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
00413                                      const char *string1, int size1,
00414                                      const char *string2, int size2,
00415                                      int pos,
00416                                      struct re_registers *regs,
00417                                      int stop);
00418 static int byte_re_search_2 (struct re_pattern_buffer *bufp,
00419                              const char *string1, int size1,
00420                              const char *string2, int size2,
00421                              int startpos, int range,
00422                              struct re_registers *regs, int stop);
00423 static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
00424 
00425 #ifdef MBS_SUPPORT
00426 static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
00427                                         reg_syntax_t syntax,
00428                                         struct re_pattern_buffer *bufp);
00429 
00430 
00431 static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
00432                                     const char *cstring1, int csize1,
00433                                     const char *cstring2, int csize2,
00434                                     int pos,
00435                                     struct re_registers *regs,
00436                                     int stop,
00437                                     wchar_t *string1, int size1,
00438                                     wchar_t *string2, int size2,
00439                                     int *mbs_offset1, int *mbs_offset2);
00440 static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
00441                             const char *string1, int size1,
00442                             const char *string2, int size2,
00443                             int startpos, int range,
00444                             struct re_registers *regs, int stop);
00445 static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
00446 #endif
00447 
00448 /* These are the command codes that appear in compiled regular
00449    expressions.  Some opcodes are followed by argument bytes.  A
00450    command code can specify any interpretation whatsoever for its
00451    arguments.  Zero bytes may appear in the compiled regular expression.  */
00452 
00453 typedef enum
00454 {
00455   no_op = 0,
00456 
00457   /* Succeed right away--no more backtracking.  */
00458   succeed,
00459 
00460         /* Followed by one byte giving n, then by n literal bytes.  */
00461   exactn,
00462 
00463 # ifdef MBS_SUPPORT
00464        /* Same as exactn, but contains binary data.  */
00465   exactn_bin,
00466 # endif
00467 
00468         /* Matches any (more or less) character.  */
00469   anychar,
00470 
00471         /* Matches any one char belonging to specified set.  First
00472            following byte is number of bitmap bytes.  Then come bytes
00473            for a bitmap saying which chars are in.  Bits in each byte
00474            are ordered low-bit-first.  A character is in the set if its
00475            bit is 1.  A character too large to have a bit in the map is
00476            automatically not in the set.  */
00477         /* ifdef MBS_SUPPORT, following element is length of character
00478           classes, length of collating symbols, length of equivalence
00479           classes, length of character ranges, and length of characters.
00480           Next, character class element, collating symbols elements,
00481           equivalence class elements, range elements, and character
00482           elements follow.
00483           See regex_compile function.  */
00484   charset,
00485 
00486         /* Same parameters as charset, but match any character that is
00487            not one of those specified.  */
00488   charset_not,
00489 
00490         /* Start remembering the text that is matched, for storing in a
00491            register.  Followed by one byte with the register number, in
00492            the range 0 to one less than the pattern buffer's re_nsub
00493            field.  Then followed by one byte with the number of groups
00494            inner to this one.  (This last has to be part of the
00495            start_memory only because we need it in the on_failure_jump
00496            of re_match_2.)  */
00497   start_memory,
00498 
00499         /* Stop remembering the text that is matched and store it in a
00500            memory register.  Followed by one byte with the register
00501            number, in the range 0 to one less than `re_nsub' in the
00502            pattern buffer, and one byte with the number of inner groups,
00503            just like `start_memory'.  (We need the number of inner
00504            groups here because we don't have any easy way of finding the
00505            corresponding start_memory when we're at a stop_memory.)  */
00506   stop_memory,
00507 
00508         /* Match a duplicate of something remembered. Followed by one
00509            byte containing the register number.  */
00510   duplicate,
00511 
00512         /* Fail unless at beginning of line.  */
00513   begline,
00514 
00515         /* Fail unless at end of line.  */
00516   endline,
00517 
00518         /* Succeeds if at beginning of buffer (if emacs) or at beginning
00519            of string to be matched (if not).  */
00520   begbuf,
00521 
00522         /* Analogously, for end of buffer/string.  */
00523   endbuf,
00524 
00525         /* Followed by two byte relative address to which to jump.  */
00526   jump,
00527 
00528        /* Same as jump, but marks the end of an alternative.  */
00529   jump_past_alt,
00530 
00531         /* Followed by two-byte relative address of place to resume at
00532            in case of failure.  */
00533         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00534   on_failure_jump,
00535 
00536         /* Like on_failure_jump, but pushes a placeholder instead of the
00537            current string position when executed.  */
00538   on_failure_keep_string_jump,
00539 
00540         /* Throw away latest failure point and then jump to following
00541            two-byte relative address.  */
00542         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00543   pop_failure_jump,
00544 
00545         /* Change to pop_failure_jump if know won't have to backtrack to
00546            match; otherwise change to jump.  This is used to jump
00547            back to the beginning of a repeat.  If what follows this jump
00548            clearly won't match what the repeat does, such that we can be
00549            sure that there is no use backtracking out of repetitions
00550            already matched, then we change it to a pop_failure_jump.
00551            Followed by two-byte address.  */
00552         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00553   maybe_pop_jump,
00554 
00555         /* Jump to following two-byte address, and push a dummy failure
00556            point. This failure point will be thrown away if an attempt
00557            is made to use it for a failure.  A `+' construct makes this
00558            before the first repeat.  Also used as an intermediary kind
00559            of jump when compiling an alternative.  */
00560         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00561   dummy_failure_jump,
00562 
00563        /* Push a dummy failure point and continue.  Used at the end of
00564           alternatives.  */
00565   push_dummy_failure,
00566 
00567         /* Followed by two-byte relative address and two-byte number n.
00568            After matching N times, jump to the address upon failure.  */
00569         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00570   succeed_n,
00571 
00572         /* Followed by two-byte relative address, and two-byte number n.
00573            Jump to the address N times, then fail.  */
00574         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00575   jump_n,
00576 
00577         /* Set the following two-byte relative address to the
00578            subsequent two-byte number.  The address *includes* the two
00579            bytes of number.  */
00580         /* ifdef MBS_SUPPORT, the size of address is 1.  */
00581   set_number_at,
00582 
00583   wordchar,   /* Matches any word-constituent character.  */
00584   notwordchar,       /* Matches any char that is not a word-constituent.  */
00585 
00586   wordbeg,    /* Succeeds if at word beginning.  */
00587   wordend,    /* Succeeds if at word end.  */
00588 
00589   wordbound,  /* Succeeds if at a word boundary.  */
00590   notwordbound       /* Succeeds if not at a word boundary.  */
00591 
00592 # ifdef emacs
00593   ,before_dot,       /* Succeeds if before point.  */
00594   at_dot,     /* Succeeds if at point.  */
00595   after_dot,  /* Succeeds if after point.  */
00596 
00597        /* Matches any character whose syntax is specified.  Followed by
00598            a byte which contains a syntax code, e.g., Sword.  */
00599   syntaxspec,
00600 
00601        /* Matches any character whose syntax is not that specified.  */
00602   notsyntaxspec
00603 # endif /* emacs */
00604 } re_opcode_t;
00605 #endif /* not INSIDE_RECURSION */
00606 
00607 
00608 #ifdef BYTE
00609 # define CHAR_T char
00610 # define UCHAR_T unsigned char
00611 # define COMPILED_BUFFER_VAR bufp->buffer
00612 # define OFFSET_ADDRESS_SIZE 2
00613 # define PREFIX(name) byte_##name
00614 # define ARG_PREFIX(name) name
00615 # define PUT_CHAR(c) putchar (c)
00616 #else
00617 # ifdef WCHAR
00618 #  define CHAR_T wchar_t
00619 #  define UCHAR_T wchar_t
00620 #  define COMPILED_BUFFER_VAR wc_buffer
00621 #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
00622 #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
00623 #  define PREFIX(name) wcs_##name
00624 #  define ARG_PREFIX(name) c##name
00625 /* Should we use wide stream??  */
00626 #  define PUT_CHAR(c) printf ("%C", c);
00627 #  define TRUE 1
00628 #  define FALSE 0
00629 # else
00630 #  ifdef MBS_SUPPORT
00631 #   define WCHAR
00632 #   define INSIDE_RECURSION
00633 #   include "regex.c"
00634 #   undef INSIDE_RECURSION
00635 #  endif
00636 #  define BYTE
00637 #  define INSIDE_RECURSION
00638 #  include "regex.c"
00639 #  undef INSIDE_RECURSION
00640 # endif
00641 #endif
00642 
00643 #ifdef INSIDE_RECURSION
00644 /* Common operations on the compiled pattern.  */
00645 
00646 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00647 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
00648 
00649 # ifdef WCHAR
00650 #  define STORE_NUMBER(destination, number)                           \
00651   do {                                                         \
00652     *(destination) = (UCHAR_T)(number);                        \
00653   } while (0)
00654 # else /* BYTE */
00655 #  define STORE_NUMBER(destination, number)                           \
00656   do {                                                         \
00657     (destination)[0] = (number) & 0377;                               \
00658     (destination)[1] = (number) >> 8;                                 \
00659   } while (0)
00660 # endif /* WCHAR */
00661 
00662 /* Same as STORE_NUMBER, except increment DESTINATION to
00663    the byte after where the number is stored.  Therefore, DESTINATION
00664    must be an lvalue.  */
00665 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
00666 
00667 # define STORE_NUMBER_AND_INCR(destination, number)                   \
00668   do {                                                         \
00669     STORE_NUMBER (destination, number);                               \
00670     (destination) += OFFSET_ADDRESS_SIZE;                      \
00671   } while (0)
00672 
00673 /* Put into DESTINATION a number stored in two contiguous bytes starting
00674    at SOURCE.  */
00675 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
00676 
00677 # ifdef WCHAR
00678 #  define EXTRACT_NUMBER(destination, source)                         \
00679   do {                                                         \
00680     (destination) = *(source);                                        \
00681   } while (0)
00682 # else /* BYTE */
00683 #  define EXTRACT_NUMBER(destination, source)                         \
00684   do {                                                         \
00685     (destination) = *(source) & 0377;                                 \
00686     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;         \
00687   } while (0)
00688 # endif
00689 
00690 # ifdef DEBUG
00691 static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
00692 static void
00693 PREFIX(extract_number) (int *dest, UCHAR_T *source)
00694 {
00695 #  ifdef WCHAR
00696   *dest = *source;
00697 #  else /* BYTE */
00698   int temp = SIGN_EXTEND_CHAR (*(source + 1));
00699   *dest = *source & 0377;
00700   *dest += temp << 8;
00701 #  endif
00702 }
00703 
00704 #  ifndef EXTRACT_MACROS /* To debug the macros.  */
00705 #   undef EXTRACT_NUMBER
00706 #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
00707 #  endif /* not EXTRACT_MACROS */
00708 
00709 # endif /* DEBUG */
00710 
00711 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00712    SOURCE must be an lvalue.  */
00713 
00714 # define EXTRACT_NUMBER_AND_INCR(destination, source)                 \
00715   do {                                                         \
00716     EXTRACT_NUMBER (destination, source);                      \
00717     (source) += OFFSET_ADDRESS_SIZE;                                  \
00718   } while (0)
00719 
00720 # ifdef DEBUG
00721 static void PREFIX(extract_number_and_incr) (int *destination,
00722                                              UCHAR_T **source);
00723 static void
00724 PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
00725 {
00726   PREFIX(extract_number) (destination, *source);
00727   *source += OFFSET_ADDRESS_SIZE;
00728 }
00729 
00730 #  ifndef EXTRACT_MACROS
00731 #   undef EXTRACT_NUMBER_AND_INCR
00732 #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
00733   PREFIX(extract_number_and_incr) (&dest, &src)
00734 #  endif /* not EXTRACT_MACROS */
00735 
00736 # endif /* DEBUG */
00737 
00738 
00739 
00740 /* If DEBUG is defined, Regex prints many voluminous messages about what
00741    it is doing (if the variable `debug' is nonzero).  If linked with the
00742    main program in `iregex.c', you can enter patterns and strings
00743    interactively.  And if linked with the main program in `main.c' and
00744    the other test files, you can run the already-written tests.  */
00745 
00746 # ifdef DEBUG
00747 
00748 #  ifndef DEFINED_ONCE
00749 
00750 /* We use standard I/O for debugging.  */
00751 #   include <stdio.h>
00752 
00753 /* It is useful to test things that ``must'' be true when debugging.  */
00754 #   include <assert.h>
00755 
00756 static int debug;
00757 
00758 #   define DEBUG_STATEMENT(e) e
00759 #   define DEBUG_PRINT1(x) if (debug) printf (x)
00760 #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00761 #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00762 #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00763 #  endif /* not DEFINED_ONCE */
00764 
00765 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                \
00766   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
00767 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)              \
00768   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
00769 
00770 
00771 /* Print the fastmap in human-readable form.  */
00772 
00773 #  ifndef DEFINED_ONCE
00774 void
00775 print_fastmap (char *fastmap)
00776 {
00777   unsigned was_a_range = 0;
00778   unsigned i = 0;
00779 
00780   while (i < (1 << BYTEWIDTH))
00781     {
00782       if (fastmap[i++])
00783        {
00784          was_a_range = 0;
00785           putchar (i - 1);
00786           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00787             {
00788               was_a_range = 1;
00789               i++;
00790             }
00791          if (was_a_range)
00792             {
00793               printf ("-");
00794               putchar (i - 1);
00795             }
00796         }
00797     }
00798   putchar ('\n');
00799 }
00800 #  endif /* not DEFINED_ONCE */
00801 
00802 
00803 /* Print a compiled pattern string in human-readable form, starting at
00804    the START pointer into it and ending just before the pointer END.  */
00805 
00806 void
00807 PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
00808 {
00809   int mcnt, mcnt2;
00810   UCHAR_T *p1;
00811   UCHAR_T *p = start;
00812   UCHAR_T *pend = end;
00813 
00814   if (start == NULL)
00815     {
00816       printf ("(null)\n");
00817       return;
00818     }
00819 
00820   /* Loop over pattern commands.  */
00821   while (p < pend)
00822     {
00823 #  ifdef _LIBC
00824       printf ("%td:\t", p - start);
00825 #  else
00826       printf ("%ld:\t", (long int) (p - start));
00827 #  endif
00828 
00829       switch ((re_opcode_t) *p++)
00830        {
00831         case no_op:
00832           printf ("/no_op");
00833           break;
00834 
00835        case exactn:
00836          mcnt = *p++;
00837           printf ("/exactn/%d", mcnt);
00838           do
00839            {
00840               putchar ('/');
00841              PUT_CHAR (*p++);
00842             }
00843           while (--mcnt);
00844           break;
00845 
00846 #  ifdef MBS_SUPPORT
00847        case exactn_bin:
00848          mcnt = *p++;
00849          printf ("/exactn_bin/%d", mcnt);
00850           do
00851            {
00852              printf("/%lx", (long int) *p++);
00853             }
00854           while (--mcnt);
00855           break;
00856 #  endif /* MBS_SUPPORT */
00857 
00858        case start_memory:
00859           mcnt = *p++;
00860           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
00861           break;
00862 
00863        case stop_memory:
00864           mcnt = *p++;
00865          printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
00866           break;
00867 
00868        case duplicate:
00869          printf ("/duplicate/%ld", (long int) *p++);
00870          break;
00871 
00872        case anychar:
00873          printf ("/anychar");
00874          break;
00875 
00876        case charset:
00877         case charset_not:
00878           {
00879 #  ifdef WCHAR
00880            int i, length;
00881            wchar_t *workp = p;
00882            printf ("/charset [%s",
00883                    (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
00884            p += 5;
00885            length = *workp++; /* the length of char_classes */
00886            for (i=0 ; i<length ; i++)
00887              printf("[:%lx:]", (long int) *p++);
00888            length = *workp++; /* the length of collating_symbol */
00889            for (i=0 ; i<length ;)
00890              {
00891               printf("[.");
00892               while(*p != 0)
00893                 PUT_CHAR((i++,*p++));
00894               i++,p++;
00895               printf(".]");
00896              }
00897            length = *workp++; /* the length of equivalence_class */
00898            for (i=0 ; i<length ;)
00899              {
00900               printf("[=");
00901               while(*p != 0)
00902                 PUT_CHAR((i++,*p++));
00903               i++,p++;
00904               printf("=]");
00905              }
00906            length = *workp++; /* the length of char_range */
00907            for (i=0 ; i<length ; i++)
00908              {
00909               wchar_t range_start = *p++;
00910               wchar_t range_end = *p++;
00911               printf("%C-%C", range_start, range_end);
00912              }
00913            length = *workp++; /* the length of char */
00914            for (i=0 ; i<length ; i++)
00915              printf("%C", *p++);
00916            putchar (']');
00917 #  else
00918             register int c, last = -100;
00919            register int in_range = 0;
00920 
00921            printf ("/charset [%s",
00922                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00923 
00924             assert (p + *p < pend);
00925 
00926             for (c = 0; c < 256; c++)
00927              if (c / 8 < *p
00928                 && (p[1 + (c/8)] & (1 << (c % 8))))
00929               {
00930                 /* Are we starting a range?  */
00931                 if (last + 1 == c && ! in_range)
00932                   {
00933                     putchar ('-');
00934                     in_range = 1;
00935                   }
00936                 /* Have we broken a range?  */
00937                 else if (last + 1 != c && in_range)
00938               {
00939                     putchar (last);
00940                     in_range = 0;
00941                   }
00942 
00943                 if (! in_range)
00944                   putchar (c);
00945 
00946                 last = c;
00947               }
00948 
00949            if (in_range)
00950              putchar (last);
00951 
00952            putchar (']');
00953 
00954            p += 1 + *p;
00955 #  endif /* WCHAR */
00956          }
00957          break;
00958 
00959        case begline:
00960          printf ("/begline");
00961           break;
00962 
00963        case endline:
00964           printf ("/endline");
00965           break;
00966 
00967        case on_failure_jump:
00968           PREFIX(extract_number_and_incr) (&mcnt, &p);
00969 #  ifdef _LIBC
00970          printf ("/on_failure_jump to %td", p + mcnt - start);
00971 #  else
00972          printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
00973 #  endif
00974           break;
00975 
00976        case on_failure_keep_string_jump:
00977           PREFIX(extract_number_and_incr) (&mcnt, &p);
00978 #  ifdef _LIBC
00979          printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
00980 #  else
00981          printf ("/on_failure_keep_string_jump to %ld",
00982                 (long int) (p + mcnt - start));
00983 #  endif
00984           break;
00985 
00986        case dummy_failure_jump:
00987           PREFIX(extract_number_and_incr) (&mcnt, &p);
00988 #  ifdef _LIBC
00989          printf ("/dummy_failure_jump to %td", p + mcnt - start);
00990 #  else
00991          printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
00992 #  endif
00993           break;
00994 
00995        case push_dummy_failure:
00996           printf ("/push_dummy_failure");
00997           break;
00998 
00999         case maybe_pop_jump:
01000           PREFIX(extract_number_and_incr) (&mcnt, &p);
01001 #  ifdef _LIBC
01002          printf ("/maybe_pop_jump to %td", p + mcnt - start);
01003 #  else
01004          printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
01005 #  endif
01006          break;
01007 
01008         case pop_failure_jump:
01009          PREFIX(extract_number_and_incr) (&mcnt, &p);
01010 #  ifdef _LIBC
01011          printf ("/pop_failure_jump to %td", p + mcnt - start);
01012 #  else
01013          printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
01014 #  endif
01015          break;
01016 
01017         case jump_past_alt:
01018          PREFIX(extract_number_and_incr) (&mcnt, &p);
01019 #  ifdef _LIBC
01020          printf ("/jump_past_alt to %td", p + mcnt - start);
01021 #  else
01022          printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
01023 #  endif
01024          break;
01025 
01026         case jump:
01027          PREFIX(extract_number_and_incr) (&mcnt, &p);
01028 #  ifdef _LIBC
01029          printf ("/jump to %td", p + mcnt - start);
01030 #  else
01031          printf ("/jump to %ld", (long int) (p + mcnt - start));
01032 #  endif
01033          break;
01034 
01035         case succeed_n:
01036           PREFIX(extract_number_and_incr) (&mcnt, &p);
01037          p1 = p + mcnt;
01038           PREFIX(extract_number_and_incr) (&mcnt2, &p);
01039 #  ifdef _LIBC
01040          printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
01041 #  else
01042          printf ("/succeed_n to %ld, %d times",
01043                 (long int) (p1 - start), mcnt2);
01044 #  endif
01045           break;
01046 
01047         case jump_n:
01048           PREFIX(extract_number_and_incr) (&mcnt, &p);
01049          p1 = p + mcnt;
01050           PREFIX(extract_number_and_incr) (&mcnt2, &p);
01051          printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
01052           break;
01053 
01054         case set_number_at:
01055           PREFIX(extract_number_and_incr) (&mcnt, &p);
01056          p1 = p + mcnt;
01057           PREFIX(extract_number_and_incr) (&mcnt2, &p);
01058 #  ifdef _LIBC
01059          printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
01060 #  else
01061          printf ("/set_number_at location %ld to %d",
01062                 (long int) (p1 - start), mcnt2);
01063 #  endif
01064           break;
01065 
01066         case wordbound:
01067          printf ("/wordbound");
01068          break;
01069 
01070        case notwordbound:
01071          printf ("/notwordbound");
01072           break;
01073 
01074        case wordbeg:
01075          printf ("/wordbeg");
01076          break;
01077 
01078        case wordend:
01079          printf ("/wordend");
01080          break;
01081 
01082 #  ifdef emacs
01083        case before_dot:
01084          printf ("/before_dot");
01085           break;
01086 
01087        case at_dot:
01088          printf ("/at_dot");
01089           break;
01090 
01091        case after_dot:
01092          printf ("/after_dot");
01093           break;
01094 
01095        case syntaxspec:
01096           printf ("/syntaxspec");
01097          mcnt = *p++;
01098          printf ("/%d", mcnt);
01099           break;
01100 
01101        case notsyntaxspec:
01102           printf ("/notsyntaxspec");
01103          mcnt = *p++;
01104          printf ("/%d", mcnt);
01105          break;
01106 #  endif /* emacs */
01107 
01108        case wordchar:
01109          printf ("/wordchar");
01110           break;
01111 
01112        case notwordchar:
01113          printf ("/notwordchar");
01114           break;
01115 
01116        case begbuf:
01117          printf ("/begbuf");
01118           break;
01119 
01120        case endbuf:
01121          printf ("/endbuf");
01122           break;
01123 
01124         default:
01125           printf ("?%ld", (long int) *(p-1));
01126        }
01127 
01128       putchar ('\n');
01129     }
01130 
01131 #  ifdef _LIBC
01132   printf ("%td:\tend of pattern.\n", p - start);
01133 #  else
01134   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
01135 #  endif
01136 }
01137 
01138 
01139 void
01140 PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
01141 {
01142   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
01143 
01144   PREFIX(print_partial_compiled_pattern) (buffer, buffer
01145                               + bufp->used / sizeof(UCHAR_T));
01146   printf ("%ld bytes used/%ld bytes allocated.\n",
01147          bufp->used, bufp->allocated);
01148 
01149   if (bufp->fastmap_accurate && bufp->fastmap)
01150     {
01151       printf ("fastmap: ");
01152       print_fastmap (bufp->fastmap);
01153     }
01154 
01155 #  ifdef _LIBC
01156   printf ("re_nsub: %Zd\t", bufp->re_nsub);
01157 #  else
01158   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
01159 #  endif
01160   printf ("regs_alloc: %d\t", bufp->regs_allocated);
01161   printf ("can_be_null: %d\t", bufp->can_be_null);
01162   printf ("newline_anchor: %d\n", bufp->newline_anchor);
01163   printf ("no_sub: %d\t", bufp->no_sub);
01164   printf ("not_bol: %d\t", bufp->not_bol);
01165   printf ("not_eol: %d\t", bufp->not_eol);
01166   printf ("syntax: %lx\n", bufp->syntax);
01167   /* Perhaps we should print the translate table?  */
01168 }
01169 
01170 
01171 void
01172 PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
01173                              int size1, const CHAR_T *string2, int size2)
01174 {
01175   int this_char;
01176 
01177   if (where == NULL)
01178     printf ("(null)");
01179   else
01180     {
01181       int cnt;
01182 
01183       if (FIRST_STRING_P (where))
01184         {
01185           for (this_char = where - string1; this_char < size1; this_char++)
01186            PUT_CHAR (string1[this_char]);
01187 
01188           where = string2;
01189         }
01190 
01191       cnt = 0;
01192       for (this_char = where - string2; this_char < size2; this_char++)
01193        {
01194          PUT_CHAR (string2[this_char]);
01195          if (++cnt > 100)
01196            {
01197              fputs ("...", stdout);
01198              break;
01199            }
01200        }
01201     }
01202 }
01203 
01204 #  ifndef DEFINED_ONCE
01205 void
01206 printchar (int c)
01207 {
01208   putc (c, stderr);
01209 }
01210 #  endif
01211 
01212 # else /* not DEBUG */
01213 
01214 #  ifndef DEFINED_ONCE
01215 #   undef assert
01216 #   define assert(e)
01217 
01218 #   define DEBUG_STATEMENT(e)
01219 #   define DEBUG_PRINT1(x)
01220 #   define DEBUG_PRINT2(x1, x2)
01221 #   define DEBUG_PRINT3(x1, x2, x3)
01222 #   define DEBUG_PRINT4(x1, x2, x3, x4)
01223 #  endif /* not DEFINED_ONCE */
01224 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
01225 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
01226 
01227 # endif /* not DEBUG */
01228 
01229 
01230 
01231 # ifdef WCHAR
01232 /* This  convert a multibyte string to a wide character string.
01233    And write their correspondances to offset_buffer(see below)
01234    and write whether each wchar_t is binary data to is_binary.
01235    This assume invalid multibyte sequences as binary data.
01236    We assume offset_buffer and is_binary is already allocated
01237    enough space.  */
01238 
01239 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
01240                               size_t len, int *offset_buffer,
01241                               char *is_binary);
01242 static size_t
01243 convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
01244                     int *offset_buffer, char *is_binary)
01245      /* It hold correspondances between src(char string) and
01246        dest(wchar_t string) for optimization.
01247        e.g. src  = "xxxyzz"
01248              dest = {'X', 'Y', 'Z'}
01249              (each "xxx", "y" and "zz" represent one multibyte character
01250               corresponding to 'X', 'Y' and 'Z'.)
01251          offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
01252                       = {0, 3, 4, 6}
01253      */
01254 {
01255   wchar_t *pdest = dest;
01256   const unsigned char *psrc = src;
01257   size_t wc_count = 0;
01258 
01259   mbstate_t mbs;
01260   int i, consumed;
01261   size_t mb_remain = len;
01262   size_t mb_count = 0;
01263 
01264   /* Initialize the conversion state.  */
01265   memset (&mbs, 0, sizeof (mbstate_t));
01266 
01267   offset_buffer[0] = 0;
01268   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
01269         psrc += consumed)
01270     {
01271 #ifdef _LIBC
01272       consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
01273 #else
01274       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
01275 #endif
01276 
01277       if (consumed <= 0)
01278        /* failed to convert. maybe src contains binary data.
01279           So we consume 1 byte manualy.  */
01280        {
01281          *pdest = *psrc;
01282          consumed = 1;
01283          is_binary[wc_count] = TRUE;
01284        }
01285       else
01286        is_binary[wc_count] = FALSE;
01287       /* In sjis encoding, we use yen sign as escape character in
01288         place of reverse solidus. So we convert 0x5c(yen sign in
01289         sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
01290         solidus in UCS2).  */
01291       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
01292        *pdest = (wchar_t) *psrc;
01293 
01294       offset_buffer[wc_count + 1] = mb_count += consumed;
01295     }
01296 
01297   /* Fill remain of the buffer with sentinel.  */
01298   for (i = wc_count + 1 ; i <= len ; i++)
01299     offset_buffer[i] = mb_count + 1;
01300 
01301   return wc_count;
01302 }
01303 
01304 # endif /* WCHAR */
01305 
01306 #else /* not INSIDE_RECURSION */
01307 
01308 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
01309    also be assigned to arbitrarily: each pattern buffer stores its own
01310    syntax, so it can be changed between regex compilations.  */
01311 /* This has no initializer because initialized variables in Emacs
01312    become read-only after dumping.  */
01313 reg_syntax_t re_syntax_options;
01314 
01315 
01316 /* Specify the precise syntax of regexps for compilation.  This provides
01317    for compatibility for various utilities which historically have
01318    different, incompatible syntaxes.
01319 
01320    The argument SYNTAX is a bit mask comprised of the various bits
01321    defined in regex.h.  We return the old syntax.  */
01322 
01323 reg_syntax_t
01324 re_set_syntax (reg_syntax_t syntax)
01325 {
01326   reg_syntax_t ret = re_syntax_options;
01327 
01328   re_syntax_options = syntax;
01329 # ifdef DEBUG
01330   if (syntax & RE_DEBUG)
01331     debug = 1;
01332   else if (debug) /* was on but now is not */
01333     debug = 0;
01334 # endif /* DEBUG */
01335   return ret;
01336 }
01337 # ifdef _LIBC
01338 weak_alias (__re_set_syntax, re_set_syntax)
01339 # endif
01340 
01341 /* This table gives an error message for each of the error codes listed
01342    in regex.h.  Obviously the order here has to be same as there.
01343    POSIX doesn't require that we do anything for REG_NOERROR,
01344    but why not be nice?  */
01345 
01346 static const char *re_error_msgid[] =
01347   {
01348     gettext_noop ("Success"),      /* REG_NOERROR */
01349     gettext_noop ("No match"),     /* REG_NOMATCH */
01350     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
01351     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
01352     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
01353     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
01354     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
01355     gettext_noop ("Unmatched [ or [^"),   /* REG_EBRACK */
01356     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
01357     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
01358     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
01359     gettext_noop ("Invalid range end"),   /* REG_ERANGE */
01360     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
01361     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
01362     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
01363     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
01364     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
01365   };
01366 
01367 #endif /* INSIDE_RECURSION */
01368 
01369 #ifndef DEFINED_ONCE
01370 /* Avoiding alloca during matching, to placate r_alloc.  */
01371 
01372 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
01373    searching and matching functions should not call alloca.  On some
01374    systems, alloca is implemented in terms of malloc, and if we're
01375    using the relocating allocator routines, then malloc could cause a
01376    relocation, which might (if the strings being searched are in the
01377    ralloc heap) shift the data out from underneath the regexp
01378    routines.
01379 
01380    Here's another reason to avoid allocation: Emacs
01381    processes input from X in a signal handler; processing X input may
01382    call malloc; if input arrives while a matching routine is calling
01383    malloc, then we're scrod.  But Emacs can't just block input while
01384    calling matching routines; then we don't notice interrupts when
01385    they come in.  So, Emacs blocks input around all regexp calls
01386    except the matching calls, which it leaves unprotected, in the
01387    faith that they will not malloc.  */
01388 
01389 /* Normally, this is fine.  */
01390 # define MATCH_MAY_ALLOCATE
01391 
01392 /* When using GNU C, we are not REALLY using the C alloca, no matter
01393    what config.h may say.  So don't take precautions for it.  */
01394 # ifdef __GNUC__
01395 #  undef C_ALLOCA
01396 # endif
01397 
01398 /* The match routines may not allocate if (1) they would do it with malloc
01399    and (2) it's not safe for them to use malloc.
01400    Note that if REL_ALLOC is defined, matching would not use malloc for the
01401    failure stack, but we would still use it for the register vectors;
01402    so REL_ALLOC should not affect this.  */
01403 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
01404 #  undef MATCH_MAY_ALLOCATE
01405 # endif
01406 #endif /* not DEFINED_ONCE */
01407 
01408 #ifdef INSIDE_RECURSION
01409 /* Failure stack declarations and macros; both re_compile_fastmap and
01410    re_match_2 use a failure stack.  These have to be macros because of
01411    REGEX_ALLOCATE_STACK.  */
01412 
01413 
01414 /* Number of failure points for which to initially allocate space
01415    when matching.  If this number is exceeded, we allocate more
01416    space, so it is not a hard limit.  */
01417 # ifndef INIT_FAILURE_ALLOC
01418 #  define INIT_FAILURE_ALLOC 5
01419 # endif
01420 
01421 /* Roughly the maximum number of failure points on the stack.  Would be
01422    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
01423    This is a variable only so users of regex can assign to it; we never
01424    change it ourselves.  */
01425 
01426 # ifdef INT_IS_16BIT
01427 
01428 #  ifndef DEFINED_ONCE
01429 #   if defined MATCH_MAY_ALLOCATE
01430 /* 4400 was enough to cause a crash on Alpha OSF/1,
01431    whose default stack limit is 2mb.  */
01432 long int re_max_failures = 4000;
01433 #   else
01434 long int re_max_failures = 2000;
01435 #   endif
01436 #  endif
01437 
01438 union PREFIX(fail_stack_elt)
01439 {
01440   UCHAR_T *pointer;
01441   long int integer;
01442 };
01443 
01444 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01445 
01446 typedef struct
01447 {
01448   PREFIX(fail_stack_elt_t) *stack;
01449   unsigned long int size;
01450   unsigned long int avail;         /* Offset of next open position.  */
01451 } PREFIX(fail_stack_type);
01452 
01453 # else /* not INT_IS_16BIT */
01454 
01455 #  ifndef DEFINED_ONCE
01456 #   if defined MATCH_MAY_ALLOCATE
01457 /* 4400 was enough to cause a crash on Alpha OSF/1,
01458    whose default stack limit is 2mb.  */
01459 int re_max_failures = 4000;
01460 #   else
01461 int re_max_failures = 2000;
01462 #   endif
01463 #  endif
01464 
01465 union PREFIX(fail_stack_elt)
01466 {
01467   UCHAR_T *pointer;
01468   int integer;
01469 };
01470 
01471 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01472 
01473 typedef struct
01474 {
01475   PREFIX(fail_stack_elt_t) *stack;
01476   unsigned size;
01477   unsigned avail;                  /* Offset of next open position.  */
01478 } PREFIX(fail_stack_type);
01479 
01480 # endif /* INT_IS_16BIT */
01481 
01482 # ifndef DEFINED_ONCE
01483 #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
01484 #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01485 #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
01486 # endif
01487 
01488 
01489 /* Define macros to initialize and free the failure stack.
01490    Do `return -2' if the alloc fails.  */
01491 
01492 # ifdef MATCH_MAY_ALLOCATE
01493 #  define INIT_FAIL_STACK()                                    \
01494   do {                                                         \
01495     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)            \
01496       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
01497                                                                \
01498     if (fail_stack.stack == NULL)                       \
01499       return -2;                                               \
01500                                                                \
01501     fail_stack.size = INIT_FAILURE_ALLOC;               \
01502     fail_stack.avail = 0;                               \
01503   } while (0)
01504 
01505 #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
01506 # else
01507 #  define INIT_FAIL_STACK()                                    \
01508   do {                                                         \
01509     fail_stack.avail = 0;                               \
01510   } while (0)
01511 
01512 #  define RESET_FAIL_STACK()
01513 # endif
01514 
01515 
01516 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
01517 
01518    Return 1 if succeeds, and 0 if either ran out of memory
01519    allocating space for it or it was already too large.
01520 
01521    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
01522 
01523 # define DOUBLE_FAIL_STACK(fail_stack)                                \
01524   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)      \
01525    ? 0                                                         \
01526    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)               \
01527         REGEX_REALLOCATE_STACK ((fail_stack).stack,                   \
01528           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),      \
01529           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
01530                                                                \
01531       (fail_stack).stack == NULL                               \
01532       ? 0                                                      \
01533       : ((fail_stack).size <<= 1,                              \
01534          1)))
01535 
01536 
01537 /* Push pointer POINTER on FAIL_STACK.
01538    Return 1 if was able to do so and 0 if ran out of memory allocating
01539    space to do so.  */
01540 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                         \
01541   ((FAIL_STACK_FULL ()                                                \
01542     && !DOUBLE_FAIL_STACK (FAIL_STACK))                               \
01543    ? 0                                                         \
01544    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,     \
01545       1))
01546 
01547 /* Push a pointer value onto the failure stack.
01548    Assumes the variable `fail_stack'.  Probably should only
01549    be called from within `PUSH_FAILURE_POINT'.  */
01550 # define PUSH_FAILURE_POINTER(item)                                   \
01551   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
01552 
01553 /* This pushes an integer-valued item onto the failure stack.
01554    Assumes the variable `fail_stack'.  Probably should only
01555    be called from within `PUSH_FAILURE_POINT'.  */
01556 # define PUSH_FAILURE_INT(item)                                \
01557   fail_stack.stack[fail_stack.avail++].integer = (item)
01558 
01559 /* Push a fail_stack_elt_t value onto the failure stack.
01560    Assumes the variable `fail_stack'.  Probably should only
01561    be called from within `PUSH_FAILURE_POINT'.  */
01562 # define PUSH_FAILURE_ELT(item)                                \
01563   fail_stack.stack[fail_stack.avail++] =  (item)
01564 
01565 /* These three POP... operations complement the three PUSH... operations.
01566    All assume that `fail_stack' is nonempty.  */
01567 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01568 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01569 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01570 
01571 /* Used to omit pushing failure point id's when we're not debugging.  */
01572 # ifdef DEBUG
01573 #  define DEBUG_PUSH PUSH_FAILURE_INT
01574 #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01575 # else
01576 #  define DEBUG_PUSH(item)
01577 #  define DEBUG_POP(item_addr)
01578 # endif
01579 
01580 
01581 /* Push the information about the state we will need
01582    if we ever fail back to it.
01583 
01584    Requires variables fail_stack, regstart, regend, reg_info, and
01585    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
01586    be declared.
01587 
01588    Does `return FAILURE_CODE' if runs out of memory.  */
01589 
01590 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)       \
01591   do {                                                         \
01592     char *destination;                                                \
01593     /* Must be int, so when we don't save any registers, the arithmetic      \
01594        of 0 + -1 isn't done as unsigned.  */                          \
01595     /* Can't be int, since there is not a shred of a guarantee that int      \
01596        is wide enough to hold a value of something to which pointer can      \
01597        be assigned */                                                 \
01598     active_reg_t this_reg;                                     \
01599                                                                \
01600     DEBUG_STATEMENT (failure_id++);                                   \
01601     DEBUG_STATEMENT (nfailure_points_pushed++);                       \
01602     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);         \
01603     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
01604     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
01605                                                                \
01606     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);        \
01607     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);     \
01608                                                                \
01609     /* Ensure we have enough space allocated for what we will push.  */      \
01610     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                 \
01611       {                                                               \
01612         if (!DOUBLE_FAIL_STACK (fail_stack))                          \
01613           return failure_code;                                        \
01614                                                                \
01615         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",            \
01616                      (fail_stack).size);                       \
01617         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01618       }                                                               \
01619                                                                \
01620     /* Push the info, starting with the registers.  */                \
01621     DEBUG_PRINT1 ("\n");                                       \
01622                                                                \
01623     if (1)                                                     \
01624       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01625           this_reg++)                                                 \
01626        {                                                       \
01627          DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);             \
01628          DEBUG_STATEMENT (num_regs_pushed++);                         \
01629                                                                \
01630          DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);        \
01631          PUSH_FAILURE_POINTER (regstart[this_reg]);                   \
01632                                                                \
01633          DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);            \
01634          PUSH_FAILURE_POINTER (regend[this_reg]);                     \
01635                                                                \
01636          DEBUG_PRINT2 ("    info: %p\n      ",                        \
01637                      reg_info[this_reg].word.pointer);         \
01638          DEBUG_PRINT2 (" match_null=%d",                       \
01639                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));   \
01640          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01641          DEBUG_PRINT2 (" matched_something=%d",                \
01642                      MATCHED_SOMETHING (reg_info[this_reg]));  \
01643          DEBUG_PRINT2 (" ever_matched=%d",                            \
01644                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
01645          DEBUG_PRINT1 ("\n");                                         \
01646          PUSH_FAILURE_ELT (reg_info[this_reg].word);                  \
01647        }                                                       \
01648                                                                \
01649     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
01650     PUSH_FAILURE_INT (lowest_active_reg);                      \
01651                                                                \
01652     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
01653     PUSH_FAILURE_INT (highest_active_reg);                            \
01654                                                                \
01655     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);          \
01656     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);         \
01657     PUSH_FAILURE_POINTER (pattern_place);                      \
01658                                                                \
01659     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);            \
01660     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
01661                              size2);                           \
01662     DEBUG_PRINT1 ("'\n");                                      \
01663     PUSH_FAILURE_POINTER (string_place);                       \
01664                                                                \
01665     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);          \
01666     DEBUG_PUSH (failure_id);                                          \
01667   } while (0)
01668 
01669 # ifndef DEFINED_ONCE
01670 /* This is the number of items that are pushed and popped on the stack
01671    for each register.  */
01672 #  define NUM_REG_ITEMS  3
01673 
01674 /* Individual items aside from the registers.  */
01675 #  ifdef DEBUG
01676 #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
01677 #  else
01678 #   define NUM_NONREG_ITEMS 4
01679 #  endif
01680 
01681 /* We push at most this many items on the stack.  */
01682 /* We used to use (num_regs - 1), which is the number of registers
01683    this regexp will save; but that was changed to 5
01684    to avoid stack overflow for a regexp with lots of parens.  */
01685 #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01686 
01687 /* We actually push this many items.  */
01688 #  define NUM_FAILURE_ITEMS                      \
01689   (((0                                           \
01690      ? 0 : highest_active_reg - lowest_active_reg + 1)  \
01691     * NUM_REG_ITEMS)                             \
01692    + NUM_NONREG_ITEMS)
01693 
01694 /* How many items can still be added to the stack without overflowing it.  */
01695 #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01696 # endif /* not DEFINED_ONCE */
01697 
01698 
01699 /* Pops what PUSH_FAIL_STACK pushes.
01700 
01701    We restore into the parameters, all of which should be lvalues:
01702      STR -- the saved data position.
01703      PAT -- the saved pattern position.
01704      LOW_REG, HIGH_REG -- the highest and lowest active registers.
01705      REGSTART, REGEND -- arrays of string positions.
01706      REG_INFO -- array of information about each subexpression.
01707 
01708    Also assumes the variables `fail_stack' and (if debugging), `bufp',
01709    `pend', `string1', `size1', `string2', and `size2'.  */
01710 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01711 {                                                              \
01712   DEBUG_STATEMENT (unsigned failure_id;)                       \
01713   active_reg_t this_reg;                                       \
01714   const UCHAR_T *string_temp;                                         \
01715                                                                \
01716   assert (!FAIL_STACK_EMPTY ());                               \
01717                                                                \
01718   /* Remove failure points and point to how many regs pushed.  */     \
01719   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                       \
01720   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);  \
01721   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);   \
01722                                                                \
01723   assert (fail_stack.avail >= NUM_NONREG_ITEMS);               \
01724                                                                \
01725   DEBUG_POP (&failure_id);                                     \
01726   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);            \
01727                                                                \
01728   /* If the saved string location is NULL, it came from an            \
01729      on_failure_keep_string_jump opcode, and we want to throw away the       \
01730      saved NULL, thus retaining our current position in the string.  */      \
01731   string_temp = POP_FAILURE_POINTER ();                               \
01732   if (string_temp != NULL)                                     \
01733     str = (const CHAR_T *) string_temp;                               \
01734                                                                \
01735   DEBUG_PRINT2 ("  Popping string %p: `", str);                       \
01736   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);    \
01737   DEBUG_PRINT1 ("'\n");                                               \
01738                                                                \
01739   pat = (UCHAR_T *) POP_FAILURE_POINTER ();                           \
01740   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);               \
01741   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                     \
01742                                                                \
01743   /* Restore register info.  */                                       \
01744   high_reg = (active_reg_t) POP_FAILURE_INT ();                       \
01745   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);        \
01746                                                                \
01747   low_reg = (active_reg_t) POP_FAILURE_INT ();                        \
01748   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);         \
01749                                                                \
01750   if (1)                                                       \
01751     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)        \
01752       {                                                               \
01753        DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);             \
01754                                                                \
01755        reg_info[this_reg].word = POP_FAILURE_ELT ();                  \
01756        DEBUG_PRINT2 ("      info: %p\n",                       \
01757                     reg_info[this_reg].word.pointer);                 \
01758                                                                \
01759        regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();    \
01760        DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);            \
01761                                                                \
01762        regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();  \
01763        DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);        \
01764       }                                                               \
01765   else                                                         \
01766     {                                                          \
01767       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01768        {                                                       \
01769          reg_info[this_reg].word.integer = 0;                         \
01770          regend[this_reg] = 0;                                        \
01771          regstart[this_reg] = 0;                               \
01772        }                                                       \
01773       highest_active_reg = high_reg;                                  \
01774     }                                                          \
01775                                                                \
01776   set_regs_matched_done = 0;                                          \
01777   DEBUG_STATEMENT (nfailure_points_popped++);                         \
01778 } /* POP_FAILURE_POINT */
01779 
01780 /* Structure for per-register (a.k.a. per-group) information.
01781    Other register information, such as the
01782    starting and ending positions (which are addresses), and the list of
01783    inner groups (which is a bits list) are maintained in separate
01784    variables.
01785 
01786    We are making a (strictly speaking) nonportable assumption here: that
01787    the compiler will pack our bit fields into something that fits into
01788    the type of `word', i.e., is something that fits into one item on the
01789    failure stack.  */
01790 
01791 
01792 /* Declarations and macros for re_match_2.  */
01793 
01794 typedef union
01795 {
01796   PREFIX(fail_stack_elt_t) word;
01797   struct
01798   {
01799       /* This field is one if this group can match the empty string,
01800          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
01801 # define MATCH_NULL_UNSET_VALUE 3
01802     unsigned match_null_string_p : 2;
01803     unsigned is_active : 1;
01804     unsigned matched_something : 1;
01805     unsigned ever_matched_something : 1;
01806   } bits;
01807 } PREFIX(register_info_type);
01808 
01809 # ifndef DEFINED_ONCE
01810 #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
01811 #  define IS_ACTIVE(R)  ((R).bits.is_active)
01812 #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
01813 #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
01814 
01815 
01816 /* Call this when have matched a real character; it sets `matched' flags
01817    for the subexpressions which we are currently inside.  Also records
01818    that those subexprs have matched.  */
01819 #  define SET_REGS_MATCHED()                                          \
01820   do                                                           \
01821     {                                                          \
01822       if (!set_regs_matched_done)                              \
01823        {                                                       \
01824          active_reg_t r;                                       \
01825          set_regs_matched_done = 1;                                   \
01826          for (r = lowest_active_reg; r <= highest_active_reg; r++)    \
01827            {                                                   \
01828              MATCHED_SOMETHING (reg_info[r])                          \
01829               = EVER_MATCHED_SOMETHING (reg_info[r])                  \
01830               = 1;                                             \
01831            }                                                   \
01832        }                                                       \
01833     }                                                          \
01834   while (0)
01835 # endif /* not DEFINED_ONCE */
01836 
01837 /* Registers are set to a sentinel when they haven't yet matched.  */
01838 static CHAR_T PREFIX(reg_unset_dummy);
01839 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
01840 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01841 
01842 /* Subroutine declarations and macros for regex_compile.  */
01843 static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
01844 static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
01845                                int arg1, int arg2);
01846 static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
01847                                 int arg, UCHAR_T *end);
01848 static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
01849                                 int arg1, int arg2, UCHAR_T *end);
01850 static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
01851                                          const CHAR_T *p,
01852                                          reg_syntax_t syntax);
01853 static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
01854                                          const CHAR_T *pend,
01855                                          reg_syntax_t syntax);
01856 # ifdef WCHAR
01857 static reg_errcode_t wcs_compile_range (CHAR_T range_start,
01858                                         const CHAR_T **p_ptr,
01859                                         const CHAR_T *pend,
01860                                         char *translate,
01861                                         reg_syntax_t syntax,
01862                                         UCHAR_T *b,
01863                                         CHAR_T *char_set);
01864 static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
01865 # else /* BYTE */
01866 static reg_errcode_t byte_compile_range (unsigned int range_start,
01867                                          const char **p_ptr,
01868                                          const char *pend,
01869                                          char *translate,
01870                                          reg_syntax_t syntax,
01871                                          unsigned char *b);
01872 # endif /* WCHAR */
01873 
01874 /* Fetch the next character in the uncompiled pattern---translating it
01875    if necessary.  Also cast from a signed character in the constant
01876    string passed to us by the user to an unsigned char that we can use
01877    as an array index (in, e.g., `translate').  */
01878 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
01879    because it is impossible to allocate 4GB array for some encodings
01880    which have 4 byte character_set like UCS4.  */
01881 # ifndef PATFETCH
01882 #  ifdef WCHAR
01883 #   define PATFETCH(c)                                                \
01884   do {if (p == pend) return REG_EEND;                                 \
01885     c = (UCHAR_T) *p++;                                               \
01886     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];         \
01887   } while (0)
01888 #  else /* BYTE */
01889 #   define PATFETCH(c)                                                \
01890   do {if (p == pend) return REG_EEND;                                 \
01891     c = (unsigned char) *p++;                                         \
01892     if (translate) c = (unsigned char) translate[c];                  \
01893   } while (0)
01894 #  endif /* WCHAR */
01895 # endif
01896 
01897 /* Fetch the next character in the uncompiled pattern, with no
01898    translation.  */
01899 # define PATFETCH_RAW(c)                                       \
01900   do {if (p == pend) return REG_EEND;                                 \
01901     c = (UCHAR_T) *p++;                                               \
01902   } while (0)
01903 
01904 /* Go backwards one character in the pattern.  */
01905 # define PATUNFETCH p--
01906 
01907 
01908 /* If `translate' is non-null, return translate[D], else just D.  We
01909    cast the subscript to translate because some data is declared as
01910    `char *', to avoid warnings when a string constant is passed.  But
01911    when we use a character as a subscript we must make it unsigned.  */
01912 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
01913    because it is impossible to allocate 4GB array for some encodings
01914    which have 4 byte character_set like UCS4.  */
01915 
01916 # ifndef TRANSLATE
01917 #  ifdef WCHAR
01918 #   define TRANSLATE(d) \
01919   ((translate && ((UCHAR_T) (d)) <= 0xff) \
01920    ? (char) translate[(unsigned char) (d)] : (d))
01921 # else /* BYTE */
01922 #   define TRANSLATE(d) \
01923   (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
01924 #  endif /* WCHAR */
01925 # endif
01926 
01927 
01928 /* Macros for outputting the compiled pattern into `buffer'.  */
01929 
01930 /* If the buffer isn't allocated when it comes in, use this.  */
01931 # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
01932 
01933 /* Make sure we have at least N more bytes of space in buffer.  */
01934 # ifdef WCHAR
01935 #  define GET_BUFFER_SPACE(n)                                         \
01936     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR     \
01937             + (n)*sizeof(CHAR_T)) > bufp->allocated)                  \
01938       EXTEND_BUFFER ()
01939 # else /* BYTE */
01940 #  define GET_BUFFER_SPACE(n)                                         \
01941     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)       \
01942       EXTEND_BUFFER ()
01943 # endif /* WCHAR */
01944 
01945 /* Make sure we have one more byte of buffer space and then add C to it.  */
01946 # define BUF_PUSH(c)                                           \
01947   do {                                                         \
01948     GET_BUFFER_SPACE (1);                                      \
01949     *b++ = (UCHAR_T) (c);                                      \
01950   } while (0)
01951 
01952 
01953 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
01954 # define BUF_PUSH_2(c1, c2)                                    \
01955   do {                                                         \
01956     GET_BUFFER_SPACE (2);                                      \
01957     *b++ = (UCHAR_T) (c1);                                     \
01958     *b++ = (UCHAR_T) (c2);                                     \
01959   } while (0)
01960 
01961 
01962 /* As with BUF_PUSH_2, except for three bytes.  */
01963 # define BUF_PUSH_3(c1, c2, c3)                                       \
01964   do {                                                         \
01965     GET_BUFFER_SPACE (3);                                      \
01966     *b++ = (UCHAR_T) (c1);                                     \
01967     *b++ = (UCHAR_T) (c2);                                     \
01968     *b++ = (UCHAR_T) (c3);                                     \
01969   } while (0)
01970 
01971 /* Store a jump with opcode OP at LOC to location TO.  We store a
01972    relative address offset by the three bytes the jump itself occupies.  */
01973 # define STORE_JUMP(op, loc, to) \
01974  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
01975 
01976 /* Likewise, for a two-argument jump.  */
01977 # define STORE_JUMP2(op, loc, to, arg) \
01978   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
01979 
01980 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
01981 # define INSERT_JUMP(op, loc, to) \
01982   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
01983 
01984 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
01985 # define INSERT_JUMP2(op, loc, to, arg) \
01986   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
01987              arg, b)
01988 
01989 /* This is not an arbitrary limit: the arguments which represent offsets
01990    into the pattern are two bytes long.  So if 2^16 bytes turns out to
01991    be too small, many things would have to change.  */
01992 /* Any other compiler which, like MSC, has allocation limit below 2^16
01993    bytes will have to use approach similar to what was done below for
01994    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
01995    reallocating to 0 bytes.  Such thing is not going to work too well.
01996    You have been warned!!  */
01997 # ifndef DEFINED_ONCE
01998 #  if defined _MSC_VER  && !defined WIN32
01999 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
02000    The REALLOC define eliminates a flurry of conversion warnings,
02001    but is not required. */
02002 #   define MAX_BUF_SIZE  65500L
02003 #   define REALLOC(p,s) realloc ((p), (size_t) (s))
02004 #  else
02005 #   define MAX_BUF_SIZE (1L << 16)
02006 #   define REALLOC(p,s) realloc ((p), (s))
02007 #  endif
02008 
02009 /* Extend the buffer by twice its current size via realloc and
02010    reset the pointers that pointed into the old block to point to the
02011    correct places in the new one.  If extending the buffer results in it
02012    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
02013 #  if __BOUNDED_POINTERS__
02014 #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
02015 #   define MOVE_BUFFER_POINTER(P) \
02016   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
02017 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND  \
02018   else                                    \
02019     {                                     \
02020       SET_HIGH_BOUND (b);                 \
02021       SET_HIGH_BOUND (begalt);                   \
02022       if (fixup_alt_jump)                 \
02023        SET_HIGH_BOUND (fixup_alt_jump);   \
02024       if (laststart)                      \
02025        SET_HIGH_BOUND (laststart);        \
02026       if (pending_exact)                  \
02027        SET_HIGH_BOUND (pending_exact);           \
02028     }
02029 #  else
02030 #   define MOVE_BUFFER_POINTER(P) (P) += incr
02031 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
02032 #  endif
02033 # endif /* not DEFINED_ONCE */
02034 
02035 # ifdef WCHAR
02036 #  define EXTEND_BUFFER()                                      \
02037   do {                                                         \
02038     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;                        \
02039     int wchar_count;                                           \
02040     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)             \
02041       return REG_ESIZE;                                               \
02042     bufp->allocated <<= 1;                                     \
02043     if (bufp->allocated > MAX_BUF_SIZE)                               \
02044       bufp->allocated = MAX_BUF_SIZE;                                 \
02045     /* How many characters the new buffer can have?  */               \
02046     wchar_count = bufp->allocated / sizeof(UCHAR_T);                  \
02047     if (wchar_count == 0) wchar_count = 1;                            \
02048     /* Truncate the buffer to CHAR_T align.  */                \
02049     bufp->allocated = wchar_count * sizeof(UCHAR_T);                  \
02050     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);             \
02051     bufp->buffer = (char*)COMPILED_BUFFER_VAR;                        \
02052     if (COMPILED_BUFFER_VAR == NULL)                                  \
02053       return REG_ESPACE;                                       \
02054     /* If the buffer moved, move all the pointers into it.  */        \
02055     if (old_buffer != COMPILED_BUFFER_VAR)                            \
02056       {                                                               \
02057        int incr = COMPILED_BUFFER_VAR - old_buffer;                   \
02058        MOVE_BUFFER_POINTER (b);                                \
02059        MOVE_BUFFER_POINTER (begalt);                                  \
02060        if (fixup_alt_jump)                                     \
02061          MOVE_BUFFER_POINTER (fixup_alt_jump);                        \
02062        if (laststart)                                                 \
02063          MOVE_BUFFER_POINTER (laststart);                      \
02064        if (pending_exact)                                      \
02065          MOVE_BUFFER_POINTER (pending_exact);                         \
02066       }                                                               \
02067     ELSE_EXTEND_BUFFER_HIGH_BOUND                              \
02068   } while (0)
02069 # else /* BYTE */
02070 #  define EXTEND_BUFFER()                                      \
02071   do {                                                         \
02072     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;                        \
02073     if (bufp->allocated == MAX_BUF_SIZE)                       \
02074       return REG_ESIZE;                                               \
02075     bufp->allocated <<= 1;                                     \
02076     if (bufp->allocated > MAX_BUF_SIZE)                               \
02077       bufp->allocated = MAX_BUF_SIZE;                                 \
02078     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,          \
02079                                           bufp->allocated);    \
02080     if (COMPILED_BUFFER_VAR == NULL)                                  \
02081       return REG_ESPACE;                                       \
02082     /* If the buffer moved, move all the pointers into it.  */        \
02083     if (old_buffer != COMPILED_BUFFER_VAR)                            \
02084       {                                                               \
02085        int incr = COMPILED_BUFFER_VAR - old_buffer;                   \
02086        MOVE_BUFFER_POINTER (b);                                \
02087        MOVE_BUFFER_POINTER (begalt);                                  \
02088        if (fixup_alt_jump)                                     \
02089          MOVE_BUFFER_POINTER (fixup_alt_jump);                        \
02090        if (laststart)                                                 \
02091          MOVE_BUFFER_POINTER (laststart);                      \
02092        if (pending_exact)                                      \
02093          MOVE_BUFFER_POINTER (pending_exact);                         \
02094       }                                                               \
02095     ELSE_EXTEND_BUFFER_HIGH_BOUND                              \
02096   } while (0)
02097 # endif /* WCHAR */
02098 
02099 # ifndef DEFINED_ONCE
02100 /* Since we have one byte reserved for the register number argument to
02101    {start,stop}_memory, the maximum number of groups we can report
02102    things about is what fits in that byte.  */
02103 #  define MAX_REGNUM 255
02104 
02105 /* But patterns can have more than `MAX_REGNUM' registers.  We just
02106    ignore the excess.  */
02107 typedef unsigned regnum_t;
02108 
02109 
02110 /* Macros for the compile stack.  */
02111 
02112 /* Since offsets can go either forwards or backwards, this type needs to
02113    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
02114 /* int may be not enough when sizeof(int) == 2.  */
02115 typedef long pattern_offset_t;
02116 
02117 typedef struct
02118 {
02119   pattern_offset_t begalt_offset;
02120   pattern_offset_t fixup_alt_jump;
02121   pattern_offset_t inner_group_offset;
02122   pattern_offset_t laststart_offset;
02123   regnum_t regnum;
02124 } compile_stack_elt_t;
02125 
02126 
02127 typedef struct
02128 {
02129   compile_stack_elt_t *stack;
02130   unsigned size;
02131   unsigned avail;                  /* Offset of next open position.  */
02132 } compile_stack_type;
02133 
02134 
02135 #  define INIT_COMPILE_STACK_SIZE 32
02136 
02137 #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
02138 #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
02139 
02140 /* The next available element.  */
02141 #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
02142 
02143 # endif /* not DEFINED_ONCE */
02144 
02145 /* Set the bit for character C in a list.  */
02146 # ifndef DEFINED_ONCE
02147 #  define SET_LIST_BIT(c)                               \
02148   (b[((unsigned char) (c)) / BYTEWIDTH]               \
02149    |= 1 << (((unsigned char) c) % BYTEWIDTH))
02150 # endif /* DEFINED_ONCE */
02151 
02152 /* Get the next unsigned number in the uncompiled pattern.  */
02153 # define GET_UNSIGNED_NUMBER(num) \
02154   {                                                            \
02155     while (p != pend)                                                 \
02156       {                                                               \
02157        PATFETCH (c);                                           \
02158        if (c < '0' || c > '9')                                        \
02159          break;                                                \
02160        if (num <= RE_DUP_MAX)                                         \
02161          {                                                     \
02162            if (num < 0)                                        \
02163              num = 0;                                                 \
02164            num = num * 10 + c - '0';                                  \
02165          }                                                     \
02166       }                                                               \
02167   }
02168 
02169 # ifndef DEFINED_ONCE
02170 #  if defined _LIBC || WIDE_CHAR_SUPPORT
02171 /* The GNU C library provides support for user-defined character classes
02172    and the functions from ISO C amendement 1.  */
02173 #   ifdef CHARCLASS_NAME_MAX
02174 #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
02175 #   else
02176 /* This shouldn't happen but some implementation might still have this
02177    problem.  Use a reasonable default value.  */
02178 #    define CHAR_CLASS_MAX_LENGTH 256
02179 #   endif
02180 
02181 #   ifdef _LIBC
02182 #    define IS_CHAR_CLASS(string) __wctype (string)
02183 #   else
02184 #    define IS_CHAR_CLASS(string) wctype (string)
02185 #   endif
02186 #  else
02187 #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
02188 
02189 #   define IS_CHAR_CLASS(string)                               \
02190    (STREQ (string, "alpha") || STREQ (string, "upper")                \
02191     || STREQ (string, "lower") || STREQ (string, "digit")             \
02192     || STREQ (string, "alnum") || STREQ (string, "xdigit")            \
02193     || STREQ (string, "space") || STREQ (string, "print")             \
02194     || STREQ (string, "punct") || STREQ (string, "graph")             \
02195     || STREQ (string, "cntrl") || STREQ (string, "blank"))
02196 #  endif
02197 # endif /* DEFINED_ONCE */
02198 
02199 # ifndef MATCH_MAY_ALLOCATE
02200 
02201 /* If we cannot allocate large objects within re_match_2_internal,
02202    we make the fail stack and register vectors global.
02203    The fail stack, we grow to the maximum size when a regexp
02204    is compiled.
02205    The register vectors, we adjust in size each time we
02206    compile a regexp, according to the number of registers it needs.  */
02207 
02208 static PREFIX(fail_stack_type) fail_stack;
02209 
02210 /* Size with which the following vectors are currently allocated.
02211    That is so we can make them bigger as needed,
02212    but never make them smaller.  */
02213 #  ifdef DEFINED_ONCE
02214 static int regs_allocated_size;
02215 
02216 static const char **     regstart, **     regend;
02217 static const char ** old_regstart, ** old_regend;
02218 static const char **best_regstart, **best_regend;
02219 static const char **reg_dummy;
02220 #  endif /* DEFINED_ONCE */
02221 
02222 static PREFIX(register_info_type) *PREFIX(reg_info);
02223 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
02224 
02225 /* Make the register vectors big enough for NUM_REGS registers,
02226    but don't make them smaller.  */
02227 
02228 static void
02229 PREFIX(regex_grow_registers) (int num_regs)
02230 {
02231   if (num_regs > regs_allocated_size)
02232     {
02233       RETALLOC_IF (regstart,        num_regs, const char *);
02234       RETALLOC_IF (regend,   num_regs, const char *);
02235       RETALLOC_IF (old_regstart, num_regs, const char *);
02236       RETALLOC_IF (old_regend,      num_regs, const char *);
02237       RETALLOC_IF (best_regstart, num_regs, const char *);
02238       RETALLOC_IF (best_regend,     num_regs, const char *);
02239       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
02240       RETALLOC_IF (reg_dummy,       num_regs, const char *);
02241       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
02242 
02243       regs_allocated_size = num_regs;
02244     }
02245 }
02246 
02247 # endif /* not MATCH_MAY_ALLOCATE */
02248 
02249 # ifndef DEFINED_ONCE
02250 static boolean group_in_compile_stack (compile_stack_type compile_stack,
02251                                        regnum_t regnum);
02252 # endif /* not DEFINED_ONCE */
02253 
02254 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
02255    Returns one of error codes defined in `regex.h', or zero for success.
02256 
02257    Assumes the `allocated' (and perhaps `buffer') and `translate'
02258    fields are set in BUFP on entry.
02259 
02260    If it succeeds, results are put in BUFP (if it returns an error, the
02261    contents of BUFP are undefined):
02262      `buffer' is the compiled pattern;
02263      `syntax' is set to SYNTAX;
02264      `used' is set to the length of the compiled pattern;
02265      `fastmap_accurate' is zero;
02266      `re_nsub' is the number of subexpressions in PATTERN;
02267      `not_bol' and `not_eol' are zero;
02268 
02269    The `fastmap' and `newline_anchor' fields are neither
02270    examined nor set.  */
02271 
02272 /* Return, freeing storage we allocated.  */
02273 # ifdef WCHAR
02274 #  define FREE_STACK_RETURN(value)        \
02275   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
02276 # else
02277 #  define FREE_STACK_RETURN(value)        \
02278   return (free (compile_stack.stack), value)
02279 # endif /* WCHAR */
02280 
02281 static reg_errcode_t
02282 PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
02283                        size_t ARG_PREFIX(size), reg_syntax_t syntax,
02284                        struct re_pattern_buffer *bufp)
02285 {
02286   /* We fetch characters from PATTERN here.  Even though PATTERN is
02287      `char *' (i.e., signed), we declare these variables as unsigned, so
02288      they can be reliably used as array indices.  */
02289   register UCHAR_T c, c1;
02290 
02291 #ifdef WCHAR
02292   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
02293   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
02294   size_t size;
02295   /* offset buffer for optimization. See convert_mbs_to_wc.  */
02296   int *mbs_offset = NULL;
02297   /* It hold whether each wchar_t is binary data or not.  */
02298   char *is_binary = NULL;
02299   /* A flag whether exactn is handling binary data or not.  */
02300   char is_exactn_bin = FALSE;
02301 #endif /* WCHAR */
02302 
02303   /* A random temporary spot in PATTERN.  */
02304   const CHAR_T *p1;
02305 
02306   /* Points to the end of the buffer, where we should append.  */
02307   register UCHAR_T *b;
02308 
02309   /* Keeps track of unclosed groups.  */
02310   compile_stack_type compile_stack;
02311 
02312   /* Points to the current (ending) position in the pattern.  */
02313 #ifdef WCHAR
02314   const CHAR_T *p;
02315   const CHAR_T *pend;
02316 #else /* BYTE */
02317   const CHAR_T *p = pattern;
02318   const CHAR_T *pend = pattern + size;
02319 #endif /* WCHAR */
02320 
02321   /* How to translate the characters in the pattern.  */
02322   RE_TRANSLATE_TYPE translate = bufp->translate;
02323 
02324   /* Address of the count-byte of the most recently inserted `exactn'
02325      command.  This makes it possible to tell if a new exact-match
02326      character can be added to that command or if the character requires
02327      a new `exactn' command.  */
02328   UCHAR_T *pending_exact = 0;
02329 
02330   /* Address of start of the most recently finished expression.
02331      This tells, e.g., postfix * where to find the start of its
02332      operand.  Reset at the beginning of groups and alternatives.  */
02333   UCHAR_T *laststart = 0;
02334 
02335   /* Address of beginning of regexp, or inside of last group.  */
02336   UCHAR_T *begalt;
02337 
02338   /* Address of the place where a forward jump should go to the end of
02339      the containing expression.  Each alternative of an `or' -- except the
02340      last -- ends with a forward jump of this sort.  */
02341   UCHAR_T *fixup_alt_jump = 0;
02342 
02343   /* Counts open-groups as they are encountered.  Remembered for the
02344      matching close-group on the compile stack, so the same register
02345      number is put in the stop_memory as the start_memory.  */
02346   regnum_t regnum = 0;
02347 
02348 #ifdef WCHAR
02349   /* Initialize the wchar_t PATTERN and offset_buffer.  */
02350   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
02351   mbs_offset = TALLOC(csize + 1, int);
02352   is_binary = TALLOC(csize + 1, char);
02353   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
02354     {
02355       free(pattern);
02356       free(mbs_offset);
02357       free(is_binary);
02358       return REG_ESPACE;
02359     }
02360   pattern[csize] = L'\0';   /* sentinel */
02361   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
02362   pend = p + size;
02363   if (size < 0)
02364     {
02365       free(pattern);
02366       free(mbs_offset);
02367       free(is_binary);
02368       return REG_BADPAT;
02369     }
02370 #endif
02371 
02372 #ifdef DEBUG
02373   DEBUG_PRINT1 ("\nCompiling pattern: ");
02374   if (debug)
02375     {
02376       unsigned debug_count;
02377 
02378       for (debug_count = 0; debug_count < size; debug_count++)
02379         PUT_CHAR (pattern[debug_count]);
02380       putchar ('\n');
02381     }
02382 #endif /* DEBUG */
02383 
02384   /* Initialize the compile stack.  */
02385   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
02386   if (compile_stack.stack == NULL)
02387     {
02388 #ifdef WCHAR
02389       free(pattern);
02390       free(mbs_offset);
02391       free(is_binary);
02392 #endif
02393       return REG_ESPACE;
02394     }
02395 
02396   compile_stack.size = INIT_COMPILE_STACK_SIZE;
02397   compile_stack.avail = 0;
02398 
02399   /* Initialize the pattern buffer.  */
02400   bufp->syntax = syntax;
02401   bufp->fastmap_accurate = 0;
02402   bufp->not_bol = bufp->not_eol = 0;
02403 
02404   /* Set `used' to zero, so that if we return an error, the pattern
02405      printer (for debugging) will think there's no pattern.  We reset it
02406      at the end.  */
02407   bufp->used = 0;
02408 
02409   /* Always count groups, whether or not bufp->no_sub is set.  */
02410   bufp->re_nsub = 0;
02411 
02412 #if !defined emacs && !defined SYNTAX_TABLE
02413   /* Initialize the syntax table.  */
02414    init_syntax_once ();
02415 #endif
02416 
02417   if (bufp->allocated == 0)
02418     {
02419       if (bufp->buffer)
02420        { /* If zero allocated, but buffer is non-null, try to realloc
02421              enough space.  This loses if buffer's address is bogus, but
02422              that is the user's responsibility.  */
02423 #ifdef WCHAR
02424          /* Free bufp->buffer and allocate an array for wchar_t pattern
02425             buffer.  */
02426           free(bufp->buffer);
02427           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
02428                                    UCHAR_T);
02429 #else
02430           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
02431 #endif /* WCHAR */
02432         }
02433       else
02434         { /* Caller did not allocate a buffer.  Do it for them.  */
02435           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
02436                                    UCHAR_T);
02437         }
02438 
02439       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
02440 #ifdef WCHAR
02441       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
02442 #endif /* WCHAR */
02443       bufp->allocated = INIT_BUF_SIZE;
02444     }
02445 #ifdef WCHAR
02446   else
02447     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
02448 #endif
02449 
02450   begalt = b = COMPILED_BUFFER_VAR;
02451 
02452   /* Loop through the uncompiled pattern until we're at the end.  */
02453   while (p != pend)
02454     {
02455       PATFETCH (c);
02456 
02457       switch (c)
02458         {
02459         case '^':
02460           {
02461             if (   /* If at start of pattern, it's an operator.  */
02462                    p == pattern + 1
02463                    /* If context independent, it's an operator.  */
02464                 || syntax & RE_CONTEXT_INDEP_ANCHORS
02465                    /* Otherwise, depends on what's come before.  */
02466                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
02467               BUF_PUSH (begline);
02468             else
02469               goto normal_char;
02470           }
02471           break;
02472 
02473 
02474         case '$':
02475           {
02476             if (   /* If at end of pattern, it's an operator.  */
02477                    p == pend
02478                    /* If context independent, it's an operator.  */
02479                 || syntax & RE_CONTEXT_INDEP_ANCHORS
02480                    /* Otherwise, depends on what's next.  */
02481                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
02482                BUF_PUSH (endline);
02483              else
02484                goto normal_char;
02485            }
02486            break;
02487 
02488 
02489        case '+':
02490         case '?':
02491           if ((syntax & RE_BK_PLUS_QM)
02492               || (syntax & RE_LIMITED_OPS))
02493             goto normal_char;
02494         handle_plus:
02495         case '*':
02496           /* If there is no previous pattern... */
02497           if (!laststart)
02498             {
02499               if (syntax & RE_CONTEXT_INVALID_OPS)
02500                 FREE_STACK_RETURN (REG_BADRPT);
02501               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
02502                 goto normal_char;
02503             }
02504 
02505           {
02506             /* Are we optimizing this jump?  */
02507             boolean keep_string_p = false;
02508 
02509             /* 1 means zero (many) matches is allowed.  */
02510             char zero_times_ok = 0, many_times_ok = 0;
02511 
02512             /* If there is a sequence of repetition chars, collapse it
02513                down to just one (the right one).  We can't combine
02514                interval operators with these because of, e.g., `a{2}*',
02515                which should only match an even number of `a's.  */
02516 
02517             for (;;)
02518               {
02519                 zero_times_ok |= c != '+';
02520                 many_times_ok |= c != '?';
02521 
02522                 if (p == pend)
02523                   break;
02524 
02525                 PATFETCH (c);
02526 
02527                 if (c == '*'
02528                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
02529                   ;
02530 
02531                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
02532                   {
02533                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02534 
02535                     PATFETCH (c1);
02536                     if (!(c1 == '+' || c1 == '?'))
02537                       {
02538                         PATUNFETCH;
02539                         PATUNFETCH;
02540                         break;
02541                       }
02542 
02543                     c = c1;
02544                   }
02545                 else
02546                   {
02547                     PATUNFETCH;
02548                     break;
02549                   }
02550 
02551                 /* If we get here, we found another repeat character.  */
02552                }
02553 
02554             /* Star, etc. applied to an empty pattern is equivalent
02555                to an empty pattern.  */
02556             if (!laststart)
02557               break;
02558 
02559             /* Now we know whether or not zero matches is allowed
02560                and also whether or not two or more matches is allowed.  */
02561             if (many_times_ok)
02562               { /* More than one repetition is allowed, so put in at the
02563                    end a backward relative jump from `b' to before the next
02564                    jump we're going to put in below (which jumps from
02565                    laststart to after this jump).
02566 
02567                    But if we are at the `*' in the exact sequence `.*\n',
02568                    insert an unconditional jump backwards to the .,
02569                    instead of the beginning of the loop.  This way we only
02570                    push a failure point once, instead of every time
02571                    through the loop.  */
02572                 assert (p - 1 > pattern);
02573 
02574                 /* Allocate the space for the jump.  */
02575                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02576 
02577                 /* We know we are not at the first character of the pattern,
02578                    because laststart was nonzero.  And we've already
02579                    incremented `p', by the way, to be the character after
02580                    the `*'.  Do we have to do something analogous here
02581                    for null bytes, because of RE_DOT_NOT_NULL?  */
02582                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02583                   && zero_times_ok
02584                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02585                     && !(syntax & RE_DOT_NEWLINE))
02586                   { /* We have .*\n.  */
02587                     STORE_JUMP (jump, b, laststart);
02588                     keep_string_p = true;
02589                   }
02590                 else
02591                   /* Anything else.  */
02592                   STORE_JUMP (maybe_pop_jump, b, laststart -
02593                            (1 + OFFSET_ADDRESS_SIZE));
02594 
02595                 /* We've added more stuff to the buffer.  */
02596                 b += 1 + OFFSET_ADDRESS_SIZE;
02597               }
02598 
02599             /* On failure, jump from laststart to b + 3, which will be the
02600                end of the buffer after this jump is inserted.  */
02601            /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
02602               'b + 3'.  */
02603             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02604             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02605                                        : on_failure_jump,
02606                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
02607             pending_exact = 0;
02608             b += 1 + OFFSET_ADDRESS_SIZE;
02609 
02610             if (!zero_times_ok)
02611               {
02612                 /* At least one repetition is required, so insert a
02613                    `dummy_failure_jump' before the initial
02614                    `on_failure_jump' instruction of the loop. This
02615                    effects a skip over that instruction the first time
02616                    we hit that loop.  */
02617                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02618                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
02619                           2 + 2 * OFFSET_ADDRESS_SIZE);
02620                 b += 1 + OFFSET_ADDRESS_SIZE;
02621               }
02622             }
02623          break;
02624 
02625 
02626        case '.':
02627           laststart = b;
02628           BUF_PUSH (anychar);
02629           break;
02630 
02631 
02632         case '[':
02633           {
02634             boolean had_char_class = false;
02635 #ifdef WCHAR
02636            CHAR_T range_start = 0xffffffff;
02637 #else
02638            unsigned int range_start = 0xffffffff;
02639 #endif
02640             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02641 
02642 #ifdef WCHAR
02643            /* We assume a charset(_not) structure as a wchar_t array.
02644               charset[0] = (re_opcode_t) charset(_not)
02645                charset[1] = l (= length of char_classes)
02646                charset[2] = m (= length of collating_symbols)
02647                charset[3] = n (= length of equivalence_classes)
02648               charset[4] = o (= length of char_ranges)
02649               charset[5] = p (= length of chars)
02650 
02651                charset[6] = char_class (wctype_t)
02652                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
02653                          ...
02654                charset[l+5]  = char_class (wctype_t)
02655 
02656                charset[l+6]  = collating_symbol (wchar_t)
02657                             ...
02658                charset[l+m+5]  = collating_symbol (wchar_t)
02659                                    ifdef _LIBC we use the index if
02660                                    _NL_COLLATE_SYMB_EXTRAMB instead of
02661                                    wchar_t string.
02662 
02663                charset[l+m+6]  = equivalence_classes (wchar_t)
02664                               ...
02665                charset[l+m+n+5]  = equivalence_classes (wchar_t)
02666                                    ifdef _LIBC we use the index in
02667                                    _NL_COLLATE_WEIGHT instead of
02668                                    wchar_t string.
02669 
02670               charset[l+m+n+6] = range_start
02671               charset[l+m+n+7] = range_end
02672                               ...
02673               charset[l+m+n+2o+4] = range_start
02674               charset[l+m+n+2o+5] = range_end
02675                                    ifdef _LIBC we use the value looked up
02676                                    in _NL_COLLATE_COLLSEQ instead of
02677                                    wchar_t character.
02678 
02679               charset[l+m+n+2o+6] = char
02680                                  ...
02681               charset[l+m+n+2o+p+5] = char
02682 
02683             */
02684 
02685            /* We need at least 6 spaces: the opcode, the length of
02686                char_classes, the length of collating_symbols, the length of
02687                equivalence_classes, the length of char_ranges, the length of
02688                chars.  */
02689            GET_BUFFER_SPACE (6);
02690 
02691            /* Save b as laststart. And We use laststart as the pointer
02692               to the first element of the charset here.
02693               In other words, laststart[i] indicates charset[i].  */
02694             laststart = b;
02695 
02696             /* We test `*p == '^' twice, instead of using an if
02697                statement, so we only need one BUF_PUSH.  */
02698             BUF_PUSH (*p == '^' ? charset_not : charset);
02699             if (*p == '^')
02700               p++;
02701 
02702             /* Push the length of char_classes, the length of
02703                collating_symbols, the length of equivalence_classes, the
02704                length of char_ranges and the length of chars.  */
02705             BUF_PUSH_3 (0, 0, 0);
02706             BUF_PUSH_2 (0, 0);
02707 
02708             /* Remember the first position in the bracket expression.  */
02709             p1 = p;
02710 
02711             /* charset_not matches newline according to a syntax bit.  */
02712             if ((re_opcode_t) b[-6] == charset_not
02713                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02714              {
02715               BUF_PUSH('\n');
02716               laststart[5]++; /* Update the length of characters  */
02717              }
02718 
02719             /* Read in characters and ranges, setting map bits.  */
02720             for (;;)
02721               {
02722                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02723 
02724                 PATFETCH (c);
02725 
02726                 /* \ might escape characters inside [...] and [^...].  */
02727                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02728                   {
02729                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02730 
02731                     PATFETCH (c1);
02732                   BUF_PUSH(c1);
02733                   laststart[5]++; /* Update the length of chars  */
02734                   range_start = c1;
02735                     continue;
02736                   }
02737 
02738                 /* Could be the end of the bracket expression.  If it's
02739                    not (i.e., when the bracket expression is `[]' so
02740                    far), the ']' character bit gets set way below.  */
02741                 if (c == ']' && p != p1 + 1)
02742                   break;
02743 
02744                 /* Look ahead to see if it's a range when the last thing
02745                    was a character class.  */
02746                 if (had_char_class && c == '-' && *p != ']')
02747                   FREE_STACK_RETURN (REG_ERANGE);
02748 
02749                 /* Look ahead to see if it's a range when the last thing
02750                    was a character: if this is a hyphen not at the
02751                    beginning or the end of a list, then it's the range
02752                    operator.  */
02753                 if (c == '-'
02754                     && !(p - 2 >= pattern && p[-2] == '[')
02755                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02756                     && *p != ']')
02757                   {
02758                     reg_errcode_t ret;
02759                   /* Allocate the space for range_start and range_end.  */
02760                   GET_BUFFER_SPACE (2);
02761                   /* Update the pointer to indicate end of buffer.  */
02762                     b += 2;
02763                     ret = wcs_compile_range (range_start, &p, pend, translate,
02764                                          syntax, b, laststart);
02765                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02766                     range_start = 0xffffffff;
02767                   }
02768                 else if (p[0] == '-' && p[1] != ']')
02769                   { /* This handles ranges made up of characters only.  */
02770                     reg_errcode_t ret;
02771 
02772                   /* Move past the `-'.  */
02773                     PATFETCH (c1);
02774                   /* Allocate the space for range_start and range_end.  */
02775                   GET_BUFFER_SPACE (2);
02776                   /* Update the pointer to indicate end of buffer.  */
02777                     b += 2;
02778                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
02779                                          laststart);
02780                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02781                   range_start = 0xffffffff;
02782                   }
02783 
02784                 /* See if we're at the beginning of a possible character
02785                    class.  */
02786                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02787                   { /* Leave room for the null.  */
02788                     char str[CHAR_CLASS_MAX_LENGTH + 1];
02789 
02790                     PATFETCH (c);
02791                     c1 = 0;
02792 
02793                     /* If pattern is `[[:'.  */
02794                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02795 
02796                     for (;;)
02797                       {
02798                         PATFETCH (c);
02799                         if ((c == ':' && *p == ']') || p == pend)
02800                           break;
02801                      if (c1 < CHAR_CLASS_MAX_LENGTH)
02802                        str[c1++] = c;
02803                      else
02804                        /* This is in any case an invalid class name.  */
02805                        str[0] = '\0';
02806                       }
02807                     str[c1] = '\0';
02808 
02809                     /* If isn't a word bracketed by `[:' and `:]':
02810                        undo the ending character, the letters, and leave
02811                        the leading `:' and `[' (but store them as character).  */
02812                     if (c == ':' && *p == ']')
02813                       {
02814                      wctype_t wt;
02815                      uintptr_t alignedp;
02816 
02817                      /* Query the character class as wctype_t.  */
02818                      wt = IS_CHAR_CLASS (str);
02819                      if (wt == 0)
02820                        FREE_STACK_RETURN (REG_ECTYPE);
02821 
02822                         /* Throw away the ] at the end of the character
02823                            class.  */
02824                         PATFETCH (c);
02825 
02826                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02827 
02828                      /* Allocate the space for character class.  */
02829                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
02830                      /* Update the pointer to indicate end of buffer.  */
02831                         b += CHAR_CLASS_SIZE;
02832                      /* Move data which follow character classes
02833                          not to violate the data.  */
02834                         insert_space(CHAR_CLASS_SIZE,
02835                                  laststart + 6 + laststart[1],
02836                                  b - 1);
02837                      alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
02838                                 + __alignof__(wctype_t) - 1)
02839                                 & ~(uintptr_t)(__alignof__(wctype_t) - 1);
02840                      /* Store the character class.  */
02841                         *((wctype_t*)alignedp) = wt;
02842                         /* Update length of char_classes */
02843                         laststart[1] += CHAR_CLASS_SIZE;
02844 
02845                         had_char_class = true;
02846                       }
02847                     else
02848                       {
02849                         c1++;
02850                         while (c1--)
02851                           PATUNFETCH;
02852                         BUF_PUSH ('[');
02853                         BUF_PUSH (':');
02854                         laststart[5] += 2; /* Update the length of characters  */
02855                      range_start = ':';
02856                         had_char_class = false;
02857                       }
02858                   }
02859                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
02860                                                    || *p == '.'))
02861                 {
02862                   CHAR_T str[128]; /* Should be large enough.  */
02863                   CHAR_T delim = *p; /* '=' or '.'  */
02864 # ifdef _LIBC
02865                   uint32_t nrules =
02866                     _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
02867 # endif
02868                   PATFETCH (c);
02869                   c1 = 0;
02870 
02871                   /* If pattern is `[[=' or '[[.'.  */
02872                   if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02873 
02874                   for (;;)
02875                     {
02876                      PATFETCH (c);
02877                      if ((c == delim && *p == ']') || p == pend)
02878                        break;
02879                      if (c1 < sizeof (str) - 1)
02880                        str[c1++] = c;
02881                      else
02882                        /* This is in any case an invalid class name.  */
02883                        str[0] = '\0';
02884                       }
02885                   str[c1] = '\0';
02886 
02887                   if (c == delim && *p == ']' && str[0] != '\0')
02888                     {
02889                         unsigned int i, offset;
02890                      /* If we have no collation data we use the default
02891                         collation in which each character is in a class
02892                         by itself.  It also means that ASCII is the
02893                         character set and therefore we cannot have character
02894                         with more than one byte in the multibyte
02895                         representation.  */
02896 
02897                         /* If not defined _LIBC, we push the name and
02898                         `\0' for the sake of matching performance.  */
02899                      int datasize = c1 + 1;
02900 
02901 # ifdef _LIBC
02902                      int32_t idx = 0;
02903                      if (nrules == 0)
02904 # endif
02905                        {
02906                          if (c1 != 1)
02907                            FREE_STACK_RETURN (REG_ECOLLATE);
02908                        }
02909 # ifdef _LIBC
02910                      else
02911                        {
02912                          const int32_t *table;
02913                          const int32_t *weights;
02914                          const int32_t *extra;
02915                          const int32_t *indirect;
02916                          wint_t *cp;
02917 
02918                          /* This #include defines a local function!  */
02919 #  include <locale/weightwc.h>
02920 
02921                          if(delim == '=')
02922                            {
02923                             /* We push the index for equivalence class.  */
02924                             cp = (wint_t*)str;
02925 
02926                             table = (const int32_t *)
02927                               _NL_CURRENT (LC_COLLATE,
02928                                           _NL_COLLATE_TABLEWC);
02929                             weights = (const int32_t *)
02930                               _NL_CURRENT (LC_COLLATE,
02931                                           _NL_COLLATE_WEIGHTWC);
02932                             extra = (const int32_t *)
02933                               _NL_CURRENT (LC_COLLATE,
02934                                           _NL_COLLATE_EXTRAWC);
02935                             indirect = (const int32_t *)
02936                               _NL_CURRENT (LC_COLLATE,
02937                                           _NL_COLLATE_INDIRECTWC);
02938 
02939                             idx = findidx ((const wint_t**)&cp);
02940                             if (idx == 0 || cp < (wint_t*) str + c1)
02941                               /* This is no valid character.  */
02942                               FREE_STACK_RETURN (REG_ECOLLATE);
02943 
02944                             str[0] = (wchar_t)idx;
02945                            }
02946                          else /* delim == '.' */
02947                            {
02948                             /* We push collation sequence value
02949                                for collating symbol.  */
02950                             int32_t table_size;
02951                             const int32_t *symb_table;
02952                             const unsigned char *extra;
02953                             int32_t idx;
02954                             int32_t elem;
02955                             int32_t second;
02956                             int32_t hash;
02957                             char char_str[c1];
02958 
02959                             /* We have to convert the name to a single-byte
02960                                string.  This is possible since the names
02961                                consist of ASCII characters and the internal
02962                                representation is UCS4.  */
02963                             for (i = 0; i < c1; ++i)
02964                               char_str[i] = str[i];
02965 
02966                             table_size =
02967                               _NL_CURRENT_WORD (LC_COLLATE,
02968                                               _NL_COLLATE_SYMB_HASH_SIZEMB);
02969                             symb_table = (const int32_t *)
02970                               _NL_CURRENT (LC_COLLATE,
02971                                           _NL_COLLATE_SYMB_TABLEMB);
02972                             extra = (const unsigned char *)
02973                               _NL_CURRENT (LC_COLLATE,
02974                                           _NL_COLLATE_SYMB_EXTRAMB);
02975 
02976                             /* Locate the character in the hashing table.  */
02977                             hash = elem_hash (char_str, c1);
02978 
02979                             idx = 0;
02980                             elem = hash % table_size;
02981                             second = hash % (table_size - 2);
02982                             while (symb_table[2 * elem] != 0)
02983                               {
02984                                 /* First compare the hashing value.  */
02985                                 if (symb_table[2 * elem] == hash
02986                                    && c1 == extra[symb_table[2 * elem + 1]]
02987                                    && memcmp (char_str,
02988                                              &extra[symb_table[2 * elem + 1]
02989                                                   + 1], c1) == 0)
02990                                   {
02991                                    /* Yep, this is the entry.  */
02992                                    idx = symb_table[2 * elem + 1];
02993                                    idx += 1 + extra[idx];
02994                                    break;
02995                                   }
02996 
02997                                 /* Next entry.  */
02998                                 elem += second;
02999                               }
03000 
03001                             if (symb_table[2 * elem] != 0)
03002                               {
03003                                 /* Compute the index of the byte sequence
03004                                    in the table.  */
03005                                 idx += 1 + extra[idx];
03006                                 /* Adjust for the alignment.  */
03007                                 idx = (idx + 3) & ~3;
03008 
03009                                 str[0] = (wchar_t) idx + 4;
03010                               }
03011                             else if (symb_table[2 * elem] == 0 && c1 == 1)
03012                               {
03013                                 /* No valid character.  Match it as a
03014                                    single byte character.  */
03015                                 had_char_class = false;
03016                                 BUF_PUSH(str[0]);
03017                                 /* Update the length of characters  */
03018                                 laststart[5]++;
03019                                 range_start = str[0];
03020 
03021                                 /* Throw away the ] at the end of the
03022                                    collating symbol.  */
03023                                 PATFETCH (c);
03024                                 /* exit from the switch block.  */
03025                                 continue;
03026                               }
03027                             else
03028                               FREE_STACK_RETURN (REG_ECOLLATE);
03029                            }
03030                          datasize = 1;
03031                        }
03032 # endif
03033                         /* Throw away the ] at the end of the equivalence
03034                            class (or collating symbol).  */
03035                         PATFETCH (c);
03036 
03037                      /* Allocate the space for the equivalence class
03038                         (or collating symbol) (and '\0' if needed).  */
03039                         GET_BUFFER_SPACE(datasize);
03040                      /* Update the pointer to indicate end of buffer.  */
03041                         b += datasize;
03042 
03043                      if (delim == '=')
03044                        { /* equivalence class  */
03045                          /* Calculate the offset of char_ranges,
03046                             which is next to equivalence_classes.  */
03047                          offset = laststart[1] + laststart[2]
03048                            + laststart[3] +6;
03049                          /* Insert space.  */
03050                          insert_space(datasize, laststart + offset, b - 1);
03051 
03052                          /* Write the equivalence_class and \0.  */
03053                          for (i = 0 ; i < datasize ; i++)
03054                            laststart[offset + i] = str[i];
03055 
03056                          /* Update the length of equivalence_classes.  */
03057                          laststart[3] += datasize;
03058                          had_char_class = true;
03059                        }
03060                      else /* delim == '.' */
03061                        { /* collating symbol  */
03062                          /* Calculate the offset of the equivalence_classes,
03063                             which is next to collating_symbols.  */
03064                          offset = laststart[1] + laststart[2] + 6;
03065                          /* Insert space and write the collationg_symbol
03066                             and \0.  */
03067                          insert_space(datasize, laststart + offset, b-1);
03068                          for (i = 0 ; i < datasize ; i++)
03069                            laststart[offset + i] = str[i];
03070 
03071                          /* In re_match_2_internal if range_start < -1, we
03072                             assume -range_start is the offset of the
03073                             collating symbol which is specified as
03074                             the character of the range start.  So we assign
03075                             -(laststart[1] + laststart[2] + 6) to
03076                             range_start.  */
03077                          range_start = -(laststart[1] + laststart[2] + 6);
03078                          /* Update the length of collating_symbol.  */
03079                          laststart[2] += datasize;
03080                          had_char_class = false;
03081                        }
03082                     }
03083                     else
03084                       {
03085                         c1++;
03086                         while (c1--)
03087                           PATUNFETCH;
03088                         BUF_PUSH ('[');
03089                         BUF_PUSH (delim);
03090                         laststart[5] += 2; /* Update the length of characters  */
03091                      range_start = delim;
03092                         had_char_class = false;
03093                       }
03094                 }
03095                 else
03096                   {
03097                     had_char_class = false;
03098                   BUF_PUSH(c);
03099                   laststart[5]++;  /* Update the length of characters  */
03100                   range_start = c;
03101                   }
03102              }
03103 
03104 #else /* BYTE */
03105             /* Ensure that we have enough space to push a charset: the
03106                opcode, the length count, and the bitset; 34 bytes in all.  */
03107            GET_BUFFER_SPACE (34);
03108 
03109             laststart = b;
03110 
03111             /* We test `*p == '^' twice, instead of using an if
03112                statement, so we only need one BUF_PUSH.  */
03113             BUF_PUSH (*p == '^' ? charset_not : charset);
03114             if (*p == '^')
03115               p++;
03116 
03117             /* Remember the first position in the bracket expression.  */
03118             p1 = p;
03119 
03120             /* Push the number of bytes in the bitmap.  */
03121             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
03122 
03123             /* Clear the whole map.  */
03124             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
03125 
03126             /* charset_not matches newline according to a syntax bit.  */
03127             if ((re_opcode_t) b[-2] == charset_not
03128                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
03129               SET_LIST_BIT ('\n');
03130 
03131             /* Read in characters and ranges, setting map bits.  */
03132             for (;;)
03133               {
03134                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03135 
03136                 PATFETCH (c);
03137 
03138                 /* \ might escape characters inside [...] and [^...].  */
03139                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
03140                   {
03141                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03142 
03143                     PATFETCH (c1);
03144                     SET_LIST_BIT (c1);
03145                   range_start = c1;
03146                     continue;
03147                   }
03148 
03149                 /* Could be the end of the bracket expression.  If it's
03150                    not (i.e., when the bracket expression is `[]' so
03151                    far), the ']' character bit gets set way below.  */
03152                 if (c == ']' && p != p1 + 1)
03153                   break;
03154 
03155                 /* Look ahead to see if it's a range when the last thing
03156                    was a character class.  */
03157                 if (had_char_class && c == '-' && *p != ']')
03158                   FREE_STACK_RETURN (REG_ERANGE);
03159 
03160                 /* Look ahead to see if it's a range when the last thing
03161                    was a character: if this is a hyphen not at the
03162                    beginning or the end of a list, then it's the range
03163                    operator.  */
03164                 if (c == '-'
03165                     && !(p - 2 >= pattern && p[-2] == '[')
03166                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
03167                     && *p != ']')
03168                   {
03169                     reg_errcode_t ret
03170                       = byte_compile_range (range_start, &p, pend, translate,
03171                                        syntax, b);
03172                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03173                   range_start = 0xffffffff;
03174                   }
03175 
03176                 else if (p[0] == '-' && p[1] != ']')
03177                   { /* This handles ranges made up of characters only.  */
03178                     reg_errcode_t ret;
03179 
03180                   /* Move past the `-'.  */
03181                     PATFETCH (c1);
03182 
03183                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
03184                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03185                   range_start = 0xffffffff;
03186                   }
03187 
03188                 /* See if we're at the beginning of a possible character
03189                    class.  */
03190 
03191                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
03192                   { /* Leave room for the null.  */
03193                     char str[CHAR_CLASS_MAX_LENGTH + 1];
03194 
03195                     PATFETCH (c);
03196                     c1 = 0;
03197 
03198                     /* If pattern is `[[:'.  */
03199                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03200 
03201                     for (;;)
03202                       {
03203                         PATFETCH (c);
03204                         if ((c == ':' && *p == ']') || p == pend)
03205                           break;
03206                      if (c1 < CHAR_CLASS_MAX_LENGTH)
03207                        str[c1++] = c;
03208                      else
03209                        /* This is in any case an invalid class name.  */
03210                        str[0] = '\0';
03211                       }
03212                     str[c1] = '\0';
03213 
03214                     /* If isn't a word bracketed by `[:' and `:]':
03215                        undo the ending character, the letters, and leave
03216                        the leading `:' and `[' (but set bits for them).  */
03217                     if (c == ':' && *p == ']')
03218                       {
03219 # if defined _LIBC || WIDE_CHAR_SUPPORT
03220                         boolean is_lower = STREQ (str, "lower");
03221                         boolean is_upper = STREQ (str, "upper");
03222                      wctype_t wt;
03223                         int ch;
03224 
03225                      wt = IS_CHAR_CLASS (str);
03226                      if (wt == 0)
03227                        FREE_STACK_RETURN (REG_ECTYPE);
03228 
03229                         /* Throw away the ] at the end of the character
03230                            class.  */
03231                         PATFETCH (c);
03232 
03233                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03234 
03235                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
03236                        {
03237 #  ifdef _LIBC
03238                          if (__iswctype (__btowc (ch), wt))
03239                            SET_LIST_BIT (ch);
03240 #  else
03241                          if (iswctype (btowc (ch), wt))
03242                            SET_LIST_BIT (ch);
03243 #  endif
03244 
03245                          if (translate && (is_upper || is_lower)
03246                             && (ISUPPER (ch) || ISLOWER (ch)))
03247                            SET_LIST_BIT (ch);
03248                        }
03249 
03250                         had_char_class = true;
03251 # else
03252                         int ch;
03253                         boolean is_alnum = STREQ (str, "alnum");
03254                         boolean is_alpha = STREQ (str, "alpha");
03255                         boolean is_blank = STREQ (str, "blank");
03256                         boolean is_cntrl = STREQ (str, "cntrl");
03257                         boolean is_digit = STREQ (str, "digit");
03258                         boolean is_graph = STREQ (str, "graph");
03259                         boolean is_lower = STREQ (str, "lower");
03260                         boolean is_print = STREQ (str, "print");
03261                         boolean is_punct = STREQ (str, "punct");
03262                         boolean is_space = STREQ (str, "space");
03263                         boolean is_upper = STREQ (str, "upper");
03264                         boolean is_xdigit = STREQ (str, "xdigit");
03265 
03266                         if (!IS_CHAR_CLASS (str))
03267                        FREE_STACK_RETURN (REG_ECTYPE);
03268 
03269                         /* Throw away the ] at the end of the character
03270                            class.  */
03271                         PATFETCH (c);
03272 
03273                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03274 
03275                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
03276                           {
03277                          /* This was split into 3 if's to
03278                             avoid an arbitrary limit in some compiler.  */
03279                             if (   (is_alnum  && ISALNUM (ch))
03280                                 || (is_alpha  && ISALPHA (ch))
03281                                 || (is_blank  && ISBLANK (ch))
03282                                 || (is_cntrl  && ISCNTRL (ch)))
03283                            SET_LIST_BIT (ch);
03284                          if (   (is_digit  && ISDIGIT (ch))
03285                                 || (is_graph  && ISGRAPH (ch))
03286                                 || (is_lower  && ISLOWER (ch))
03287                                 || (is_print  && ISPRINT (ch)))
03288                            SET_LIST_BIT (ch);
03289                          if (   (is_punct  && ISPUNCT (ch))
03290                                 || (is_space  && ISSPACE (ch))
03291                                 || (is_upper  && ISUPPER (ch))
03292                                 || (is_xdigit && ISXDIGIT (ch)))
03293                            SET_LIST_BIT (ch);
03294                          if (   translate && (is_upper || is_lower)
03295                             && (ISUPPER (ch) || ISLOWER (ch)))
03296                            SET_LIST_BIT (ch);
03297                           }
03298                         had_char_class = true;
03299 # endif       /* libc || wctype.h */
03300                       }
03301                     else
03302                       {
03303                         c1++;
03304                         while (c1--)
03305                           PATUNFETCH;
03306                         SET_LIST_BIT ('[');
03307                         SET_LIST_BIT (':');
03308                      range_start = ':';
03309                         had_char_class = false;
03310                       }
03311                   }
03312                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
03313                 {
03314                   unsigned char str[MB_LEN_MAX + 1];
03315 # ifdef _LIBC
03316                   uint32_t nrules =
03317                     _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03318 # endif
03319 
03320                   PATFETCH (c);
03321                   c1 = 0;
03322 
03323                   /* If pattern is `[[='.  */
03324                   if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03325 
03326                   for (;;)
03327                     {
03328                      PATFETCH (c);
03329                      if ((c == '=' && *p == ']') || p == pend)
03330                        break;
03331                      if (c1 < MB_LEN_MAX)
03332                        str[c1++] = c;
03333                      else
03334                        /* This is in any case an invalid class name.  */
03335                        str[0] = '\0';
03336                       }
03337                   str[c1] = '\0';
03338 
03339                   if (c == '=' && *p == ']' && str[0] != '\0')
03340                     {
03341                      /* If we have no collation data we use the default
03342                         collation in which each character is in a class
03343                         by itself.  It also means that ASCII is the
03344                         character set and therefore we cannot have character
03345                         with more than one byte in the multibyte
03346                         representation.  */
03347 # ifdef _LIBC
03348                      if (nrules == 0)
03349 # endif
03350                        {
03351                          if (c1 != 1)
03352                            FREE_STACK_RETURN (REG_ECOLLATE);
03353 
03354                          /* Throw away the ] at the end of the equivalence
03355                             class.  */
03356                          PATFETCH (c);
03357 
03358                          /* Set the bit for the character.  */
03359                          SET_LIST_BIT (str[0]);
03360                        }
03361 # ifdef _LIBC
03362                      else
03363                        {
03364                          /* Try to match the byte sequence in `str' against
03365                             those known to the collate implementation.
03366                             First find out whether the bytes in `str' are
03367                             actually from exactly one character.  */
03368                          const int32_t *table;
03369                          const unsigned char *weights;
03370                          const unsigned char *extra;
03371                          const int32_t *indirect;
03372                          int32_t idx;
03373                          const unsigned char *cp = str;
03374                          int ch;
03375 
03376                          /* This #include defines a local function!  */
03377 #  include <locale/weight.h>
03378 
03379                          table = (const int32_t *)
03380                            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03381                          weights = (const unsigned char *)
03382                            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
03383                          extra = (const unsigned char *)
03384                            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
03385                          indirect = (const int32_t *)
03386                            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
03387 
03388                          idx = findidx (&cp);
03389                          if (idx == 0 || cp < str + c1)
03390                            /* This is no valid character.  */
03391                            FREE_STACK_RETURN (REG_ECOLLATE);
03392 
03393                          /* Throw away the ] at the end of the equivalence
03394                             class.  */
03395                          PATFETCH (c);
03396 
03397                          /* Now we have to go throught the whole table
03398                             and find all characters which have the same
03399                             first level weight.
03400 
03401                             XXX Note that this is not entirely correct.
03402                             we would have to match multibyte sequences
03403                             but this is not possible with the current
03404                             implementation.  */
03405                          for (ch = 1; ch < 256; ++ch)
03406                            /* XXX This test would have to be changed if we
03407                              would allow matching multibyte sequences.  */
03408                            if (table[ch] > 0)
03409                             {
03410                               int32_t idx2 = table[ch];
03411                               size_t len = weights[idx2];
03412 
03413                               /* Test whether the lenghts match.  */
03414                               if (weights[idx] == len)
03415                                 {
03416                                   /* They do.  New compare the bytes of
03417                                     the weight.  */
03418                                   size_t cnt = 0;
03419 
03420                                   while (cnt < len
03421                                         && (weights[idx + 1 + cnt]
03422                                            == weights[idx2 + 1 + cnt]))
03423                                    ++cnt;
03424 
03425                                   if (cnt == len)
03426                                    /* They match.  Mark the character as
03427                                       acceptable.  */
03428                                    SET_LIST_BIT (ch);
03429                                 }
03430                             }
03431                        }
03432 # endif
03433                      had_char_class = true;
03434                     }
03435                     else
03436                       {
03437                         c1++;
03438                         while (c1--)
03439                           PATUNFETCH;
03440                         SET_LIST_BIT ('[');
03441                         SET_LIST_BIT ('=');
03442                      range_start = '=';
03443                         had_char_class = false;
03444                       }
03445                 }
03446                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
03447                 {
03448                   unsigned char str[128]; /* Should be large enough.  */
03449 # ifdef _LIBC
03450                   uint32_t nrules =
03451                     _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03452 # endif
03453 
03454                   PATFETCH (c);
03455                   c1 = 0;
03456 
03457                   /* If pattern is `[[.'.  */
03458                   if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03459 
03460                   for (;;)
03461                     {
03462                      PATFETCH (c);
03463                      if ((c == '.' && *p == ']') || p == pend)
03464                        break;
03465                      if (c1 < sizeof (str))
03466                        str[c1++] = c;
03467                      else
03468                        /* This is in any case an invalid class name.  */
03469                        str[0] = '\0';
03470                       }
03471                   str[c1] = '\0';
03472 
03473                   if (c == '.' && *p == ']' && str[0] != '\0')
03474                     {
03475                      /* If we have no collation data we use the default
03476                         collation in which each character is the name
03477                         for its own class which contains only the one
03478                         character.  It also means that ASCII is the
03479                         character set and therefore we cannot have character
03480                         with more than one byte in the multibyte
03481                         representation.  */
03482 # ifdef _LIBC
03483                      if (nrules == 0)
03484 # endif
03485                        {
03486                          if (c1 != 1)
03487                            FREE_STACK_RETURN (REG_ECOLLATE);
03488 
03489                          /* Throw away the ] at the end of the equivalence
03490                             class.  */
03491                          PATFETCH (c);
03492 
03493                          /* Set the bit for the character.  */
03494                          SET_LIST_BIT (str[0]);
03495                          range_start = ((const unsigned char *) str)[0];
03496                        }
03497 # ifdef _LIBC
03498                      else
03499                        {
03500                          /* Try to match the byte sequence in `str' against
03501                             those known to the collate implementation.
03502                             First find out whether the bytes in `str' are
03503                             actually from exactly one character.  */
03504                          int32_t table_size;
03505                          const int32_t *symb_table;
03506                          const unsigned char *extra;
03507                          int32_t idx;
03508                          int32_t elem;
03509                          int32_t second;
03510                          int32_t hash;
03511 
03512                          table_size =
03513                            _NL_CURRENT_WORD (LC_COLLATE,
03514                                           _NL_COLLATE_SYMB_HASH_SIZEMB);
03515                          symb_table = (const int32_t *)
03516                            _NL_CURRENT (LC_COLLATE,
03517                                       _NL_COLLATE_SYMB_TABLEMB);
03518                          extra = (const unsigned char *)
03519                            _NL_CURRENT (LC_COLLATE,
03520                                       _NL_COLLATE_SYMB_EXTRAMB);
03521 
03522                          /* Locate the character in the hashing table.  */
03523                          hash = elem_hash (str, c1);
03524 
03525                          idx = 0;
03526                          elem = hash % table_size;
03527                          second = hash % (table_size - 2);
03528                          while (symb_table[2 * elem] != 0)
03529                            {
03530                             /* First compare the hashing value.  */
03531                             if (symb_table[2 * elem] == hash
03532                                 && c1 == extra[symb_table[2 * elem + 1]]
03533                                 && memcmp (str,
03534                                           &extra[symb_table[2 * elem + 1]
03535                                                + 1],
03536                                           c1) == 0)
03537                               {
03538                                 /* Yep, this is the entry.  */
03539                                 idx = symb_table[2 * elem + 1];
03540                                 idx += 1 + extra[idx];
03541                                 break;
03542                               }
03543 
03544                             /* Next entry.  */
03545                             elem += second;
03546                            }
03547 
03548                          if (symb_table[2 * elem] == 0)
03549                            /* This is no valid character.  */
03550                            FREE_STACK_RETURN (REG_ECOLLATE);
03551 
03552                          /* Throw away the ] at the end of the equivalence
03553                             class.  */
03554                          PATFETCH (c);
03555 
03556                          /* Now add the multibyte character(s) we found
03557                             to the accept list.
03558 
03559                             XXX Note that this is not entirely correct.
03560                             we would have to match multibyte sequences
03561                             but this is not possible with the current
03562                             implementation.  Also, we have to match
03563                             collating symbols, which expand to more than
03564                             one file, as a whole and not allow the
03565                             individual bytes.  */
03566                          c1 = extra[idx++];
03567                          if (c1 == 1)
03568                            range_start = extra[idx];
03569                          while (c1-- > 0)
03570                            {
03571                             SET_LIST_BIT (extra[idx]);
03572                             ++idx;
03573                            }
03574                        }
03575 # endif
03576                      had_char_class = false;
03577                     }
03578                     else
03579                       {
03580                         c1++;
03581                         while (c1--)
03582                           PATUNFETCH;
03583                         SET_LIST_BIT ('[');
03584                         SET_LIST_BIT ('.');
03585                      range_start = '.';
03586                         had_char_class = false;
03587                       }
03588                 }
03589                 else
03590                   {
03591                     had_char_class = false;
03592                     SET_LIST_BIT (c);
03593                   range_start = c;
03594                   }
03595               }
03596 
03597             /* Discard any (non)matching list bytes that are all 0 at the
03598                end of the map.  Decrease the map-length byte too.  */
03599             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
03600               b[-1]--;
03601             b += b[-1];
03602 #endif /* WCHAR */
03603           }
03604           break;
03605 
03606 
03607        case '(':
03608           if (syntax & RE_NO_BK_PARENS)
03609             goto handle_open;
03610           else
03611             goto normal_char;
03612 
03613 
03614         case ')':
03615           if (syntax & RE_NO_BK_PARENS)
03616             goto handle_close;
03617           else
03618             goto normal_char;
03619 
03620 
03621         case '\n':
03622           if (syntax & RE_NEWLINE_ALT)
03623             goto handle_alt;
03624           else
03625             goto normal_char;
03626 
03627 
03628        case '|':
03629           if (syntax & RE_NO_BK_VBAR)
03630             goto handle_alt;
03631           else
03632             goto normal_char;
03633 
03634 
03635         case '{':
03636            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
03637              goto handle_interval;
03638            else
03639              goto normal_char;
03640 
03641 
03642         case '\\':
03643           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03644 
03645           /* Do not translate the character after the \, so that we can
03646              distinguish, e.g., \B from \b, even if we normally would
03647              translate, e.g., B to b.  */
03648           PATFETCH_RAW (c);
03649 
03650           switch (c)
03651             {
03652             case '(':
03653               if (syntax & RE_NO_BK_PARENS)
03654                 goto normal_backslash;
03655 
03656             handle_open:
03657               bufp->re_nsub++;
03658               regnum++;
03659 
03660               if (COMPILE_STACK_FULL)
03661                 {
03662                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
03663                             compile_stack_elt_t);
03664                   if (compile_stack.stack == NULL) return REG_ESPACE;
03665 
03666                   compile_stack.size <<= 1;
03667                 }
03668 
03669               /* These are the values to restore when we hit end of this
03670                  group.  They are all relative offsets, so that if the
03671                  whole pattern moves because of realloc, they will still
03672                  be valid.  */
03673               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
03674               COMPILE_STACK_TOP.fixup_alt_jump
03675                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
03676               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
03677               COMPILE_STACK_TOP.regnum = regnum;
03678 
03679               /* We will eventually replace the 0 with the number of
03680                  groups inner to this one.  But do not push a
03681                  start_memory for groups beyond the last one we can
03682                  represent in the compiled pattern.  */
03683               if (regnum <= MAX_REGNUM)
03684                 {
03685                   COMPILE_STACK_TOP.inner_group_offset = b
03686                   - COMPILED_BUFFER_VAR + 2;
03687                   BUF_PUSH_3 (start_memory, regnum, 0);
03688                 }
03689 
03690               compile_stack.avail++;
03691 
03692               fixup_alt_jump = 0;
03693               laststart = 0;
03694               begalt = b;
03695              /* If we've reached MAX_REGNUM groups, then this open
03696                won't actually generate any code, so we'll have to
03697                clear pending_exact explicitly.  */
03698              pending_exact = 0;
03699               break;
03700 
03701 
03702             case ')':
03703               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
03704 
03705               if (COMPILE_STACK_EMPTY)
03706               {
03707                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03708                   goto normal_backslash;
03709                 else
03710                   FREE_STACK_RETURN (REG_ERPAREN);
03711               }
03712 
03713             handle_close:
03714               if (fixup_alt_jump)
03715                 { /* Push a dummy failure point at the end of the
03716                      alternative for a possible future
03717                      `pop_failure_jump' to pop.  See comments at
03718                      `push_dummy_failure' in `re_match_2'.  */
03719                   BUF_PUSH (push_dummy_failure);
03720 
03721                   /* We allocated space for this jump when we assigned
03722                      to `fixup_alt_jump', in the `handle_alt' case below.  */
03723                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
03724                 }
03725 
03726               /* See similar code for backslashed left paren above.  */
03727               if (COMPILE_STACK_EMPTY)
03728               {
03729                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03730                   goto normal_char;
03731                 else
03732                   FREE_STACK_RETURN (REG_ERPAREN);
03733               }
03734 
03735               /* Since we just checked for an empty stack above, this
03736                  ``can't happen''.  */
03737               assert (compile_stack.avail != 0);
03738               {
03739                 /* We don't just want to restore into `regnum', because
03740                    later groups should continue to be numbered higher,
03741                    as in `(ab)c(de)' -- the second group is #2.  */
03742                 regnum_t this_group_regnum;
03743 
03744                 compile_stack.avail--;
03745                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
03746                 fixup_alt_jump
03747                   = COMPILE_STACK_TOP.fixup_alt_jump
03748                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
03749                     : 0;
03750                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
03751                 this_group_regnum = COMPILE_STACK_TOP.regnum;
03752               /* If we've reached MAX_REGNUM groups, then this open
03753                  won't actually generate any code, so we'll have to
03754                  clear pending_exact explicitly.  */
03755               pending_exact = 0;
03756 
03757                 /* We're at the end of the group, so now we know how many
03758                    groups were inside this one.  */
03759                 if (this_group_regnum <= MAX_REGNUM)
03760                   {
03761                   UCHAR_T *inner_group_loc
03762                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
03763 
03764                     *inner_group_loc = regnum - this_group_regnum;
03765                     BUF_PUSH_3 (stop_memory, this_group_regnum,
03766                                 regnum - this_group_regnum);
03767                   }
03768               }
03769               break;
03770 
03771 
03772             case '|':                                   /* `\|'.  */
03773               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
03774                 goto normal_backslash;
03775             handle_alt:
03776               if (syntax & RE_LIMITED_OPS)
03777                 goto normal_char;
03778 
03779               /* Insert before the previous alternative a jump which
03780                  jumps to this alternative if the former fails.  */
03781               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03782               INSERT_JUMP (on_failure_jump, begalt,
03783                         b + 2 + 2 * OFFSET_ADDRESS_SIZE);
03784               pending_exact = 0;
03785               b += 1 + OFFSET_ADDRESS_SIZE;
03786 
03787               /* The alternative before this one has a jump after it
03788                  which gets executed if it gets matched.  Adjust that
03789                  jump so it will jump to this alternative's analogous
03790                  jump (put in below, which in turn will jump to the next
03791                  (if any) alternative's such jump, etc.).  The last such
03792                  jump jumps to the correct final destination.  A picture:
03793                           _____ _____
03794                           |   | |   |
03795                           |   v |   v
03796                          a | b   | c
03797 
03798                  If we are at `b', then fixup_alt_jump right now points to a
03799                  three-byte space after `a'.  We'll put in the jump, set
03800                  fixup_alt_jump to right after `b', and leave behind three
03801                  bytes which we'll fill in when we get to after `c'.  */
03802 
03803               if (fixup_alt_jump)
03804                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
03805 
03806               /* Mark and leave space for a jump after this alternative,
03807                  to be filled in later either by next alternative or
03808                  when know we're at the end of a series of alternatives.  */
03809               fixup_alt_jump = b;
03810               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03811               b += 1 + OFFSET_ADDRESS_SIZE;
03812 
03813               laststart = 0;
03814               begalt = b;
03815               break;
03816 
03817 
03818             case '{':
03819               /* If \{ is a literal.  */
03820               if (!(syntax & RE_INTERVALS)
03821                      /* If we're at `\{' and it's not the open-interval
03822                         operator.  */
03823                 || (syntax & RE_NO_BK_BRACES))
03824                 goto normal_backslash;
03825 
03826             handle_interval:
03827               {
03828                 /* If got here, then the syntax allows intervals.  */
03829 
03830                 /* At least (most) this many matches must be made.  */
03831                 int lower_bound = -1, upper_bound = -1;
03832 
03833               /* Place in the uncompiled pattern (i.e., just after
03834                  the '{') to go back to if the interval is invalid.  */
03835               const CHAR_T *beg_interval = p;
03836 
03837                 if (p == pend)
03838                 goto invalid_interval;
03839 
03840                 GET_UNSIGNED_NUMBER (lower_bound);
03841 
03842                 if (c == ',')
03843                   {
03844                     GET_UNSIGNED_NUMBER (upper_bound);
03845                   if (upper_bound < 0)
03846                     upper_bound = RE_DUP_MAX;
03847                   }
03848                 else
03849                   /* Interval such as `{1}' => match exactly once. */
03850                   upper_bound = lower_bound;
03851 
03852                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
03853                 goto invalid_interval;
03854 
03855                 if (!(syntax & RE_NO_BK_BRACES))
03856                   {
03857                   if (c != '\\' || p == pend)
03858                     goto invalid_interval;
03859                     PATFETCH (c);
03860                   }
03861 
03862                 if (c != '}')
03863                 goto invalid_interval;
03864 
03865                 /* If it's invalid to have no preceding re.  */
03866                 if (!laststart)
03867                   {
03868                   if (syntax & RE_CONTEXT_INVALID_OPS
03869                      && !(syntax & RE_INVALID_INTERVAL_ORD))
03870                       FREE_STACK_RETURN (REG_BADRPT);
03871                     else if (syntax & RE_CONTEXT_INDEP_OPS)
03872                       laststart = b;
03873                     else
03874                       goto unfetch_interval;
03875                   }
03876 
03877                 /* We just parsed a valid interval.  */
03878 
03879                 if (RE_DUP_MAX < upper_bound)
03880                 FREE_STACK_RETURN (REG_BADBR);
03881 
03882                 /* If the upper bound is zero, don't want to succeed at
03883                    all; jump from `laststart' to `b + 3', which will be
03884                  the end of the buffer after we insert the jump.  */
03885               /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
03886                  instead of 'b + 3'.  */
03887                  if (upper_bound == 0)
03888                    {
03889                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03890                      INSERT_JUMP (jump, laststart, b + 1
03891                               + OFFSET_ADDRESS_SIZE);
03892                      b += 1 + OFFSET_ADDRESS_SIZE;
03893                    }
03894 
03895                  /* Otherwise, we have a nontrivial interval.  When
03896                     we're all done, the pattern will look like:
03897                       set_number_at <jump count> <upper bound>
03898                       set_number_at <succeed_n count> <lower bound>
03899                       succeed_n <after jump addr> <succeed_n count>
03900                       <body of loop>
03901                       jump_n <succeed_n addr> <jump count>
03902                     (The upper bound and `jump_n' are omitted if
03903                     `upper_bound' is 1, though.)  */
03904                  else
03905                    { /* If the upper bound is > 1, we need to insert
03906                         more at the end of the loop.  */
03907                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
03908                      (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
03909 
03910                      GET_BUFFER_SPACE (nbytes);
03911 
03912                      /* Initialize lower bound of the `succeed_n', even
03913                         though it will be set during matching by its
03914                         attendant `set_number_at' (inserted next),
03915                         because `re_compile_fastmap' needs to know.
03916                         Jump to the `jump_n' we might insert below.  */
03917                      INSERT_JUMP2 (succeed_n, laststart,
03918                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
03919                                + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
03920                                , lower_bound);
03921                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03922 
03923                      /* Code to initialize the lower bound.  Insert
03924                         before the `succeed_n'.  The `5' is the last two
03925                         bytes of this `set_number_at', plus 3 bytes of
03926                         the following `succeed_n'.  */
03927                    /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
03928                      is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
03929                      of the following `succeed_n'.  */
03930                      PREFIX(insert_op2) (set_number_at, laststart, 1
03931                              + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
03932                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03933 
03934                      if (upper_bound > 1)
03935                        { /* More than one repetition is allowed, so
03936                             append a backward jump to the `succeed_n'
03937                             that starts this interval.
03938 
03939                             When we've reached this during matching,
03940                             we'll have matched the interval once, so
03941                             jump back only `upper_bound - 1' times.  */
03942                          STORE_JUMP2 (jump_n, b, laststart
03943                                   + 2 * OFFSET_ADDRESS_SIZE + 1,
03944                                       upper_bound - 1);
03945                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03946 
03947                          /* The location we want to set is the second
03948                             parameter of the `jump_n'; that is `b-2' as
03949                             an absolute address.  `laststart' will be
03950                             the `set_number_at' we're about to insert;
03951                             `laststart+3' the number to set, the source
03952                             for the relative address.  But we are
03953                             inserting into the middle of the pattern --
03954                             so everything is getting moved up by 5.
03955                             Conclusion: (b - 2) - (laststart + 3) + 5,
03956                             i.e., b - laststart.
03957 
03958                             We insert this at the beginning of the loop
03959                             so that if we fail during matching, we'll
03960                             reinitialize the bounds.  */
03961                          PREFIX(insert_op2) (set_number_at, laststart,
03962                                         b - laststart,
03963                                         upper_bound - 1, b);
03964                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03965                        }
03966                    }
03967                 pending_exact = 0;
03968               break;
03969 
03970              invalid_interval:
03971               if (!(syntax & RE_INVALID_INTERVAL_ORD))
03972                 FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
03973              unfetch_interval:
03974               /* Match the characters as literals.  */
03975               p = beg_interval;
03976               c = '{';
03977               if (syntax & RE_NO_BK_BRACES)
03978                 goto normal_char;
03979               else
03980                 goto normal_backslash;
03981              }
03982 
03983 #ifdef emacs
03984             /* There is no way to specify the before_dot and after_dot
03985                operators.  rms says this is ok.  --karl  */
03986             case '=':
03987               BUF_PUSH (at_dot);
03988               break;
03989 
03990             case 's':
03991               laststart = b;
03992               PATFETCH (c);
03993               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
03994               break;
03995 
03996             case 'S':
03997               laststart = b;
03998               PATFETCH (c);
03999               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
04000               break;
04001 #endif /* emacs */
04002 
04003 
04004             case 'w':
04005              if (syntax & RE_NO_GNU_OPS)
04006               goto normal_char;
04007               laststart = b;
04008               BUF_PUSH (wordchar);
04009               break;
04010 
04011 
04012             case 'W':
04013              if (syntax & RE_NO_GNU_OPS)
04014               goto normal_char;
04015               laststart = b;
04016               BUF_PUSH (notwordchar);
04017               break;
04018 
04019 
04020             case '<':
04021              if (syntax & RE_NO_GNU_OPS)
04022               goto normal_char;
04023               BUF_PUSH (wordbeg);
04024               break;
04025 
04026             case '>':
04027              if (syntax & RE_NO_GNU_OPS)
04028               goto normal_char;
04029               BUF_PUSH (wordend);
04030               break;
04031 
04032             case 'b':
04033              if (syntax & RE_NO_GNU_OPS)
04034               goto normal_char;
04035               BUF_PUSH (wordbound);
04036               break;
04037 
04038             case 'B':
04039              if (syntax & RE_NO_GNU_OPS)
04040               goto normal_char;
04041               BUF_PUSH (notwordbound);
04042               break;
04043 
04044             case '`':
04045              if (syntax & RE_NO_GNU_OPS)
04046               goto normal_char;
04047               BUF_PUSH (begbuf);
04048               break;
04049 
04050             case '\'':
04051              if (syntax & RE_NO_GNU_OPS)
04052               goto normal_char;
04053               BUF_PUSH (endbuf);
04054               break;
04055 
04056             case '1': case '2': case '3': case '4': case '5':
04057             case '6': case '7': case '8': case '9':
04058               if (syntax & RE_NO_BK_REFS)
04059                 goto normal_char;
04060 
04061               c1 = c - '0';
04062 
04063               if (c1 > regnum)
04064                 FREE_STACK_RETURN (REG_ESUBREG);
04065 
04066               /* Can't back reference to a subexpression if inside of it.  */
04067               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
04068                 goto normal_char;
04069 
04070               laststart = b;
04071               BUF_PUSH_2 (duplicate, c1);
04072               break;
04073 
04074 
04075             case '+':
04076             case '?':
04077               if (syntax & RE_BK_PLUS_QM)
04078                 goto handle_plus;
04079               else
04080                 goto normal_backslash;
04081 
04082             default:
04083             normal_backslash:
04084               /* You might think it would be useful for \ to mean
04085                  not to translate; but if we don't translate it
04086                  it will never match anything.  */
04087               c = TRANSLATE (c);
04088               goto normal_char;
04089             }
04090           break;
04091 
04092 
04093        default:
04094         /* Expects the character in `c'.  */
04095        normal_char:
04096              /* If no exactn currently being built.  */
04097           if (!pending_exact
04098 #ifdef WCHAR
04099              /* If last exactn handle binary(or character) and
04100                new exactn handle character(or binary).  */
04101              || is_exactn_bin != is_binary[p - 1 - pattern]
04102 #endif /* WCHAR */
04103 
04104               /* If last exactn not at current position.  */
04105               || pending_exact + *pending_exact + 1 != b
04106 
04107               /* We have only one byte following the exactn for the count.  */
04108              || *pending_exact == (1 << BYTEWIDTH) - 1
04109 
04110               /* If followed by a repetition operator.  */
04111               || *p == '*' || *p == '^'
04112              || ((syntax & RE_BK_PLUS_QM)
04113                 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
04114                 : (*p == '+' || *p == '?'))
04115              || ((syntax & RE_INTERVALS)
04116                   && ((syntax & RE_NO_BK_BRACES)
04117                     ? *p == '{'
04118                       : (p[0] == '\\' && p[1] == '{'))))
04119            {
04120              /* Start building a new exactn.  */
04121 
04122               laststart = b;
04123 
04124 #ifdef WCHAR
04125              /* Is this exactn binary data or character? */
04126              is_exactn_bin = is_binary[p - 1 - pattern];
04127              if (is_exactn_bin)
04128                 BUF_PUSH_2 (exactn_bin, 0);
04129              else
04130                 BUF_PUSH_2 (exactn, 0);
04131 #else
04132              BUF_PUSH_2 (exactn, 0);
04133 #endif /* WCHAR */
04134              pending_exact = b - 1;
04135             }
04136 
04137          BUF_PUSH (c);
04138           (*pending_exact)++;
04139          break;
04140         } /* switch (c) */
04141     } /* while p != pend */
04142 
04143 
04144   /* Through the pattern now.  */
04145 
04146   if (fixup_alt_jump)
04147     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
04148 
04149   if (!COMPILE_STACK_EMPTY)
04150     FREE_STACK_RETURN (REG_EPAREN);
04151 
04152   /* If we don't want backtracking, force success
04153      the first time we reach the end of the compiled pattern.  */
04154   if (syntax & RE_NO_POSIX_BACKTRACKING)
04155     BUF_PUSH (succeed);
04156 
04157 #ifdef WCHAR
04158   free (pattern);
04159   free (mbs_offset);
04160   free (is_binary);
04161 #endif
04162   free (compile_stack.stack);
04163 
04164   /* We have succeeded; set the length of the buffer.  */
04165 #ifdef WCHAR
04166   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
04167 #else
04168   bufp->used = b - bufp->buffer;
04169 #endif
04170 
04171 #ifdef DEBUG
04172   if (debug)
04173     {
04174       DEBUG_PRINT1 ("\nCompiled pattern: \n");
04175       PREFIX(print_compiled_pattern) (bufp);
04176     }
04177 #endif /* DEBUG */
04178 
04179 #ifndef MATCH_MAY_ALLOCATE
04180   /* Initialize the failure stack to the largest possible stack.  This
04181      isn't necessary unless we're trying to avoid calling alloca in
04182      the search and match routines.  */
04183   {
04184     int num_regs = bufp->re_nsub + 1;
04185 
04186     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
04187        is strictly greater than re_max_failures, the largest possible stack
04188        is 2 * re_max_failures failure points.  */
04189     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
04190       {
04191        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
04192 
04193 # ifdef emacs
04194        if (! fail_stack.stack)
04195          fail_stack.stack
04196            = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
04197                                 * sizeof (PREFIX(fail_stack_elt_t)));
04198        else
04199          fail_stack.stack
04200            = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
04201                                  (fail_stack.size
04202                                   * sizeof (PREFIX(fail_stack_elt_t))));
04203 # else /* not emacs */
04204        if (! fail_stack.stack)
04205          fail_stack.stack
04206            = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
04207                                * sizeof (PREFIX(fail_stack_elt_t)));
04208        else
04209          fail_stack.stack
04210            = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
04211                                        (fail_stack.size
04212                                  * sizeof (PREFIX(fail_stack_elt_t))));
04213 # endif /* not emacs */
04214       }
04215 
04216    PREFIX(regex_grow_registers) (num_regs);
04217   }
04218 #endif /* not MATCH_MAY_ALLOCATE */
04219 
04220   return REG_NOERROR;
04221 } /* regex_compile */
04222 
04223 /* Subroutines for `regex_compile'.  */
04224 
04225 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
04226 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
04227 
04228 static void
04229 PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
04230 {
04231   *loc = (UCHAR_T) op;
04232   STORE_NUMBER (loc + 1, arg);
04233 }
04234 
04235 
04236 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
04237 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
04238 
04239 static void
04240 PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
04241 {
04242   *loc = (UCHAR_T) op;
04243   STORE_NUMBER (loc + 1, arg1);
04244   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
04245 }
04246 
04247 
04248 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
04249    for OP followed by two-byte integer parameter ARG.  */
04250 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
04251 
04252 static void
04253 PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
04254 {
04255   register UCHAR_T *pfrom = end;
04256   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
04257 
04258   while (pfrom != loc)
04259     *--pto = *--pfrom;
04260 
04261   PREFIX(store_op1) (op, loc, arg);
04262 }
04263 
04264 
04265 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
04266 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
04267 
04268 static void
04269 PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
04270                     int arg2, UCHAR_T *end)
04271 {
04272   register UCHAR_T *pfrom = end;
04273   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
04274 
04275   while (pfrom != loc)
04276     *--pto = *--pfrom;
04277 
04278   PREFIX(store_op2) (op, loc, arg1, arg2);
04279 }
04280 
04281 
04282 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
04283    after an alternative or a begin-subexpression.  We assume there is at
04284    least one character before the ^.  */
04285 
04286 static boolean
04287 PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
04288                           reg_syntax_t syntax)
04289 {
04290   const CHAR_T *prev = p - 2;
04291   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
04292 
04293   return
04294        /* After a subexpression?  */
04295        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
04296        /* After an alternative?  */
04297     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
04298 }
04299 
04300 
04301 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
04302    at least one character after the $, i.e., `P < PEND'.  */
04303 
04304 static boolean
04305 PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
04306                           reg_syntax_t syntax)
04307 {
04308   const CHAR_T *next = p;
04309   boolean next_backslash = *next == '\\';
04310   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
04311 
04312   return
04313        /* Before a subexpression?  */
04314        (syntax & RE_NO_BK_PARENS ? *next == ')'
04315         : next_backslash && next_next && *next_next == ')')
04316        /* Before an alternative?  */
04317     || (syntax & RE_NO_BK_VBAR ? *next == '|'
04318         : next_backslash && next_next && *next_next == '|');
04319 }
04320 
04321 #else /* not INSIDE_RECURSION */
04322 
04323 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
04324    false if it's not.  */
04325 
04326 static boolean
04327 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
04328 {
04329   int this_element;
04330 
04331   for (this_element = compile_stack.avail - 1;
04332        this_element >= 0;
04333        this_element--)
04334     if (compile_stack.stack[this_element].regnum == regnum)
04335       return true;
04336 
04337   return false;
04338 }
04339 #endif /* not INSIDE_RECURSION */
04340 
04341 #ifdef INSIDE_RECURSION
04342 
04343 #ifdef WCHAR
04344 /* This insert space, which size is "num", into the pattern at "loc".
04345    "end" must point the end of the allocated buffer.  */
04346 static void
04347 insert_space (int num, CHAR_T *loc, CHAR_T *end)
04348 {
04349   register CHAR_T *pto = end;
04350   register CHAR_T *pfrom = end - num;
04351 
04352   while (pfrom >= loc)
04353     *pto-- = *pfrom--;
04354 }
04355 #endif /* WCHAR */
04356 
04357 #ifdef WCHAR
04358 static reg_errcode_t
04359 wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
04360                    const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
04361                    reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
04362 {
04363   const CHAR_T *p = *p_ptr;
04364   CHAR_T range_start, range_end;
04365   reg_errcode_t ret;
04366 # ifdef _LIBC
04367   uint32_t nrules;
04368   uint32_t start_val, end_val;
04369 # endif
04370   if (p == pend)
04371     return REG_ERANGE;
04372 
04373 # ifdef _LIBC
04374   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
04375   if (nrules != 0)
04376     {
04377       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
04378                                                  _NL_COLLATE_COLLSEQWC);
04379       const unsigned char *extra = (const unsigned char *)
04380        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
04381 
04382       if (range_start_char < -1)
04383        {
04384          /* range_start is a collating symbol.  */
04385          int32_t *wextra;
04386          /* Retreive the index and get collation sequence value.  */
04387          wextra = (int32_t*)(extra + char_set[-range_start_char]);
04388          start_val = wextra[1 + *wextra];
04389        }
04390       else
04391        start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
04392 
04393       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
04394 
04395       /* Report an error if the range is empty and the syntax prohibits
04396         this.  */
04397       ret = ((syntax & RE_NO_EMPTY_RANGES)
04398             && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
04399 
04400       /* Insert space to the end of the char_ranges.  */
04401       insert_space(2, b - char_set[5] - 2, b - 1);
04402       *(b - char_set[5] - 2) = (wchar_t)start_val;
04403       *(b - char_set[5] - 1) = (wchar_t)end_val;
04404       char_set[4]++; /* ranges_index */
04405     }
04406   else
04407 # endif
04408     {
04409       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
04410        range_start_char;
04411       range_end = TRANSLATE (p[0]);
04412       /* Report an error if the range is empty and the syntax prohibits
04413         this.  */
04414       ret = ((syntax & RE_NO_EMPTY_RANGES)
04415             && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
04416 
04417       /* Insert space to the end of the char_ranges.  */
04418       insert_space(2, b - char_set[5] - 2, b - 1);
04419       *(b - char_set[5] - 2) = range_start;
04420       *(b - char_set[5] - 1) = range_end;
04421       char_set[4]++; /* ranges_index */
04422     }
04423   /* Have to increment the pointer into the pattern string, so the
04424      caller isn't still at the ending character.  */
04425   (*p_ptr)++;
04426 
04427   return ret;
04428 }
04429 #else /* BYTE */
04430 /* Read the ending character of a range (in a bracket expression) from the
04431    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
04432    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
04433    Then we set the translation of all bits between the starting and
04434    ending characters (inclusive) in the compiled pattern B.
04435 
04436    Return an error code.
04437 
04438    We use these short variable names so we can use the same macros as
04439    `regex_compile' itself.  */
04440 
04441 static reg_errcode_t
04442 byte_compile_range (unsigned int range_start_char, const char **p_ptr,
04443                     const char *pend, RE_TRANSLATE_TYPE translate,
04444                     reg_syntax_t syntax, unsigned char *b)
04445 {
04446   unsigned this_char;
04447   const char *p = *p_ptr;
04448   reg_errcode_t ret;
04449 # if _LIBC
04450   const unsigned char *collseq;
04451   unsigned int start_colseq;
04452   unsigned int end_colseq;
04453 # else
04454   unsigned end_char;
04455 # endif
04456 
04457   if (p == pend)
04458     return REG_ERANGE;
04459 
04460   /* Have to increment the pointer into the pattern string, so the
04461      caller isn't still at the ending character.  */
04462   (*p_ptr)++;
04463 
04464   /* Report an error if the range is empty and the syntax prohibits this.  */
04465   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
04466 
04467 # if _LIBC
04468   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
04469                                            _NL_COLLATE_COLLSEQMB);
04470 
04471   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
04472   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
04473   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
04474     {
04475       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
04476 
04477       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
04478        {
04479          SET_LIST_BIT (TRANSLATE (this_char));
04480          ret = REG_NOERROR;
04481        }
04482     }
04483 # else
04484   /* Here we see why `this_char' has to be larger than an `unsigned
04485      char' -- we would otherwise go into an infinite loop, since all
04486      characters <= 0xff.  */
04487   range_start_char = TRANSLATE (range_start_char);
04488   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
04489      and some compilers cast it to int implicitly, so following for_loop
04490      may fall to (almost) infinite loop.
04491      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
04492      To avoid this, we cast p[0] to unsigned int and truncate it.  */
04493   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
04494 
04495   for (this_char = range_start_char; this_char <= end_char; ++this_char)
04496     {
04497       SET_LIST_BIT (TRANSLATE (this_char));
04498       ret = REG_NOERROR;
04499     }
04500 # endif
04501 
04502   return ret;
04503 }
04504 #endif /* WCHAR */
04505 
04506 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
04507    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
04508    characters can start a string that matches the pattern.  This fastmap
04509    is used by re_search to skip quickly over impossible starting points.
04510 
04511    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
04512    area as BUFP->fastmap.
04513 
04514    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
04515    the pattern buffer.
04516 
04517    Returns 0 if we succeed, -2 if an internal error.   */
04518 
04519 #ifdef WCHAR
04520 /* local function for re_compile_fastmap.
04521    truncate wchar_t character to char.  */
04522 static unsigned char truncate_wchar (CHAR_T c);
04523 
04524 static unsigned char
04525 truncate_wchar (CHAR_T c)
04526 {
04527   unsigned char buf[MB_CUR_MAX];
04528   mbstate_t state;
04529   int retval;
04530   memset (&state, '\0', sizeof (state));
04531 # ifdef _LIBC
04532   retval = __wcrtomb (buf, c, &state);
04533 # else
04534   retval = wcrtomb (buf, c, &state);
04535 # endif
04536   return retval > 0 ? buf[0] : (unsigned char) c;
04537 }
04538 #endif /* WCHAR */
04539 
04540 static int
04541 PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
04542 {
04543   int j, k;
04544 #ifdef MATCH_MAY_ALLOCATE
04545   PREFIX(fail_stack_type) fail_stack;
04546 #endif
04547 #ifndef REGEX_MALLOC
04548   char *destination;
04549 #endif
04550 
04551   register char *fastmap = bufp->fastmap;
04552 
04553 #ifdef WCHAR
04554   /* We need to cast pattern to (wchar_t*), because we casted this compiled
04555      pattern to (char*) in regex_compile.  */
04556   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
04557   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
04558 #else /* BYTE */
04559   UCHAR_T *pattern = bufp->buffer;
04560   register UCHAR_T *pend = pattern + bufp->used;
04561 #endif /* WCHAR */
04562   UCHAR_T *p = pattern;
04563 
04564 #ifdef REL_ALLOC
04565   /* This holds the pointer to the failure stack, when
04566      it is allocated relocatably.  */
04567   fail_stack_elt_t *failure_stack_ptr;
04568 #endif
04569 
04570   /* Assume that each path through the pattern can be null until
04571      proven otherwise.  We set this false at the bottom of switch
04572      statement, to which we get only if a particular path doesn't
04573      match the empty string.  */
04574   boolean path_can_be_null = true;
04575 
04576   /* We aren't doing a `succeed_n' to begin with.  */
04577   boolean succeed_n_p = false;
04578 
04579   assert (fastmap != NULL && p != NULL);
04580 
04581   INIT_FAIL_STACK ();
04582   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
04583   bufp->fastmap_accurate = 1;          /* It will be when we're done.  */
04584   bufp->can_be_null = 0;
04585 
04586   while (1)
04587     {
04588       if (p == pend || *p == (UCHAR_T) succeed)
04589        {
04590          /* We have reached the (effective) end of pattern.  */
04591          if (!FAIL_STACK_EMPTY ())
04592            {
04593              bufp->can_be_null |= path_can_be_null;
04594 
04595              /* Reset for next path.  */
04596              path_can_be_null = true;
04597 
04598              p = fail_stack.stack[--fail_stack.avail].pointer;
04599 
04600              continue;
04601            }
04602          else
04603            break;
04604        }
04605 
04606       /* We should never be about to go beyond the end of the pattern.  */
04607       assert (p < pend);
04608 
04609       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04610        {
04611 
04612         /* I guess the idea here is to simply not bother with a fastmap
04613            if a backreference is used, since it's too hard to figure out
04614            the fastmap for the corresponding group.  Setting
04615            `can_be_null' stops `re_search_2' from using the fastmap, so
04616            that is all we do.  */
04617        case duplicate:
04618          bufp->can_be_null = 1;
04619           goto done;
04620 
04621 
04622       /* Following are the cases which match a character.  These end
04623          with `break'.  */
04624 
04625 #ifdef WCHAR
04626        case exactn:
04627           fastmap[truncate_wchar(p[1])] = 1;
04628          break;
04629 #else /* BYTE */
04630        case exactn:
04631           fastmap[p[1]] = 1;
04632          break;
04633 #endif /* WCHAR */
04634 #ifdef MBS_SUPPORT
04635        case exactn_bin:
04636          fastmap[p[1]] = 1;
04637          break;
04638 #endif
04639 
04640 #ifdef WCHAR
04641         /* It is hard to distinguish fastmap from (multi byte) characters
04642            which depends on current locale.  */
04643         case charset:
04644        case charset_not:
04645        case wordchar:
04646        case notwordchar:
04647           bufp->can_be_null = 1;
04648           goto done;
04649 #else /* BYTE */
04650         case charset:
04651           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04652            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
04653               fastmap[j] = 1;
04654          break;
04655 
04656 
04657        case charset_not:
04658          /* Chars beyond end of map must be allowed.  */
04659          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
04660             fastmap[j] = 1;
04661 
04662          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04663            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
04664               fastmap[j] = 1;
04665           break;
04666 
04667 
04668        case wordchar:
04669          for (j = 0; j < (1 << BYTEWIDTH); j++)
04670            if (SYNTAX (j) == Sword)
04671              fastmap[j] = 1;
04672          break;
04673 
04674 
04675        case notwordchar:
04676          for (j = 0; j < (1 << BYTEWIDTH); j++)
04677            if (SYNTAX (j) != Sword)
04678              fastmap[j] = 1;
04679          break;
04680 #endif /* WCHAR */
04681 
04682         case anychar:
04683          {
04684            int fastmap_newline = fastmap['\n'];
04685 
04686            /* `.' matches anything ...  */
04687            for (j = 0; j < (1 << BYTEWIDTH); j++)
04688              fastmap[j] = 1;
04689 
04690            /* ... except perhaps newline.  */
04691            if (!(bufp->syntax & RE_DOT_NEWLINE))
04692              fastmap['\n'] = fastmap_newline;
04693 
04694            /* Return if we have already set `can_be_null'; if we have,
04695               then the fastmap is irrelevant.  Something's wrong here.  */
04696            else if (bufp->can_be_null)
04697              goto done;
04698 
04699            /* Otherwise, have to check alternative paths.  */
04700            break;
04701          }
04702 
04703 #ifdef emacs
04704         case syntaxspec:
04705          k = *p++;
04706          for (j = 0; j < (1 << BYTEWIDTH); j++)
04707            if (SYNTAX (j) == (enum syntaxcode) k)
04708              fastmap[j] = 1;
04709          break;
04710 
04711 
04712        case notsyntaxspec:
04713          k = *p++;
04714          for (j = 0; j < (1 << BYTEWIDTH); j++)
04715            if (SYNTAX (j) != (enum syntaxcode) k)
04716              fastmap[j] = 1;
04717          break;
04718 
04719 
04720       /* All cases after this match the empty string.  These end with
04721          `continue'.  */
04722 
04723 
04724        case before_dot:
04725        case at_dot:
04726        case after_dot:
04727           continue;
04728 #endif /* emacs */
04729 
04730 
04731         case no_op:
04732         case begline:
04733         case endline:
04734        case begbuf:
04735        case endbuf:
04736        case wordbound:
04737        case notwordbound:
04738        case wordbeg:
04739        case wordend:
04740         case push_dummy_failure:
04741           continue;
04742 
04743 
04744        case jump_n:
04745         case pop_failure_jump:
04746        case maybe_pop_jump:
04747        case jump:
04748         case jump_past_alt:
04749        case dummy_failure_jump:
04750           EXTRACT_NUMBER_AND_INCR (j, p);
04751          p += j;
04752          if (j > 0)
04753            continue;
04754 
04755           /* Jump backward implies we just went through the body of a
04756              loop and matched nothing.  Opcode jumped to should be
04757              `on_failure_jump' or `succeed_n'.  Just treat it like an
04758              ordinary jump.  For a * loop, it has pushed its failure
04759              point already; if so, discard that as redundant.  */
04760           if ((re_opcode_t) *p != on_failure_jump
04761              && (re_opcode_t) *p != succeed_n)
04762            continue;
04763 
04764           p++;
04765           EXTRACT_NUMBER_AND_INCR (j, p);
04766           p += j;
04767 
04768           /* If what's on the stack is where we are now, pop it.  */
04769           if (!FAIL_STACK_EMPTY ()
04770              && fail_stack.stack[fail_stack.avail - 1].pointer == p)
04771             fail_stack.avail--;
04772 
04773           continue;
04774 
04775 
04776         case on_failure_jump:
04777         case on_failure_keep_string_jump:
04778        handle_on_failure_jump:
04779           EXTRACT_NUMBER_AND_INCR (j, p);
04780 
04781           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
04782              end of the pattern.  We don't want to push such a point,
04783              since when we restore it above, entering the switch will
04784              increment `p' past the end of the pattern.  We don't need
04785              to push such a point since we obviously won't find any more
04786              fastmap entries beyond `pend'.  Such a pattern can match
04787              the null string, though.  */
04788           if (p + j < pend)
04789             {
04790               if (!PUSH_PATTERN_OP (p + j, fail_stack))
04791               {
04792                 RESET_FAIL_STACK ();
04793                 return -2;
04794               }
04795             }
04796           else
04797             bufp->can_be_null = 1;
04798 
04799           if (succeed_n_p)
04800             {
04801               EXTRACT_NUMBER_AND_INCR (k, p);    /* Skip the n.  */
04802               succeed_n_p = false;
04803            }
04804 
04805           continue;
04806 
04807 
04808        case succeed_n:
04809           /* Get to the number of times to succeed.  */
04810           p += OFFSET_ADDRESS_SIZE;
04811 
04812           /* Increment p past the n for when k != 0.  */
04813           EXTRACT_NUMBER_AND_INCR (k, p);
04814           if (k == 0)
04815            {
04816               p -= 2 * OFFSET_ADDRESS_SIZE;
04817              succeed_n_p = true;  /* Spaghetti code alert.  */
04818               goto handle_on_failure_jump;
04819             }
04820           continue;
04821 
04822 
04823        case set_number_at:
04824           p += 2 * OFFSET_ADDRESS_SIZE;
04825           continue;
04826 
04827 
04828        case start_memory:
04829         case stop_memory:
04830          p += 2;
04831          continue;
04832 
04833 
04834        default:
04835           abort (); /* We have listed all the cases.  */
04836         } /* switch *p++ */
04837 
04838       /* Getting here means we have found the possible starting
04839          characters for one path of the pattern -- and that the empty
04840          string does not match.  We need not follow this path further.
04841          Instead, look at the next alternative (remembered on the
04842          stack), or quit if no more.  The test at the top of the loop
04843          does these things.  */
04844       path_can_be_null = false;
04845       p = pend;
04846     } /* while p */
04847 
04848   /* Set `can_be_null' for the last path (also the first path, if the
04849      pattern is empty).  */
04850   bufp->can_be_null |= path_can_be_null;
04851 
04852  done:
04853   RESET_FAIL_STACK ();
04854   return 0;
04855 }
04856 
04857 #else /* not INSIDE_RECURSION */
04858 
04859 int
04860 re_compile_fastmap (struct re_pattern_buffer *bufp)
04861 {
04862 # ifdef MBS_SUPPORT
04863   if (MB_CUR_MAX != 1)
04864     return wcs_re_compile_fastmap(bufp);
04865   else
04866 # endif
04867     return byte_re_compile_fastmap(bufp);
04868 } /* re_compile_fastmap */
04869 #ifdef _LIBC
04870 weak_alias (__re_compile_fastmap, re_compile_fastmap)
04871 #endif
04872 
04873 
04874 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
04875    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
04876    this memory for recording register information.  STARTS and ENDS
04877    must be allocated using the malloc library routine, and must each
04878    be at least NUM_REGS * sizeof (regoff_t) bytes long.
04879 
04880    If NUM_REGS == 0, then subsequent matches should allocate their own
04881    register data.
04882 
04883    Unless this function is called, the first search or match using
04884    PATTERN_BUFFER will allocate its own register data, without
04885    freeing the old data.  */
04886 
04887 void
04888 re_set_registers (struct re_pattern_buffer *bufp,
04889                   struct re_registers *regs, unsigned num_regs,
04890                   regoff_t *starts, regoff_t *ends)
04891 {
04892   if (num_regs)
04893     {
04894       bufp->regs_allocated = REGS_REALLOCATE;
04895       regs->num_regs = num_regs;
04896       regs->start = starts;
04897       regs->end = ends;
04898     }
04899   else
04900     {
04901       bufp->regs_allocated = REGS_UNALLOCATED;
04902       regs->num_regs = 0;
04903       regs->start = regs->end = (regoff_t *) 0;
04904     }
04905 }
04906 #ifdef _LIBC
04907 weak_alias (__re_set_registers, re_set_registers)
04908 #endif
04909 
04910 /* Searching routines.  */
04911 
04912 /* Like re_search_2, below, but only one string is specified, and
04913    doesn't let you say where to stop matching.  */
04914 
04915 int
04916 re_search (struct re_pattern_buffer *bufp, const char *string, int size,
04917            int startpos, int range, struct re_registers *regs)
04918 {
04919   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
04920                     regs, size);
04921 }
04922 #ifdef _LIBC
04923 weak_alias (__re_search, re_search)
04924 #endif
04925 
04926 
04927 /* Using the compiled pattern in BUFP->buffer, first tries to match the
04928    virtual concatenation of STRING1 and STRING2, starting first at index
04929    STARTPOS, then at STARTPOS + 1, and so on.
04930 
04931    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
04932 
04933    RANGE is how far to scan while trying to match.  RANGE = 0 means try
04934    only at STARTPOS; in general, the last start tried is STARTPOS +
04935    RANGE.
04936 
04937    In REGS, return the indices of the virtual concatenation of STRING1
04938    and STRING2 that matched the entire BUFP->buffer and its contained
04939    subexpressions.
04940 
04941    Do not consider matching one past the index STOP in the virtual
04942    concatenation of STRING1 and STRING2.
04943 
04944    We return either the position in the strings at which the match was
04945    found, -1 if no match, or -2 if error (such as failure
04946    stack overflow).  */
04947 
04948 int
04949 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
04950              const char *string2, int size2, int startpos, int range,
04951              struct re_registers *regs, int stop)
04952 {
04953 # ifdef MBS_SUPPORT
04954   if (MB_CUR_MAX != 1)
04955     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
04956                          range, regs, stop);
04957   else
04958 # endif
04959     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
04960                           range, regs, stop);
04961 } /* re_search_2 */
04962 #ifdef _LIBC
04963 weak_alias (__re_search_2, re_search_2)
04964 #endif
04965 
04966 #endif /* not INSIDE_RECURSION */
04967 
04968 #ifdef INSIDE_RECURSION
04969 
04970 #ifdef MATCH_MAY_ALLOCATE
04971 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
04972 #else
04973 # define FREE_VAR(var) if (var) free (var); var = NULL
04974 #endif
04975 
04976 #ifdef WCHAR
04977 # define MAX_ALLOCA_SIZE    2000
04978 
04979 # define FREE_WCS_BUFFERS() \
04980   do {                                                               \
04981     if (size1 > MAX_ALLOCA_SIZE)                                     \
04982       {                                                                     \
04983        free (wcs_string1);                                           \
04984        free (mbs_offset1);                                           \
04985       }                                                                     \
04986     else                                                             \
04987       {                                                                     \
04988        FREE_VAR (wcs_string1);                                              \
04989        FREE_VAR (mbs_offset1);                                              \
04990       }                                                                     \
04991     if (size2 > MAX_ALLOCA_SIZE)                                     \
04992       {                                                                     \
04993        free (wcs_string2);                                           \
04994        free (mbs_offset2);                                           \
04995       }                                                                     \
04996     else                                                             \
04997       {                                                                     \
04998        FREE_VAR (wcs_string2);                                              \
04999        FREE_VAR (mbs_offset2);                                              \
05000       }                                                                     \
05001   } while (0)
05002 
05003 #endif
05004 
05005 
05006 static int
05007 PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
05008                      int size1, const char *string2, int size2,
05009                      int startpos, int range,
05010                      struct re_registers *regs, int stop)
05011 {
05012   int val;
05013   register char *fastmap = bufp->fastmap;
05014   register RE_TRANSLATE_TYPE translate = bufp->translate;
05015   int total_size = size1 + size2;
05016   int endpos = startpos + range;
05017 #ifdef WCHAR
05018   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
05019   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
05020   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
05021   int wcs_size1 = 0, wcs_size2 = 0;
05022   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
05023   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
05024   /* They hold whether each wchar_t is binary data or not.  */
05025   char *is_binary = NULL;
05026 #endif /* WCHAR */
05027 
05028   /* Check for out-of-range STARTPOS.  */
05029   if (startpos < 0 || startpos > total_size)
05030     return -1;
05031 
05032   /* Fix up RANGE if it might eventually take us outside
05033      the virtual concatenation of STRING1 and STRING2.
05034      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
05035   if (endpos < 0)
05036     range = 0 - startpos;
05037   else if (endpos > total_size)
05038     range = total_size - startpos;
05039 
05040   /* If the search isn't to be a backwards one, don't waste time in a
05041      search for a pattern that must be anchored.  */
05042   if (bufp->used > 0 && range > 0
05043       && ((re_opcode_t) bufp->buffer[0] == begbuf
05044          /* `begline' is like `begbuf' if it cannot match at newlines.  */
05045          || ((re_opcode_t) bufp->buffer[0] == begline
05046              && !bufp->newline_anchor)))
05047     {
05048       if (startpos > 0)
05049        return -1;
05050       else
05051        range = 1;
05052     }
05053 
05054 #ifdef emacs
05055   /* In a forward search for something that starts with \=.
05056      don't keep searching past point.  */
05057   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
05058     {
05059       range = PT - startpos;
05060       if (range <= 0)
05061        return -1;
05062     }
05063 #endif /* emacs */
05064 
05065   /* Update the fastmap now if not correct already.  */
05066   if (fastmap && !bufp->fastmap_accurate)
05067     if (re_compile_fastmap (bufp) == -2)
05068       return -2;
05069 
05070 #ifdef WCHAR
05071   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
05072      fill them with converted string.  */
05073   if (size1 != 0)
05074     {
05075       if (size1 > MAX_ALLOCA_SIZE)
05076        {
05077          wcs_string1 = TALLOC (size1 + 1, CHAR_T);
05078          mbs_offset1 = TALLOC (size1 + 1, int);
05079          is_binary = TALLOC (size1 + 1, char);
05080        }
05081       else
05082        {
05083          wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
05084          mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
05085          is_binary = REGEX_TALLOC (size1 + 1, char);
05086        }
05087       if (!wcs_string1 || !mbs_offset1 || !is_binary)
05088        {
05089          if (size1 > MAX_ALLOCA_SIZE)
05090            {
05091              free (wcs_string1);
05092              free (mbs_offset1);
05093              free (is_binary);
05094            }
05095          else
05096            {
05097              FREE_VAR (wcs_string1);
05098              FREE_VAR (mbs_offset1);
05099              FREE_VAR (is_binary);
05100            }
05101          return -2;
05102        }
05103       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
05104                                  mbs_offset1, is_binary);
05105       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
05106       if (size1 > MAX_ALLOCA_SIZE)
05107        free (is_binary);
05108       else
05109        FREE_VAR (is_binary);
05110     }
05111   if (size2 != 0)
05112     {
05113       if (size2 > MAX_ALLOCA_SIZE)
05114        {
05115          wcs_string2 = TALLOC (size2 + 1, CHAR_T);
05116          mbs_offset2 = TALLOC (size2 + 1, int);
05117          is_binary = TALLOC (size2 + 1, char);
05118        }
05119       else
05120        {
05121          wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
05122          mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
05123          is_binary = REGEX_TALLOC (size2 + 1, char);
05124        }
05125       if (!wcs_string2 || !mbs_offset2 || !is_binary)
05126        {
05127          FREE_WCS_BUFFERS ();
05128          if (size2 > MAX_ALLOCA_SIZE)
05129            free (is_binary);
05130          else
05131            FREE_VAR (is_binary);
05132          return -2;
05133        }
05134       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
05135                                  mbs_offset2, is_binary);
05136       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
05137       if (size2 > MAX_ALLOCA_SIZE)
05138        free (is_binary);
05139       else
05140        FREE_VAR (is_binary);
05141     }
05142 #endif /* WCHAR */
05143 
05144 
05145   /* Loop through the string, looking for a place to start matching.  */
05146   for (;;)
05147     {
05148       /* If a fastmap is supplied, skip quickly over characters that
05149          cannot be the start of a match.  If the pattern can match the
05150          null string, however, we don't need to skip characters; we want
05151          the first null string.  */
05152       if (fastmap && startpos < total_size && !bufp->can_be_null)
05153        {
05154          if (range > 0)     /* Searching forwards.  */
05155            {
05156              register const char *d;
05157              register int lim = 0;
05158              int irange = range;
05159 
05160               if (startpos < size1 && startpos + range >= size1)
05161                 lim = range - (size1 - startpos);
05162 
05163              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
05164 
05165               /* Written out as an if-else to avoid testing `translate'
05166                  inside the loop.  */
05167              if (translate)
05168                 while (range > lim
05169                        && !fastmap[(unsigned char)
05170                                translate[(unsigned char) *d++]])
05171                   range--;
05172              else
05173                 while (range > lim && !fastmap[(unsigned char) *d++])
05174                   range--;
05175 
05176              startpos += irange - range;
05177            }
05178          else                      /* Searching backwards.  */
05179            {
05180              register CHAR_T c = (size1 == 0 || startpos >= size1
05181                                   ? string2[startpos - size1]
05182                                   : string1[startpos]);
05183 
05184              if (!fastmap[(unsigned char) TRANSLATE (c)])
05185               goto advance;
05186            }
05187        }
05188 
05189       /* If can't match the null string, and that's all we have left, fail.  */
05190       if (range >= 0 && startpos == total_size && fastmap
05191           && !bufp->can_be_null)
05192        {
05193 #ifdef WCHAR
05194          FREE_WCS_BUFFERS ();
05195 #endif
05196          return -1;
05197        }
05198 
05199 #ifdef WCHAR
05200       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
05201                                  size2, startpos, regs, stop,
05202                                  wcs_string1, wcs_size1,
05203                                  wcs_string2, wcs_size2,
05204                                  mbs_offset1, mbs_offset2);
05205 #else /* BYTE */
05206       val = byte_re_match_2_internal (bufp, string1, size1, string2,
05207                                   size2, startpos, regs, stop);
05208 #endif /* BYTE */
05209 
05210 #ifndef REGEX_MALLOC
05211 # ifdef C_ALLOCA
05212       alloca (0);
05213 # endif
05214 #endif
05215 
05216       if (val >= 0)
05217        {
05218 #ifdef WCHAR
05219          FREE_WCS_BUFFERS ();
05220 #endif
05221          return startpos;
05222        }
05223 
05224       if (val == -2)
05225        {
05226 #ifdef WCHAR
05227          FREE_WCS_BUFFERS ();
05228 #endif
05229          return -2;
05230        }
05231 
05232     advance:
05233       if (!range)
05234         break;
05235       else if (range > 0)
05236         {
05237           range--;
05238           startpos++;
05239         }
05240       else
05241         {
05242           range++;
05243           startpos--;
05244         }
05245     }
05246 #ifdef WCHAR
05247   FREE_WCS_BUFFERS ();
05248 #endif
05249   return -1;
05250 }
05251 
05252 #ifdef WCHAR
05253 /* This converts PTR, a pointer into one of the search wchar_t strings
05254    `string1' and `string2' into an multibyte string offset from the
05255    beginning of that string. We use mbs_offset to optimize.
05256    See convert_mbs_to_wcs.  */
05257 # define POINTER_TO_OFFSET(ptr)                                       \
05258   (FIRST_STRING_P (ptr)                                               \
05259    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))       \
05260    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)       \
05261                + csize1)))
05262 #else /* BYTE */
05263 /* This converts PTR, a pointer into one of the search strings `string1'
05264    and `string2' into an offset from the beginning of that string.  */
05265 # define POINTER_TO_OFFSET(ptr)                  \
05266   (FIRST_STRING_P (ptr)                          \
05267    ? ((regoff_t) ((ptr) - string1))              \
05268    : ((regoff_t) ((ptr) - string2 + size1)))
05269 #endif /* WCHAR */
05270 
05271 /* Macros for dealing with the split strings in re_match_2.  */
05272 
05273 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
05274 
05275 /* Call before fetching a character with *d.  This switches over to
05276    string2 if necessary.  */
05277 #define PREFETCH()                                             \
05278   while (d == dend)                                            \
05279     {                                                          \
05280       /* End of string2 => fail.  */                                  \
05281       if (dend == end_match_2)                                        \
05282         goto fail;                                             \
05283       /* End of string1 => advance to string2.  */                    \
05284       d = string2;                                              \
05285       dend = end_match_2;                                      \
05286     }
05287 
05288 /* Test if at very beginning or at very end of the virtual concatenation
05289    of `string1' and `string2'.  If only one string, it's `string2'.  */
05290 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
05291 #define AT_STRINGS_END(d) ((d) == end2)
05292 
05293 
05294 /* Test if D points to a character which is word-constituent.  We have
05295    two special cases to check for: if past the end of string1, look at
05296    the first character in string2; and if before the beginning of
05297    string2, look at the last character in string1.  */
05298 #ifdef WCHAR
05299 /* Use internationalized API instead of SYNTAX.  */
05300 # define WORDCHAR_P(d)                                                \
05301   (iswalnum ((wint_t)((d) == end1 ? *string2                          \
05302            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0           \
05303    || ((d) == end1 ? *string2                                         \
05304        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
05305 #else /* BYTE */
05306 # define WORDCHAR_P(d)                                                \
05307   (SYNTAX ((d) == end1 ? *string2                              \
05308            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                 \
05309    == Sword)
05310 #endif /* WCHAR */
05311 
05312 /* Disabled due to a compiler bug -- see comment at case wordbound */
05313 #if 0
05314 /* Test if the character before D and the one at D differ with respect
05315    to being word-constituent.  */
05316 #define AT_WORD_BOUNDARY(d)                                    \
05317   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                           \
05318    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
05319 #endif
05320 
05321 /* Free everything we malloc.  */
05322 #ifdef MATCH_MAY_ALLOCATE
05323 # ifdef WCHAR
05324 #  define FREE_VARIABLES()                                     \
05325   do {                                                         \
05326     REGEX_FREE_STACK (fail_stack.stack);                       \
05327     FREE_VAR (regstart);                                       \
05328     FREE_VAR (regend);                                                \
05329     FREE_VAR (old_regstart);                                          \
05330     FREE_VAR (old_regend);                                     \
05331     FREE_VAR (best_regstart);                                         \
05332     FREE_VAR (best_regend);                                    \
05333     FREE_VAR (reg_info);                                       \
05334     FREE_VAR (reg_dummy);                                      \
05335     FREE_VAR (reg_info_dummy);                                        \
05336     if (!cant_free_wcs_buf)                                    \
05337       {                                                               \
05338         FREE_VAR (string1);                                    \
05339         FREE_VAR (string2);                                    \
05340         FREE_VAR (mbs_offset1);                                       \
05341         FREE_VAR (mbs_offset2);                                       \
05342       }                                                               \
05343   } while (0)
05344 # else /* BYTE */
05345 #  define FREE_VARIABLES()                                     \
05346   do {                                                         \
05347     REGEX_FREE_STACK (fail_stack.stack);                       \
05348     FREE_VAR (regstart);                                       \
05349     FREE_VAR (regend);                                                \
05350     FREE_VAR (old_regstart);                                          \
05351     FREE_VAR (old_regend);                                     \
05352     FREE_VAR (best_regstart);                                         \
05353     FREE_VAR (best_regend);                                    \
05354     FREE_VAR (reg_info);                                       \
05355     FREE_VAR (reg_dummy);                                      \
05356     FREE_VAR (reg_info_dummy);                                        \
05357   } while (0)
05358 # endif /* WCHAR */
05359 #else
05360 # ifdef WCHAR
05361 #  define FREE_VARIABLES()                                     \
05362   do {                                                         \
05363     if (!cant_free_wcs_buf)                                    \
05364       {                                                               \
05365         FREE_VAR (string1);                                    \
05366         FREE_VAR (string2);                                    \
05367         FREE_VAR (mbs_offset1);                                       \
05368         FREE_VAR (mbs_offset2);                                       \
05369       }                                                               \
05370   } while (0)
05371 # else /* BYTE */
05372 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
05373 # endif /* WCHAR */
05374 #endif /* not MATCH_MAY_ALLOCATE */
05375 
05376 /* These values must meet several constraints.  They must not be valid
05377    register values; since we have a limit of 255 registers (because
05378    we use only one byte in the pattern for the register number), we can
05379    use numbers larger than 255.  They must differ by 1, because of
05380    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
05381    be larger than the value for the highest register, so we do not try
05382    to actually save any registers when none are active.  */
05383 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
05384 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
05385 
05386 #else /* not INSIDE_RECURSION */
05387 /* Matching routines.  */
05388 
05389 #ifndef emacs   /* Emacs never uses this.  */
05390 /* re_match is like re_match_2 except it takes only a single string.  */
05391 
05392 int
05393 re_match (struct re_pattern_buffer *bufp, const char *string,
05394           int size, int pos, struct re_registers *regs)
05395 {
05396   int result;
05397 # ifdef MBS_SUPPORT
05398   if (MB_CUR_MAX != 1)
05399     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
05400                                   pos, regs, size,
05401                                   NULL, 0, NULL, 0, NULL, NULL);
05402   else
05403 # endif
05404     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
05405                               pos, regs, size);
05406 # ifndef REGEX_MALLOC
05407 #  ifdef C_ALLOCA
05408   alloca (0);
05409 #  endif
05410 # endif
05411   return result;
05412 }
05413 # ifdef _LIBC
05414 weak_alias (__re_match, re_match)
05415 # endif
05416 #endif /* not emacs */
05417 
05418 #endif /* not INSIDE_RECURSION */
05419 
05420 #ifdef INSIDE_RECURSION
05421 static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
05422                                                   UCHAR_T *end,
05423                                    PREFIX(register_info_type) *reg_info);
05424 static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
05425                                                 UCHAR_T *end,
05426                                    PREFIX(register_info_type) *reg_info);
05427 static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
05428                                                       UCHAR_T *end,
05429                                    PREFIX(register_info_type) *reg_info);
05430 static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
05431                                    int len, char *translate);
05432 #else /* not INSIDE_RECURSION */
05433 
05434 /* re_match_2 matches the compiled pattern in BUFP against the
05435    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
05436    and SIZE2, respectively).  We start matching at POS, and stop
05437    matching at STOP.
05438 
05439    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
05440    store offsets for the substring each group matched in REGS.  See the
05441    documentation for exactly how many groups we fill.
05442 
05443    We return -1 if no match, -2 if an internal error (such as the
05444    failure stack overflowing).  Otherwise, we return the length of the
05445    matched substring.  */
05446 
05447 int
05448 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
05449             const char *string2, int size2, int pos,
05450             struct re_registers *regs, int stop)
05451 {
05452   int result;
05453 # ifdef MBS_SUPPORT
05454   if (MB_CUR_MAX != 1)
05455     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
05456                                   pos, regs, stop,
05457                                   NULL, 0, NULL, 0, NULL, NULL);
05458   else
05459 # endif
05460     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
05461                               pos, regs, stop);
05462 
05463 #ifndef REGEX_MALLOC
05464 # ifdef C_ALLOCA
05465   alloca (0);
05466 # endif
05467 #endif
05468   return result;
05469 }
05470 #ifdef _LIBC
05471 weak_alias (__re_match_2, re_match_2)
05472 #endif
05473 
05474 #endif /* not INSIDE_RECURSION */
05475 
05476 #ifdef INSIDE_RECURSION
05477 
05478 #ifdef WCHAR
05479 static int count_mbs_length (int *, int);
05480 
05481 /* This check the substring (from 0, to length) of the multibyte string,
05482    to which offset_buffer correspond. And count how many wchar_t_characters
05483    the substring occupy. We use offset_buffer to optimization.
05484    See convert_mbs_to_wcs.  */
05485 
05486 static int
05487 count_mbs_length(int *offset_buffer, int length)
05488 {
05489   int upper, lower;
05490 
05491   /* Check whether the size is valid.  */
05492   if (length < 0)
05493     return -1;
05494 
05495   if (offset_buffer == NULL)
05496     return 0;
05497 
05498   /* If there are no multibyte character, offset_buffer[i] == i.
05499    Optmize for this case.  */
05500   if (offset_buffer[length] == length)
05501     return length;
05502 
05503   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
05504   upper = length;
05505   lower = 0;
05506 
05507   while (true)
05508     {
05509       int middle = (lower + upper) / 2;
05510       if (middle == lower || middle == upper)
05511        break;
05512       if (offset_buffer[middle] > length)
05513        upper = middle;
05514       else if (offset_buffer[middle] < length)
05515        lower = middle;
05516       else
05517        return middle;
05518     }
05519 
05520   return -1;
05521 }
05522 #endif /* WCHAR */
05523 
05524 /* This is a separate function so that we can force an alloca cleanup
05525    afterwards.  */
05526 #ifdef WCHAR
05527 static int
05528 wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
05529                          const char *cstring1, int csize1,
05530                          const char *cstring2, int csize2,
05531                          int pos,
05532                       struct re_registers *regs,
05533                          int stop,
05534      /* string1 == string2 == NULL means string1/2, size1/2 and
05535        mbs_offset1/2 need seting up in this function.  */
05536      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
05537                          wchar_t *string1, int size1,
05538                          wchar_t *string2, int size2,
05539      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
05540                       int *mbs_offset1, int *mbs_offset2)
05541 #else /* BYTE */
05542 static int
05543 byte_re_match_2_internal (struct re_pattern_buffer *bufp,
05544                           const char *string1, int size1,
05545                           const char *string2, int size2,
05546                           int pos,
05547                        struct re_registers *regs, int stop)
05548 #endif /* BYTE */
05549 {
05550   /* General temporaries.  */
05551   int mcnt;
05552   UCHAR_T *p1;
05553 #ifdef WCHAR
05554   /* They hold whether each wchar_t is binary data or not.  */
05555   char *is_binary = NULL;
05556   /* If true, we can't free string1/2, mbs_offset1/2.  */
05557   int cant_free_wcs_buf = 1;
05558 #endif /* WCHAR */
05559 
05560   /* Just past the end of the corresponding string.  */
05561   const CHAR_T *end1, *end2;
05562 
05563   /* Pointers into string1 and string2, just past the last characters in
05564      each to consider matching.  */
05565   const CHAR_T *end_match_1, *end_match_2;
05566 
05567   /* Where we are in the data, and the end of the current string.  */
05568   const CHAR_T *d, *dend;
05569 
05570   /* Where we are in the pattern, and the end of the pattern.  */
05571 #ifdef WCHAR
05572   UCHAR_T *pattern, *p;
05573   register UCHAR_T *pend;
05574 #else /* BYTE */
05575   UCHAR_T *p = bufp->buffer;
05576   register UCHAR_T *pend = p + bufp->used;
05577 #endif /* WCHAR */
05578 
05579   /* Mark the opcode just after a start_memory, so we can test for an
05580      empty subpattern when we get to the stop_memory.  */
05581   UCHAR_T *just_past_start_mem = 0;
05582 
05583   /* We use this to map every character in the string.  */
05584   RE_TRANSLATE_TYPE translate = bufp->translate;
05585 
05586   /* Failure point stack.  Each place that can handle a failure further
05587      down the line pushes a failure point on this stack.  It consists of
05588      restart, regend, and reg_info for all registers corresponding to
05589      the subexpressions we're currently inside, plus the number of such
05590      registers, and, finally, two char *'s.  The first char * is where
05591      to resume scanning the pattern; the second one is where to resume
05592      scanning the strings.  If the latter is zero, the failure point is
05593      a ``dummy''; if a failure happens and the failure point is a dummy,
05594      it gets discarded and the next next one is tried.  */
05595 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
05596   PREFIX(fail_stack_type) fail_stack;
05597 #endif
05598 #ifdef DEBUG
05599   static unsigned failure_id;
05600   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
05601 #endif
05602 
05603 #ifdef REL_ALLOC
05604   /* This holds the pointer to the failure stack, when
05605      it is allocated relocatably.  */
05606   fail_stack_elt_t *failure_stack_ptr;
05607 #endif
05608 
05609   /* We fill all the registers internally, independent of what we
05610      return, for use in backreferences.  The number here includes
05611      an element for register zero.  */
05612   size_t num_regs = bufp->re_nsub + 1;
05613 
05614   /* The currently active registers.  */
05615   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
05616   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
05617 
05618   /* Information on the contents of registers. These are pointers into
05619      the input strings; they record just what was matched (on this
05620      attempt) by a subexpression part of the pattern, that is, the
05621      regnum-th regstart pointer points to where in the pattern we began
05622      matching and the regnum-th regend points to right after where we
05623      stopped matching the regnum-th subexpression.  (The zeroth register
05624      keeps track of what the whole pattern matches.)  */
05625 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
05626   const CHAR_T **regstart, **regend;
05627 #endif
05628 
05629   /* If a group that's operated upon by a repetition operator fails to
05630      match anything, then the register for its start will need to be
05631      restored because it will have been set to wherever in the string we
05632      are when we last see its open-group operator.  Similarly for a
05633      register's end.  */
05634 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
05635   const CHAR_T **old_regstart, **old_regend;
05636 #endif
05637 
05638   /* The is_active field of reg_info helps us keep track of which (possibly
05639      nested) subexpressions we are currently in. The matched_something
05640      field of reg_info[reg_num] helps us tell whether or not we have
05641      matched any of the pattern so far this time through the reg_num-th
05642      subexpression.  These two fields get reset each time through any
05643      loop their register is in.  */
05644 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
05645   PREFIX(register_info_type) *reg_info;
05646 #endif
05647 
05648   /* The following record the register info as found in the above
05649      variables when we find a match better than any we've seen before.
05650      This happens as we backtrack through the failure points, which in
05651      turn happens only if we have not yet matched the entire string. */
05652   unsigned best_regs_set = false;
05653 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
05654   const CHAR_T **best_regstart, **best_regend;
05655 #endif
05656 
05657   /* Logically, this is `best_regend[0]'.  But we don't want to have to
05658      allocate space for that if we're not allocating space for anything
05659      else (see below).  Also, we never need info about register 0 for
05660      any of the other register vectors, and it seems rather a kludge to
05661      treat `best_regend' differently than the rest.  So we keep track of
05662      the end of the best match so far in a separate variable.  We
05663      initialize this to NULL so that when we backtrack the first time
05664      and need to test it, it's not garbage.  */
05665   const CHAR_T *match_end = NULL;
05666 
05667   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
05668   int set_regs_matched_done = 0;
05669 
05670   /* Used when we pop values we don't care about.  */
05671 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
05672   const CHAR_T **reg_dummy;
05673   PREFIX(register_info_type) *reg_info_dummy;
05674 #endif
05675 
05676 #ifdef DEBUG
05677   /* Counts the total number of registers pushed.  */
05678   unsigned num_regs_pushed = 0;
05679 #endif
05680 
05681   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
05682 
05683   INIT_FAIL_STACK ();
05684 
05685 #ifdef MATCH_MAY_ALLOCATE
05686   /* Do not bother to initialize all the register variables if there are
05687      no groups in the pattern, as it takes a fair amount of time.  If
05688      there are groups, we include space for register 0 (the whole
05689      pattern), even though we never use it, since it simplifies the
05690      array indexing.  We should fix this.  */
05691   if (bufp->re_nsub)
05692     {
05693       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05694       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05695       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05696       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05697       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05698       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05699       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05700       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
05701       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05702 
05703       if (!(regstart && regend && old_regstart && old_regend && reg_info
05704             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
05705         {
05706           FREE_VARIABLES ();
05707           return -2;
05708         }
05709     }
05710   else
05711     {
05712       /* We must initialize all our variables to NULL, so that
05713          `FREE_VARIABLES' doesn't try to free them.  */
05714       regstart = regend = old_regstart = old_regend = best_regstart
05715         = best_regend = reg_dummy = NULL;
05716       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
05717     }
05718 #endif /* MATCH_MAY_ALLOCATE */
05719 
05720   /* The starting position is bogus.  */
05721 #ifdef WCHAR
05722   if (pos < 0 || pos > csize1 + csize2)
05723 #else /* BYTE */
05724   if (pos < 0 || pos > size1 + size2)
05725 #endif
05726     {
05727       FREE_VARIABLES ();
05728       return -1;
05729     }
05730 
05731 #ifdef WCHAR
05732   /* Allocate wchar_t array for string1 and string2 and
05733      fill them with converted string.  */
05734   if (string1 == NULL && string2 == NULL)
05735     {
05736       /* We need seting up buffers here.  */
05737 
05738       /* We must free wcs buffers in this function.  */
05739       cant_free_wcs_buf = 0;
05740 
05741       if (csize1 != 0)
05742        {
05743          string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
05744          mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
05745          is_binary = REGEX_TALLOC (csize1 + 1, char);
05746          if (!string1 || !mbs_offset1 || !is_binary)
05747            {
05748              FREE_VAR (string1);
05749              FREE_VAR (mbs_offset1);
05750              FREE_VAR (is_binary);
05751              return -2;
05752            }
05753        }
05754       if (csize2 != 0)
05755        {
05756          string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
05757          mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
05758          is_binary = REGEX_TALLOC (csize2 + 1, char);
05759          if (!string2 || !mbs_offset2 || !is_binary)
05760            {
05761              FREE_VAR (string1);
05762              FREE_VAR (mbs_offset1);
05763              FREE_VAR (string2);
05764              FREE_VAR (mbs_offset2);
05765              FREE_VAR (is_binary);
05766              return -2;
05767            }
05768          size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
05769                                  mbs_offset2, is_binary);
05770          string2[size2] = L'\0'; /* for a sentinel  */
05771          FREE_VAR (is_binary);
05772        }
05773     }
05774 
05775   /* We need to cast pattern to (wchar_t*), because we casted this compiled
05776      pattern to (char*) in regex_compile.  */
05777   p = pattern = (CHAR_T*)bufp->buffer;
05778   pend = (CHAR_T*)(bufp->buffer + bufp->used);
05779 
05780 #endif /* WCHAR */
05781 
05782   /* Initialize subexpression text positions to -1 to mark ones that no
05783      start_memory/stop_memory has been seen for. Also initialize the
05784      register information struct.  */
05785   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05786     {
05787       regstart[mcnt] = regend[mcnt]
05788         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
05789 
05790       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
05791       IS_ACTIVE (reg_info[mcnt]) = 0;
05792       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05793       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05794     }
05795 
05796   /* We move `string1' into `string2' if the latter's empty -- but not if
05797      `string1' is null.  */
05798   if (size2 == 0 && string1 != NULL)
05799     {
05800       string2 = string1;
05801       size2 = size1;
05802       string1 = 0;
05803       size1 = 0;
05804 #ifdef WCHAR
05805       mbs_offset2 = mbs_offset1;
05806       csize2 = csize1;
05807       mbs_offset1 = NULL;
05808       csize1 = 0;
05809 #endif
05810     }
05811   end1 = string1 + size1;
05812   end2 = string2 + size2;
05813 
05814   /* Compute where to stop matching, within the two strings.  */
05815 #ifdef WCHAR
05816   if (stop <= csize1)
05817     {
05818       mcnt = count_mbs_length(mbs_offset1, stop);
05819       end_match_1 = string1 + mcnt;
05820       end_match_2 = string2;
05821     }
05822   else
05823     {
05824       if (stop > csize1 + csize2)
05825        stop = csize1 + csize2;
05826       end_match_1 = end1;
05827       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
05828       end_match_2 = string2 + mcnt;
05829     }
05830   if (mcnt < 0)
05831     { /* count_mbs_length return error.  */
05832       FREE_VARIABLES ();
05833       return -1;
05834     }
05835 #else
05836   if (stop <= size1)
05837     {
05838       end_match_1 = string1 + stop;
05839       end_match_2 = string2;
05840     }
05841   else
05842     {
05843       end_match_1 = end1;
05844       end_match_2 = string2 + stop - size1;
05845     }
05846 #endif /* WCHAR */
05847 
05848   /* `p' scans through the pattern as `d' scans through the data.
05849      `dend' is the end of the input string that `d' points within.  `d'
05850      is advanced into the following input string whenever necessary, but
05851      this happens before fetching; therefore, at the beginning of the
05852      loop, `d' can be pointing at the end of a string, but it cannot
05853      equal `string2'.  */
05854 #ifdef WCHAR
05855   if (size1 > 0 && pos <= csize1)
05856     {
05857       mcnt = count_mbs_length(mbs_offset1, pos);
05858       d = string1 + mcnt;
05859       dend = end_match_1;
05860     }
05861   else
05862     {
05863       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
05864       d = string2 + mcnt;
05865       dend = end_match_2;
05866     }
05867 
05868   if (mcnt < 0)
05869     { /* count_mbs_length return error.  */
05870       FREE_VARIABLES ();
05871       return -1;
05872     }
05873 #else
05874   if (size1 > 0 && pos <= size1)
05875     {
05876       d = string1 + pos;
05877       dend = end_match_1;
05878     }
05879   else
05880     {
05881       d = string2 + pos - size1;
05882       dend = end_match_2;
05883     }
05884 #endif /* WCHAR */
05885 
05886   DEBUG_PRINT1 ("The compiled pattern is:\n");
05887   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
05888   DEBUG_PRINT1 ("The string to match is: `");
05889   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
05890   DEBUG_PRINT1 ("'\n");
05891 
05892   /* This loops over pattern commands.  It exits by returning from the
05893      function if the match is complete, or it drops through if the match
05894      fails at this starting point in the input data.  */
05895   for (;;)
05896     {
05897 #ifdef _LIBC
05898       DEBUG_PRINT2 ("\n%p: ", p);
05899 #else
05900       DEBUG_PRINT2 ("\n0x%x: ", p);
05901 #endif
05902 
05903       if (p == pend)
05904        { /* End of pattern means we might have succeeded.  */
05905           DEBUG_PRINT1 ("end of pattern ... ");
05906 
05907          /* If we haven't matched the entire string, and we want the
05908              longest match, try backtracking.  */
05909           if (d != end_match_2)
05910            {
05911              /* 1 if this match ends in the same string (string1 or string2)
05912                as the best previous match.  */
05913              boolean same_str_p = (FIRST_STRING_P (match_end)
05914                                 == MATCHING_IN_FIRST_STRING);
05915              /* 1 if this match is the best seen so far.  */
05916              boolean best_match_p;
05917 
05918              /* AIX compiler got confused when this was combined
05919                with the previous declaration.  */
05920              if (same_str_p)
05921               best_match_p = d > match_end;
05922              else
05923               best_match_p = !MATCHING_IN_FIRST_STRING;
05924 
05925               DEBUG_PRINT1 ("backtracking.\n");
05926 
05927               if (!FAIL_STACK_EMPTY ())
05928                 { /* More failure points to try.  */
05929 
05930                   /* If exceeds best match so far, save it.  */
05931                   if (!best_regs_set || best_match_p)
05932                     {
05933                       best_regs_set = true;
05934                       match_end = d;
05935 
05936                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
05937 
05938                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05939                         {
05940                           best_regstart[mcnt] = regstart[mcnt];
05941                           best_regend[mcnt] = regend[mcnt];
05942                         }
05943                     }
05944                   goto fail;
05945                 }
05946 
05947               /* If no failure points, don't restore garbage.  And if
05948                  last match is real best match, don't restore second
05949                  best one. */
05950               else if (best_regs_set && !best_match_p)
05951                 {
05952                restore_best_regs:
05953                   /* Restore best match.  It may happen that `dend ==
05954                      end_match_1' while the restored d is in string2.
05955                      For example, the pattern `x.*y.*z' against the
05956                      strings `x-' and `y-z-', if the two strings are
05957                      not consecutive in memory.  */
05958                   DEBUG_PRINT1 ("Restoring best registers.\n");
05959 
05960                   d = match_end;
05961                   dend = ((d >= string1 && d <= end1)
05962                          ? end_match_1 : end_match_2);
05963 
05964                 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05965                   {
05966                     regstart[mcnt] = best_regstart[mcnt];
05967                     regend[mcnt] = best_regend[mcnt];
05968                   }
05969                 }
05970             } /* d != end_match_2 */
05971 
05972        succeed_label:
05973           DEBUG_PRINT1 ("Accepting match.\n");
05974           /* If caller wants register contents data back, do it.  */
05975           if (regs && !bufp->no_sub)
05976            {
05977              /* Have the register data arrays been allocated?  */
05978               if (bufp->regs_allocated == REGS_UNALLOCATED)
05979                 { /* No.  So allocate them with malloc.  We need one
05980                      extra element beyond `num_regs' for the `-1' marker
05981                      GNU code uses.  */
05982                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
05983                   regs->start = TALLOC (regs->num_regs, regoff_t);
05984                   regs->end = TALLOC (regs->num_regs, regoff_t);
05985                   if (regs->start == NULL || regs->end == NULL)
05986                   {
05987                     FREE_VARIABLES ();
05988                     return -2;
05989                   }
05990                   bufp->regs_allocated = REGS_REALLOCATE;
05991                 }
05992               else if (bufp->regs_allocated == REGS_REALLOCATE)
05993                 { /* Yes.  If we need more elements than were already
05994                      allocated, reallocate them.  If we need fewer, just
05995                      leave it alone.  */
05996                   if (regs->num_regs < num_regs + 1)
05997                     {
05998                       regs->num_regs = num_regs + 1;
05999                       RETALLOC (regs->start, regs->num_regs, regoff_t);
06000                       RETALLOC (regs->end, regs->num_regs, regoff_t);
06001                       if (regs->start == NULL || regs->end == NULL)
06002                      {
06003                        FREE_VARIABLES ();
06004                        return -2;
06005                      }
06006                     }
06007                 }
06008               else
06009               {
06010                 /* These braces fend off a "empty body in an else-statement"
06011                    warning under GCC when assert expands to nothing.  */
06012                 assert (bufp->regs_allocated == REGS_FIXED);
06013               }
06014 
06015               /* Convert the pointer data in `regstart' and `regend' to
06016                  indices.  Register zero has to be set differently,
06017                  since we haven't kept track of any info for it.  */
06018               if (regs->num_regs > 0)
06019                 {
06020                   regs->start[0] = pos;
06021 #ifdef WCHAR
06022                 if (MATCHING_IN_FIRST_STRING)
06023                   regs->end[0] = mbs_offset1 != NULL ?
06024                                    mbs_offset1[d-string1] : 0;
06025                 else
06026                   regs->end[0] = csize1 + (mbs_offset2 != NULL ?
06027                                         mbs_offset2[d-string2] : 0);
06028 #else
06029                   regs->end[0] = (MATCHING_IN_FIRST_STRING
06030                               ? ((regoff_t) (d - string1))
06031                                : ((regoff_t) (d - string2 + size1)));
06032 #endif /* WCHAR */
06033                 }
06034 
06035               /* Go through the first `min (num_regs, regs->num_regs)'
06036                  registers, since that is all we initialized.  */
06037              for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
06038                  mcnt++)
06039               {
06040                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
06041                     regs->start[mcnt] = regs->end[mcnt] = -1;
06042                   else
06043                     {
06044                     regs->start[mcnt]
06045                      = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
06046                       regs->end[mcnt]
06047                      = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
06048                     }
06049               }
06050 
06051               /* If the regs structure we return has more elements than
06052                  were in the pattern, set the extra elements to -1.  If
06053                  we (re)allocated the registers, this is the case,
06054                  because we always allocate enough to have at least one
06055                  -1 at the end.  */
06056               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
06057                 regs->start[mcnt] = regs->end[mcnt] = -1;
06058            } /* regs && !bufp->no_sub */
06059 
06060           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
06061                         nfailure_points_pushed, nfailure_points_popped,
06062                         nfailure_points_pushed - nfailure_points_popped);
06063           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
06064 
06065 #ifdef WCHAR
06066          if (MATCHING_IN_FIRST_STRING)
06067            mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
06068          else
06069            mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
06070                      csize1;
06071           mcnt -= pos;
06072 #else
06073           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
06074                          ? string1
06075                          : string2 - size1);
06076 #endif /* WCHAR */
06077 
06078           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
06079 
06080           FREE_VARIABLES ();
06081           return mcnt;
06082         }
06083 
06084       /* Otherwise match next pattern command.  */
06085       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
06086        {
06087         /* Ignore these.  Used to ignore the n of succeed_n's which
06088            currently have n == 0.  */
06089         case no_op:
06090           DEBUG_PRINT1 ("EXECUTING no_op.\n");
06091           break;
06092 
06093        case succeed:
06094           DEBUG_PRINT1 ("EXECUTING succeed.\n");
06095          goto succeed_label;
06096 
06097         /* Match the next n pattern characters exactly.  The following
06098            byte in the pattern defines n, and the n bytes after that
06099            are the characters to match.  */
06100        case exactn:
06101 #ifdef MBS_SUPPORT
06102        case exactn_bin:
06103 #endif
06104          mcnt = *p++;
06105           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
06106 
06107           /* This is written out as an if-else so we don't waste time
06108              testing `translate' inside the loop.  */
06109           if (translate)
06110            {
06111              do
06112               {
06113                 PREFETCH ();
06114 #ifdef WCHAR
06115                 if (*d <= 0xff)
06116                   {
06117                     if ((UCHAR_T) translate[(unsigned char) *d++]
06118                        != (UCHAR_T) *p++)
06119                      goto fail;
06120                   }
06121                 else
06122                   {
06123                     if (*d++ != (CHAR_T) *p++)
06124                      goto fail;
06125                   }
06126 #else
06127                 if ((UCHAR_T) translate[(unsigned char) *d++]
06128                     != (UCHAR_T) *p++)
06129                     goto fail;
06130 #endif /* WCHAR */
06131               }
06132              while (--mcnt);
06133            }
06134          else
06135            {
06136              do
06137               {
06138                 PREFETCH ();
06139                 if (*d++ != (CHAR_T) *p++) goto fail;
06140               }
06141              while (--mcnt);
06142            }
06143          SET_REGS_MATCHED ();
06144           break;
06145 
06146 
06147         /* Match any character except possibly a newline or a null.  */
06148        case anychar:
06149           DEBUG_PRINT1 ("EXECUTING anychar.\n");
06150 
06151           PREFETCH ();
06152 
06153           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
06154               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
06155            goto fail;
06156 
06157           SET_REGS_MATCHED ();
06158           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
06159           d++;
06160          break;
06161 
06162 
06163        case charset:
06164        case charset_not:
06165          {
06166            register UCHAR_T c;
06167 #ifdef WCHAR
06168            unsigned int i, char_class_length, coll_symbol_length,
06169               equiv_class_length, ranges_length, chars_length, length;
06170            CHAR_T *workp, *workp2, *charset_top;
06171 #define WORK_BUFFER_SIZE 128
06172             CHAR_T str_buf[WORK_BUFFER_SIZE];
06173 # ifdef _LIBC
06174            uint32_t nrules;
06175 # endif /* _LIBC */
06176 #endif /* WCHAR */
06177            boolean negate = (re_opcode_t) *(p - 1) == charset_not;
06178 
06179             DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
06180            PREFETCH ();
06181            c = TRANSLATE (*d); /* The character to match.  */
06182 #ifdef WCHAR
06183 # ifdef _LIBC
06184            nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
06185 # endif /* _LIBC */
06186            charset_top = p - 1;
06187            char_class_length = *p++;
06188            coll_symbol_length = *p++;
06189            equiv_class_length = *p++;
06190            ranges_length = *p++;
06191            chars_length = *p++;
06192            /* p points charset[6], so the address of the next instruction
06193               (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
06194               where l=length of char_classes, m=length of collating_symbol,
06195               n=equivalence_class, o=length of char_range,
06196               p'=length of character.  */
06197            workp = p;
06198            /* Update p to indicate the next instruction.  */
06199            p += char_class_length + coll_symbol_length+ equiv_class_length +
06200               2*ranges_length + chars_length;
06201 
06202             /* match with char_class?  */
06203            for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
06204              {
06205               wctype_t wctype;
06206               uintptr_t alignedp = ((uintptr_t)workp
06207                                   + __alignof__(wctype_t) - 1)
06208                                   & ~(uintptr_t)(__alignof__(wctype_t) - 1);
06209               wctype = *((wctype_t*)alignedp);
06210               workp += CHAR_CLASS_SIZE;
06211 # ifdef _LIBC
06212               if (__iswctype((wint_t)c, wctype))
06213                 goto char_set_matched;
06214 # else
06215               if (iswctype((wint_t)c, wctype))
06216                 goto char_set_matched;
06217 # endif
06218              }
06219 
06220             /* match with collating_symbol?  */
06221 # ifdef _LIBC
06222            if (nrules != 0)
06223              {
06224               const unsigned char *extra = (const unsigned char *)
06225                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
06226 
06227               for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
06228                    workp++)
06229                 {
06230                   int32_t *wextra;
06231                   wextra = (int32_t*)(extra + *workp++);
06232                   for (i = 0; i < *wextra; ++i)
06233                     if (TRANSLATE(d[i]) != wextra[1 + i])
06234                      break;
06235 
06236                   if (i == *wextra)
06237                     {
06238                      /* Update d, however d will be incremented at
06239                         char_set_matched:, we decrement d here.  */
06240                      d += i - 1;
06241                      goto char_set_matched;
06242                     }
06243                 }
06244              }
06245            else /* (nrules == 0) */
06246 # endif
06247              /* If we can't look up collation data, we use wcscoll
06248                instead.  */
06249              {
06250               for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
06251                 {
06252                   const CHAR_T *backup_d = d, *backup_dend = dend;
06253 # ifdef _LIBC
06254                   length = __wcslen (workp);
06255 # else
06256                   length = wcslen (workp);
06257 # endif
06258 
06259                   /* If wcscoll(the collating symbol, whole string) > 0,
06260                      any substring of the string never match with the
06261                      collating symbol.  */
06262 # ifdef _LIBC
06263                   if (__wcscoll (workp, d) > 0)
06264 # else
06265                   if (wcscoll (workp, d) > 0)
06266 # endif
06267                     {
06268                      workp += length + 1;
06269                      continue;
06270                     }
06271 
06272                   /* First, we compare the collating symbol with
06273                      the first character of the string.
06274                      If it don't match, we add the next character to
06275                      the compare buffer in turn.  */
06276                   for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
06277                     {
06278                      int match;
06279                      if (d == dend)
06280                        {
06281                          if (dend == end_match_2)
06282                            break;
06283                          d = string2;
06284                          dend = end_match_2;
06285                        }
06286 
06287                      /* add next character to the compare buffer.  */
06288                      str_buf[i] = TRANSLATE(*d);
06289                      str_buf[i+1] = '\0';
06290 
06291 # ifdef _LIBC
06292                      match = __wcscoll (workp, str_buf);
06293 # else
06294                      match = wcscoll (workp, str_buf);
06295 # endif
06296                      if (match == 0)
06297                        goto char_set_matched;
06298 
06299                      if (match < 0)
06300                        /* (str_buf > workp) indicate (str_buf + X > workp),
06301                           because for all X (str_buf + X > str_buf).
06302                           So we don't need continue this loop.  */
06303                        break;
06304 
06305                      /* Otherwise(str_buf < workp),
06306                         (str_buf+next_character) may equals (workp).
06307                         So we continue this loop.  */
06308                     }
06309                   /* not matched */
06310                   d = backup_d;
06311                   dend = backup_dend;
06312                   workp += length + 1;
06313                 }
06314               }
06315             /* match with equivalence_class?  */
06316 # ifdef _LIBC
06317            if (nrules != 0)
06318              {
06319                 const CHAR_T *backup_d = d, *backup_dend = dend;
06320               /* Try to match the equivalence class against
06321                  those known to the collate implementation.  */
06322               const int32_t *table;
06323               const int32_t *weights;
06324               const int32_t *extra;
06325               const int32_t *indirect;
06326               int32_t idx, idx2;
06327               wint_t *cp;
06328               size_t len;
06329 
06330               /* This #include defines a local function!  */
06331 #  include <locale/weightwc.h>
06332 
06333               table = (const int32_t *)
06334                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
06335               weights = (const wint_t *)
06336                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
06337               extra = (const wint_t *)
06338                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
06339               indirect = (const int32_t *)
06340                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
06341 
06342               /* Write 1 collating element to str_buf, and
06343                  get its index.  */
06344               idx2 = 0;
06345 
06346               for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
06347                 {
06348                   cp = (wint_t*)str_buf;
06349                   if (d == dend)
06350                     {
06351                      if (dend == end_match_2)
06352                        break;
06353                      d = string2;
06354                      dend = end_match_2;
06355                     }
06356                   str_buf[i] = TRANSLATE(*(d+i));
06357                   str_buf[i+1] = '\0'; /* sentinel */
06358                   idx2 = findidx ((const wint_t**)&cp);
06359                 }
06360 
06361               /* Update d, however d will be incremented at
06362                  char_set_matched:, we decrement d here.  */
06363               d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
06364               if (d >= dend)
06365                 {
06366                   if (dend == end_match_2)
06367                      d = dend;
06368                   else
06369                     {
06370                      d = string2;
06371                      dend = end_match_2;
06372                     }
06373                 }
06374 
06375               len = weights[idx2];
06376 
06377               for (workp2 = workp + equiv_class_length ; workp < workp2 ;
06378                    workp++)
06379                 {
06380                   idx = (int32_t)*workp;
06381                   /* We already checked idx != 0 in regex_compile. */
06382 
06383                   if (idx2 != 0 && len == weights[idx])
06384                     {
06385                      int cnt = 0;
06386                      while (cnt < len && (weights[idx + 1 + cnt]
06387                                         == weights[idx2 + 1 + cnt]))
06388                        ++cnt;
06389 
06390                      if (cnt == len)
06391                        goto char_set_matched;
06392                     }
06393                 }
06394               /* not matched */
06395                 d = backup_d;
06396                 dend = backup_dend;
06397              }
06398            else /* (nrules == 0) */
06399 # endif
06400              /* If we can't look up collation data, we use wcscoll
06401                instead.  */
06402              {
06403               for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
06404                 {
06405                   const CHAR_T *backup_d = d, *backup_dend = dend;
06406 # ifdef _LIBC
06407                   length = __wcslen (workp);
06408 # else
06409                   length = wcslen (workp);
06410 # endif
06411 
06412                   /* If wcscoll(the collating symbol, whole string) > 0,
06413                      any substring of the string never match with the
06414                      collating symbol.  */
06415 # ifdef _LIBC
06416                   if (__wcscoll (workp, d) > 0)
06417 # else
06418                   if (wcscoll (workp, d) > 0)
06419 # endif
06420                     {
06421                      workp += length + 1;
06422                      break;
06423                     }
06424 
06425                   /* First, we compare the equivalence class with
06426                      the first character of the string.
06427                      If it don't match, we add the next character to
06428                      the compare buffer in turn.  */
06429                   for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
06430                     {
06431                      int match;
06432                      if (d == dend)
06433                        {
06434                          if (dend == end_match_2)
06435                            break;
06436                          d = string2;
06437                          dend = end_match_2;
06438                        }
06439 
06440                      /* add next character to the compare buffer.  */
06441                      str_buf[i] = TRANSLATE(*d);
06442                      str_buf[i+1] = '\0';
06443 
06444 # ifdef _LIBC
06445                      match = __wcscoll (workp, str_buf);
06446 # else
06447                      match = wcscoll (workp, str_buf);
06448 # endif
06449 
06450                      if (match == 0)
06451                        goto char_set_matched;
06452 
06453                      if (match < 0)
06454                      /* (str_buf > workp) indicate (str_buf + X > workp),
06455                         because for all X (str_buf + X > str_buf).
06456                         So we don't need continue this loop.  */
06457                        break;
06458 
06459                      /* Otherwise(str_buf < workp),
06460                         (str_buf+next_character) may equals (workp).
06461                         So we continue this loop.  */
06462                     }
06463                   /* not matched */
06464                   d = backup_d;
06465                   dend = backup_dend;
06466                   workp += length + 1;
06467                 }
06468              }
06469 
06470             /* match with char_range?  */
06471 # ifdef _LIBC
06472            if (nrules != 0)
06473              {
06474               uint32_t collseqval;
06475               const char *collseq = (const char *)
06476                 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
06477 
06478               collseqval = collseq_table_lookup (collseq, c);
06479 
06480               for (; workp < p - chars_length ;)
06481                 {
06482                   uint32_t start_val, end_val;
06483 
06484                   /* We already compute the collation sequence value
06485                      of the characters (or collating symbols).  */
06486                   start_val = (uint32_t) *workp++; /* range_start */
06487                   end_val = (uint32_t) *workp++; /* range_end */
06488 
06489                   if (start_val <= collseqval && collseqval <= end_val)
06490                     goto char_set_matched;
06491                 }
06492              }
06493            else
06494 # endif
06495              {
06496               /* We set range_start_char at str_buf[0], range_end_char
06497                  at str_buf[4], and compared char at str_buf[2].  */
06498               str_buf[1] = 0;
06499               str_buf[2] = c;
06500               str_buf[3] = 0;
06501               str_buf[5] = 0;
06502               for (; workp < p - chars_length ;)
06503                 {
06504                   wchar_t *range_start_char, *range_end_char;
06505 
06506                   /* match if (range_start_char <= c <= range_end_char).  */
06507 
06508                   /* If range_start(or end) < 0, we assume -range_start(end)
06509                      is the offset of the collating symbol which is specified
06510                      as the character of the range start(end).  */
06511 
06512                   /* range_start */
06513                   if (*workp < 0)
06514                     range_start_char = charset_top - (*workp++);
06515                   else
06516                     {
06517                      str_buf[0] = *workp++;
06518                      range_start_char = str_buf;
06519                     }
06520 
06521                   /* range_end */
06522                   if (*workp < 0)
06523                     range_end_char = charset_top - (*workp++);
06524                   else
06525                     {
06526                      str_buf[4] = *workp++;
06527                      range_end_char = str_buf + 4;
06528                     }
06529 
06530 # ifdef _LIBC
06531                   if (__wcscoll (range_start_char, str_buf+2) <= 0
06532                      && __wcscoll (str_buf+2, range_end_char) <= 0)
06533 # else
06534                   if (wcscoll (range_start_char, str_buf+2) <= 0
06535                      && wcscoll (str_buf+2, range_end_char) <= 0)
06536 # endif
06537                     goto char_set_matched;
06538                 }
06539              }
06540 
06541             /* match with char?  */
06542            for (; workp < p ; workp++)
06543              if (c == *workp)
06544               goto char_set_matched;
06545 
06546            negate = !negate;
06547 
06548          char_set_matched:
06549            if (negate) goto fail;
06550 #else
06551             /* Cast to `unsigned' instead of `unsigned char' in case the
06552                bit list is a full 32 bytes long.  */
06553            if (c < (unsigned) (*p * BYTEWIDTH)
06554               && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
06555              negate = !negate;
06556 
06557            p += 1 + *p;
06558 
06559            if (!negate) goto fail;
06560 #undef WORK_BUFFER_SIZE
06561 #endif /* WCHAR */
06562            SET_REGS_MATCHED ();
06563             d++;
06564            break;
06565          }
06566 
06567 
06568         /* The beginning of a group is represented by start_memory.
06569            The arguments are the register number in the next byte, and the
06570            number of groups inner to this one in the next.  The text
06571            matched within the group is recorded (in the internal
06572            registers data structure) under the register number.  */
06573         case start_memory:
06574          DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
06575                      (long int) *p, (long int) p[1]);
06576 
06577           /* Find out if this group can match the empty string.  */
06578          p1 = p;            /* To send to group_match_null_string_p.  */
06579 
06580           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
06581             REG_MATCH_NULL_STRING_P (reg_info[*p])
06582               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
06583 
06584           /* Save the position in the string where we were the last time
06585              we were at this open-group operator in case the group is
06586              operated upon by a repetition operator, e.g., with `(a*)*b'
06587              against `ab'; then we want to ignore where we are now in
06588              the string in case this attempt to match fails.  */
06589           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06590                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
06591                              : regstart[*p];
06592          DEBUG_PRINT2 ("  old_regstart: %d\n",
06593                       POINTER_TO_OFFSET (old_regstart[*p]));
06594 
06595           regstart[*p] = d;
06596          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
06597 
06598           IS_ACTIVE (reg_info[*p]) = 1;
06599           MATCHED_SOMETHING (reg_info[*p]) = 0;
06600 
06601          /* Clear this whenever we change the register activity status.  */
06602          set_regs_matched_done = 0;
06603 
06604           /* This is the new highest active register.  */
06605           highest_active_reg = *p;
06606 
06607           /* If nothing was active before, this is the new lowest active
06608              register.  */
06609           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
06610             lowest_active_reg = *p;
06611 
06612           /* Move past the register number and inner group count.  */
06613           p += 2;
06614          just_past_start_mem = p;
06615 
06616           break;
06617 
06618 
06619         /* The stop_memory opcode represents the end of a group.  Its
06620            arguments are the same as start_memory's: the register
06621            number, and the number of inner groups.  */
06622        case stop_memory:
06623          DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
06624                      (long int) *p, (long int) p[1]);
06625 
06626           /* We need to save the string position the last time we were at
06627              this close-group operator in case the group is operated
06628              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
06629              against `aba'; then we want to ignore where we are now in
06630              the string in case this attempt to match fails.  */
06631           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06632                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
06633                         : regend[*p];
06634          DEBUG_PRINT2 ("      old_regend: %d\n",
06635                       POINTER_TO_OFFSET (old_regend[*p]));
06636 
06637           regend[*p] = d;
06638          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
06639 
06640           /* This register isn't active anymore.  */
06641           IS_ACTIVE (reg_info[*p]) = 0;
06642 
06643          /* Clear this whenever we change the register activity status.  */
06644          set_regs_matched_done = 0;
06645 
06646           /* If this was the only register active, nothing is active
06647              anymore.  */
06648           if (lowest_active_reg == highest_active_reg)
06649             {
06650               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06651               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06652             }
06653           else
06654             { /* We must scan for the new highest active register, since
06655                  it isn't necessarily one less than now: consider
06656                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
06657                  new highest active register is 1.  */
06658               UCHAR_T r = *p - 1;
06659               while (r > 0 && !IS_ACTIVE (reg_info[r]))
06660                 r--;
06661 
06662               /* If we end up at register zero, that means that we saved
06663                  the registers as the result of an `on_failure_jump', not
06664                  a `start_memory', and we jumped to past the innermost
06665                  `stop_memory'.  For example, in ((.)*) we save
06666                  registers 1 and 2 as a result of the *, but when we pop
06667                  back to the second ), we are at the stop_memory 1.
06668                  Thus, nothing is active.  */
06669              if (r == 0)
06670                 {
06671                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06672                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06673                 }
06674               else
06675                 highest_active_reg = r;
06676             }
06677 
06678           /* If just failed to match something this time around with a
06679              group that's operated on by a repetition operator, try to
06680              force exit from the ``loop'', and restore the register
06681              information for this group that we had before trying this
06682              last match.  */
06683           if ((!MATCHED_SOMETHING (reg_info[*p])
06684                || just_past_start_mem == p - 1)
06685              && (p + 2) < pend)
06686             {
06687               boolean is_a_jump_n = false;
06688 
06689               p1 = p + 2;
06690               mcnt = 0;
06691               switch ((re_opcode_t) *p1++)
06692                 {
06693                   case jump_n:
06694                   is_a_jump_n = true;
06695                   case pop_failure_jump:
06696                 case maybe_pop_jump:
06697                 case jump:
06698                 case dummy_failure_jump:
06699                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06700                   if (is_a_jump_n)
06701                     p1 += OFFSET_ADDRESS_SIZE;
06702                     break;
06703 
06704                   default:
06705                     /* do nothing */ ;
06706                 }
06707              p1 += mcnt;
06708 
06709               /* If the next operation is a jump backwards in the pattern
06710                 to an on_failure_jump right before the start_memory
06711                  corresponding to this stop_memory, exit from the loop
06712                  by forcing a failure after pushing on the stack the
06713                  on_failure_jump's jump in the pattern, and d.  */
06714               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
06715                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
06716                 && p1[2+OFFSET_ADDRESS_SIZE] == *p)
06717               {
06718                   /* If this group ever matched anything, then restore
06719                      what its registers were before trying this last
06720                      failed match, e.g., with `(a*)*b' against `ab' for
06721                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
06722                      against `aba' for regend[3].
06723 
06724                      Also restore the registers for inner groups for,
06725                      e.g., `((a*)(b*))*' against `aba' (register 3 would
06726                      otherwise get trashed).  */
06727 
06728                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
06729                   {
06730                     unsigned r;
06731 
06732                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
06733 
06734                     /* Restore this and inner groups' (if any) registers.  */
06735                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
06736                         r++)
06737                         {
06738                           regstart[r] = old_regstart[r];
06739 
06740                           /* xx why this test?  */
06741                           if (old_regend[r] >= regstart[r])
06742                             regend[r] = old_regend[r];
06743                         }
06744                     }
06745                 p1++;
06746                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06747                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
06748 
06749                   goto fail;
06750                 }
06751             }
06752 
06753           /* Move past the register number and the inner group count.  */
06754           p += 2;
06755           break;
06756 
06757 
06758        /* <digit> has been turned into a `duplicate' command which is
06759            followed by the numeric value of <digit> as the register number.  */
06760         case duplicate:
06761          {
06762            register const CHAR_T *d2, *dend2;
06763            int regno = *p++;   /* Get which register to match against.  */
06764            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
06765 
06766            /* Can't back reference a group which we've never matched.  */
06767             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
06768               goto fail;
06769 
06770             /* Where in input to try to start matching.  */
06771             d2 = regstart[regno];
06772 
06773             /* Where to stop matching; if both the place to start and
06774                the place to stop matching are in the same string, then
06775                set to the place to stop, otherwise, for now have to use
06776                the end of the first string.  */
06777 
06778             dend2 = ((FIRST_STRING_P (regstart[regno])
06779                     == FIRST_STRING_P (regend[regno]))
06780                    ? regend[regno] : end_match_1);
06781            for (;;)
06782              {
06783               /* If necessary, advance to next segment in register
06784                    contents.  */
06785               while (d2 == dend2)
06786                 {
06787                   if (dend2 == end_match_2) break;
06788                   if (dend2 == regend[regno]) break;
06789 
06790                     /* End of string1 => advance to string2. */
06791                     d2 = string2;
06792                     dend2 = regend[regno];
06793                 }
06794               /* At end of register contents => success */
06795               if (d2 == dend2) break;
06796 
06797               /* If necessary, advance to next segment in data.  */
06798               PREFETCH ();
06799 
06800               /* How many characters left in this segment to match.  */
06801               mcnt = dend - d;
06802 
06803               /* Want how many consecutive characters we can match in
06804                    one shot, so, if necessary, adjust the count.  */
06805                 if (mcnt > dend2 - d2)
06806                 mcnt = dend2 - d2;
06807 
06808               /* Compare that many; failure if mismatch, else move
06809                    past them.  */
06810               if (translate
06811                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
06812                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
06813                 goto fail;
06814               d += mcnt, d2 += mcnt;
06815 
06816               /* Do this because we've match some characters.  */
06817               SET_REGS_MATCHED ();
06818              }
06819          }
06820          break;
06821 
06822 
06823         /* begline matches the empty string at the beginning of the string
06824            (unless `not_bol' is set in `bufp'), and, if
06825            `newline_anchor' is set, after newlines.  */
06826        case begline:
06827           DEBUG_PRINT1 ("EXECUTING begline.\n");
06828 
06829           if (AT_STRINGS_BEG (d))
06830             {
06831               if (!bufp->not_bol) break;
06832             }
06833           else if (d[-1] == '\n' && bufp->newline_anchor)
06834             {
06835               break;
06836             }
06837           /* In all other cases, we fail.  */
06838           goto fail;
06839 
06840 
06841         /* endline is the dual of begline.  */
06842        case endline:
06843           DEBUG_PRINT1 ("EXECUTING endline.\n");
06844 
06845           if (AT_STRINGS_END (d))
06846             {
06847               if (!bufp->not_eol) break;
06848             }
06849 
06850           /* We have to ``prefetch'' the next character.  */
06851           else if ((d == end1 ? *string2 : *d) == '\n'
06852                    && bufp->newline_anchor)
06853             {
06854               break;
06855             }
06856           goto fail;
06857 
06858 
06859        /* Match at the very beginning of the data.  */
06860         case begbuf:
06861           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
06862           if (AT_STRINGS_BEG (d))
06863             break;
06864           goto fail;
06865 
06866 
06867        /* Match at the very end of the data.  */
06868         case endbuf:
06869           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
06870          if (AT_STRINGS_END (d))
06871            break;
06872           goto fail;
06873 
06874 
06875         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
06876            pushes NULL as the value for the string on the stack.  Then
06877            `pop_failure_point' will keep the current value for the
06878            string, instead of restoring it.  To see why, consider
06879            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
06880            then the . fails against the \n.  But the next thing we want
06881            to do is match the \n against the \n; if we restored the
06882            string value, we would be back at the foo.
06883 
06884            Because this is used only in specific cases, we don't need to
06885            check all the things that `on_failure_jump' does, to make
06886            sure the right things get saved on the stack.  Hence we don't
06887            share its code.  The only reason to push anything on the
06888            stack at all is that otherwise we would have to change
06889            `anychar's code to do something besides goto fail in this
06890            case; that seems worse than this.  */
06891         case on_failure_keep_string_jump:
06892           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
06893 
06894           EXTRACT_NUMBER_AND_INCR (mcnt, p);
06895 #ifdef _LIBC
06896           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
06897 #else
06898           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
06899 #endif
06900 
06901           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
06902           break;
06903 
06904 
06905        /* Uses of on_failure_jump:
06906 
06907            Each alternative starts with an on_failure_jump that points
06908            to the beginning of the next alternative.  Each alternative
06909            except the last ends with a jump that in effect jumps past
06910            the rest of the alternatives.  (They really jump to the
06911            ending jump of the following alternative, because tensioning
06912            these jumps is a hassle.)
06913 
06914            Repeats start with an on_failure_jump that points past both
06915            the repetition text and either the following jump or
06916            pop_failure_jump back to this on_failure_jump.  */
06917        case on_failure_jump:
06918         on_failure:
06919           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
06920 
06921           EXTRACT_NUMBER_AND_INCR (mcnt, p);
06922 #ifdef _LIBC
06923           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
06924 #else
06925           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
06926 #endif
06927 
06928           /* If this on_failure_jump comes right before a group (i.e.,
06929              the original * applied to a group), save the information
06930              for that group and all inner ones, so that if we fail back
06931              to this point, the group's information will be correct.
06932              For example, in \(a*\)*\1, we need the preceding group,
06933              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
06934 
06935           /* We can't use `p' to check ahead because we push
06936              a failure point to `p + mcnt' after we do this.  */
06937           p1 = p;
06938 
06939           /* We need to skip no_op's before we look for the
06940              start_memory in case this on_failure_jump is happening as
06941              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
06942              against aba.  */
06943           while (p1 < pend && (re_opcode_t) *p1 == no_op)
06944             p1++;
06945 
06946           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
06947             {
06948               /* We have a new highest active register now.  This will
06949                  get reset at the start_memory we are about to get to,
06950                  but we will have saved all the registers relevant to
06951                  this repetition op, as described above.  */
06952               highest_active_reg = *(p1 + 1) + *(p1 + 2);
06953               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
06954                 lowest_active_reg = *(p1 + 1);
06955             }
06956 
06957           DEBUG_PRINT1 (":\n");
06958           PUSH_FAILURE_POINT (p + mcnt, d, -2);
06959           break;
06960 
06961 
06962         /* A smart repeat ends with `maybe_pop_jump'.
06963           We change it to either `pop_failure_jump' or `jump'.  */
06964         case maybe_pop_jump:
06965           EXTRACT_NUMBER_AND_INCR (mcnt, p);
06966           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
06967           {
06968            register UCHAR_T *p2 = p;
06969 
06970             /* Compare the beginning of the repeat with what in the
06971                pattern follows its end. If we can establish that there
06972                is nothing that they would both match, i.e., that we
06973                would have to backtrack because of (as in, e.g., `a*a')
06974                then we can change to pop_failure_jump, because we'll
06975                never have to backtrack.
06976 
06977                This is not true in the case of alternatives: in
06978                `(a|ab)*' we do need to backtrack to the `ab' alternative
06979                (e.g., if the string was `ab').  But instead of trying to
06980                detect that here, the alternative has put on a dummy
06981                failure point which is what we will end up popping.  */
06982 
06983            /* Skip over open/close-group commands.
06984               If what follows this loop is a ...+ construct,
06985               look at what begins its body, since we will have to
06986               match at least one of that.  */
06987            while (1)
06988              {
06989               if (p2 + 2 < pend
06990                   && ((re_opcode_t) *p2 == stop_memory
06991                      || (re_opcode_t) *p2 == start_memory))
06992                 p2 += 3;
06993               else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
06994                       && (re_opcode_t) *p2 == dummy_failure_jump)
06995                 p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
06996               else
06997                 break;
06998              }
06999 
07000            p1 = p + mcnt;
07001            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
07002               to the `maybe_finalize_jump' of this case.  Examine what
07003               follows.  */
07004 
07005             /* If we're at the end of the pattern, we can change.  */
07006             if (p2 == pend)
07007              {
07008               /* Consider what happens when matching ":\(.*\)"
07009                  against ":/".  I don't really understand this code
07010                  yet.  */
07011                p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07012                 pop_failure_jump;
07013                 DEBUG_PRINT1
07014                   ("  End of pattern: change to `pop_failure_jump'.\n");
07015               }
07016 
07017             else if ((re_opcode_t) *p2 == exactn
07018 #ifdef MBS_SUPPORT
07019                    || (re_opcode_t) *p2 == exactn_bin
07020 #endif
07021                    || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
07022              {
07023               register UCHAR_T c
07024                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
07025 
07026                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
07027 #ifdef MBS_SUPPORT
07028                    || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
07029 #endif
07030                   ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
07031                   {
07032                   p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07033                     pop_failure_jump;
07034 #ifdef WCHAR
07035                     DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
07036                                 (wint_t) c,
07037                                 (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
07038 #else
07039                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
07040                                 (char) c,
07041                                 (char) p1[3+OFFSET_ADDRESS_SIZE]);
07042 #endif
07043                   }
07044 
07045 #ifndef WCHAR
07046               else if ((re_opcode_t) p1[3] == charset
07047                       || (re_opcode_t) p1[3] == charset_not)
07048                 {
07049                   int negate = (re_opcode_t) p1[3] == charset_not;
07050 
07051                   if (c < (unsigned) (p1[4] * BYTEWIDTH)
07052                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
07053                     negate = !negate;
07054 
07055                     /* `negate' is equal to 1 if c would match, which means
07056                         that we can't change to pop_failure_jump.  */
07057                   if (!negate)
07058                       {
07059                       p[-3] = (unsigned char) pop_failure_jump;
07060                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
07061                       }
07062                 }
07063 #endif /* not WCHAR */
07064              }
07065 #ifndef WCHAR
07066             else if ((re_opcode_t) *p2 == charset)
07067              {
07068               /* We win if the first character of the loop is not part
07069                    of the charset.  */
07070                 if ((re_opcode_t) p1[3] == exactn
07071                   && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
07072                        && (p2[2 + p1[5] / BYTEWIDTH]
07073                            & (1 << (p1[5] % BYTEWIDTH)))))
07074                 {
07075                   p[-3] = (unsigned char) pop_failure_jump;
07076                   DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
07077                   }
07078 
07079               else if ((re_opcode_t) p1[3] == charset_not)
07080                 {
07081                   int idx;
07082                   /* We win if the charset_not inside the loop
07083                      lists every character listed in the charset after.  */
07084                   for (idx = 0; idx < (int) p2[1]; idx++)
07085                     if (! (p2[2 + idx] == 0
07086                           || (idx < (int) p1[4]
07087                              && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
07088                      break;
07089 
07090                   if (idx == p2[1])
07091                       {
07092                       p[-3] = (unsigned char) pop_failure_jump;
07093                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
07094                       }
07095                 }
07096               else if ((re_opcode_t) p1[3] == charset)
07097                 {
07098                   int idx;
07099                   /* We win if the charset inside the loop
07100                      has no overlap with the one after the loop.  */
07101                   for (idx = 0;
07102                       idx < (int) p2[1] && idx < (int) p1[4];
07103                       idx++)
07104                     if ((p2[2 + idx] & p1[5 + idx]) != 0)
07105                      break;
07106 
07107                   if (idx == p2[1] || idx == p1[4])
07108                       {
07109                       p[-3] = (unsigned char) pop_failure_jump;
07110                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
07111                       }
07112                 }
07113              }
07114 #endif /* not WCHAR */
07115          }
07116          p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again.  */
07117          if ((re_opcode_t) p[-1] != pop_failure_jump)
07118            {
07119              p[-1] = (UCHAR_T) jump;
07120               DEBUG_PRINT1 ("  Match => jump.\n");
07121              goto unconditional_jump;
07122            }
07123         /* Note fall through.  */
07124 
07125 
07126        /* The end of a simple repeat has a pop_failure_jump back to
07127            its matching on_failure_jump, where the latter will push a
07128            failure point.  The pop_failure_jump takes off failure
07129            points put on by this pop_failure_jump's matching
07130            on_failure_jump; we got through the pattern to here from the
07131            matching on_failure_jump, so didn't fail.  */
07132         case pop_failure_jump:
07133           {
07134             /* We need to pass separate storage for the lowest and
07135                highest registers, even though we don't care about the
07136                actual values.  Otherwise, we will restore only one
07137                register from the stack, since lowest will == highest in
07138                `pop_failure_point'.  */
07139             active_reg_t dummy_low_reg, dummy_high_reg;
07140             UCHAR_T *pdummy = NULL;
07141             const CHAR_T *sdummy = NULL;
07142 
07143             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
07144             POP_FAILURE_POINT (sdummy, pdummy,
07145                                dummy_low_reg, dummy_high_reg,
07146                                reg_dummy, reg_dummy, reg_info_dummy);
07147           }
07148          /* Note fall through.  */
07149 
07150        unconditional_jump:
07151 #ifdef _LIBC
07152          DEBUG_PRINT2 ("\n%p: ", p);
07153 #else
07154          DEBUG_PRINT2 ("\n0x%x: ", p);
07155 #endif
07156           /* Note fall through.  */
07157 
07158         /* Unconditionally jump (without popping any failure points).  */
07159         case jump:
07160          EXTRACT_NUMBER_AND_INCR (mcnt, p);      /* Get the amount to jump.  */
07161           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
07162          p += mcnt;                       /* Do the jump.  */
07163 #ifdef _LIBC
07164           DEBUG_PRINT2 ("(to %p).\n", p);
07165 #else
07166           DEBUG_PRINT2 ("(to 0x%x).\n", p);
07167 #endif
07168          break;
07169 
07170 
07171         /* We need this opcode so we can detect where alternatives end
07172            in `group_match_null_string_p' et al.  */
07173         case jump_past_alt:
07174           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
07175           goto unconditional_jump;
07176 
07177 
07178         /* Normally, the on_failure_jump pushes a failure point, which
07179            then gets popped at pop_failure_jump.  We will end up at
07180            pop_failure_jump, also, and with a pattern of, say, `a+', we
07181            are skipping over the on_failure_jump, so we have to push
07182            something meaningless for pop_failure_jump to pop.  */
07183         case dummy_failure_jump:
07184           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
07185           /* It doesn't matter what we push for the string here.  What
07186              the code at `fail' tests is the value for the pattern.  */
07187           PUSH_FAILURE_POINT (NULL, NULL, -2);
07188           goto unconditional_jump;
07189 
07190 
07191         /* At the end of an alternative, we need to push a dummy failure
07192            point in case we are followed by a `pop_failure_jump', because
07193            we don't want the failure point for the alternative to be
07194            popped.  For example, matching `(a|ab)*' against `aab'
07195            requires that we match the `ab' alternative.  */
07196         case push_dummy_failure:
07197           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
07198           /* See comments just above at `dummy_failure_jump' about the
07199              two zeroes.  */
07200           PUSH_FAILURE_POINT (NULL, NULL, -2);
07201           break;
07202 
07203         /* Have to succeed matching what follows at least n times.
07204            After that, handle like `on_failure_jump'.  */
07205         case succeed_n:
07206           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07207           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
07208 
07209           assert (mcnt >= 0);
07210           /* Originally, this is how many times we HAVE to succeed.  */
07211           if (mcnt > 0)
07212             {
07213                mcnt--;
07214               p += OFFSET_ADDRESS_SIZE;
07215                STORE_NUMBER_AND_INCR (p, mcnt);
07216 #ifdef _LIBC
07217                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
07218                           , mcnt);
07219 #else
07220                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
07221                           , mcnt);
07222 #endif
07223             }
07224          else if (mcnt == 0)
07225             {
07226 #ifdef _LIBC
07227               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
07228                          p + OFFSET_ADDRESS_SIZE);
07229 #else
07230               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
07231                          p + OFFSET_ADDRESS_SIZE);
07232 #endif /* _LIBC */
07233 
07234 #ifdef WCHAR
07235              p[1] = (UCHAR_T) no_op;
07236 #else
07237              p[2] = (UCHAR_T) no_op;
07238               p[3] = (UCHAR_T) no_op;
07239 #endif /* WCHAR */
07240               goto on_failure;
07241             }
07242           break;
07243 
07244         case jump_n:
07245           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07246           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
07247 
07248           /* Originally, this is how many times we CAN jump.  */
07249           if (mcnt)
07250             {
07251                mcnt--;
07252                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
07253 
07254 #ifdef _LIBC
07255                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
07256                           mcnt);
07257 #else
07258                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
07259                           mcnt);
07260 #endif /* _LIBC */
07261               goto unconditional_jump;
07262             }
07263           /* If don't have to jump any more, skip over the rest of command.  */
07264          else
07265            p += 2 * OFFSET_ADDRESS_SIZE;
07266           break;
07267 
07268        case set_number_at:
07269          {
07270             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
07271 
07272             EXTRACT_NUMBER_AND_INCR (mcnt, p);
07273             p1 = p + mcnt;
07274             EXTRACT_NUMBER_AND_INCR (mcnt, p);
07275 #ifdef _LIBC
07276             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
07277 #else
07278             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
07279 #endif
07280            STORE_NUMBER (p1, mcnt);
07281             break;
07282           }
07283 
07284 #if 0
07285        /* The DEC Alpha C compiler 3.x generates incorrect code for the
07286           test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
07287           AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
07288           macro and introducing temporary variables works around the bug.  */
07289 
07290        case wordbound:
07291          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07292          if (AT_WORD_BOUNDARY (d))
07293            break;
07294          goto fail;
07295 
07296        case notwordbound:
07297          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07298          if (AT_WORD_BOUNDARY (d))
07299            goto fail;
07300          break;
07301 #else
07302        case wordbound:
07303        {
07304          boolean prevchar, thischar;
07305 
07306          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07307          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07308            break;
07309 
07310          prevchar = WORDCHAR_P (d - 1);
07311          thischar = WORDCHAR_P (d);
07312          if (prevchar != thischar)
07313            break;
07314          goto fail;
07315        }
07316 
07317       case notwordbound:
07318        {
07319          boolean prevchar, thischar;
07320 
07321          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07322          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07323            goto fail;
07324 
07325          prevchar = WORDCHAR_P (d - 1);
07326          thischar = WORDCHAR_P (d);
07327          if (prevchar != thischar)
07328            goto fail;
07329          break;
07330        }
07331 #endif
07332 
07333        case wordbeg:
07334           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
07335          if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
07336              && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
07337            break;
07338           goto fail;
07339 
07340        case wordend:
07341           DEBUG_PRINT1 ("EXECUTING wordend.\n");
07342          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
07343               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
07344            break;
07345           goto fail;
07346 
07347 #ifdef emacs
07348        case before_dot:
07349           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
07350          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
07351            goto fail;
07352          break;
07353 
07354        case at_dot:
07355           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
07356          if (PTR_CHAR_POS ((unsigned char *) d) != point)
07357            goto fail;
07358          break;
07359 
07360        case after_dot:
07361           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
07362           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
07363            goto fail;
07364          break;
07365 
07366        case syntaxspec:
07367           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
07368          mcnt = *p++;
07369          goto matchsyntax;
07370 
07371         case wordchar:
07372           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
07373          mcnt = (int) Sword;
07374         matchsyntax:
07375          PREFETCH ();
07376          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
07377          d++;
07378          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
07379            goto fail;
07380           SET_REGS_MATCHED ();
07381          break;
07382 
07383        case notsyntaxspec:
07384           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
07385          mcnt = *p++;
07386          goto matchnotsyntax;
07387 
07388         case notwordchar:
07389           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
07390          mcnt = (int) Sword;
07391         matchnotsyntax:
07392          PREFETCH ();
07393          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
07394          d++;
07395          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
07396            goto fail;
07397          SET_REGS_MATCHED ();
07398           break;
07399 
07400 #else /* not emacs */
07401        case wordchar:
07402           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
07403          PREFETCH ();
07404           if (!WORDCHAR_P (d))
07405             goto fail;
07406          SET_REGS_MATCHED ();
07407           d++;
07408          break;
07409 
07410        case notwordchar:
07411           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
07412          PREFETCH ();
07413          if (WORDCHAR_P (d))
07414             goto fail;
07415           SET_REGS_MATCHED ();
07416           d++;
07417          break;
07418 #endif /* not emacs */
07419 
07420         default:
07421           abort ();
07422        }
07423       continue;  /* Successfully executed one pattern command; keep going.  */
07424 
07425 
07426     /* We goto here if a matching operation fails. */
07427     fail:
07428       if (!FAIL_STACK_EMPTY ())
07429        { /* A restart point is known.  Restore to that state.  */
07430           DEBUG_PRINT1 ("\nFAIL:\n");
07431           POP_FAILURE_POINT (d, p,
07432                              lowest_active_reg, highest_active_reg,
07433                              regstart, regend, reg_info);
07434 
07435           /* If this failure point is a dummy, try the next one.  */
07436           if (!p)
07437            goto fail;
07438 
07439           /* If we failed to the end of the pattern, don't examine *p.  */
07440          assert (p <= pend);
07441           if (p < pend)
07442             {
07443               boolean is_a_jump_n = false;
07444 
07445               /* If failed to a backwards jump that's part of a repetition
07446                  loop, need to pop this failure point and use the next one.  */
07447               switch ((re_opcode_t) *p)
07448                 {
07449                 case jump_n:
07450                   is_a_jump_n = true;
07451                 case maybe_pop_jump:
07452                 case pop_failure_jump:
07453                 case jump:
07454                   p1 = p + 1;
07455                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07456                   p1 += mcnt;
07457 
07458                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
07459                       || (!is_a_jump_n
07460                           && (re_opcode_t) *p1 == on_failure_jump))
07461                     goto fail;
07462                   break;
07463                 default:
07464                   /* do nothing */ ;
07465                 }
07466             }
07467 
07468           if (d >= string1 && d <= end1)
07469            dend = end_match_1;
07470         }
07471       else
07472         break;   /* Matching at this starting point really fails.  */
07473     } /* for (;;) */
07474 
07475   if (best_regs_set)
07476     goto restore_best_regs;
07477 
07478   FREE_VARIABLES ();
07479 
07480   return -1;                              /* Failure to match.  */
07481 } /* re_match_2 */
07482 
07483 /* Subroutine definitions for re_match_2.  */
07484 
07485 
07486 /* We are passed P pointing to a register number after a start_memory.
07487 
07488    Return true if the pattern up to the corresponding stop_memory can
07489    match the empty string, and false otherwise.
07490 
07491    If we find the matching stop_memory, sets P to point to one past its number.
07492    Otherwise, sets P to an undefined byte less than or equal to END.
07493 
07494    We don't handle duplicates properly (yet).  */
07495 
07496 static boolean
07497 PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
07498                                    PREFIX(register_info_type) *reg_info)
07499 {
07500   int mcnt;
07501   /* Point to after the args to the start_memory.  */
07502   UCHAR_T *p1 = *p + 2;
07503 
07504   while (p1 < end)
07505     {
07506       /* Skip over opcodes that can match nothing, and return true or
07507         false, as appropriate, when we get to one that can't, or to the
07508          matching stop_memory.  */
07509 
07510       switch ((re_opcode_t) *p1)
07511         {
07512         /* Could be either a loop or a series of alternatives.  */
07513         case on_failure_jump:
07514           p1++;
07515           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07516 
07517           /* If the next operation is not a jump backwards in the
07518             pattern.  */
07519 
07520          if (mcnt >= 0)
07521            {
07522               /* Go through the on_failure_jumps of the alternatives,
07523                  seeing if any of the alternatives cannot match nothing.
07524                  The last alternative starts with only a jump,
07525                  whereas the rest start with on_failure_jump and end
07526                  with a jump, e.g., here is the pattern for `a|b|c':
07527 
07528                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
07529                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
07530                  /exactn/1/c
07531 
07532                  So, we have to first go through the first (n-1)
07533                  alternatives and then deal with the last one separately.  */
07534 
07535 
07536               /* Deal with the first (n-1) alternatives, which start
07537                  with an on_failure_jump (see above) that jumps to right
07538                  past a jump_past_alt.  */
07539 
07540               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
07541                    jump_past_alt)
07542                 {
07543                   /* `mcnt' holds how many bytes long the alternative
07544                      is, including the ending `jump_past_alt' and
07545                      its number.  */
07546 
07547                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
07548                                           (1 + OFFSET_ADDRESS_SIZE),
07549                                           reg_info))
07550                     return false;
07551 
07552                   /* Move to right after this alternative, including the
07553                    jump_past_alt.  */
07554                   p1 += mcnt;
07555 
07556                   /* Break if it's the beginning of an n-th alternative
07557                      that doesn't begin with an on_failure_jump.  */
07558                   if ((re_opcode_t) *p1 != on_failure_jump)
07559                     break;
07560 
07561                 /* Still have to check that it's not an n-th
07562                    alternative that starts with an on_failure_jump.  */
07563                 p1++;
07564                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07565                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
07566                     jump_past_alt)
07567                     {
07568                     /* Get to the beginning of the n-th alternative.  */
07569                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
07570                       break;
07571                     }
07572                 }
07573 
07574               /* Deal with the last alternative: go back and get number
07575                  of the `jump_past_alt' just before it.  `mcnt' contains
07576                  the length of the alternative.  */
07577               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
07578 
07579               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
07580                 return false;
07581 
07582               p1 += mcnt;   /* Get past the n-th alternative.  */
07583             } /* if mcnt > 0 */
07584           break;
07585 
07586 
07587         case stop_memory:
07588          assert (p1[1] == **p);
07589           *p = p1 + 2;
07590           return true;
07591 
07592 
07593         default:
07594           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07595             return false;
07596         }
07597     } /* while p1 < end */
07598 
07599   return false;
07600 } /* group_match_null_string_p */
07601 
07602 
07603 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
07604    It expects P to be the first byte of a single alternative and END one
07605    byte past the last. The alternative can contain groups.  */
07606 
07607 static boolean
07608 PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
07609                                  PREFIX(register_info_type) *reg_info)
07610 {
07611   int mcnt;
07612   UCHAR_T *p1 = p;
07613 
07614   while (p1 < end)
07615     {
07616       /* Skip over opcodes that can match nothing, and break when we get
07617          to one that can't.  */
07618 
07619       switch ((re_opcode_t) *p1)
07620         {
07621        /* It's a loop.  */
07622         case on_failure_jump:
07623           p1++;
07624           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07625           p1 += mcnt;
07626           break;
07627 
07628        default:
07629           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07630             return false;
07631         }
07632     }  /* while p1 < end */
07633 
07634   return true;
07635 } /* alt_match_null_string_p */
07636 
07637 
07638 /* Deals with the ops common to group_match_null_string_p and
07639    alt_match_null_string_p.
07640 
07641    Sets P to one after the op and its arguments, if any.  */
07642 
07643 static boolean
07644 PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
07645                                        PREFIX(register_info_type) *reg_info)
07646 {
07647   int mcnt;
07648   boolean ret;
07649   int reg_no;
07650   UCHAR_T *p1 = *p;
07651 
07652   switch ((re_opcode_t) *p1++)
07653     {
07654     case no_op:
07655     case begline:
07656     case endline:
07657     case begbuf:
07658     case endbuf:
07659     case wordbeg:
07660     case wordend:
07661     case wordbound:
07662     case notwordbound:
07663 #ifdef emacs
07664     case before_dot:
07665     case at_dot:
07666     case after_dot:
07667 #endif
07668       break;
07669 
07670     case start_memory:
07671       reg_no = *p1;
07672       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
07673       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
07674 
07675       /* Have to set this here in case we're checking a group which
07676          contains a group and a back reference to it.  */
07677 
07678       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
07679         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
07680 
07681       if (!ret)
07682         return false;
07683       break;
07684 
07685     /* If this is an optimized succeed_n for zero times, make the jump.  */
07686     case jump:
07687       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07688       if (mcnt >= 0)
07689         p1 += mcnt;
07690       else
07691         return false;
07692       break;
07693 
07694     case succeed_n:
07695       /* Get to the number of times to succeed.  */
07696       p1 += OFFSET_ADDRESS_SIZE;
07697       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07698 
07699       if (mcnt == 0)
07700         {
07701           p1 -= 2 * OFFSET_ADDRESS_SIZE;
07702           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07703           p1 += mcnt;
07704         }
07705       else
07706         return false;
07707       break;
07708 
07709     case duplicate:
07710       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
07711         return false;
07712       break;
07713 
07714     case set_number_at:
07715       p1 += 2 * OFFSET_ADDRESS_SIZE;
07716 
07717     default:
07718       /* All other opcodes mean we cannot match the empty string.  */
07719       return false;
07720   }
07721 
07722   *p = p1;
07723   return true;
07724 } /* common_op_match_null_string_p */
07725 
07726 
07727 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
07728    bytes; nonzero otherwise.  */
07729 
07730 static int
07731 PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
07732                         RE_TRANSLATE_TYPE translate)
07733 {
07734   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
07735   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
07736   while (len)
07737     {
07738 #ifdef WCHAR
07739       if (((*p1<=0xff)?translate[*p1++]:*p1++)
07740          != ((*p2<=0xff)?translate[*p2++]:*p2++))
07741        return 1;
07742 #else /* BYTE */
07743       if (translate[*p1++] != translate[*p2++]) return 1;
07744 #endif /* WCHAR */
07745       len--;
07746     }
07747   return 0;
07748 }
07749 
07750 
07751 #else /* not INSIDE_RECURSION */
07752 
07753 /* Entry points for GNU code.  */
07754 
07755 /* re_compile_pattern is the GNU regular expression compiler: it
07756    compiles PATTERN (of length SIZE) and puts the result in BUFP.
07757    Returns 0 if the pattern was valid, otherwise an error string.
07758 
07759    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
07760    are set in BUFP on entry.
07761 
07762    We call regex_compile to do the actual compilation.  */
07763 
07764 const char *
07765 re_compile_pattern (const char *pattern, size_t length,
07766                     struct re_pattern_buffer *bufp)
07767 {
07768   reg_errcode_t ret;
07769 
07770   /* GNU code is written to assume at least RE_NREGS registers will be set
07771      (and at least one extra will be -1).  */
07772   bufp->regs_allocated = REGS_UNALLOCATED;
07773 
07774   /* And GNU code determines whether or not to get register information
07775      by passing null for the REGS argument to re_match, etc., not by
07776      setting no_sub.  */
07777   bufp->no_sub = 0;
07778 
07779   /* Match anchors at newline.  */
07780   bufp->newline_anchor = 1;
07781 
07782 # ifdef MBS_SUPPORT
07783   if (MB_CUR_MAX != 1)
07784     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
07785   else
07786 # endif
07787     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
07788 
07789   if (!ret)
07790     return NULL;
07791   return gettext (re_error_msgid[(int) ret]);
07792 }
07793 #ifdef _LIBC
07794 weak_alias (__re_compile_pattern, re_compile_pattern)
07795 #endif
07796 
07797 /* Entry points compatible with 4.2 BSD regex library.  We don't define
07798    them unless specifically requested.  */
07799 
07800 #if defined _REGEX_RE_COMP || defined _LIBC
07801 
07802 /* BSD has one and only one pattern buffer.  */
07803 static struct re_pattern_buffer re_comp_buf;
07804 
07805 char *
07806 #ifdef _LIBC
07807 /* Make these definitions weak in libc, so POSIX programs can redefine
07808    these names if they don't use our functions, and still use
07809    regcomp/regexec below without link errors.  */
07810 weak_function
07811 #endif
07812 re_comp (const char *s)
07813 {
07814   reg_errcode_t ret;
07815 
07816   if (!s)
07817     {
07818       if (!re_comp_buf.buffer)
07819        return (char *) gettext ("No previous regular expression");
07820       return 0;
07821     }
07822 
07823   if (!re_comp_buf.buffer)
07824     {
07825       re_comp_buf.buffer = (unsigned char *) malloc (200);
07826       if (re_comp_buf.buffer == NULL)
07827         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07828       re_comp_buf.allocated = 200;
07829 
07830       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
07831       if (re_comp_buf.fastmap == NULL)
07832        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07833     }
07834 
07835   /* Since `re_exec' always passes NULL for the `regs' argument, we
07836      don't need to initialize the pattern buffer fields which affect it.  */
07837 
07838   /* Match anchors at newlines.  */
07839   re_comp_buf.newline_anchor = 1;
07840 
07841 # ifdef MBS_SUPPORT
07842   if (MB_CUR_MAX != 1)
07843     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07844   else
07845 # endif
07846     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07847 
07848   if (!ret)
07849     return NULL;
07850 
07851   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
07852   return (char *) gettext (re_error_msgid[(int) ret]);
07853 }
07854 
07855 
07856 int
07857 #ifdef _LIBC
07858 weak_function
07859 #endif
07860 re_exec (const char *s)
07861 {
07862   const int len = strlen (s);
07863   return
07864     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
07865 }
07866 
07867 #endif /* _REGEX_RE_COMP */
07868 
07869 /* POSIX.2 functions.  Don't define these for Emacs.  */
07870 
07871 #ifndef emacs
07872 
07873 /* regcomp takes a regular expression as a string and compiles it.
07874 
07875    PREG is a regex_t *.  We do not expect any fields to be initialized,
07876    since POSIX says we shouldn't.  Thus, we set
07877 
07878      `buffer' to the compiled pattern;
07879      `used' to the length of the compiled pattern;
07880      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
07881        REG_EXTENDED bit in CFLAGS is set; otherwise, to
07882        RE_SYNTAX_POSIX_BASIC;
07883      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
07884      `fastmap' to an allocated space for the fastmap;
07885      `fastmap_accurate' to zero;
07886      `re_nsub' to the number of subexpressions in PATTERN.
07887 
07888    PATTERN is the address of the pattern string.
07889 
07890    CFLAGS is a series of bits which affect compilation.
07891 
07892      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
07893      use POSIX basic syntax.
07894 
07895      If REG_NEWLINE is set, then . and [^...] don't match newline.
07896      Also, regexec will try a match beginning after every newline.
07897 
07898      If REG_ICASE is set, then we considers upper- and lowercase
07899      versions of letters to be equivalent when matching.
07900 
07901      If REG_NOSUB is set, then when PREG is passed to regexec, that
07902      routine will report only success or failure, and nothing about the
07903      registers.
07904 
07905    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
07906    the return codes and their meanings.)  */
07907 
07908 int
07909 regcomp (regex_t *preg, const char *pattern, int cflags)
07910 {
07911   reg_errcode_t ret;
07912   reg_syntax_t syntax
07913     = (cflags & REG_EXTENDED) ?
07914       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
07915 
07916   /* regex_compile will allocate the space for the compiled pattern.  */
07917   preg->buffer = 0;
07918   preg->allocated = 0;
07919   preg->used = 0;
07920 
07921   /* Try to allocate space for the fastmap.  */
07922   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
07923 
07924   if (cflags & REG_ICASE)
07925     {
07926       int i;
07927 
07928       preg->translate
07929        = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
07930                                   * sizeof (*(RE_TRANSLATE_TYPE)0));
07931       if (preg->translate == NULL)
07932         return (int) REG_ESPACE;
07933 
07934       /* Map uppercase characters to corresponding lowercase ones.  */
07935       for (i = 0; i < CHAR_SET_SIZE; i++)
07936         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
07937     }
07938   else
07939     preg->translate = NULL;
07940 
07941   /* If REG_NEWLINE is set, newlines are treated differently.  */
07942   if (cflags & REG_NEWLINE)
07943     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
07944       syntax &= ~RE_DOT_NEWLINE;
07945       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
07946       /* It also changes the matching behavior.  */
07947       preg->newline_anchor = 1;
07948     }
07949   else
07950     preg->newline_anchor = 0;
07951 
07952   preg->no_sub = !!(cflags & REG_NOSUB);
07953 
07954   /* POSIX says a null character in the pattern terminates it, so we
07955      can use strlen here in compiling the pattern.  */
07956 # ifdef MBS_SUPPORT
07957   if (MB_CUR_MAX != 1)
07958     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
07959   else
07960 # endif
07961     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
07962 
07963   /* POSIX doesn't distinguish between an unmatched open-group and an
07964      unmatched close-group: both are REG_EPAREN.  */
07965   if (ret == REG_ERPAREN) ret = REG_EPAREN;
07966 
07967   if (ret == REG_NOERROR && preg->fastmap)
07968     {
07969       /* Compute the fastmap now, since regexec cannot modify the pattern
07970         buffer.  */
07971       if (re_compile_fastmap (preg) == -2)
07972        {
07973          /* Some error occurred while computing the fastmap, just forget
07974             about it.  */
07975          free (preg->fastmap);
07976          preg->fastmap = NULL;
07977        }
07978     }
07979 
07980   return (int) ret;
07981 }
07982 #ifdef _LIBC
07983 weak_alias (__regcomp, regcomp)
07984 #endif
07985 
07986 
07987 /* regexec searches for a given pattern, specified by PREG, in the
07988    string STRING.
07989 
07990    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
07991    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
07992    least NMATCH elements, and we set them to the offsets of the
07993    corresponding matched substrings.
07994 
07995    EFLAGS specifies `execution flags' which affect matching: if
07996    REG_NOTBOL is set, then ^ does not match at the beginning of the
07997    string; if REG_NOTEOL is set, then $ does not match at the end.
07998 
07999    We return 0 if we find a match and REG_NOMATCH if not.  */
08000 
08001 int
08002 regexec (const regex_t *preg, const char *string, size_t nmatch,
08003          regmatch_t pmatch[], int eflags)
08004 {
08005   int ret;
08006   struct re_registers regs;
08007   regex_t private_preg;
08008   int len = strlen (string);
08009   boolean want_reg_info = !preg->no_sub && nmatch > 0;
08010 
08011   private_preg = *preg;
08012 
08013   private_preg.not_bol = !!(eflags & REG_NOTBOL);
08014   private_preg.not_eol = !!(eflags & REG_NOTEOL);
08015 
08016   /* The user has told us exactly how many registers to return
08017      information about, via `nmatch'.  We have to pass that on to the
08018      matching routines.  */
08019   private_preg.regs_allocated = REGS_FIXED;
08020 
08021   if (want_reg_info)
08022     {
08023       regs.num_regs = nmatch;
08024       regs.start = TALLOC (nmatch * 2, regoff_t);
08025       if (regs.start == NULL)
08026         return (int) REG_NOMATCH;
08027       regs.end = regs.start + nmatch;
08028     }
08029 
08030   /* Perform the searching operation.  */
08031   ret = re_search (&private_preg, string, len,
08032                    /* start: */ 0, /* range: */ len,
08033                    want_reg_info ? &regs : (struct re_registers *) 0);
08034 
08035   /* Copy the register information to the POSIX structure.  */
08036   if (want_reg_info)
08037     {
08038       if (ret >= 0)
08039         {
08040           unsigned r;
08041 
08042           for (r = 0; r < nmatch; r++)
08043             {
08044               pmatch[r].rm_so = regs.start[r];
08045               pmatch[r].rm_eo = regs.end[r];
08046             }
08047         }
08048 
08049       /* If we needed the temporary register info, free the space now.  */
08050       free (regs.start);
08051     }
08052 
08053   /* We want zero return to mean success, unlike `re_search'.  */
08054   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
08055 }
08056 #ifdef _LIBC
08057 weak_alias (__regexec, regexec)
08058 #endif
08059 
08060 
08061 /* Returns a message corresponding to an error code, ERRCODE, returned
08062    from either regcomp or regexec.   We don't use PREG here.  */
08063 
08064 size_t
08065 regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
08066           char *errbuf, size_t errbuf_size)
08067 {
08068   const char *msg;
08069   size_t msg_size;
08070 
08071   if (errcode < 0
08072       || errcode >= (int) (sizeof (re_error_msgid)
08073                         / sizeof (re_error_msgid[0])))
08074     /* Only error codes returned by the rest of the code should be passed
08075        to this routine.  If we are given anything else, or if other regex
08076        code generates an invalid error code, then the program has a bug.
08077        Dump core so we can fix it.  */
08078     abort ();
08079 
08080   msg = gettext (re_error_msgid[errcode]);
08081 
08082   msg_size = strlen (msg) + 1; /* Includes the null.  */
08083 
08084   if (errbuf_size != 0)
08085     {
08086       if (msg_size > errbuf_size)
08087         {
08088 #if defined HAVE_MEMPCPY || defined _LIBC
08089          *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
08090 #else
08091           memcpy (errbuf, msg, errbuf_size - 1);
08092           errbuf[errbuf_size - 1] = 0;
08093 #endif
08094         }
08095       else
08096         memcpy (errbuf, msg, msg_size);
08097     }
08098 
08099   return msg_size;
08100 }
08101 #ifdef _LIBC
08102 weak_alias (__regerror, regerror)
08103 #endif
08104 
08105 
08106 /* Free dynamically allocated space used by PREG.  */
08107 
08108 void
08109 regfree (regex_t *preg)
08110 {
08111   if (preg->buffer != NULL)
08112     free (preg->buffer);
08113   preg->buffer = NULL;
08114 
08115   preg->allocated = 0;
08116   preg->used = 0;
08117 
08118   if (preg->fastmap != NULL)
08119     free (preg->fastmap);
08120   preg->fastmap = NULL;
08121   preg->fastmap_accurate = 0;
08122 
08123   if (preg->translate != NULL)
08124     free (preg->translate);
08125   preg->translate = NULL;
08126 }
08127 #ifdef _LIBC
08128 weak_alias (__regfree, regfree)
08129 #endif
08130 
08131 #endif /* not emacs  */
08132 
08133 #endif /* not INSIDE_RECURSION */
08134 
08135 
08136 #undef STORE_NUMBER
08137 #undef STORE_NUMBER_AND_INCR
08138 #undef EXTRACT_NUMBER
08139 #undef EXTRACT_NUMBER_AND_INCR
08140 
08141 #undef DEBUG_PRINT_COMPILED_PATTERN
08142 #undef DEBUG_PRINT_DOUBLE_STRING
08143 
08144 #undef INIT_FAIL_STACK
08145 #undef RESET_FAIL_STACK
08146 #undef DOUBLE_FAIL_STACK
08147 #undef PUSH_PATTERN_OP
08148 #undef PUSH_FAILURE_POINTER
08149 #undef PUSH_FAILURE_INT
08150 #undef PUSH_FAILURE_ELT
08151 #undef POP_FAILURE_POINTER
08152 #undef POP_FAILURE_INT
08153 #undef POP_FAILURE_ELT
08154 #undef DEBUG_PUSH
08155 #undef DEBUG_POP
08156 #undef PUSH_FAILURE_POINT
08157 #undef POP_FAILURE_POINT
08158 
08159 #undef REG_UNSET_VALUE
08160 #undef REG_UNSET
08161 
08162 #undef PATFETCH
08163 #undef PATFETCH_RAW
08164 #undef PATUNFETCH
08165 #undef TRANSLATE
08166 
08167 #undef INIT_BUF_SIZE
08168 #undef GET_BUFFER_SPACE
08169 #undef BUF_PUSH
08170 #undef BUF_PUSH_2
08171 #undef BUF_PUSH_3
08172 #undef STORE_JUMP
08173 #undef STORE_JUMP2
08174 #undef INSERT_JUMP
08175 #undef INSERT_JUMP2
08176 #undef EXTEND_BUFFER
08177 #undef GET_UNSIGNED_NUMBER
08178 #undef FREE_STACK_RETURN
08179 
08180 # undef POINTER_TO_OFFSET
08181 # undef MATCHING_IN_FRST_STRING
08182 # undef PREFETCH
08183 # undef AT_STRINGS_BEG
08184 # undef AT_STRINGS_END
08185 # undef WORDCHAR_P
08186 # undef FREE_VAR
08187 # undef FREE_VARIABLES
08188 # undef NO_HIGHEST_ACTIVE_REG
08189 # undef NO_LOWEST_ACTIVE_REG
08190 
08191 # undef CHAR_T
08192 # undef UCHAR_T
08193 # undef COMPILED_BUFFER_VAR
08194 # undef OFFSET_ADDRESS_SIZE
08195 # undef CHAR_CLASS_SIZE
08196 # undef PREFIX
08197 # undef ARG_PREFIX
08198 # undef PUT_CHAR
08199 # undef BYTE
08200 # undef WCHAR
08201 
08202 # define DEFINED_ONCE