Back to index

glibc  2.9
regexp.h
Go to the documentation of this file.
00001 /* Copyright (C) 1996, 1997, 1998, 1999, 2004, 2008
00002    Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #ifndef _REGEXP_H
00022 #define _REGEXP_H    1
00023 
00024 /* The contents of this header file was first standardized in X/Open
00025    System Interface and Headers Issue 2, originally coming from SysV.
00026    In issue 4, version 2, it is marked as TO BE WITDRAWN, and it has
00027    been withdrawn in SUSv3.
00028 
00029    This code shouldn't be used in any newly written code.  It is
00030    included only for compatibility reasons.  Use the POSIX definition
00031    in <regex.h> for portable applications and a reasonable interface.  */
00032 
00033 #include <features.h>
00034 #include <alloca.h>
00035 #include <regex.h>
00036 #include <stdlib.h>
00037 #include <string.h>
00038 
00039 /* The implementation provided here emulates the needed functionality
00040    by mapping to the POSIX regular expression matcher.  The interface
00041    for the here included function is weird (this really is a harmless
00042    word).
00043 
00044    The user has to provide six macros before this header file can be
00045    included:
00046 
00047    INIT              Declarations vor variables which can be used by the
00048               other macros.
00049 
00050    GETC()     Return the value of the next character in the regular
00051               expression pattern.  Successive calls should return
00052               successive characters.
00053 
00054    PEEKC()    Return the value of the next character in the regular
00055               expression pattern.  Immediately successive calls to
00056               PEEKC() should return the same character which should
00057               also be the next character returned by GETC().
00058 
00059    UNGETC(c)  Cause `c' to be returned by the next call to GETC() and
00060               PEEKC().
00061 
00062    RETURN(ptr)       Used for normal exit of the `compile' function.  `ptr'
00063               is a pointer to the character after the last character of
00064               the compiled regular expression.
00065 
00066    ERROR(val) Used for abnormal return from `compile'.  `val' is the
00067               error number.  The error codes are:
00068               11     Range endpoint too large.
00069               16     Bad number.
00070               25     \digit out of range.
00071               36     Illegal or missing delimiter.
00072               41     No remembered search string.
00073               42     \( \) imbalance.
00074               43     Too many \(.
00075               44     More tan two numbers given in \{ \}.
00076               45     } expected after \.
00077               46     First number exceeds second in \{ \}.
00078               49     [ ] imbalance.
00079               50     Regular expression overflow.
00080 
00081   */
00082 
00083 __BEGIN_DECLS
00084 
00085 /* Interface variables.  They contain the results of the successful
00086    calls to `setp' and `advance'.  */
00087 extern char *loc1;
00088 extern char *loc2;
00089 
00090 /* The use of this variable in the `advance' function is not
00091    supported.  */
00092 extern char *locs;
00093 
00094 
00095 #ifndef __DO_NOT_DEFINE_COMPILE
00096 /* Get and compile the user supplied pattern up to end of line or
00097    string or until EOF is seen, whatever happens first.  The result is
00098    placed in the buffer starting at EXPBUF and delimited by ENDBUF.
00099 
00100    This function cannot be defined in the libc itself since it depends
00101    on the macros.  */
00102 char *
00103 compile (char *__restrict instring, char *__restrict expbuf,
00104         __const char *__restrict endbuf, int eof)
00105 {
00106   char *__input_buffer = NULL;
00107   size_t __input_size = 0;
00108   size_t __current_size = 0;
00109   int __ch;
00110   int __error;
00111   INIT
00112 
00113   /* Align the expression buffer according to the needs for an object
00114      of type `regex_t'.  Then check for minimum size of the buffer for
00115      the compiled regular expression.  */
00116   regex_t *__expr_ptr;
00117 # if defined __GNUC__ && __GNUC__ >= 2
00118   const size_t __req = __alignof__ (regex_t *);
00119 # else
00120   /* How shall we find out?  We simply guess it and can change it is
00121      this really proofs to be wrong.  */
00122   const size_t __req = 8;
00123 # endif
00124   expbuf += __req;
00125   expbuf -= (expbuf - ((char *) 0)) % __req;
00126   if (endbuf < expbuf + sizeof (regex_t))
00127     {
00128       ERROR (50);
00129     }
00130   __expr_ptr = (regex_t *) expbuf;
00131   /* The remaining space in the buffer can be used for the compiled
00132      pattern.  */
00133   __expr_ptr->__REPB_PREFIX (buffer) = expbuf + sizeof (regex_t);
00134   __expr_ptr->__REPB_PREFIX (allocated)
00135     = endbuf - (char *) __expr_ptr->__REPB_PREFIX (buffer);
00136 
00137   while ((__ch = (GETC ())) != eof)
00138     {
00139       if (__ch == '\0' || __ch == '\n')
00140        {
00141          UNGETC (__ch);
00142          break;
00143        }
00144 
00145       if (__current_size + 1 >= __input_size)
00146        {
00147          size_t __new_size = __input_size ? 2 * __input_size : 128;
00148          char *__new_room = (char *) alloca (__new_size);
00149          /* See whether we can use the old buffer.  */
00150          if (__new_room + __new_size == __input_buffer)
00151            {
00152              __input_size += __new_size;
00153              __input_buffer = (char *) memcpy (__new_room, __input_buffer,
00154                                           __current_size);
00155            }
00156          else if (__input_buffer + __input_size == __new_room)
00157            __input_size += __new_size;
00158          else
00159            {
00160              __input_size = __new_size;
00161              __input_buffer = (char *) memcpy (__new_room, __input_buffer,
00162                                           __current_size);
00163            }
00164        }
00165       __input_buffer[__current_size++] = __ch;
00166     }
00167   if (__current_size)
00168     __input_buffer[__current_size++] = '\0';
00169   else
00170     __input_buffer = "";
00171 
00172   /* Now compile the pattern.  */
00173   __error = regcomp (__expr_ptr, __input_buffer, REG_NEWLINE);
00174   if (__error != 0)
00175     /* Oh well, we have to translate POSIX error codes.  */
00176     switch (__error)
00177       {
00178       case REG_BADPAT:
00179       case REG_ECOLLATE:
00180       case REG_ECTYPE:
00181       case REG_EESCAPE:
00182       case REG_BADRPT:
00183       case REG_EEND:
00184       case REG_ERPAREN:
00185       default:
00186        /* There is no matching error code.  */
00187        RETURN (36);
00188       case REG_ESUBREG:
00189        RETURN (25);
00190       case REG_EBRACK:
00191        RETURN (49);
00192       case REG_EPAREN:
00193        RETURN (42);
00194       case REG_EBRACE:
00195        RETURN (44);
00196       case REG_BADBR:
00197        RETURN (46);
00198       case REG_ERANGE:
00199        RETURN (11);
00200       case REG_ESPACE:
00201       case REG_ESIZE:
00202        ERROR (50);
00203       }
00204 
00205   /* Everything is ok.  */
00206   RETURN ((char *) (__expr_ptr->__REPB_PREFIX (buffer)
00207                   + __expr_ptr->__REPB_PREFIX (used)));
00208 }
00209 #endif
00210 
00211 
00212 /* Find the next match in STRING.  The compiled regular expression is
00213    found in the buffer starting at EXPBUF.  `loc1' will return the
00214    first character matched and `loc2' points to the next unmatched
00215    character.  */
00216 extern int step (__const char *__restrict __string,
00217                __const char *__restrict __expbuf) __THROW;
00218 
00219 /* Match the beginning of STRING with the compiled regular expression
00220    in EXPBUF.  If the match is successful `loc2' will contain the
00221    position of the first unmatched character.  */
00222 extern int advance (__const char *__restrict __string,
00223                   __const char *__restrict __expbuf) __THROW;
00224 
00225 
00226 __END_DECLS
00227 
00228 #endif /* regexp.h */