Back to index

radiance  4R0+20100331
rexpr.c
Go to the documentation of this file.
00001 #ifndef lint
00002 static const char    RCSid[] = "$Id: rexpr.c,v 2.9 2003/07/17 09:21:29 schorsch Exp $";
00003 #endif
00004 /*
00005  * Regular expression parsing routines.
00006  *
00007  * External symbols declared in standard.h
00008  */
00009 
00010 #include "copyright.h"
00011 
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <string.h>
00016 
00017 #include "rtio.h"
00018 
00019 /*
00020  * rexpr.c - regular expression parser (ala grep)
00021  */
00022 
00023 #define CCHR    2
00024 #define CDOT    4
00025 #define CCL     6
00026 #define NCCL    8
00027 #define CDOL    10
00028 #define CEOF    11
00029 #define CBRC    14
00030 #define CLET    15
00031 #define STAR    01
00032 
00033 #define ESIZE   255
00034 
00035 #define same(a,b) (a==b || (iflag && (a^b)==' ' && isalpha(a)))
00036 
00037 
00038 static int     advance(char *, char *);
00039 static int     cclass(char *, int c, int af);
00040 
00041 static char    expbuf[ESIZE];
00042 static int     iflag;
00043 static int     circf;
00044 
00045 int    explen;                 /* length of the last expression found */
00046 
00047 int
00048 ecompile(sp, iflg, wflag)               /* compile the expression */
00049 register char  *sp;
00050 int iflg, wflag;
00051 {
00052         register int c;
00053         register char *ep;
00054         char *lastep = NULL;
00055         int cclcnt;
00056         
00057         iflag = iflg;
00058         ep = expbuf;
00059        explen = 0;
00060         if (*sp == '^') {
00061                 circf = 1;
00062                 sp++;
00063         } else
00064               circf = 0;
00065         if (wflag)
00066                 *ep++ = CBRC;
00067         for (;;) {
00068                 if (ep >= &expbuf[ESIZE])
00069                         return(-1);
00070                 if ((c = *sp++) != '*')
00071                         lastep = ep;
00072                 switch (c) {
00073 
00074                 case '\0':
00075                         if (wflag)
00076                                 *ep++ = CLET;
00077                         *ep++ = CEOF;
00078                      explen = ep - expbuf;
00079                         return(0);
00080 
00081                 case '.':
00082                         *ep++ = CDOT;
00083                         continue;
00084 
00085                 case '*':
00086                         if (lastep==0)
00087                                 goto defchar;
00088                         *lastep |= STAR;
00089                         continue;
00090 
00091                 case '$':
00092                         if (*sp != '\0')
00093                                 goto defchar;
00094                         *ep++ = CDOL;
00095                         continue;
00096 
00097                 case '[':
00098                         *ep++ = CCL;
00099                         *ep++ = 0;
00100                         cclcnt = 1;
00101                         if ((c = *sp++) == '^') {
00102                                 c = *sp++;
00103                                 ep[-2] = NCCL;
00104                         }
00105                         do {
00106                                 *ep++ = c;
00107                                 cclcnt++;
00108                                 if (c=='\0' || ep >= &expbuf[ESIZE])
00109                                         return(-1);
00110                         } while ((c = *sp++) != ']');
00111                         lastep[1] = cclcnt;
00112                         continue;
00113 
00114                 case '\\':
00115                         if ((c = *sp++) == '\0')
00116                                 return(-1);
00117                         if (c == '<') {
00118                             if (ep == expbuf || ep[-1] != CBRC)
00119                                    *ep++ = CBRC;
00120                                 continue;
00121                         }
00122                         if (c == '>') {
00123                                 *ep++ = CLET;
00124                                 continue;
00125                         }
00126                 defchar:
00127                 default:
00128                         *ep++ = CCHR;
00129                         *ep++ = c;
00130                 }
00131         }
00132 }
00133 
00134 char *
00135 expsave()                  /* save compiled string */
00136 {
00137        register char  *ep;
00138 
00139        if (explen == 0)
00140               return(NULL);
00141        if ((ep = (char *)malloc(explen+3)) == NULL)
00142               return(NULL);
00143        ep[0] = iflag;
00144        ep[1] = circf;
00145        ep[2] = explen;
00146        (void)memcpy(ep+3, expbuf, explen);
00147        return(ep);
00148 }
00149 
00150 void
00151 expset(ep)                  /* install saved string */
00152 register char  *ep;
00153 {
00154        iflag = ep[0];
00155        circf = ep[1];
00156        (void)memcpy(expbuf, ep+3, ep[2]&0xff);
00157 }
00158 
00159 char *
00160 eindex(sp)                    /* find the expression in string sp */
00161 register char *sp;
00162 {
00163        /* check for match at beginning of line, watch CBRC */
00164        if (advance(sp, expbuf[0]==CBRC ? expbuf+1 : expbuf))
00165               return(sp);
00166        if (circf)
00167                 return(NULL);
00168         /* fast check for first character */
00169         if (expbuf[0]==CCHR) {
00170               register int c = expbuf[1];
00171               while (*++sp)
00172                      if (same(*sp, c) && advance(sp, expbuf))
00173                             return(sp);
00174                 return(NULL);
00175         }
00176         /* regular algorithm */
00177        while (*++sp)
00178                 if (advance(sp, expbuf))
00179                         return(sp);
00180         return(NULL);
00181 }
00182 
00183 static int
00184 advance(alp, ep)
00185         char *alp;
00186 register char *ep;
00187 {
00188         register char *lp;
00189        char *curlp;
00190 
00191         lp = alp;
00192         for (;;) switch (*ep++) {
00193 
00194         case CCHR:
00195                 if (!same(*ep, *lp))
00196                         return (0);
00197                 ep++, lp++;
00198                 continue;
00199 
00200         case CDOT:
00201                 if (*lp++)
00202                         continue;
00203                 return(0);
00204 
00205         case CDOL:
00206                 if (*lp==0)
00207                         continue;
00208                 return(0);
00209 
00210         case CEOF:
00211                 explen = lp - alp;
00212                 return(1);
00213 
00214         case CCL:
00215                 if (cclass(ep, *lp++, 1)) {
00216                         ep += *ep;
00217                         continue;
00218                 }
00219                 return(0);
00220 
00221         case NCCL:
00222                 if (cclass(ep, *lp++, 0)) {
00223                         ep += *ep;
00224                         continue;
00225                 }
00226                 return(0);
00227 
00228         case CDOT|STAR:
00229                 curlp = lp;
00230                 while (*lp++);
00231                 goto star;
00232 
00233         case CCHR|STAR:
00234                 curlp = lp;
00235                 while (same(*lp, *ep))
00236                         lp++;
00237                 lp++;
00238                 ep++;
00239                 goto star;
00240 
00241         case CCL|STAR:
00242         case NCCL|STAR:
00243                 curlp = lp;
00244                 while (cclass(ep, *lp++, ep[-1]==(CCL|STAR)));
00245                 ep += *ep;
00246         star:
00247                 do {
00248                         lp--;
00249                         if (advance(lp, ep)) {
00250                                 explen += lp - alp;
00251                                 return(1);
00252                         }
00253                 } while (lp > curlp);
00254                 return(0);
00255 
00256         case CBRC:
00257                 if ((isalnum(*lp) || *lp == '_') && !(isalnum(lp[-1]) || lp[-1] == '_'))
00258                         continue;
00259                 return (0);
00260 
00261         case CLET:
00262                 if (!isalnum(*lp) && *lp != '_')
00263                         continue;
00264                 return (0);
00265 
00266         default:
00267                 fprintf(stderr, "RE botch\n");
00268         }
00269 }
00270 
00271 static int
00272 cclass(set, c, af)
00273 register char *set;
00274 register int c;
00275 int af;
00276 {
00277         register int n;
00278 
00279         if (c == 0)
00280                 return(0);
00281         n = *set++;
00282         while (--n)
00283                 if (n > 2 && set[1] == '-') {
00284                         if (c >= set[0] && c <= set[2])
00285                                 return (af);
00286                         set += 3;
00287                         n -= 2;
00288                 } else
00289                         if (*set++ == c)
00290                                 return(af);
00291         return(!af);
00292 }