Back to index

wims  3.65+svn20090927
extract.c
Go to the documentation of this file.
00001 /*    Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018 int nopart=0, nonesting=0, nocomma=0, nocoord=0, _estart;
00019 
00020 void extrout(char *pb, char *pe, int dist, int commas[], int *commacnt)
00021 {
00022     int i;
00023     if(pe-pb<thislinelen && nopart) return;
00024     while(pe>pb && isspace(*(pe-1))) pe--;
00025     if(pe<=pb) return;
00026     if(regexcnt>0 && pe-pb<MAX_LINELEN) {
00027        char buf[MAX_LINELEN+1];
00028        memmove(buf,pb,pe-pb); buf[pe-pb]=0;
00029        if(!checkregex(buf)) return;
00030     }
00031     if(!nocoord)
00032       printf("%d %d %d, ",thisobjline,dist+_estart,pe-pb+dist+_estart);
00033     while(*commacnt>0 && commas[*commacnt-1]>pe-pb) (*commacnt)--;
00034     if(*commacnt>0 && !nocomma) {
00035        fwrite(pb,1,commas[0],stdout); putchar(',');
00036        for(i=1;i<*commacnt;i++) {
00037            fwrite(pb+commas[i-1],1,commas[i]-commas[i-1],stdout); putchar(',');
00038        }
00039        pb+=commas[*commacnt-1];
00040     }
00041     fwrite(pb,1,pe-pb,stdout); printf("\n");
00042     *commacnt=0;
00043 }
00044 
00045 void _extract(char *p, int dist)
00046 {
00047     int i,l,got,lt;
00048     char *p1, *p2, *p3, *p4;
00049     char buf[MAX_LINELEN+1];
00050     int commas[MAX_COMMAS];
00051     int commacnt;
00052 
00053     if(dist>0 && (nonesting || nopart)) return;
00054     commacnt=0;
00055     if(expl1<=0) got=1; else got=0;
00056     for(p1=find_word_start(p), p2=p1; *p2; p2=find_word_start(p3)) {
00057        if(*p2=='.' || isdigit(*p2)) {
00058            strtod(p2,&p3); continue;
00059        }
00060        if(*p2=='(') {
00061            p3=find_matching(p2+1,')'); lt=exp_paren;
00062            paren: if(p3==NULL) error("Unmatched parentheses.");
00063            p2++; memmove(buf,p2,p3-p2); buf[p3-p2]=0;
00064            _extract(buf,p2-p+dist);
00065            if(expl2==lt) extrout(p2,p3,dist+p2-p,commas,&commacnt);
00066            p3++; continue;
00067        }
00068        if(*p2=='[') {
00069            p3=find_matching(p2+1,']'); lt=exp_matrix; goto paren;
00070        }
00071        if(*p2=='{') {
00072            p3=find_matching(p2+1,'}'); lt=exp_set; goto paren;
00073        }
00074        if(isalpha(*p2)) {
00075            for(p3=p2; *p3=='_' || isalnum(*p3); p3++);
00076            if(p3-p2>=16) goto notdefined;
00077            memmove(buf,p2,p3-p2); buf[p3-p2]=0;
00078            for(i=0;i<opalphano && strcmp(buf,opalpha[i].name)!=0; i++);
00079            if(i<opalphano) {
00080               l=opalpha[i].lvl; if(l>expl2) {
00081                   if(got) extrout(p1,p2,dist+p1-p,commas,&commacnt);
00082                   if(expl1>0) got=0; p1=find_word_start(p3);
00083               }
00084               if(l>=expl1 && l<=expl2) {
00085                   got=1;
00086                   if(l>0 && commacnt<MAX_COMMAS-2) {
00087                      commas[commacnt++]=p2-p1;
00088                      commas[commacnt++]=p3-p1;
00089                   }
00090               }
00091               continue;
00092            }
00093            notdefined: p4=find_word_start(p3);
00094            if(*p4=='(') {
00095               p3=find_matching(p4+1,')');
00096               if(p3==NULL) error("Unmatched parentheses.");
00097               p4++; memmove(buf,p4,p3-p2); buf[p3-p4]=0;
00098               _extract(buf,p2-p+dist);
00099               p3++;
00100               if(expl2==exp_fn) extrout(p2,p3,dist+p2-p,commas,&commacnt);
00101            }
00102            else if(expl2==exp_variable) extrout(p2,p3,dist+p2-p,commas,&commacnt);
00103            continue;
00104        }
00105        for(i=0;i<oppunctno && strncmp(p2,oppunct[i].name,strlen(oppunct[i].name))!=0; i++);
00106        if(i>=oppunctno) error("Unknown operator.");
00107        p3=p2+strlen(oppunct[i].name); l=oppunct[i].lvl;
00108        if(l>expl2) {
00109            if(got) extrout(p1,p2,dist+p1-p,commas,&commacnt);
00110            if(expl1>0) got=0; p1=find_word_start(p3);
00111        }
00112        if(l>=expl1 && l<=expl2) {
00113            got=1;
00114            if(l>0 && commacnt<MAX_COMMAS-2) {
00115               commas[commacnt++]=p2-p1;
00116               commas[commacnt++]=p3-p1;
00117            }
00118        }
00119     }
00120     if(got) extrout(p1,p1+strlen(p1),dist+p1-p,commas,&commacnt);
00121 }
00122 
00123 void req_extract(void)
00124 {
00125     int i;
00126     char *p;
00127 
00128     if(objlinecnt<2) return;
00129     if(*reqtype==0) error("Missing extraction type.");
00130     for(i=0;i<exptypeno && strcmp(reqtype,exptype[i].name)!=0; i++);
00131     if(i>=exptypeno) error("Bad extraction type.");
00132     if(logdir!=0) nopart=1;
00133     while((p=wordchr(objline[0],"nopart"))!=NULL) {
00134        nopart=1; strcpy(p,p+strlen("nopart"));
00135     }
00136     while((p=wordchr(objline[0],"nonesting"))!=NULL) {
00137        nonesting=1; strcpy(p,p+strlen("nonesting"));
00138     }
00139     while((p=wordchr(objline[0],"nocomma"))!=NULL) {
00140        nocomma=1; strcpy(p,p+strlen("nocomma"));
00141     }
00142     getregex(objline[0]);
00143     expl1=exptype[i].lvl1; expl2=exptype[i].lvl2;
00144     for(i=1;i<objlinecnt;i++) {
00145        thisobjline=i; p=find_word_start(objline[i]);
00146        linelogdir=0;
00147        if(*p=='>') {
00148            if(logdir<0) continue;
00149            p=find_word_start(p+1); linelogdir=1;
00150        }
00151        else if(*p=='<') {
00152            if(logdir>0) continue;
00153            p=find_word_start(p+1); linelogdir=-1;
00154        }
00155        thislinelen=strlen(p); if(thislinelen<=0) continue;
00156        _estart=p-objline[i]; _extract(p, 0);
00157     }
00158 }
00159 
00160 void req_cut(void)
00161 {
00162     nocoord=1; req_extract();
00163 }
00164