Back to index

wims  3.65+svn20090927
symtext.c
Go to the documentation of this file.
00001 /*    Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018        /* This program makes comparison between two text strings,
00019         * according to the symtext syntax. */
00020 
00021 /* Input data: via environment variables.
00022  * wims_exec_parm: line 1 = command (comp,expand,wordlist,random,1,2,3,...)
00023  * line 2 = text to examine (for comp).
00024  * line 3 and up = symtext syntax.
00025  * w_symtext: dictionary style.
00026  * w_symtext_option: option words.
00027  * 
00028  * Output: two lines.
00029  * Line 1: ERROR or OK
00030  * Line 2: result depending on command.
00031  */
00032 
00033 
00034 const char *codechar="_0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
00035 
00036 #include "symtext.h"
00037 
00038 struct block blockbuf[MAX_BLOCKS];
00039 int nextblock;
00040 listtype listbuf[MAX_LISTS];
00041 int nextlist;
00042 listtype tagbuf[MAX_BLOCKS];
00043 int nexttag;
00044 
00045 struct poolstruct poolbuf[MAX_POOLS];
00046 int nextpool;
00047 
00048 int options;
00049 #define       op_nocase     (1<<0)
00050 #define op_deaccent  (1<<1)
00051 #define op_reaccent  (1<<2)
00052 #define op_nopunct   (1<<3)
00053 #define op_nomath    (1<<4)
00054 #define op_noparenth (1<<5)
00055 #define op_nocs             (1<<6)
00056 #define op_noquote   (1<<7)
00057 #define op_matchall  (1<<8)
00058 #define op_alphaonly (1<<9)
00059 #define op_alnumonly (1<<10)
00060 
00061 char cmdbuf[256], stbuf[MAX_LINELEN+1], textbuf[MAX_LINELEN+1];
00062 char wbuf[MAX_LINELEN+1];
00063 char cmdparm[1024];
00064 char defbuf[MAX_LINELEN+1];
00065 char style[MAX_NAMELEN+1];
00066 char styledir[MAX_FNAME+1];
00067 char optionbuf[1024];
00068 char outbuf[4096];
00069 char *outptr, *wptr;
00070 int debug;
00071 
00072 enum {
00073     cmd_none, cmd_comp, cmd_debug, cmd_random, cmd_1, cmd_wordlist
00074 };
00075 struct {
00076     char *name; int value;
00077 } cmdlist[]={
00078     {"1",     cmd_1},
00079     {"comp",  cmd_comp},
00080     {"compare",      cmd_comp},
00081     {"debug", cmd_debug},
00082     {"match", cmd_comp},
00083     {"rand",  cmd_random},
00084     {"random",       cmd_random},
00085     {"wordlist",cmd_wordlist},
00086     {"words", cmd_wordlist}
00087 };
00088 #define cmdcnt (sizeof(cmdlist)/sizeof(cmdlist[0]))
00089 int cmd;
00090 
00091 void error(char *msg,...)
00092 {
00093     va_list vp;
00094     char buf[1024];
00095 
00096     va_start(vp,msg);
00097     vsnprintf(buf,sizeof(buf),msg,vp);
00098     va_end(vp);
00099     printf("ERROR\n%s\n",buf);
00100     exit(1);
00101 }
00102 
00103 void _error(char *msg)
00104 {
00105     error(msg);
00106 }
00107 
00108        /* read-in a file into buffer. Use open() and read().
00109         * Return buffer address which will be malloc'ed if buf=NULL. */
00110 char *readfile(char *fname, char buf[], long int buflen)
00111 {
00112     int fd, t;
00113     struct stat st;
00114     long int l, lc;
00115     char *bf;
00116     t=0; if(buf) buf[0]=0;
00117     if(stat(fname,&st)) return NULL;
00118     l=st.st_size; if(l<0) return NULL;
00119     if(l>=buflen) {
00120        if(buflen<MAX_LINELEN) l=buflen-1;
00121        else error("file_too_long %s",fname);
00122     }
00123     fd=open(fname,O_RDONLY); if(fd==-1) return NULL;
00124     if(buf==NULL) bf=xmalloc(l+8); else {bf=buf;if(l==0) {t=1; l=buflen-1;}}
00125     lc=read(fd,bf,l); close(fd);
00126     if(lc<0 || lc>l || (lc!=l && t==0)) 
00127        {if(buf==NULL) free(bf); else buf[0]=0; return NULL;}
00128     bf[lc]=0; _tolinux(bf); return bf;
00129 }
00130 
00131        /* get option word in a string */
00132 void _getopt(char *name, char *p)
00133 {
00134     char *p1, *p2, *p3, *p4;
00135     char buf[MAX_LINELEN+1];
00136     
00137     snprintf(buf,sizeof(buf),"%s",p);
00138     p1=find_word_start(name);
00139     for(p2=buf;*p2;p2++) {
00140        if(myisspace(*p2)) *p2=' ';
00141        if(*p2=='=') *p2='   ';
00142     }
00143     *p=0;
00144     p2=wordchr(buf,p1); if(p2==NULL) return;
00145     for(p3=find_word_end(p2);myisspace(*p3);p3++) {
00146        if(*p3=='     ') {
00147            p3=find_word_start(p3);
00148            switch(*p3) {
00149               case '"': {
00150                   p4=strchr(p3+1,'"');
00151                   goto tested;
00152               }
00153               case '(': {
00154                   p4=find_matching(p3+1,')');
00155                   goto tested;
00156               }
00157               case '[': {
00158                   p4=find_matching(p3+1,']');
00159                   goto tested;
00160               }
00161               case '{': {
00162                   p4=find_matching(p3+1,'}');
00163                   tested:
00164                   if(p4) {
00165                      p3++; *p4=0; break;
00166                   }
00167                   else goto nomatch;
00168               }
00169               default: {
00170                   nomatch: 
00171                   *find_word_end(p3)=0;
00172               }
00173            }
00174            mystrncpy(p,p3,MAX_LINELEN);
00175            return;
00176        }
00177     }
00178     *find_word_end(p2)=0;
00179     memmove(p,p2,strlen(p2)+1);
00180 }
00181 
00182 void _getdef(char buf[], char *name, char value[])
00183 {
00184     char *p1, *p2, *p3, *p4;
00185 
00186     if(*name==0) goto nothing;     /* this would create segfault. */
00187     for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
00188        p2=find_word_start(p1+strlen(name));
00189        if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
00190        p3=p1; while(p3>buf && *(p3-1)!='\n') p3--;
00191        p3=find_word_start(p3);
00192        if(p3<p1 && *p3!='!') continue;
00193        if(p3<p1) {
00194            p3++; p4=find_word_end(p3);
00195            if(find_word_start(p4)!=p1) continue;
00196            if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 &&
00197               strncmp(p3,"let",3)!=0 &&
00198               strncmp(p3,"def",3)!=0)) {
00199               if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue;
00200            }
00201        }
00202        p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2);
00203        p2=find_word_start(p2);
00204        if(p2>p3) goto nothing;
00205        if(p3-p2>=MAX_LINELEN) error("string_too_long def %s",name);
00206        memmove(value,p2,p3-p2); value[p3-p2]=0;
00207        strip_trailing_spaces(value); return;
00208     }
00209 nothing:
00210     value[0]=0; return;
00211 }
00212 
00213 char fnbuf[MAX_FNAME+1];
00214 
00215        /* make a filename and check length */
00216 char *mkfname(char buf[], char *s,...)
00217 {
00218     va_list vp;
00219     char *p;
00220 
00221     if(buf==NULL) p=fnbuf; else p=buf;
00222     va_start(vp,s);
00223     vsnprintf(p,MAX_FNAME,s,vp);
00224     va_end(vp);
00225     if(strlen(p)>=MAX_FNAME-1) error("name_too_long %.20s",p);
00226     return p;
00227 }
00228 
00229 #include "lines.c"
00230 #include "translate.c"
00231 #include "match.c"
00232 #include "compile.c"
00233 
00234 void getparms(void)
00235 {
00236     char *p, *p2, *p3, lbuf[8];
00237     char buf[MAX_LINELEN+1], pbuf[MAX_LINELEN+1];
00238     struct stat st;
00239     int i;
00240     
00241     cmd=0;
00242     p=getenv("wims_exec_parm");
00243     if(p==NULL) return;
00244     snprintf(pbuf,sizeof(pbuf),"%s",p);
00245     rows2lines(pbuf);
00246     p2=strchr(pbuf,'\n'); if(p2==NULL) return; else *p2++=0;
00247     p=find_word_start(pbuf); 
00248     p3=find_word_end(p); if(p3-p>=sizeof(cmdbuf)) return;
00249     if(*p==0) return; else *p3++=0;
00250     memmove(cmdbuf,p,p3-p); cmdbuf[p3-p]=0;
00251     p=p2; p2=strchr(p,'\n'); if(p2==NULL) p2=p+strlen(p); else *p2++=0;
00252     if(p2<=find_word_start(p)) return;
00253     if(p2-p<sizeof(textbuf)) {
00254        memmove(textbuf,p,p2-p); textbuf[p2-p]=0;
00255     }
00256     p=p2; p2=p+strlen(p);
00257     if(p2>p && p2-p<sizeof(stbuf)) {
00258        memmove(stbuf,p,p2-p); stbuf[p2-p]=0;
00259     }
00260     i=search_list(cmdlist,cmdcnt,sizeof(cmdlist[0]),cmdbuf);
00261     if(i>=0) cmd=cmdlist[i].value;
00262     else error("bad_command %.20s",cmdbuf);
00263     snprintf(cmdparm,sizeof(cmdparm),"%s",p2);
00264     
00265     options=0;
00266     p=getenv("w_module_language"); if(p==NULL) p="";
00267     snprintf(lbuf,sizeof(lbuf),"%2s",p);
00268     if(*p3) {
00269        snprintf(buf,sizeof(buf),"%s",p3);
00270        _getopt("style",buf);
00271        snprintf(style,sizeof(style),"%s",find_word_start(buf));
00272        *find_word_end(style)=0;
00273        snprintf(buf,sizeof(buf),"%s",p3);
00274        _getopt("language",buf);
00275        if(buf[0]) snprintf(lbuf,sizeof(lbuf),"%2s",buf);
00276     }
00277     lbuf[2]=0;
00278     if(!myisalpha(lbuf[0]) || !myisalpha(lbuf[1])) strcpy(lbuf,"en");
00279     styledir[0]=defbuf[0]=optionbuf[0]=buf[0]=0;
00280     if(*style) {
00281        p=getenv("module_dir");
00282        if(p==NULL) {               /* non-wims operation */
00283            snprintf(styledir,sizeof(styledir),"%s",style);
00284        }
00285        else {
00286            for(i=0;i<MAX_NAMELEN && myisalnum(style[i]);i++);
00287            style[i]=0;
00288            if(style[0]) {          /* style defined */
00289               if(*p!='/' && strstr(p,"..")==NULL) {     /* check module dir */
00290                   snprintf(styledir,sizeof(styledir),"%s/symtext/%s/%s/def",p,lbuf,style);
00291                   if(stat(styledir,&st)) styledir[0]=0;
00292               }
00293               if(styledir[0]==0) { /* check default */
00294                   snprintf(styledir,sizeof(styledir),"%s/symtext/%s/%s/def",defaultdir,lbuf,style);
00295                   if(stat(styledir,&st)) error("style_not_found %s",style);
00296               }
00297            }
00298        }
00299        if(styledir[0]) {           /* get def */
00300            readfile(styledir,defbuf,sizeof(defbuf));
00301            styledir[strlen(styledir)-4]=0;
00302            suffix_dic(mkfname(NULL,"%s/suffix",styledir));
00303            transdic=diccnt;
00304            if(prepare_dic("trans")==NULL) transdic=-1;
00305            dic[transdic].unknown_type=unk_leave;
00306            macrodic=diccnt;
00307            if(prepare_dic("macros")==NULL) macrodic=-1;
00308            dic[macrodic].unknown_type=unk_delete;
00309        }
00310     }
00311     _getdef(defbuf,"option",buf);
00312     snprintf(optionbuf,sizeof(optionbuf),"%s %s",p3,buf);
00313     if(wordchr(optionbuf,"nocase")!=NULL) options|=op_nocase;
00314     if(wordchr(optionbuf,"deaccent")!=NULL) options|=op_deaccent;
00315     if(wordchr(optionbuf,"reaccent")!=NULL) options|=op_reaccent;
00316     if(wordchr(optionbuf,"nopunct")!=NULL) options|=op_nopunct;
00317     if(wordchr(optionbuf,"nomath")!=NULL) options|=op_nomath;
00318     if(wordchr(optionbuf,"noparenthesis")!=NULL) options|=op_noparenth;
00319     if(wordchr(optionbuf,"noparentheses")!=NULL) options|=op_noparenth;
00320     if(wordchr(optionbuf,"nocs")!=NULL) options|=op_nocs;
00321     if(wordchr(optionbuf,"noquote")!=NULL) options|=op_noquote;
00322     if(wordchr(optionbuf,"matchall")!=NULL) options|=op_matchall;
00323     if(wordchr(optionbuf,"abconly")!=NULL) options|=op_alphaonly;
00324     if(wordchr(optionbuf,"onlyabc")!=NULL) options|=op_alphaonly;
00325     if(wordchr(optionbuf,"alnumonly")!=NULL) options|=op_alnumonly;
00326     if(wordchr(optionbuf,"onlyalnum")!=NULL) options|=op_alnumonly;
00327     
00328     if(cmd==cmd_comp || cmd==cmd_debug) {
00329        _getopt("debug",optionbuf);
00330        if(optionbuf[0]) {
00331            i=atoi(optionbuf);
00332            if(i>0 || strcmp(optionbuf,"0")==0) debug=i; else debug=1;
00333            if(debug>0) cmd=cmd_debug;
00334        }
00335     }
00336     strip_enclosing_par(textbuf);
00337     strfold(textbuf);
00338 }
00339 
00340 int verify_tables(void)
00341 {
00342     if(verify_order(builtin,builtincnt,sizeof(builtin[0]))) return -1;
00343     if(verify_order(cmdlist,cmdcnt,sizeof(cmdlist[0]))) return -1;
00344     
00345     return 0;
00346 }
00347 
00348 int main(int argc, char *argv[])
00349 {
00350     int i, n, mat;
00351     char *p1, *p2;
00352     char lbuf[MAX_LINELEN+1];
00353 
00354     if(argc>1 && strcmp(argv[1],"-t")==0) {
00355        if(verify_tables()==0) {
00356            printf("Table orders OK.\n");
00357            return 0;
00358        }
00359        else return 1;
00360     }
00361     error1=error2=_error; debug=0;
00362     wptr=wbuf; wbuf[0]=0;
00363     getparms();
00364     Mnext=Mbuf; Mcnt=0;
00365     switch(cmd) {
00366        case cmd_comp: {
00367            comp:
00368            n=linenum(stbuf);
00369            for(mat=0,i=1,p1=stbuf;i<=n;i++,p1=p2) {
00370               p2=find_line_end(p1); if(*p2) *p2++=0;
00371               p1=find_word_start(p1);
00372               if(*p1==0) continue;
00373               snprintf(lbuf,sizeof(lbuf),"%s",p1);
00374               compile(lbuf);
00375               mat=match(textbuf);
00376               if(mat) {
00377                   printf("MATCH %d %s\n",i,outbuf);
00378                   if((options&op_matchall)==0) break;
00379               }
00380            }
00381            if(debug) fprintf(stderr,"word list: %s\n",wbuf);
00382            break;
00383        }
00384        case cmd_debug: {
00385            if(debug==0) debug=1;
00386            fprintf(stderr,"debug=%d.\n",debug);
00387            for(i=0;i<diccnt;i++)
00388              fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
00389                     i+1,dic[i].name,dic[i].len);
00390            goto comp;
00391        }
00392        case cmd_random: {
00393            
00394            break;
00395        }
00396        case cmd_wordlist: {
00397            
00398            break;
00399        }
00400        case cmd_1: {
00401            
00402            break;
00403        }
00404         
00405        case cmd_none:
00406        default: return 1;
00407     }
00408     return 0;
00409 }
00410