Back to index

wims  3.65+svn20090927
text.c
Go to the documentation of this file.
00001 /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018 unsigned char t_buf[4][MAX_LINELEN+1];
00019 char maskbuf[MAX_LINELEN+1];
00020 
00021        /* internal routine. */
00022 void _text_cut(char *p, char *w)
00023 {
00024     char *p1, *p2;
00025     p1=wordchr(p,w); if(p1==NULL) error2("syntax_error");
00026     *p1=0; p2=find_word_start(p1+strlen(w));
00027     strcpy(t_buf[0],p); strcpy(t_buf[1],p2);
00028     strip_trailing_spaces(t_buf[0]);
00029     substitute(t_buf[0]); substitute(t_buf[1]);
00030 }
00031 
00032        /* Extract characters in buf[0] which are identical to
00033         * corresponding characters in buf[1]. */
00034 void text_common(char *p)
00035 {
00036     int i,j,n1,n2;
00037     _text_cut(p,"and");
00038     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
00039     if(n2<n1) n1=n2;
00040     for(i=j=0;i<n1;i++) {
00041        if(t_buf[0][i]==t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i];
00042     }
00043     p[j]=0;
00044 }
00045 
00046        /* Returns a mask string composed of '0's and '1's, where
00047         * '0' means corresponding positions of buf[0] and buf[1] are 
00048         * equal. */
00049 void text_compare(char *p)
00050 {
00051     int min,max, i;
00052     _text_cut(p,"and");
00053     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
00054     if(min>max) {
00055        i=min; min=max; max=i;
00056     }
00057     for(i=0; i<min; i++) {
00058        if(t_buf[0][i]==t_buf[1][i]) p[i]='0'; else p[i]='1';
00059     }
00060     for(; i<max; i++) p[i]='1';
00061     p[max]=0;
00062 }
00063 
00064        /* copy text according to mask. */
00065 void text_copy(char *p)
00066 {
00067     int i, j, n;
00068     
00069     snprintf(t_buf[0],MAX_LINELEN,"%s",p);
00070     strip_trailing_spaces(t_buf[0]); substitute(t_buf[0]);
00071     n=strlen(t_buf[0]);
00072     for(i=j=0;i<n;i++) {
00073        if(maskbuf[i]!='0') p[j++]=t_buf[0][i];
00074     }
00075     p[j]=0;
00076 }
00077 
00078        /* returns count of characters in buf[1] which appear in buf[0]. */
00079 void text_count(char *p)
00080 {
00081     int i, n, c;
00082     _text_cut(p,"in");
00083     n=strlen(t_buf[1]);
00084     for(i=c=0;i<n;i++) {
00085        if(strchr(t_buf[0],t_buf[1][i])!=NULL && maskbuf[i]!='0') c++;
00086     }
00087     snprintf(p,MAX_LINELEN,"%d",c);
00088 }
00089 
00090        /* Extract characters in buf[0] which are different than
00091         * corresponding characters in buf[1]. */
00092 void text_diff(char *p)
00093 {
00094     int i,j,n1,n2;
00095     _text_cut(p,"from");
00096     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
00097     if(n2<n1) n1=n2;
00098     for(i=j=0;i<n1;i++) {
00099        if(t_buf[0][i]!=t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i];
00100     }
00101     p[j]=0;
00102 }
00103 
00104        /* put chars in buf[0] in a new string, into positions 
00105         * corresponding to '1's in the mask buf[1]. 
00106         * Positions corresponding to '0's are filled by space.
00107         * Fill stops at the end of buf[0]. If buf[1] is 
00108         * too short, it is reused from the start. */
00109 void text_expand(char *p)
00110 {
00111     int i,j,k,n1,n2;
00112     _text_cut(p,"using");
00113     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
00114     if(n2==0) {p[0]=0; return;}
00115     for(i=j=k=0;i<n1 && j<MAX_LINELEN;j++,k=j%n2) {
00116        if(t_buf[1][k]=='0') p[j]=' ';
00117        else p[j]=t_buf[0][i++];
00118     }
00119     p[j]=0;
00120 }
00121 
00122        /* character by character replacement of buf[1] by buf[0], 
00123         * replacing only mask-effective chars.
00124         * The resulting string is as long as buf[1], and the replacement
00125         * stops when chars buf[0] has run out. */
00126 void text_insert(char *p)
00127 {
00128     int i,j,n1,n2;
00129     _text_cut(p,"into");
00130     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
00131     for(i=j=0; i<n2 && j<n1; i++) {
00132        if(maskbuf[i]!='0') t_buf[1][i]=t_buf[0][j++];
00133     }
00134     snprintf(p,MAX_LINELEN,"%s",t_buf[1]);
00135 }
00136 
00137 #define MAX_TLEN 96
00138 
00139        /* interact of two strings according to rules
00140         * defined a table. */
00141 void text_interact(char *p)
00142 {
00143     char *table, *dline, *tline[MAX_TLEN];
00144     char *p1, *p2;
00145     int i,j1,j2,k,l,l2,n;
00146     
00147     table=wordchr(p,"table");
00148     if(table==NULL) error2("syntax_error");
00149     *table=0; strip_trailing_spaces(p);
00150     table=find_word_start(table+strlen("table"));
00151     snprintf(t_buf[2],MAX_LINELEN,"%s",table);
00152     _text_cut(p,"and");
00153     strip_trailing_spaces(t_buf[2]); substitute(t_buf[2]);
00154     n=linenum(t_buf[2])-1;
00155     if(n>=MAX_TLEN) error2("text_bad_table");
00156     p2=strchr(t_buf[2],'\n'); if(p2!=NULL) *p2++=0;
00157     if(strlen(t_buf[2])!=n) error2("text_bad_table");
00158     dline=t_buf[2];
00159     for(i=0,p1=p2;i<n;i++,p1=p2) {
00160        if(p1==NULL) error2("text_bad_table");
00161        p2=strchr(p1,'\n'); 
00162        if(p2!=NULL) *p2++=0;
00163        if(strlen(p1)!=n) error2("text_bad_table");
00164        tline[i]=p1;
00165     }
00166     l=strlen(t_buf[0]); l2=strlen(t_buf[1]); if(l2<l) l=l2;
00167     for(i=k=0;i<l;i++) {
00168        if(maskbuf[i]!='0') {
00169            p1=strchr(dline,t_buf[0][i]);
00170            p2=strchr(dline,t_buf[1][i]);
00171            if(p1==NULL || p2==NULL) continue;
00172            j1=p1-dline; j2=p2-dline;
00173            if(j1>=n || j2>=n) continue; /* should not occur */
00174            p[k++]=tline[j1][j2];
00175        }
00176     }
00177     p[k]=0;
00178 }
00179 
00180        /* returns a mask string composed of '0's and '1's, where
00181         * '0' means corresponding char in buf[1] appears in buf[0]. */
00182 void text_mark(char *p)
00183 {
00184     int i, n;
00185     _text_cut(p,"in");
00186     n=strlen(t_buf[1]);
00187     for(i=0;i<n;i++) {
00188        if(strchr(t_buf[0],t_buf[1][i])!=NULL) p[i]='1';
00189        else p[i]='0';
00190     }
00191     p[i]=0;
00192 }
00193 
00194        /* Returns a string whose characters are the maximum 
00195         * of the two corresponding chars in buf[0] and buf[1].
00196         * Length of the string is the longuest one. */
00197 void text_max(char *p)
00198 {
00199     int min,max, i, j, k;
00200     _text_cut(p,"and");
00201     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
00202     if(min>max) {
00203        i=min; min=max; max=i; j=0;
00204     }
00205     else j=1;
00206     for(i=k=0; i<min; i++) {
00207        if(maskbuf[i]=='0') continue;
00208        if(t_buf[0][i]>t_buf[1][i]) p[k++]=t_buf[0][i];
00209        else p[k++]=t_buf[1][i];
00210     }
00211     for(;i<max;i++) {
00212        if(maskbuf[i]!='0') p[k++]=t_buf[j][i];
00213     }
00214     p[k]=0;
00215 }
00216 
00217        /* Returns a string whose characters are the minimum
00218         * of the two corresponding chars in buf[0] and buf[1].
00219         * Length of the string is the shortest one. */
00220 void text_min(char *p)
00221 {
00222     int min,max, i,k;
00223     _text_cut(p,"and");
00224     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
00225     if(min>max) {
00226        i=min; min=max; max=i;
00227     }
00228     for(i=k=0; i<min; i++) {
00229        if(maskbuf[i]=='0') continue;
00230        if(t_buf[0][i]<t_buf[1][i]) p[k++]=t_buf[0][i];
00231        else p[k++]=t_buf[1][i];
00232     }
00233     p[k]=0;
00234 }
00235 
00236        /* extract chars in buf[0] which occur in buf[1]. */
00237 void text_occur(char *p)
00238 {
00239     int i,j,n;
00240     unsigned char *pp;
00241     char buf[MAX_LINELEN+1];
00242     memset(buf,0,sizeof(buf));
00243     _text_cut(p,"in");
00244     n=strlen(t_buf[1]);
00245     for(i=0;i<n;i++) {
00246        if(maskbuf[i]=='0') continue;
00247        pp=strchr(t_buf[0],t_buf[1][i]);
00248        if(pp!=NULL) buf[pp - t_buf[0]]=1;
00249     }
00250     n=strlen(t_buf[0]);
00251     for(i=j=0;i<n;i++) {
00252        if(buf[i]) p[j++]=t_buf[0][i];
00253     }
00254     p[j]=0;
00255 }
00256 
00257        /* remove characters of buf[1] in buf[0]. */
00258 void text_remove(char *p)
00259 {
00260     int i, j, n;
00261     _text_cut(p,"in");
00262     n=strlen(t_buf[1]);
00263     for(i=j=0;i<n;i++) {
00264        if(strchr(t_buf[0],t_buf[1][i])==NULL
00265           && maskbuf[i]!='0') p[j++]=t_buf[1][i];
00266     }
00267     p[j]=0;
00268 }
00269 
00270        /* Cyclic reordering of text. */
00271 void text_reorder(char *p)
00272 {
00273     int i,j,k,l,n,t;
00274     int list[10240];
00275     char buf[MAX_LINELEN+1];
00276     _text_cut(p,"by"); *p=0;
00277     n=itemnum(t_buf[1]); if(n<=0 || n>=10240) return;
00278     for(i=0;i<n;i++) {
00279         buf[0]=0; fnd_item(t_buf[1],i+1,buf);
00280        j=atoi(buf); if(j<=0 || j>n) return;
00281        list[i]=j;
00282     }
00283     t=strlen(t_buf[0]);
00284     for(i=l=0;l<t && i<t+n;i++) {
00285        j=i/n; k=j*n+list[i%n];
00286        if(k>t || k<=0) continue;
00287        p[l++]=t_buf[0][k-1];
00288     }
00289     p[l]=0;
00290 }
00291 
00292        /* repeat a string to a given length. */
00293 void text_repeat(char *p)
00294 {
00295     int n,i,k;
00296     _text_cut(p,"to");
00297     n=strevalue(t_buf[1]); if(n>MAX_LINELEN) n=MAX_LINELEN;
00298     if(n<0) n=0;
00299     k=strlen(t_buf[0]); if(k<=0) {*p=0; return;}
00300     for(i=0;i<n;i++) {
00301        p[i]=t_buf[0][i%k];
00302     }
00303     p[i]=0;
00304 }
00305 
00306        /* reverse a string */
00307 void text_reverse(char *p)
00308 {
00309     int i,n;
00310     char buf[MAX_LINELEN+1];
00311     snprintf(t_buf[0],sizeof(t_buf[0]),"%s",p);
00312     substitute(t_buf[0]);
00313     n=strlen(t_buf[0]); if(n>MAX_LINELEN) n=MAX_LINELEN;
00314     for(i=0;i<n;i++) buf[i]=t_buf[0][n-1-i];
00315     buf[n]=0;
00316     strcpy(p,buf);
00317 }
00318 
00319        /* remove characters of buf[1] not in buf[0]. */
00320 void text_select(char *p)
00321 {
00322     int i, j, n;
00323     _text_cut(p,"in");
00324     n=strlen(t_buf[1]);
00325     for(i=j=0;i<n;i++) {
00326        if(strchr(t_buf[0],t_buf[1][i])!=NULL
00327           && maskbuf[i]!='0') p[j++]=t_buf[1][i];
00328     }
00329     p[j]=0;
00330 }
00331 
00332        /* tag: bit 0 is mask. */
00333 struct {
00334     char *name;
00335     int tag;
00336     void (*routine) (char *p);
00337 } text_proc[]={
00338       {"appear",     1,     text_occur},
00339       {"common",     1,     text_common},
00340       {"compare",    0,     text_compare},
00341       {"copy",              1,     text_copy},
00342       {"count",             1,     text_count},
00343       {"delete",     1,     text_remove},
00344       {"diff",              1,     text_diff},
00345       {"differ",     1,     text_diff},
00346       {"drop",              1,     text_remove},
00347       {"expand",     0,     text_expand},
00348       {"extract",    1,     text_select},
00349       {"insert",     1,     text_insert},
00350       {"interact",   1,     text_interact},
00351       {"mark",              0,     text_mark},
00352       {"max",        1,     text_max},
00353       {"min",        1,     text_min},
00354       {"occur",             1,     text_occur},
00355       {"occurrence", 1,     text_occur},
00356       {"pick",              1,     text_select},
00357       {"pickup",     1,     text_select},
00358       {"remove",     1,     text_remove},
00359       {"reorder",    0,     text_reorder},
00360       {"repeat",     0,     text_repeat},
00361       {"reverse",    0,     text_reverse},
00362       {"select",     1,     text_select}
00363 };
00364 #define TEXT_PROC_NO (sizeof(text_proc)/sizeof(text_proc[0]))
00365 
00366 int textab_verify(void) {
00367     return verify_order(text_proc,TEXT_PROC_NO,sizeof(text_proc[0]));
00368 }
00369 
00370        /* main entry point for text routines */
00371 void text(char *p)
00372 {
00373     int i,j,n;
00374     char *p1, *p2;
00375     char c,cc;
00376     char buf[MAX_LINELEN+1];
00377     p1=find_word_start(p); p2=find_word_end(p1);
00378     if(p2<=p1 || *p2==0) error2("syntax_error");
00379     *p2=0;
00380     i=search_list(text_proc,TEXT_PROC_NO,sizeof(text_proc[0]),p1);
00381     if(i<0) error2("syntax_error");
00382     snprintf(buf,sizeof(buf),"%s",find_word_start(p2+1));
00383     if((text_proc[i].tag&1)!=0 && (p1=wordchr(buf,"mask"))!=NULL) {
00384        *p1=0; strip_trailing_spaces(buf);
00385        p2=find_word_start(p1+strlen("mask"));
00386        strip_trailing_spaces(p2);
00387        snprintf(maskbuf,sizeof(maskbuf),"%s",p2);
00388        substitute(maskbuf);
00389        n=strlen(maskbuf); if(n==0) goto zeromask;
00390        c=maskbuf[n-1]; cc=0;
00391        if(c=='+') cc='1'; if(c=='-') cc='0';
00392        if(cc!=0) memset(maskbuf+n-1,cc,sizeof(maskbuf)-n);
00393        else for(j=n;j<MAX_LINELEN;j++) maskbuf[j]=maskbuf[j%n];
00394        maskbuf[sizeof(maskbuf)-1]=0;
00395     }
00396     else zeromask: memset(maskbuf,0,sizeof(maskbuf));
00397     text_proc[i].routine(buf);
00398     buf[MAX_LINELEN]=0;strcpy(p,buf);
00399 }
00400