Back to index

wims  3.65+svn20090927
html2msg.c
Go to the documentation of this file.
00001 /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018        /* Check type of a file */
00019 
00020 /*************** Customization: change values hereafter ****************/
00021 
00022        /* limit of data buffers */
00023 #define buflim 1024*1024*16
00024 
00025 /***************** Nothing should need change hereafter *****************/
00026 
00027 #include "../wims.h"
00028 #include "../hmname.c"
00029 
00030 char fn1[1024]="", fn2[1024]="";
00031 char mathbuf[MAX_LINELEN+1];
00032 char *filebuf;
00033 int filelen=0;
00034 int latex2html=0;
00035 FILE *outf;
00036 
00037 struct {
00038     char *name, *trans;
00039 } backtrans[]={
00040     {"\\ge\\",              " >= "},
00041     {"\\geq\\",             " >= "},
00042     {"\\le\\",              " <= "},
00043     {"\\leq\\",             " <= "},
00044     {"\\to\\",              " -> "},
00045     {"\\rightarrow\\",      " -> "},
00046     {"\\longrightarrow\\", " --> "},
00047     {"\\Rightarrow\\",      " => "},
00048     {"\\Longrightarrow\\", " ==> "},
00049     {"\\Leftrightarrow\\", " <=> "},
00050     {"\\Longleftrightarrow\\", " <==> "},
00051     {"\\Longleftarrow\\", " <== "},
00052 };
00053 
00054 #define backtransno (sizeof(backtrans)/sizeof(backtrans[0]))
00055 
00056 void *xmalloc(size_t n)
00057 {
00058     void *p;
00059     p=malloc(n);
00060     if(p==NULL) exit(1);
00061     return p;
00062 }
00063 
00064        /* Points to the end of the word */
00065 char *find_word_end(char *p)
00066 {
00067     int i;
00068     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
00069     return p;
00070 }
00071 
00072        /* Strips leading spaces */
00073 char *find_word_start(char *p)
00074 {
00075     int i;
00076     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
00077     return p;
00078 }
00079 
00080 char *find_tag_end(char *p)
00081 {
00082     char *pp, *old;
00083     pp=p; if(*pp=='<') pp++;
00084     for(; *pp && *pp!='>'; pp++) {
00085        if(*pp=='"') {
00086            pp=strchr(pp+1,'"');
00087            if(pp==NULL) {pp=p+strlen(p); break;} else continue;
00088        }
00089     }
00090        /* this is probably an syntax error of the page */
00091     if(*pp==0 && pp>p+2048) {
00092        old=p; if(*old=='<') old++;
00093        pp=strchr(old,'>');
00094        if(pp==NULL) pp=strchr(old,'<');
00095        if(pp==NULL) pp=find_word_end(find_word_start(old));
00096     }
00097     if(*pp=='>') pp++; return pp;
00098 }
00099 
00100 char *find_tag(char *p, char *tag)
00101 {
00102     char *pp;
00103     int len;
00104     len=strlen(tag);
00105     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
00106        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
00107     }
00108     return p+strlen(p);
00109 }
00110 
00111        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
00112 void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
00113 {
00114     char buf[MAX_LINELEN+1];
00115     va_list vp;
00116     
00117     va_start(vp,good);
00118     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
00119     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) {
00120        return;
00121     }
00122     strcat(buf,bad_end);
00123     strcpy(bad_beg,buf);
00124 }
00125 
00126 void cutamp(char *p)
00127 {
00128     char *pp;
00129     for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) {
00130        if(strncmp(pp,"&amp;",5)==0) {
00131            strcpy(pp+1,pp+5); continue;
00132        }
00133        if(strncmp(pp,"&lt;",4)==0) {
00134            *pp='<'; strcpy(pp+1,pp+4); continue;
00135        }
00136        if(strncmp(pp,"&gt;",4)==0) {
00137            *pp='>'; strcpy(pp+1,pp+4); continue;
00138        }
00139        
00140     }
00141 }
00142 
00143        /* get the file */
00144 void prepare_file(void)
00145 {
00146     FILE *f;
00147     long int flen;
00148 
00149     filelen=0;
00150     f=fopen(fn1,"r"); if(f==NULL) return;
00151     fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
00152     if(flen>buflim) return;
00153     filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
00154     fclose(f);
00155     if(flen<0 || flen>=buflim) flen=0; filebuf[flen]=0;
00156     filelen=flen;
00157     outf=fopen(fn2,"w"); if(outf==NULL) return;
00158 }
00159 
00160 void getmath(char *p)
00161 {
00162     char *pt, *pv;
00163     char *p1, *p2, buf[256];
00164 
00165     mathbuf[0]=0;
00166     pt=find_word_start(p);
00167     if(strncmp(pt,"\\begin{displaymath}",
00168                  strlen("\\begin{displaymath}"))==0) {
00169        pt=strchr(pt,'}')+1;
00170        pv=strstr(pt,"\\end{displaymath}");
00171        if(pv==NULL) return;
00172        goto insmath;
00173     }
00174     if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return;
00175     if(*pt!='$') return; do pt++; while(*pt=='$');
00176     pv=strchr(pt,'$'); if(pv==NULL) return;
00177     insmath: if(pv-pt>=MAX_LINELEN-256) return;
00178     memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0;
00179     if(strstr(mathbuf,"...\n...")!=NULL) {
00180        strcpy(mathbuf,"......"); return;
00181     }
00182     cutamp(mathbuf); latex2html=1;
00183     for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) {
00184        char c,*d;
00185        p2=find_word_start(p1+strlen("\\mathbb")); c=0;
00186        if(strchr("NZQRC",*p2)!=NULL) c=*p2;
00187        else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) {
00188               c=*(p2+1); p2+=2;
00189        }
00190        if(c) {
00191            p2=find_word_start(++p2);
00192            if(isalnum(*p2)) d=" "; else d="";
00193            string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d);
00194        }
00195     }
00196     for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) {
00197        if(p1>mathbuf && isalpha(*(p1-1))) continue;
00198        for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++);
00199        if(*p2!='}' || isalnum(*(p2+1))) continue;
00200        memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0;
00201        if(strstr(hmsame,buf)==NULL) continue;
00202        strcpy(p2,p2+1); strcpy(p1,p1+1);
00203     }
00204     if(strstr(mathbuf,"\\begin{")!=NULL) return;
00205     for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) {
00206        if((p1>mathbuf && isalpha(*(p1-1))) ||
00207           !isalnum(*(p1+1)) || *(p1+2)!='}') continue;
00208        *p1=*(p1+1); strcpy(p1+1,p1+3);
00209     }
00210     if(strchr(mathbuf,'[')!=NULL) {
00211         char mbuf[MAX_LINELEN+1];
00212        snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf);
00213        strcpy(mathbuf,mbuf);
00214     }
00215        /* try to simplify */
00216     if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) {
00217        int i, tt;
00218        tt=0;
00219        for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
00220            for(p2=p1+1;isalpha(*p2);p2++);
00221            if(p2==p1+1 || p2>p1+24) {tt=1; break;}
00222            memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
00223            for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
00224            if(i>=backtransno && strstr(hmsame,buf)==NULL) {
00225               tt=1; break;
00226            }
00227        }
00228        if(tt==0) {
00229            for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
00230               for(p2=p1+1;isalpha(*p2);p2++);
00231               if(p2==p1+1 || p2>p1+24) break;
00232               memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
00233               for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
00234               if(i<backtransno) 
00235                 string_modify(buf,p1,p2,backtrans[i].trans);
00236               else *p1=' ';
00237            }
00238        }
00239     }
00240 }
00241 
00242 void output(void)
00243 {
00244     char *p, *pp, *p2, *pt;
00245     char buf[MAX_LINELEN+1];
00246     p=filebuf;
00247     restart:
00248     pp=find_tag(p,"body"); if(*pp!=0) {
00249        p=find_tag_end(pp); goto restart;
00250     }
00251     pp=find_tag(p,"html"); if(*pp!=0) {
00252        p=find_tag_end(pp); goto restart;
00253     }
00254     *find_tag(p,"/body")=0; *find_tag(p,"/html")=0;
00255     for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' ';
00256     for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) {
00257        if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;}
00258        if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 &&
00259           *find_word_start(pp+8)==0) break;
00260        if(strncasecmp(pp+1,"!-- MATH",8)==0) {
00261            p2=strstr(pp+8,"-->"); if(p2==NULL) continue;
00262            *p2=0; getmath(pp+9); *p2='-';
00263            p=p2+3; pt=find_word_start(p);
00264            if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) {
00265               p=find_tag_end(pt); pp=pt;
00266               fprintf(outf,"\\(%s\\)",mathbuf);
00267            }
00268            continue;
00269        }
00270        if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) {
00271 
00272            
00273            
00274            continue;
00275        }
00276        if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) {
00277            p2=find_tag_end(pp);
00278            if(p2-pp>=MAX_LINELEN-256) continue;
00279            memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0;
00280            pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\"");
00281            if(pt!=NULL) {
00282               pt+=strlen("ALT=\"");
00283               getmath(pt); if(mathbuf[0]) {
00284                   fprintf(outf,"\\(%s\\)",mathbuf); p=p2;
00285               }
00286            }
00287        }
00288     }
00289     if(pp==NULL) fprintf(outf,"%s",p);
00290 }
00291 
00292 int main(int argc, char *argv[])
00293 {
00294     char *p, *pp;
00295     char *mod;
00296 
00297     mod=getenv("w_module");
00298     if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1;
00299     if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm");
00300     if(p==NULL || *p==0) return 1;
00301     p=find_word_start(p); pp=find_word_end(p);
00302     if(pp<=p || pp-p>sizeof(fn1)-1) return 1;
00303     memmove(fn1,p,pp-p); fn1[pp-p]=0;
00304     p=find_word_start(pp); pp=find_word_end(p);
00305     if(pp<=p || pp-p>sizeof(fn2)-1) strcpy(fn2,fn1); 
00306     else {memmove(fn2,p,pp-p); fn2[pp-p]=0;}
00307     prepare_file();
00308     output();
00309     fclose(outf);
00310     return 0;
00311 }
00312