Back to index

wims  3.65+svn20090927
phtmltext.c
Go to the documentation of this file.
00001 /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018        /* Extract text from phtml file */
00019 
00020 /*************** Customization: change values hereafter ****************/
00021 
00022        /* limit of data buffers */
00023 #define buflim 1024*1024*16
00024 
00025 /***************** Nothing should need change hereafter *****************/
00026 
00027 #include "../wims.h"
00028 #include "../Lib/libwims.h"
00029 char filename[1024]="";
00030 char *filebuf;
00031 int filelen=0;
00032 
00033        /* get the file */
00034 void prepare_file(void)
00035 {
00036     FILE *f;
00037     long int flen;
00038 
00039     filelen=0;
00040     f=fopen(filename,"r"); if(f==NULL) return;
00041     fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
00042     if(flen>buflim) return;
00043     filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
00044     fclose(f);
00045     if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
00046     filelen=flen;
00047 }
00048 
00049 void processbuf(void)
00050 {
00051     char *p;
00052     deaccent(filebuf);
00053     for(p=filebuf; *p; p++) *p=tolower(*p);
00054     for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
00055     for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
00056        *p=' '; strcpy(p+1,p+6);
00057     }
00058 }
00059 
00060 void output(void)
00061 {
00062     char *p, *pp, lastc;
00063     p=find_word_start(filebuf); lastc=0;
00064     if(*p=='!' || *p==':') goto cont1;
00065     for(;*p;p++) {
00066        if(*p=='\n') {
00067            if(!isspace(lastc)) {printf(" "); lastc=' ';}
00068            cont2: p=find_word_start(p);
00069            if(*p=='!' || *p==':') {
00070               if(lastc!='   ') printf(". "); lastc='    ';
00071               cont1: p=strchr(p,'\n');
00072               if(p==NULL) return;
00073               if(*(p-1)=='\\') {p++; goto cont1;}
00074               goto cont2;
00075            }
00076            for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
00077            pp=find_word_start(pp);
00078            if(*pp=='=') goto cont1;
00079        }
00080        if(*p=='\\' && *(p+1)=='\n') {
00081            printf("\n"); p++; continue;
00082        }
00083        if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
00084            p=strchr(p,'>'); goto nextp;
00085        }
00086        if(*p=='$') {
00087            if(lastc != '    ') {
00088               if(!isspace(lastc)) printf(" ");
00089               printf(". "); lastc='       ';
00090            }
00091            p++; 
00092            if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
00093            if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
00094            while(isalnum(*p) || *p=='_') p++;
00095            p--; continue;
00096        }
00097        if(*p=='&') {
00098            char *p2;
00099            for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
00100            if(*p2==';') {
00101               p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
00102               p=p2; continue;
00103            }
00104        }
00105        if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
00106            {printf("%c",*p); lastc=*p;}
00107        else {
00108            if(isspace(*p) && !isspace(lastc)) {
00109               printf(" "); lastc=' ';
00110            }
00111            if(!isspace(*p)) {
00112               switch(lastc) {
00113                   case ' ': printf(". "); lastc='       '; break;
00114                   case '    ': break;
00115                   default: printf(" . "); lastc='       '; break;
00116               }
00117            }
00118        }
00119        nextp: if(p==NULL || *p==0) break;
00120     }
00121 }
00122 
00123 int main(int argc, char *argv[])
00124 {
00125     if(argc<=1) return 0;
00126     snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
00127     prepare_file();
00128     processbuf();
00129     output();
00130     return 0;
00131 }
00132