Back to index

wims  3.65+svn20090927
modind.c
Go to the documentation of this file.
00001 /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018        /* This is an internal program,
00019         * used to index modules for search engine. */
00020 
00021 #include "../wims.h"
00022 
00023 #define MAX_LANGS    MAX_LANGUAGES
00024 #define MAX_MODULES  65536
00025 char *moduledir=     "public_html/modules";
00026 char *sheetdir=             "public_html/bases/sheet";
00027 char *dicdir=        "public_html/bases";
00028 char *outdir=        "public_html/bases/site2";
00029 char *maindic=              "sys/words";
00030 char *groupdic=             "sys/wgrp/wgrp";
00031 char *suffixdic=     "sys/suffix";
00032 char *ignoredic=     "sys/indignore";
00033 char *conffile=             "log/wims.conf";
00034 char *mlistbase=     "list";
00035 
00036 char lang[MAX_LANGS][4]={
00037     "en","fr","cn","es","it","nl","si","ca"
00038 };
00039 #define DEFAULT_LANGCNT     6
00040 char allang[MAX_LANGS][4]={
00041     "en","fr","cn","es","it","nl","tw","de","si","ca"
00042 };
00043 #define allangcnt 8
00044 char ignore[MAX_LANGS][MAX_LINELEN+1];
00045 char mlistfile[MAX_LANGS][256];
00046 int langcnt;
00047 FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
00048 
00049 struct cat {
00050     char *name;
00051     char typ;
00052 } cat[]={
00053        {"all_types", 'A'},
00054        {"exercise",  'X'},
00055        {"oef",              'O'},
00056        {"tool",      'T'},
00057        {"recreation",       'R'},
00058        {"reference", 'Y'},
00059        {"document",  'D'},
00060        {"popup",     'P'},
00061        {"datamodule",       'M'}
00062 };
00063 #define catno (sizeof(cat)/sizeof(cat[0]))
00064 
00065 struct mod {
00066     char *name;
00067     unsigned char langs[MAX_LANGS];
00068     int counts[MAX_LANGS];
00069     int  langcnt;
00070 } mod[MAX_MODULES];
00071 int modcnt;
00072 
00073 char *mlist;
00074 
00075 void *xmalloc(size_t n)
00076 {
00077     void *p;
00078     p=malloc(n);
00079     if(p==NULL) {
00080        printf("Malloc failure.\n");
00081        exit(1);
00082     }
00083     return p;
00084 }
00085 
00086 char *acctab="",
00087      *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
00088 
00089        /* fold accented letters to unaccented */
00090 void deaccent(char *p)
00091 {
00092     signed char *sp;
00093     char *v;
00094     for(sp=p;*sp;sp++) {
00095        if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
00096          *sp=*(deatab+(v-acctab));
00097        if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
00098        else *sp=tolower(*sp);
00099     }
00100 }
00101 
00102        /* translate everything non-alphanumeric into space */
00103 void towords(char *p)
00104 {
00105     char *pp;
00106     for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
00107 }
00108 
00109        /* Points to the end of the word */
00110 char *find_word_end(char *p)
00111 {
00112     int i;
00113     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
00114     return p;
00115 }
00116 
00117        /* Strips leading spaces */
00118 char *find_word_start(char *p)
00119 {
00120     int i;
00121     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
00122     return p;
00123 }
00124 
00125        /* Find first occurrence of word */
00126 char *wordchr(char *p, char *w)
00127 {
00128     char *r;
00129 
00130     for(r=strstr(p,w);r!=NULL && 
00131        ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
00132        r=strstr(r+1,w));
00133     return r;
00134 }
00135 
00136        /* find a variable in a string (math expression).
00137         * Returns the pointer or NULL. */
00138 char *varchr(char *p, char *v)
00139 {
00140     char *pp; int n=strlen(v);
00141     for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
00142        if((pp==p || !isalnum(*(pp-1))) &&
00143           (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
00144     }
00145     return pp;
00146 }
00147 
00148        /* strip trailing spaces; return string end. */
00149 char *strip_trailing_spaces(char *p)
00150 {
00151     char *pp;
00152     if(*p==0) return p;
00153     for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
00154     return pp;
00155 }
00156 
00157 char *find_tag_end(char *p)
00158 {
00159     char *pp;
00160     pp=p; if(*pp=='<') pp++;
00161     for(; *pp && *pp!='>'; pp++) {
00162        if(*pp=='<') {
00163            pp=find_tag_end(pp)-1; continue;
00164        }
00165        if(*pp=='"') {
00166            pp=strchr(pp+1,'"');
00167            if(pp==NULL) return p+strlen(p); else continue;
00168        }
00169        if(*pp=='\'') {
00170            pp=strchr(pp+1,'\'');
00171            if(pp==NULL) return p+strlen(p); else continue;
00172        }
00173     }
00174     if(*pp=='>') pp++; return pp;
00175 }
00176 
00177 char *find_tag(char *p, char *tag)
00178 {
00179     char *pp;
00180     int len;
00181     len=strlen(tag);
00182     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
00183        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
00184     }
00185     return p+strlen(p);
00186 }
00187 
00188        /* remove all html tags */
00189 void detag(char *p)
00190 {
00191     char *pp, *p2;
00192     for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
00193        p2=find_tag_end(pp);
00194        if(*p2==0) {*pp=0; return; }
00195        strcpy(pp,p2);
00196     }
00197 }
00198 
00199        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
00200 void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
00201 {
00202     char buf[MAX_LINELEN+1];
00203     va_list vp;
00204     
00205     va_start(vp,good);
00206     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
00207     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
00208       return;
00209     strcat(buf,bad_end);
00210     strcpy(bad_beg,buf);
00211 }
00212 
00213 void _getdef(char buf[], char *name, char value[])
00214 {
00215     char *p1, *p2, *p3;
00216 
00217     value[0]=0;
00218     for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
00219        p2=find_word_start(p1+strlen(name));
00220        if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
00221        p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
00222        if(p3>buf && *(p3-1)!='\n') continue;
00223        p2=find_word_start(p2+1);
00224        p3=strchr(p2,'\n');
00225        snprintf(value,MAX_LINELEN,"%s",p2);
00226        if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
00227        strip_trailing_spaces(value);
00228        break;
00229     }
00230 }
00231 
00232        /* Get variable definition from a file.
00233         * Result stored in buffer value of length MAX_LINELEN. */
00234 void getdef(char *fname, char *name, char value[])
00235 {
00236     FILE *f;
00237     char *buf;
00238     int l;
00239     
00240     value[0]=0;
00241     f=fopen(fname,"r"); if(f==NULL) return;
00242     fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
00243     buf=xmalloc(l+256); l=fread(buf,1,l,f);
00244     fclose(f);
00245     if(l<=0) return; else buf[l]=0;
00246     _getdef(buf,name,value);
00247     free(buf);
00248 }
00249 
00250 #include "translator_.c"
00251 
00252 char *mdicbuf, *gdicbuf;
00253 char gentry[sizeof(entry)], mentry[sizeof(entry)];
00254 int gentrycount, mentrycount;
00255 
00256        /* Preparation of data */
00257 void prep(void)
00258 {
00259     char buf[MAX_LINELEN+1];
00260     char *p1,*p2,*s,*old;
00261     int i,l,thislang,t;
00262     FILE *f;
00263     
00264     s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
00265     s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
00266     snprintf(buf,sizeof(buf),"%s/addr",outdir);
00267     addrf=fopen(buf,"w");
00268     snprintf(buf,sizeof(buf),"%s/serial",outdir);
00269     serialf=fopen(buf,"w");
00270     modcnt=langcnt=0;
00271     getdef(conffile,"site_languages",buf);
00272     for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
00273     for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
00274        p2=find_word_end(p1);
00275        if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
00276        memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
00277     }
00278     if(langcnt==0) { /* default languages */
00279        langcnt=DEFAULT_LANGCNT;
00280     }
00281     s=getenv("mlist"); if(s==NULL) exit(1);
00282     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
00283     mlist=xmalloc(l+16); strcpy(mlist,s); old="";
00284     for(i=0;i<langcnt;i++) {
00285        snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
00286        f=fopen(buf,"r"); if(f==NULL) continue;
00287        l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
00288        if(l<0 || l>=MAX_LINELEN) l=0;
00289        ignore[i][l]=0;
00290     }
00291     for(t=0, p1=find_word_start(mlist);
00292        *p1 && modcnt<MAX_MODULES;
00293        p1=find_word_start(p2), t++) {
00294        p2=find_word_end(p1);
00295        l=p2-p1; if(*p2) *p2++=0;
00296        fprintf(addrf,"%d:%s\n",t,p1);
00297        fprintf(serialf,"%s:%d\n",p1,t);
00298        thislang=-1;
00299        if(l>3 && p1[l-3]=='.') {
00300            for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
00301            if(i<langcnt) {p1[l-3]=0; thislang=i;}
00302            else {    /* unknown language, not referenced */
00303               continue;
00304            }
00305        }
00306        if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
00307            if(mod[modcnt-1].langcnt<langcnt) {
00308               mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
00309               mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
00310               (mod[modcnt-1].langcnt)++;
00311            }
00312        }
00313        else {
00314            mod[modcnt].name=old=p1;
00315            if(thislang>=0) {
00316               mod[modcnt].langs[0]=thislang;
00317               mod[modcnt].langcnt=1;
00318            }
00319            else mod[modcnt].langcnt=0;
00320            mod[modcnt].counts[0]=t;
00321            modcnt++;
00322        }
00323     }
00324     snprintf(buf,sizeof(buf),"%s/language",outdir);
00325     langf=fopen(buf,"w");
00326     snprintf(buf,sizeof(buf),"%s/title",outdir);
00327     titf=fopen(buf,"w");
00328     snprintf(buf,sizeof(buf),"%s/description",outdir);
00329     descf=fopen(buf,"w");
00330     snprintf(buf,sizeof(buf),"%s/author",outdir);
00331     authorf=fopen(buf,"w");
00332     snprintf(buf,sizeof(buf),"%s/version",outdir);
00333     versionf=fopen(buf,"w");
00334     snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
00335     robotf=fopen(buf,"w");
00336     fclose(addrf); fclose(serialf);
00337     if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
00338        fprintf(stderr,"modind: error creating output files.\n");
00339        exit(1);
00340     }
00341 }
00342 
00343 void sprep(void)
00344 {
00345     char *p1,*p2,*s;
00346     int i,l,thislang;
00347     
00348     modcnt=0;
00349     s=getenv("slist"); if(s==NULL) return;
00350     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
00351     mlist=xmalloc(l+16); strcpy(mlist,s);
00352     for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
00353        p2=find_word_end(p1);
00354        l=p2-p1; if(*p2) *p2++=0;
00355        for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
00356        if(i<langcnt) thislang=i; else continue;
00357        mod[modcnt].name=p1;
00358        mod[modcnt].langs[0]=thislang;
00359        mod[modcnt].langcnt=1;
00360        modcnt++;
00361     }
00362 }
00363 
00364 void clean(void)
00365 {
00366     fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
00367     fclose(authorf); fclose(versionf);
00368 }
00369 
00370 char *sheetindex[]={
00371       "title", "description", 
00372       "duration", "severity",
00373       "level", "domain",
00374       "keywords", "reserved1", "reserved2", "remark"
00375 };
00376 #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
00377 char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
00378 enum{s_title, s_description,
00379       s_duration, s_severity,
00380       s_level, s_domain,
00381       s_keywords, s_reserved1, s_reserved2,
00382       s_remark
00383 };
00384 
00385 char *modindex[]={
00386       "title", "description", 
00387       "author", "address", "copyright",
00388       "version", "wims_version", "language",
00389       "category", "level", "domain", "keywords",
00390       "require"
00391 };
00392 #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
00393 char indbuf[MODINDEX_NO][MAX_LINELEN+1];
00394 enum{i_title, i_description,
00395       i_author,i_address,i_copyright,
00396       i_version,i_wims_version,i_language,
00397       i_category,i_level,i_domain,i_keywords,
00398       i_require
00399 };
00400 
00401 char *module_special_file[]={
00402     "intro","help","about"
00403 };
00404 #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
00405 char module_language[4];
00406 
00407        /* read and treat module's INDEX file */
00408 int module_index(const char *name)
00409 {
00410     char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
00411     FILE *indf;
00412     int i,l;
00413 
00414     snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
00415     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
00416     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
00417     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
00418     for(i=0;i<MODINDEX_NO;i++) {
00419        _getdef(ibuf,modindex[i],indbuf[i]);
00420               /* compatibility precaution */
00421        if(indbuf[i][0]==':') indbuf[i][0]='.';
00422     }
00423     p=find_word_start(indbuf[i_language]);
00424     if(isalpha(*p) && isalpha(*(p+1))) {
00425        memmove(module_language,p,2); module_language[2]=0;
00426     }
00427     else strcpy(module_language,"en");
00428     return 0;
00429 }
00430 
00431 int sheet_index(int serial)
00432 {
00433     char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
00434     FILE *indf;
00435     int i,l;
00436 
00437     snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
00438     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
00439     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
00440     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
00441     for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
00442     for(i=0,p1=find_word_start(ibuf);
00443        i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
00444        i++,p1=p2) {
00445        p2=strchr(p1,'\n');
00446        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00447        p1=find_word_start(p1); strip_trailing_spaces(p1);
00448        snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
00449     }
00450     p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
00451     else *p2=0;
00452     p1=find_word_start(p1); strip_trailing_spaces(p1);
00453     for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
00454     strcpy(sindbuf[s_remark],p1);
00455     return 0;
00456 }
00457 
00458 unsigned char categories[16];
00459 char taken[MAX_LINELEN+1];
00460 int catcnt, takenlen, tweight;
00461 
00462 void appenditem(char *word, int lind, int serial, int weight, char *l)
00463 {
00464     char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
00465     int i, ll;
00466     char *p;
00467     FILE *f;
00468     
00469     if(!isalnum(*word) || (ll=strlen(word))<2 ||
00470        wordchr(taken,word)!=NULL ||
00471        wordchr(ignore[lind],word)!=NULL ||
00472        takenlen>=MAX_LINELEN-ll-16)
00473       return;
00474     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
00475     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
00476     taken[takenlen++]=' '; taken[takenlen++]=' ';
00477     strcpy(taken+takenlen,word);
00478     takenlen+=ll; tweight+=weight;
00479     snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
00480     for(i=0;i<catcnt;i++) {
00481        snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
00482                outdir,categories[i],lang[lind]);
00483        f=fopen(nbuf,"a");
00484        if(f!=NULL) {fputs(buf,f); fclose(f);}
00485     }
00486 }
00487 
00488 void onemodule(const char *name, int serial, int lind)
00489 {
00490     int i;
00491     unsigned char trlist[]={
00492        i_title,i_description,i_category,i_domain,i_keywords,
00493          i_require,i_author
00494     };
00495     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
00496     char *p1, *p2, *pp, buf[MAX_LINELEN+1], lbuf[16];
00497     FILE *f;
00498     
00499     if(module_index(name)) return;
00500     towords(indbuf[i_category]);
00501     for(i=catcnt=0;i<catno && catcnt<16;i++) {
00502        if(wordchr(indbuf[i_category],cat[i].name)!=NULL) 
00503          categories[catcnt++]=cat[i].typ;
00504     }
00505     if(catcnt==0) return;
00506     if(categories[0]!=cat[0].typ)
00507       categories[catcnt++]=cat[0].typ;
00508     for(i=0;i<catcnt;i++) {
00509        snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
00510                outdir,categories[i],lang[lind]);
00511        f=fopen(buf,"a");
00512        if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
00513     }
00514     fprintf(langf,"%d:%s\n",serial,module_language);
00515     fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
00516     fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
00517     fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
00518     fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
00519     snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
00520     for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
00521       string_modify(buf,pp,pp+1,"&#44;");
00522     if(strcmp(module_language,lang[lind])==0)
00523       fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
00524              indbuf[i_title], buf);
00525     entrycount=mentrycount; dicbuf=mdicbuf;
00526     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
00527     unknown_type=unk_leave;
00528     for(i=0;i<trcnt;i++) {
00529        detag(indbuf[trlist[i]]);
00530        deaccent(indbuf[trlist[i]]);
00531        singlespace(indbuf[trlist[i]]);
00532        suffix_translate(indbuf[trlist[i]]);
00533        translate(indbuf[trlist[i]]);
00534     }
00535     taken[0]=0; takenlen=tweight=0;
00536     strcpy(buf,indbuf[i_title]); towords(buf);
00537     for(p1=find_word_start(buf);*p1;
00538        p1=find_word_start(p2)) {
00539        p2=find_word_end(p1); if(*p2) *p2++=0;
00540        appenditem(p1,lind,serial,4,module_language);
00541     }
00542     snprintf(buf,sizeof(buf),"%s %s %s %s %s",
00543             indbuf[i_description],indbuf[i_keywords],
00544             indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
00545     towords(buf);
00546     for(p1=find_word_start(buf);*p1;
00547        p1=find_word_start(p2)) {
00548        p2=find_word_end(p1); if(*p2) *p2++=0;
00549        appenditem(p1,lind,serial,2,module_language);
00550     }
00551     entrycount=gentrycount; dicbuf=gdicbuf;
00552     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
00553     unknown_type=unk_delete;
00554     strcpy(buf,indbuf[i_title]); translate(buf);
00555     for(p1=find_word_start(buf); *p1;
00556        p1=find_word_start(p2)) {
00557        p2=strchr(p1,',');
00558        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00559        if(strlen(p1)<=0) continue;
00560        appenditem(p1,lind,serial,4,module_language);
00561     }
00562     snprintf(buf,sizeof(buf),"%s, %s, %s",
00563             indbuf[i_description],indbuf[i_keywords],
00564             indbuf[i_domain]);
00565     translate(buf);
00566     for(p1=find_word_start(buf); *p1;
00567        p1=find_word_start(p2)) {
00568        p2=strchr(p1,','); 
00569        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00570        if(strlen(p1)<=0) continue;
00571        appenditem(p1,lind,serial,2,module_language);
00572     }
00573     snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
00574     strcpy(lbuf,"level");
00575     for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
00576     for(p1=find_word_start(buf); *p1;
00577        p1=find_word_start(p2)) {
00578        p2=find_word_end(p1); 
00579        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00580        if(!isalpha(*p1) || 
00581           (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
00582           (*(p1+1)!=0 && *(p1+2)!=0))
00583          continue;
00584        *p1=tolower(*p1);
00585        strcpy(lbuf+strlen("level"),p1);
00586        appenditem(lbuf,lind,serial,2,module_language);
00587     }
00588     fprintf(weightf,"%d:%d\n",serial,tweight);
00589 }
00590 
00591 void modules(void)
00592 {
00593     int i,j,k,d;
00594     char namebuf[MAX_LINELEN+1];
00595     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
00596 
00597     for(j=0;j<langcnt;j++) {
00598        snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
00599        weightf=fopen(namebuf,"w");
00600        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
00601        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
00602        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
00603        suffix_dic(sdic); prepare_dic(gdic);
00604        gdicbuf=dicbuf; gentrycount=entrycount;
00605        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
00606        prepare_dic(mdic);
00607        mdicbuf=dicbuf; mentrycount=entrycount;
00608        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
00609        unknown_type=unk_leave; translate(ignore[j]);
00610        for(i=0;i<modcnt;i++) {
00611            if(mod[i].langcnt>0) {
00612               for(d=k=0;k<mod[i].langcnt;k++)
00613                 if(mod[i].langs[k]<mod[i].langs[d]) d=k;
00614               for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
00615               if(k>=mod[i].langcnt) k=d;
00616               snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
00617                       lang[mod[i].langs[k]]);
00618               onemodule(namebuf,mod[i].counts[k],j);
00619            }
00620            else {
00621               onemodule(mod[i].name,mod[i].counts[0],j);
00622            }
00623        }
00624        if(mentrycount>0) free(mdicbuf);
00625        if(gentrycount>0) free(gdicbuf);
00626        if(suffixcnt>0) free(sufbuf);
00627        if(weightf) fclose(weightf);
00628     }
00629 }
00630 
00631 void sappenditem(char *word, int lind, int serial, int weight)
00632 {
00633     int ll;
00634     char *p;
00635     
00636     if(!isalnum(*word) || (ll=strlen(word))<2 ||
00637        wordchr(taken,word)!=NULL ||
00638        wordchr(ignore[lind],word)!=NULL ||
00639        takenlen>=MAX_LINELEN-ll-16)
00640       return;
00641     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
00642     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
00643     taken[takenlen++]=' ';taken[takenlen++]=' ';
00644     strcpy(taken+takenlen,word);
00645     takenlen+=ll; tweight+=weight;
00646     fprintf(indf,"%s:%d?%d\n",word,serial,weight);
00647 }
00648 
00649 void onesheet(int serial, int lind)
00650 {
00651     int i;
00652     unsigned char trlist[]={
00653        s_title,s_description,s_domain,s_keywords,s_remark
00654     };
00655     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
00656     char *p1, *p2, buf[MAX_LINELEN+1];
00657     
00658     if(sheet_index(serial)) return;
00659     fprintf(listf,"%s\n",mod[serial].name+3);
00660     fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
00661     fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
00662     entrycount=mentrycount; dicbuf=mdicbuf;
00663     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
00664     unknown_type=unk_leave;
00665     for(i=0;i<trcnt;i++) {
00666        detag(sindbuf[trlist[i]]);
00667        deaccent(sindbuf[trlist[i]]);
00668        singlespace(sindbuf[trlist[i]]);
00669        suffix_translate(sindbuf[trlist[i]]);
00670        translate(sindbuf[trlist[i]]);
00671     }
00672     taken[0]=0; takenlen=tweight=0;
00673     strcpy(buf,sindbuf[s_title]); towords(buf);
00674     for(p1=find_word_start(buf);*p1;
00675        p1=find_word_start(p2)) {
00676        p2=find_word_end(p1); if(*p2) *p2++=0;
00677        sappenditem(p1,lind,serial,4);
00678     }
00679     snprintf(buf,sizeof(buf),"%s %s %s %s",
00680             sindbuf[s_description],sindbuf[s_keywords],
00681             sindbuf[s_domain],sindbuf[s_remark]);
00682     towords(buf);
00683     for(p1=find_word_start(buf);*p1;
00684        p1=find_word_start(p2)) {
00685        p2=find_word_end(p1); if(*p2) *p2++=0;
00686        sappenditem(p1,lind,serial,2);
00687     }
00688     entrycount=gentrycount; dicbuf=gdicbuf;
00689     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
00690     unknown_type=unk_delete;
00691     strcpy(buf,sindbuf[s_title]); translate(buf);
00692     for(p1=find_word_start(buf); *p1;
00693        p1=find_word_start(p2)) {
00694        p2=strchr(p1,',');
00695        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00696        if(strlen(p1)<=0) continue;
00697        sappenditem(p1,lind,serial,4);
00698     }
00699     snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
00700             sindbuf[s_description],sindbuf[s_keywords],
00701             sindbuf[s_domain],sindbuf[s_remark]);
00702     translate(buf);
00703     for(p1=find_word_start(buf); *p1;
00704        p1=find_word_start(p2)) {
00705        p2=strchr(p1,','); 
00706        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
00707        if(strlen(p1)<=0) continue;
00708        sappenditem(p1,lind,serial,2);
00709     }
00710     fprintf(weightf,"%d:%d\n",serial,tweight);
00711 }
00712 
00713 void sheets(void)
00714 {
00715     int i,j;
00716     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
00717     char buf[MAX_LINELEN+1];
00718     
00719     for(j=0;j<langcnt;j++) {
00720        snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
00721        titf=fopen(buf,"w");
00722        snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
00723        descf=fopen(buf,"w");
00724        snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
00725        indf=fopen(buf,"w");
00726        snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
00727        listf=fopen(buf,"w");
00728        snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
00729        weightf=fopen(buf,"w");
00730        snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
00731        addrf=fopen(buf,"w");
00732        snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
00733        serialf=fopen(buf,"w");
00734        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
00735        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
00736        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
00737        suffix_dic(sdic); prepare_dic(gdic);
00738        gdicbuf=dicbuf; gentrycount=entrycount;
00739        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
00740        prepare_dic(mdic);
00741        mdicbuf=dicbuf; mentrycount=entrycount;
00742        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
00743        unknown_type=unk_leave; translate(ignore[j]);
00744        for(i=0;i<modcnt;i++) {
00745            if(mod[i].langs[0]!=j) continue;
00746            fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
00747            fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
00748            onesheet(i,j);
00749        }
00750        if(mentrycount>0) free(mdicbuf);
00751        if(gentrycount>0) free(gdicbuf);
00752        if(suffixcnt>0) free(sufbuf);
00753        fclose(titf); fclose(descf); fclose(indf); fclose(listf);
00754        fclose(weightf); fclose(addrf); fclose(serialf);
00755     }
00756 }
00757 
00758 int main()
00759 {
00760     prep();
00761     if(modcnt>0) modules();
00762     clean();
00763     sprep();
00764     if(modcnt>0) sheets();
00765     return 0;
00766 }
00767