Back to index

wims  3.65+svn20090927
Classes | Defines | Enumerations | Functions | Variables
translator_.c File Reference
#include "suffix.c"
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  entry

Defines

#define entrylim   32768
#define diclim   1024*1024

Enumerations

enum  { unk_delete, unk_leave, unk_replace }

Functions

int compare (int i1, const char *s2)
int search_list (struct entry *list, int items, size_t item_size, const char *str)
void singlespace (char *p)
void prepare_dic (char *fname)
void translate (char *p)

Variables

char inpbuf [MAX_LINELEN+1]
char outbuf [2 *MAX_LINELEN+2]
char * dicbuf
struct entry entry [entrylim]
int entrycount
int has_digits = 0
int unknown_type = unk_delete
int nocase = 0
int leaveline = 0
char * unknown
char unkbuf [1024]

Class Documentation

struct entry

Definition at line 37 of file dicsort.c.

Class Members
int earlier
int olen
unsigned char * original
char * original
unsigned char * replace
char * replace

Define Documentation

#define diclim   1024*1024

Definition at line 25 of file translator_.c.

#define entrylim   32768

Definition at line 23 of file translator_.c.


Enumeration Type Documentation

anonymous enum
Enumerator:
unk_delete 
unk_leave 
unk_replace 

Definition at line 37 of file translator_.c.


Function Documentation

int compare ( int  i1,
const char *  s2 
)

Definition at line 46 of file translator_.c.

{
    int k;
    if(nocase) k=strncasecmp(entry[i1].original,s2,entry[i1].olen);
    else k=strncmp(entry[i1].original,s2,entry[i1].olen);
    if(k==0 && (isalnum(*(s2+entry[i1].olen)) || (*(s2+entry[i1].olen)&128)!=0)) return -1;
    else return k;
}
void prepare_dic ( char *  fname)

Definition at line 116 of file translator_.c.

{
    int i,l;
    FILE *dicf;
    char *p1, *p2, *pp;
    long int flen;
    
    entrycount=0;
    dicf=fopen(fname,"r"); if(dicf==NULL) return;
    fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
    if(flen>diclim) return;
    dicbuf=xmalloc(flen+16);flen=fread(dicbuf,1,flen,dicf);
    fclose(dicf);
    if(flen>0 && flen<diclim) dicbuf[flen]=0;
    else return;
    for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) {
       p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
       pp=strchr(p1,':'); if(pp==NULL) continue;
       *pp++=0;
       strip_trailing_spaces(p1); strip_trailing_spaces(pp);
       singlespace(p1);
       p1=find_word_start(p1); pp=find_word_start(pp);
       if(*p1==0) continue;
       if(has_digits==0) {
           char *p;
           for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++);
           if(isdigit(*p)) has_digits=1;
       }
       entry[i].original=p1; entry[i].replace=pp; 
       entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
       if(i>0) {
           int l1,l2;
           l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
           else {l2=entry[i-1].olen;l1=i-1;}
           if(l>l2 && isspace(p1[l2])
              && strncmp(entry[l1].original,p1,l2)==0) 
             entry[i].earlier=entry[i-1].earlier=l1;
       }
       i++;
    }
    entrycount=i;
}

Here is the call graph for this function:

int search_list ( struct entry list,
int  items,
size_t  item_size,
const char *  str 
)

Definition at line 57 of file translator_.c.

{
    int i1,i2,j,k,t,t1;
    unsigned char c;

    if(items<=0) return -1;
    j=0; c=str[0];
    k=list[0].original[0]-c; if(k==0) k=compare(0,str);
    if(k==0) goto more; if(k>0) return -1;
    j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(j,str);
    if(k==0) return j;
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
       j=i1+(i2-i1)/2;
       k=list[j].original[0]-c; if(k==0) k=compare(j,str);
       if(k==0) goto more;
       if(k>0) {i2=j; continue;}
       if(k<0) {i1=j; continue;}   
    }
    if(k>0) {j--;k=compare(j,str);}
    more:
    if((t=list[j].earlier)<0) {
       if(k==0) return j; else return -1;
    }
    if(compare(t,str)!=0) return -1;
    for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++)
      if(k==0) t=j;
    return t;
}

Here is the call graph for this function:

void singlespace ( char *  p)

Definition at line 87 of file translator_.c.

{
    char *pp, *p2;
    for(pp=p;*pp;pp++) {
       if(!isspace(*pp)) continue;
       if(leaveline) {
           if(*pp==13) strcpy(pp,pp+1);
           if(*pp=='\n') {
              pp++;
              gopt: for(p2=pp; isspace(*p2) && *p2!='\n'; p2++);
              if(p2>pp) strcpy(pp,p2); pp--;
           }
           else {
              pp++; if(!isspace(*pp) || *pp=='\n') continue;
              goto gopt;
           }
       }
       else {
           if(*pp!=' ') *pp=' ';
           if(!isspace(*(pp+1))) continue;
           for(pp++,p2=pp;isspace(*p2);p2++);
           strcpy(pp,p2); pp--;
       }
    }
}

Here is the caller graph for this function:

void translate ( char *  p)

Definition at line 160 of file translator_.c.

{
    char *p1, *p2, *pp;
    int t;

    if(entrycount<=0 && suffixcnt<=0) return;
    snprintf(outbuf,sizeof(outbuf),"%s",p);
    for(p1=find_word_start(outbuf);
       p1!=NULL && p1-outbuf<MAX_LINELEN && *p1!=0;
       p1=p2) {
       p2=find_word_end(p1);
       for(pp=p1;pp<p2 && 
           ((!has_digits && isalpha(*pp)) ||
            (has_digits && isalnum(*pp)) || (*pp&128)!=0 ||
            strchr("_",*pp)!=NULL);pp++);
       p2=find_word_start(p2);
       if(pp==p1 || 
          (has_digits==0 && isdigit(*pp)) || 
          (*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue;
       t=search_list(entry,entrycount,sizeof(entry[0]),p1);
       if(t<0) {
           switch(unknown_type) {
              case unk_leave: break;
              case unk_delete: {
                  strcpy(p1,find_word_start(pp)); p2=p1;
                  break;
              }
              case unk_replace: {
                  string_modify(outbuf,p1,pp,unkbuf);
                  p2=find_word_start(p1+strlen(unkbuf));
              }
           }
           continue;
       }
       string_modify(outbuf,p1,p1+strlen(entry[t].original),
                    entry[t].replace);
       p2=find_word_start(p1+strlen(entry[t].replace));
    }
    snprintf(p,MAX_LINELEN,"%s",outbuf);
}

Here is the call graph for this function:


Variable Documentation

char* dicbuf

Definition at line 30 of file translator_.c.

struct entry entry[entrylim]

Definition at line 35 of file translator_.c.

int has_digits = 0

Definition at line 41 of file translator_.c.

Definition at line 29 of file translator_.c.

int leaveline = 0

Definition at line 43 of file translator_.c.

int nocase = 0

Definition at line 43 of file translator_.c.

char outbuf[2 *MAX_LINELEN+2]

Definition at line 29 of file translator_.c.

char unkbuf[1024]

Definition at line 44 of file translator_.c.

char* unknown

Definition at line 44 of file translator_.c.

Definition at line 42 of file translator_.c.