Back to index

wims  3.65+svn20090927
Classes | Defines | Functions | Variables
dicsort.c File Reference
#include "../wims.h"
#include "suffix.c"

Go to the source code of this file.

Classes

struct  entry

Defines

#define entrylim   512*1024
#define diclim   32*1024*1024

Functions

void * xmalloc (size_t n)
char * find_word_end (char *p)
char * find_word_start (char *p)
char * strip_trailing_spaces (char *p)
int compare (const void *s1, const void *s2)
void sortdic (void)
void string_modify (char *start, char *bad_beg, char *bad_end, char *good,...)
void singlespace (char *p)
void prepare_dic (void)
void output (void)
int main (int argc, char *argv[])

Variables

char sepchar = ':'
char grpchar = 0
char inpbuf [MAX_LINELEN+1]
char outbuf [2 *MAX_LINELEN+2]
char * dicbuf
char dicname [1024]
char suffixname [1024]
struct entry entry [entrylim]
int entrycount
int nocase = 0
int hassuffix = 0
int leaveline = 0
int ocount

Class Documentation

struct entry

Definition at line 37 of file dicsort.c.

Class Members
int earlier
int olen
unsigned char * original
char * original
unsigned char * replace
char * replace

Define Documentation

#define diclim   32*1024*1024

Definition at line 25 of file dicsort.c.

#define entrylim   512*1024

Definition at line 23 of file dicsort.c.


Function Documentation

int compare ( const void *  s1,
const void *  s2 
)

Definition at line 79 of file dicsort.c.

{
    const struct entry *p1, *p2;
    p1=s1; p2=s2;
    if(nocase) return strcasecmp(p1->original,p2->original);
    else return strcmp(p1->original,p2->original);
}
char* find_word_end ( char *  p)

Definition at line 55 of file dicsort.c.

{
    int i;
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
    return p;
}
char* find_word_start ( char *  p)

Definition at line 63 of file dicsort.c.

{
    int i;
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
    return p;
}
int main ( int  argc,
char *  argv[] 
)

Definition at line 195 of file dicsort.c.

{
    char *ss, *gr;
    if(argc<2) return -1;
    
    ss=getenv("dicsort_separator");
    if(ss!=NULL && *ss!=0) sepchar=*ss;
    gr=getenv("dicsort_grouping");
    if(gr!=NULL && *gr!=0) grpchar=*gr;
    snprintf(dicname,sizeof(dicname)-128,"%s",argv[1]); prepare_dic();
    if(argc>2) {
       snprintf(suffixname,sizeof(suffixname),"%s",argv[2]);
       suffix_dic(suffixname); hassuffix=1;
    }
    else suffixname[0]=hassuffix=0;
    sortdic(); output();
    printf("%s: sorted %d entries.\n",dicname, ocount);
    return 0;
}

Here is the call graph for this function:

void output ( void  )

Definition at line 164 of file dicsort.c.

{
    int i;
    FILE *f;

    ocount=0;
    strcat(dicname,".sorted");
    f=fopen(dicname,"w"); if(f==NULL) return;
    for(i=0;i<entrycount;i++) {
       if(i>0 && strcmp(entry[i].original,entry[i-1].original)==0
          && strcmp(entry[i].replace,entry[i-1].replace)==0)
         continue;
       if(grpchar!=0) {
           if(i>0 && strcmp(entry[i].original,entry[i-1].original)==0)
             fprintf(f,"%c%s",grpchar, entry[i].replace);
           else {
              if(i>0) fprintf(f,"\n");
              fprintf(f,"%s%c%s",entry[i].original,sepchar,entry[i].replace);
              ocount++;
           }
           
       }
       else {
           fprintf(f,"%s%c%s\n",entry[i].original,sepchar,entry[i].replace);
           ocount++;
       }
    }
    if(grpchar!=0) fprintf(f,"\n");
    fclose(f);
}
void prepare_dic ( void  )

Definition at line 134 of file dicsort.c.

{
    int i;
    FILE *dicf;
    char *p1, *p2, *pp;
    long int flen;

    entrycount=0;
    dicf=fopen(dicname,"r"); if(dicf==NULL) return;
    fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
    if(flen>diclim) return;
    dicbuf=xmalloc(2*flen+1024);flen=fread(dicbuf,1,flen,dicf);
    fclose(dicf);
    if(flen>0 && flen<diclim) dicbuf[flen]=0;
    else return;
    for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) {
       p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
       pp=strchr(p1,sepchar); if(pp==NULL) continue;
       *pp++=0;
       strip_trailing_spaces(p1); strip_trailing_spaces(pp);
       singlespace(p1);
       p1=find_word_start(p1); pp=find_word_start(pp);
       if(*p1==0) continue;
       entry[i].original=p1; entry[i].replace=pp; i++;
    }
    entrycount=i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void singlespace ( char *  p)

Definition at line 107 of file dicsort.c.

{
    char *pp, *p2;
    for(pp=p;*pp;pp++) {
       if(!isspace(*pp)) continue;
       if(leaveline) {
           if(*pp==13) strcpy(pp,pp+1);
           if(*pp=='\n') {
              pp++;
              gopt: for(p2=pp; isspace(*p2) && *p2!='\n'; p2++);
              if(p2>pp) strcpy(pp,p2); pp--;
           }
           else {
              pp++; if(!isspace(*pp) || *pp=='\n') continue;
              goto gopt;
           }
       }
       else {
           if(*pp!=' ') *pp=' ';
           pp++; if(!isspace(*pp)) continue;
           for(p2=pp;isspace(*p2);p2++);
           strcpy(pp,p2); pp--;
       }
    }
}
void sortdic ( void  )

Definition at line 87 of file dicsort.c.

{
    qsort(entry,entrycount,sizeof(entry[0]),compare);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void string_modify ( char *  start,
char *  bad_beg,
char *  bad_end,
char *  good,
  ... 
)

Definition at line 93 of file dicsort.c.

{
    char buf[MAX_LINELEN+1];
    va_list vp;
    
    va_start(vp,good);
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
      return; /* this is an error situation. */
    strcat(buf,bad_end);
    strcpy(bad_beg,buf);
}
char* strip_trailing_spaces ( char *  p)

Definition at line 71 of file dicsort.c.

{
    char *pp;
    if(*p==0) return p;
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
    return pp;
}
void* xmalloc ( size_t  n)

Definition at line 46 of file dicsort.c.

{
    void *p;
    p=malloc(n);
    if(p==NULL) exit(1);
    return p;
}

Variable Documentation

char* dicbuf

Definition at line 34 of file dicsort.c.

char dicname[1024]

Definition at line 35 of file dicsort.c.

struct entry entry[entrylim]

Definition at line 41 of file dicsort.c.

char grpchar = 0

Definition at line 27 of file dicsort.c.

int hassuffix = 0

Definition at line 43 of file dicsort.c.

Definition at line 33 of file dicsort.c.

int leaveline = 0

Definition at line 43 of file dicsort.c.

int nocase = 0

Definition at line 43 of file dicsort.c.

int ocount

Definition at line 44 of file dicsort.c.

char outbuf[2 *MAX_LINELEN+2]

Definition at line 33 of file dicsort.c.

char sepchar = ':'

Definition at line 27 of file dicsort.c.

char suffixname[1024]

Definition at line 35 of file dicsort.c.