Back to index

lightning-sunbird  0.9+nobinonly
hashmgr.cpp
Go to the documentation of this file.
00001 #include "license.readme"
00002 
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <stdio.h>
00006 
00007 #include "hashmgr.hxx"
00008 
00009 extern void mychomp(char * s);
00010 extern char * mystrdup(const char *);
00011 
00012 #ifdef __SUNPRO_CC // for SunONE Studio compiler
00013 using namespace std;
00014 #endif
00015 
00016 // build a hash table from a munched word list
00017 
00018 HashMgr::HashMgr(const char * tpath)
00019 {
00020   tablesize = 0;
00021   tableptr = NULL;
00022   int ec = load_tables(tpath);
00023   if (ec) {
00024     /* error condition - what should we do here */
00025     fprintf(stderr,"Hash Manager Error : %d\n",ec);
00026     fflush(stderr);
00027     if (tableptr) {
00028       free(tableptr);
00029     }
00030     tablesize = 0;
00031   }
00032 }
00033 
00034 
00035 HashMgr::~HashMgr()
00036 {
00037   if (tableptr) {
00038     // now pass through hash table freeing up everything
00039     // go through column by column of the table
00040     for (int i=0; i < tablesize; i++) {
00041       struct hentry * pt = &tableptr[i];
00042       struct hentry * nt = NULL;
00043       if (pt) {
00044        if (pt->word) free(pt->word);
00045         if (pt->astr) free(pt->astr);
00046         pt = pt->next;
00047       }
00048       while(pt) {
00049         nt = pt->next;
00050        if (pt->word) free(pt->word);
00051         if (pt->astr) free(pt->astr);
00052         free(pt);
00053        pt = nt;
00054       }
00055     }
00056     free(tableptr);
00057   }
00058   tablesize = 0;
00059 }
00060 
00061 
00062 
00063 // lookup a root word in the hashtable
00064 
00065 struct hentry * HashMgr::lookup(const char *word) const
00066 {
00067     struct hentry * dp;
00068     if (tableptr) {
00069        dp = &tableptr[hash(word)];
00070        if (dp->word == NULL) return NULL;
00071        for (  ;  dp != NULL;  dp = dp->next) {
00072           if (strcmp(word,dp->word) == 0) return dp;
00073        }
00074     }
00075     return NULL;
00076 }
00077 
00078 
00079 
00080 // add a word to the hash table (private)
00081 
00082 int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
00083 {
00084     int i = hash(word);
00085     struct hentry * dp = &tableptr[i];
00086     struct hentry* hp;
00087     if (dp->word == NULL) {
00088        dp->wlen = wl;
00089        dp->alen = al;
00090        dp->word = mystrdup(word);
00091        dp->astr = mystrdup(aff);
00092        dp->next = NULL;
00093        if ((wl) && (dp->word == NULL)) return 1;
00094        if ((al) && (dp->astr == NULL)) return 1;
00095     } else {
00096        hp = (struct hentry *) malloc (sizeof(struct hentry));
00097        if (hp == NULL) return 1;
00098        hp->wlen = wl;
00099        hp->alen = al;
00100        hp->word = mystrdup(word);
00101        hp->astr = mystrdup(aff);
00102        hp->next = NULL;      
00103        while (dp->next != NULL) dp=dp->next; 
00104        dp->next = hp;
00105        if ((wl) && (hp->word == NULL)) return 1;
00106        if ((al) && (hp->astr == NULL)) return 1;
00107     }
00108     return 0;
00109 }     
00110 
00111 
00112 
00113 // walk the hash table entry by entry - null at end
00114 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
00115 {
00116   //reset to start
00117   if ((col < 0) || (hp == NULL)) {
00118     col = -1;
00119     hp = NULL;
00120   }
00121 
00122   if (hp && hp->next != NULL) {
00123     hp = hp->next;
00124   } else {
00125     col++;
00126     hp = (col < tablesize) ? &tableptr[col] : NULL;
00127     // search for next non-blank column entry
00128     while (hp && (hp->word == NULL)) {
00129         col ++;
00130         hp = (col < tablesize) ? &tableptr[col] : NULL;
00131     }
00132     if (col < tablesize) return hp;
00133     hp = NULL;
00134     col = -1;
00135   }
00136   return hp;
00137 }
00138 
00139 
00140 
00141 // load a munched word list and build a hash table on the fly
00142 
00143 int HashMgr::load_tables(const char * tpath)
00144 {
00145   int wl, al;
00146   char * ap;
00147 
00148   // raw dictionary - munched file
00149   FILE * rawdict = fopen(tpath, "r");
00150   if (rawdict == NULL) return 1;
00151 
00152   // first read the first line of file to get hash table size */
00153   char ts[MAXDELEN];
00154   if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
00155   mychomp(ts);
00156   tablesize = atoi(ts);
00157   if (!tablesize) return 4; 
00158   tablesize = tablesize + 5;
00159   if ((tablesize %2) == 0) tablesize++;
00160 
00161   // allocate the hash table
00162   tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
00163   if (! tableptr) return 3;
00164 
00165   // loop through all words on much list and add to hash
00166   // table and create word and affix strings
00167 
00168   while (fgets(ts,MAXDELEN-1,rawdict)) {
00169     mychomp(ts);
00170     // split each line into word and affix char strings
00171     ap = strchr(ts,'/');
00172     if (ap) {
00173       *ap = '\0';
00174       ap++;
00175       al = strlen(ap);
00176     } else {
00177       al = 0;
00178       ap = NULL;
00179     }
00180 
00181     wl = strlen(ts);
00182 
00183     // add the word and its index
00184     if (add_word(ts,wl,ap,al)) 
00185       return 5;;
00186 
00187   }
00188 
00189   fclose(rawdict);
00190   return 0;
00191 }
00192 
00193 
00194 // the hash function is a simple load and rotate
00195 // algorithm borrowed
00196 
00197 int HashMgr::hash(const char * word) const
00198 {
00199     long  hv = 0;
00200     for (int i=0; i < 4  &&  *word != 0; i++)
00201        hv = (hv << 8) | (*word++);
00202     while (*word != 0) {
00203       ROTATE(hv,ROTATE_LEN);
00204       hv ^= (*word++);
00205     }
00206     return (unsigned long) hv % tablesize;
00207 }
00208