Back to index

courier  0.68.2
duphash.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 1998 - 2000 Double Precision, Inc.
00003 ** See COPYING for distribution information.
00004 */
00005 
00006 #include      "config.h"
00007 #include      <stdio.h>
00008 #include      <string.h>
00009 #include      <stdlib.h>
00010 #include      "msghash.h"
00011 #include      "duphash.h"
00012 
00013 
00014 /*
00015 
00016 This module implements a rotating FIFO of last X message hashes
00017 seen.  When more than Y hashes have been seen, we have a dupe sign,
00018 and they are cancelled.
00019 
00020 */
00021 
00022 void duphash_init(struct duphash *h,
00023        unsigned n,          /* Size of the FIFO queue */
00024        unsigned d)          /* How many dupes to trigger cancels */
00025 {
00026 struct duphashinfo *di;
00027 int    i;
00028 
00029        h->duphashbufsize=n;
00030        h->duplevel=d;
00031 
00032        if ((h->msgsbuf=(struct duphashmsginfo *)
00033               malloc( sizeof(struct duphashmsginfo)*n)) == 0 ||
00034               (h->hashesbuf=(struct duphashinfo *)
00035                      malloc(sizeof(struct duphashinfo)*n)) == 0)
00036        {
00037               perror("malloc");
00038               exit(1);
00039        }
00040 
00041        h->head=h->tail=0;
00042        h->freelist=0;
00043        for (i=0; i<n; i++)
00044        {
00045               di= h->hashesbuf + i;
00046               di->next=h->freelist;
00047               h->freelist=di;
00048        }
00049 
00050        for (i=0; i < 256; i++)
00051               h->hashbuckets[i]=0;
00052 }
00053 
00054 /* Remove the oldest msg from the circular queue */
00055 
00056 static void dup_poptail(struct duphash *p)
00057 {
00058 struct duphashmsginfo *msgp=p->msgsbuf + p->tail;
00059 struct duphashinfo *hashp=msgp->hash;
00060 
00061        /* Unlink msg from its hash */
00062 
00063        if (msgp->prev)      msgp->prev->next=msgp->next;
00064        else   hashp->firstmsg=msgp->next;
00065 
00066        if (msgp->next)      msgp->next->prev=msgp->prev;
00067        else   hashp->lastmsg=msgp->prev;
00068 
00069        /* If this msg was an intentional dupe, account for it no longer
00070        ** being in the FIFO
00071        */
00072 
00073        if (msgp->dupmsg)
00074               --hashp->ndupmsgs;
00075 
00076        if ( --hashp->nmsgs == 0 )  /* No other msgs with same hash */
00077        {
00078               /* Remove the hash from its hashbucket */
00079 
00080               if (hashp->prev)
00081                      hashp->prev->next=hashp->next;
00082               else
00083                      p->hashbuckets[hashp->md5[0] & 255]=hashp->next;
00084               if (hashp->next)
00085                      hashp->next->prev=hashp->prev;
00086 
00087               /* Return the unused hash to the freelist */
00088               hashp->next=p->freelist;
00089               p->freelist=hashp;
00090        }
00091        p->tail = (p->tail+1) % p->duphashbufsize;
00092 }
00093 
00094 /* Add a new md5 hash to the circular queue */
00095 
00096 static struct duphashmsginfo *dup_addhead(struct duphash *p, MD5_DIGEST *d,
00097        int is_submit_dupe)  /* This is an intentional dupe */
00098 {
00099 unsigned bucket=(*d)[0] & 255;
00100 struct duphashmsginfo *msgp;
00101 struct duphashinfo *hashp;
00102 unsigned nexthead= (p->head+1) % p->duphashbufsize;
00103 
00104        /* If queue is full, pop one msg */
00105 
00106        if (nexthead == p->tail)
00107               dup_poptail(p);
00108 
00109        /* Search the hashbucket for this hash */
00110 
00111        for (hashp= p->hashbuckets[bucket]; hashp; hashp=hashp->next)
00112               if (memcmp(&hashp->md5, d, sizeof(hashp->md5)) == 0)
00113                      break;
00114        if (!hashp)
00115        {
00116               /* Not found, remove an unused hash from the freelist */
00117 
00118               hashp=p->freelist;
00119               p->freelist=hashp->next;
00120 
00121               /* Attach the new hash to its hashbucket */
00122 
00123               hashp->prev=0;
00124               hashp->next= p->hashbuckets[bucket];
00125               p->hashbuckets[bucket]=hashp;
00126 
00127               if (hashp->next)
00128                      hashp->next->prev=hashp;
00129 
00130               /* Initialize empty hash */
00131               memcpy(&hashp->md5, (*d), sizeof(hashp->md5));
00132               hashp->firstmsg=0;
00133               hashp->lastmsg=0;
00134               hashp->nmsgs=0;
00135               hashp->ndupmsgs=0;
00136        }
00137 
00138        msgp=p->msgsbuf + p->head;
00139 
00140        /* attach msg to its hash */
00141 
00142        msgp->hash=hashp;
00143        ++hashp->nmsgs;
00144        msgp->dupmsg=0;
00145        if (is_submit_dupe)
00146        {
00147               msgp->dupmsg=1;
00148               ++hashp->ndupmsgs;
00149        }
00150        msgp->next=hashp->firstmsg;
00151        msgp->prev=0;
00152 
00153        if (hashp->firstmsg)
00154               hashp->firstmsg->prev=msgp;
00155        else
00156               hashp->lastmsg=msgp;
00157        hashp->firstmsg=msgp;
00158        p->head=nexthead;
00159        return (msgp);
00160 }
00161 
00162 int duphash_check(struct duphash *p, MD5_DIGEST *d, const char *msgid,
00163        int is_submit_dupe, void (*cancelfunc)(const char *))
00164 {
00165 struct duphashmsginfo *msgp=dup_addhead(p, d, is_submit_dupe);
00166 int    cancelled=0;
00167 
00168        /* Finish initialized msgp */
00169 
00170        msgp->cancelled=0;
00171        msgp->msgid[0]=0;
00172        strncat(msgp->msgid, msgid, sizeof(msgp->msgid)-1);
00173 
00174        if ((msgp->next && msgp->next->cancelled) ||
00175               (msgp->prev && msgp->prev->cancelled))
00176        {
00177               msgp->cancelled=cancelled=1;       /* Message being cancelled */
00178        }
00179        else if (msgp->hash->nmsgs -
00180               msgp->hash->ndupmsgs >= p->duplevel) /* New dupe detected */
00181        {
00182               msgp->cancelled=cancelled=1;
00183               for (msgp=msgp->hash->firstmsg; msgp; msgp=msgp->next)
00184               {
00185                      if (!msgp->cancelled)
00186                             (*cancelfunc)(msgp->msgid);
00187               }
00188        }
00189        return (cancelled);
00190 }