Back to index

courier  0.68.2
msghash.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 1998 - 2000 Double Precision, Inc.
00003 ** See COPYING for distribution information.
00004 */
00005 
00006 #include      "config.h"
00007 #include      "msghash.h"
00008 #include      <ctype.h>
00009 #include      <string.h>
00010 
00011 
00012 /*
00013               Calculate a hash of a message
00014 
00015 Duplicate message detection works by calculating a hash on the message's
00016 contents.  We attempt to calculate a hash in such a way as to detect some
00017 simple attempts at hashbusting.
00018 
00019 Obviously, this is not going to work for very long.  If this filter becomes
00020 popular, it would become trivially easy to write a hasbuster around it.
00021 Consider this just as an example.
00022 
00023 msghash_init initializes the msghashinfo structure.  Then, msghash_line
00024 gets called to feed the message contents into the hash.  msghash_line()
00025 is called to process each individual line in the message.
00026 
00027 msghash_line will ignore all headers except subject, and will ignore
00028 everything except letters, which will all be converted to lowercase.
00029 
00030 Finally, msghash_finish is called to calculate the final hashes.  Two
00031 hashes are calculated: the "top" and the "bottom" hash.  The first one
00032 is a hash of everything except the first couple of lines.  The last one
00033 is a hash of everything except the last couple of lines.  If the message
00034 is too small, both hashes are identical, and the entire message is hashed.
00035 
00036 */
00037 
00038 void msghash_init(struct msghashinfo *p)
00039 {
00040        p->numlines=0;
00041        p->inheader=1;
00042        p->cur_header[0]=0;
00043        p->headerlinebuf_size=0;
00044        md5_context_init(&p->c_entire);
00045        md5_context_init(&p->c_top);
00046        md5_context_init(&p->c_bot);
00047        p->c_entire_cnt=0;
00048        p->c_top_cnt=0;
00049        p->c_bot_cnt=0;
00050        p->linebuf_head=0;
00051        p->linebuf_tail=0;
00052 }
00053 
00054 static void do_msghash_line(struct msghashinfo *p, const char *l)
00055 {
00056 unsigned long ll;
00057 
00058        if (*l == 0)  return;
00059        ll=strlen(l);
00060 
00061        if (p->numlines <= MSGHASH_HIMARGIN)
00062                      /* No need to calc all anymore */
00063        {
00064               md5_context_hashstream(&p->c_entire, l, ll);
00065               p->c_entire_cnt += ll;
00066        }
00067 
00068        if (p->numlines++ < MSGHASH_MARGIN)
00069        {
00070               strcpy(p->linebuf[p->linebuf_head], l);
00071               p->linebuf_head= (p->linebuf_head+1) % MSGHASH_MARGIN;
00072        }
00073        else
00074        {
00075        const char *s=p->linebuf[p->linebuf_tail];
00076        unsigned long sl=strlen(s);
00077 
00078               md5_context_hashstream(&p->c_top, s, sl);
00079               p->c_top_cnt += sl;
00080 
00081               strcpy(p->linebuf[p->linebuf_head], l);
00082               p->linebuf_head= (p->linebuf_head+1) % MSGHASH_MARGIN;
00083               p->linebuf_tail= (p->linebuf_tail+1) % MSGHASH_MARGIN;
00084 
00085               md5_context_hashstream(&p->c_bot, l, ll);
00086               p->c_bot_cnt += ll;
00087        }
00088 }
00089 
00090 void msghash_line(struct msghashinfo *p, const char *l)
00091 {
00092 unsigned long ll;
00093 
00094        if (!p->inheader)
00095        {
00096               p->headerlinebuf_size=0;
00097               ll=0;
00098               while (*l)
00099               {
00100                      if ( isalpha((int)(unsigned char)*l) &&
00101                             ll < sizeof(p->headerlinebuf)-1)
00102                             p->headerlinebuf[ll++] = *l;
00103                      ++l;
00104               }
00105               p->headerlinebuf[ll]=0;
00106 
00107               do_msghash_line(p, p->headerlinebuf);
00108               return;
00109        }
00110 
00111        if (*l == 0)
00112        {
00113               p->inheader=0;
00114               if (strcmp(p->cur_header, "subject") == 0)
00115                      do_msghash_line(p, p->headerlinebuf);
00116                      /* Count the subject header */
00117               return;
00118        }
00119        if (!isspace(*l))
00120        {
00121               if (strcmp(p->cur_header, "subject") == 0)
00122                      do_msghash_line(p, p->headerlinebuf);
00123               for (ll=0; ll<sizeof(p->cur_header)-1; ll++)
00124               {
00125                      if (l[ll] == ':')    break;
00126                      p->cur_header[ll]=tolower( (int)(unsigned char)l[ll]);
00127               }
00128               p->cur_header[ll]=0;
00129               p->headerlinebuf_size=0;
00130        }
00131        while (*l)
00132        {
00133               if ( isalpha((int)(unsigned char)*l) &&
00134                      p->headerlinebuf_size <
00135                             sizeof(p->headerlinebuf)-1)
00136                      p->headerlinebuf[p->headerlinebuf_size
00137                                    ++] = *l;
00138               ++l;
00139        }
00140        p->headerlinebuf[p->headerlinebuf_size]=0;
00141 }
00142 
00143 void msghash_finish(struct msghashinfo *p)
00144 {
00145        if (p->numlines < MSGHASH_HIMARGIN)
00146        {
00147               memcpy(&p->c_top, &p->c_entire, sizeof(p->c_top));
00148               p->c_top_cnt=p->c_entire_cnt;
00149 
00150               memcpy(&p->c_bot, &p->c_entire, sizeof(p->c_bot));
00151               p->c_bot_cnt=p->c_entire_cnt;
00152        }
00153 
00154        md5_context_endstream(&p->c_top, p->c_top_cnt);
00155        md5_context_endstream(&p->c_bot, p->c_bot_cnt);
00156 
00157        md5_context_digest(&p->c_top, p->md1);
00158        md5_context_digest(&p->c_bot, p->md2);
00159 }