Back to index

courier  0.68.2
rfc2045.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 1998 - 2004 Double Precision, Inc.  See COPYING for
00003 ** distribution information.
00004 */
00005 
00006 /*
00007 */
00008 #if    HAVE_CONFIG_H
00009 #include       "rfc2045_config.h"
00010 #endif
00011 #include       <stdlib.h>
00012 #include       <stdio.h>
00013 #include       <string.h>
00014 #if    HAVE_STRINGS_H
00015 #include       <strings.h>
00016 #endif
00017 #include      <ctype.h>
00018 #include      "rfc2045.h"
00019 #include      "rfc822/rfc822.h"
00020 #include      "rfc2045charset.h"
00021 
00022 static char   *rfc2045_defcharset=0;
00023 
00024 int rfc2045_in_reformime=0;
00025 
00026 extern void rfc2045_enomem();
00027 
00028 #define       MAXLEVELS     20
00029 #define       MAXPARTS      300
00030 
00031 /*
00032        New RFC2045 structure.
00033 */
00034 
00035 struct rfc2045 *rfc2045_alloc()
00036 {
00037 struct rfc2045 *p=(struct rfc2045 *)malloc(sizeof(struct rfc2045));
00038 
00039        if (!p)
00040        {
00041               rfc2045_enomem();
00042               return (0);
00043        }
00044 
00045        /* Initialize everything to nulls, except for one thing */
00046 
00047        memset(p, '\0', sizeof(*p));
00048 
00049        p->pindex=1;  /* Start with part #1 */
00050        p->workinheader=1;
00051        /* Most of the time, we're about to read a header */
00052 
00053        return (p);
00054 }
00055 
00056 const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name)
00057 {
00058        while (p)
00059        {
00060               if (p->name && strcmp(p->name, name) == 0)
00061                      return (p->value);
00062               p=p->next;
00063        }
00064        return (0);
00065 }
00066 
00067 int rfc2045_attrset(struct rfc2045attr **p, const char *name, const char *val)
00068 {
00069 char   *v;
00070 
00071        while (*p)
00072        {
00073               if (strcmp( (*p)->name, name) == 0)       break;
00074               p=&(*p)->next;
00075        }
00076        if (val == 0)
00077        {
00078        struct rfc2045attr *q= *p;
00079 
00080               if (q)
00081               {
00082                      *p=q->next;
00083                      if (q->name)  free(q->name);
00084                      if (q->value) free(q->value);
00085                      free(q);
00086               }
00087               return 0;
00088        }
00089 
00090        v=strdup(val);
00091        if (!v)
00092               return -1;
00093 
00094        if (!*p)
00095        {
00096               if (((*p)=(struct rfc2045attr *)malloc(sizeof(**p))) == 0)
00097               {
00098                      free(v);
00099                      return -1;
00100               }
00101               memset( (*p), 0, sizeof(**p));
00102               if ( ((*p)->name=strdup(name)) == 0)
00103               {
00104                      free( *p );
00105                      *p=0;
00106                      free(v);
00107                      return -1;
00108               }
00109        }
00110        if ( (*p)->value )   free ( (*p)->value );
00111        (*p)->value=v;
00112        return 0;
00113 }
00114 
00115 /* static const char cb_name[]="boundary"; */
00116 
00117 /* #define    ContentBoundary(p)   (rfc2045_getattr( (p)->content_type_attr, cb_name)) */
00118 
00119 #define       ContentBoundary(p)   ( (p)->boundary )
00120 
00121 /*
00122        Unallocate the RFC2045 structure.  Recursively unallocate
00123        all sub-structures.  Unallocate all associated buffers.
00124 */
00125 
00126 static void rfc2045_freeattr(struct rfc2045attr *p)
00127 {
00128        while (p)
00129        {
00130        struct rfc2045attr *q=p->next;
00131 
00132               if (p->name)  free(p->name);
00133               if (p->value) free(p->value);
00134               free(p);
00135               p=q;
00136        }
00137 }
00138 
00139 void rfc2045_free(struct rfc2045 *p)
00140 {
00141 struct rfc2045 *q, *r;
00142 
00143        for (q=p->firstpart; q; )
00144        {
00145               r=q->next;
00146               rfc2045_free(q);
00147               q=r;
00148        }
00149        rfc2045_freeattr(p->content_type_attr);
00150        rfc2045_freeattr(p->content_disposition_attr);
00151 
00152        if (p->header)              free(p->header);
00153        if (p->content_md5)  free(p->content_md5);
00154        if (p->content_base) free(p->content_base);
00155        if (p->content_location)    free(p->content_location);
00156        if (p->content_language)    free(p->content_language);
00157        if (p->content_id)   free(p->content_id);
00158        if (p->content_description) free(p->content_description);
00159        if (p->content_transfer_encoding) free(p->content_transfer_encoding);
00160        if (p->boundary) free(p->boundary);
00161        if (p->content_type) free(p->content_type);
00162        if (p->mime_version) free(p->mime_version);
00163        if (p->workbuf)             free(p->workbuf);
00164        if (p->content_disposition) free(p->content_disposition);
00165        if (p->rw_transfer_encoding) free(p->rw_transfer_encoding);
00166        free(p);
00167 }
00168 
00169 /*
00170        Generic dynamic buffer append.
00171 */
00172 
00173 void rfc2045_add_buf(
00174        char **bufptr,       /* Buffer */
00175        size_t *bufsize,     /* Buffer's maximum size */
00176        size_t *buflen,             /* Buffer's current size */
00177 
00178        const char *p, size_t len)  /* Append this data */
00179 {
00180        if (len + *buflen > *bufsize)
00181        {
00182        size_t newsize=len+*buflen+256;
00183        char   *p= *bufptr ? (char *)realloc(*bufptr, newsize):
00184                             (char *)malloc(newsize);
00185 
00186               if (!p)
00187               {
00188                      rfc2045_enomem();
00189                      return;
00190               }
00191               *bufptr=p;
00192               *bufsize=newsize;
00193        }
00194 
00195        memcpy(*bufptr + *buflen, p, len);
00196        *buflen += len;
00197 }
00198 
00199 /* Append to the work buffer */
00200 
00201 void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len)
00202 {
00203        rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len);
00204 }
00205 
00206 /* Append one character to the work buffer */
00207 
00208 void rfc2045_add_workbufch(struct rfc2045 *h, int c)
00209 {
00210 char cc= (char)c;
00211 
00212        rfc2045_add_workbuf(h, &cc, 1);
00213 }
00214 
00215 /*
00216        Generic function to duplicate contents of a string.
00217        The destination string may already be previously allocated,
00218        so unallocate it.
00219 */
00220 
00221 static void set_string(char **p,
00222        const char *q)
00223 {
00224        if (*p)       free(*p);
00225 
00226        *p=0;
00227        if (!q)       return;
00228 
00229        if ((*p=(char *)malloc(strlen(q)+1)) == 0)
00230        {
00231               rfc2045_enomem();
00232               return;
00233        }
00234 
00235        strcpy(*p, q);
00236 }
00237 
00238 /* Update byte counts for this structure, and all the superstructures */
00239 
00240 static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt,
00241        unsigned nlines)
00242 {
00243        while (p)
00244        {
00245               p->endpos = newcnt;
00246               p->endbody = newendcnt;
00247               p->nlines += nlines;
00248               if (!p->workinheader)
00249                      p->nbodylines += nlines;
00250               p=p->parent;
00251        }
00252 }
00253 
00254 /*
00255        Main entry point for RFC2045 parsing.  External data is fed
00256        by repetitively calling rfc2045_parse().
00257 
00258        rfc2045_parse() breaks up input into lines, and calls doline()
00259        to process each line.
00260 */
00261 
00262 static void doline(struct rfc2045 *);
00263 
00264 void rfc2045_parse_partial(struct rfc2045 *h);
00265 
00266 void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s)
00267 {
00268        size_t l;
00269 
00270        while (s)
00271        {
00272               for (l=0; l<s; l++)
00273                      if (buf[l] == '\n')  break;
00274               if (l < s && buf[l] == '\n')
00275               {
00276                      ++l;
00277                      rfc2045_add_workbuf(h, buf, l);
00278                      doline(h);
00279                      h->workbuflen=0;
00280               }
00281               else
00282                      rfc2045_add_workbuf(h, buf, l);
00283               buf += l;
00284               s -= l;
00285        }
00286 
00287        if (h->workbuflen > 1024)
00288               rfc2045_parse_partial(h);
00289 }
00290 
00291 void rfc2045_parse_partial(struct rfc2045 *h)
00292 {
00293        /*
00294        ** Our buffer's getting pretty big.  Let's see if we can
00295        ** partially handle it.
00296        */
00297 
00298        if (h->workbuflen > 0)
00299        {
00300        struct rfc2045 *p;
00301        int    l, i;
00302 
00303               for (p=h; p->lastpart && !p->lastpart->workclosed;
00304                             p=p->lastpart)
00305                      ;
00306 
00307               /* If p->workinheader, we've got a mother of all headers
00308               ** here.  Well, that's just too bad, we'll end up garbling
00309               ** it.
00310               */
00311 
00312               l=h->workbuflen;
00313 
00314               /* We do need to make sure that the final \r\n gets
00315               ** stripped off, so don't gobble up everything if
00316               ** the last character we see is a \r
00317               */
00318 
00319               if (h->workbuf[l-1] == '\r')
00320                      --l;
00321 
00322               /* If we'll be rewriting, make sure rwprep knows about
00323               ** stuff that was skipped just now. */
00324 
00325               if (h->rfc2045acptr && !p->workinheader &&
00326                      (!p->lastpart || !p->lastpart->workclosed))
00327                      (*h->rfc2045acptr->section_contents)(h->workbuf, l);
00328 
00329               update_counts(p, p->endpos+l, p->endpos+l, 0);
00330               p->informdata=1;
00331               for (i=0; l<h->workbuflen; l++)
00332                      h->workbuf[i++]=h->workbuf[l];
00333               h->workbuflen=i;
00334        }
00335 }
00336 
00337 /*
00338        Append a new RFC2045 subpart.  Adds new RFC2045 structure to the
00339        end of the list of existing RFC2045 substructures.
00340 */
00341 
00342 static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos){
00343 struct rfc2045 *newp;
00344 
00345        newp=rfc2045_alloc();
00346        if (p->lastpart)
00347        {
00348               p->lastpart->next=newp;
00349               newp->pindex=p->lastpart->pindex+1;
00350        }
00351        else
00352        {
00353               p->firstpart=newp;
00354               newp->pindex=0;
00355        }
00356        p->lastpart=newp;
00357        newp->parent=p;
00358 
00359        /* Initialize source pointers */
00360        newp->startpos=newp->endpos=newp->startbody=newp->endbody=startpos;
00361 
00362        while (p->parent)
00363               p=p->parent;
00364        ++p->numparts;
00365 
00366        return (newp);
00367 }
00368 
00369 static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos)
00370 {
00371 struct rfc2045 *newp=append_part_noinherit(p, startpos);
00372 
00373        /* Substructures inherit content transfer encoding and character set */
00374 
00375        set_string(&newp->content_transfer_encoding,
00376                      p->content_transfer_encoding);
00377 
00378        if (rfc2045_attrset(&newp->content_type_attr, "charset",
00379                          rfc2045_getattr(p->content_type_attr, "charset"))
00380            < 0)
00381               rfc2045_enomem();
00382 
00383        return (newp);
00384 }
00385 
00386 /*
00387        doline() processes next line in the RFC2045 message.
00388 
00389        Drills down the list of all the multipart messages currently open,
00390        and checks if the line is a boundary line for the given multipart.
00391        In theory the boundary line, if there is one, should be the boundary
00392        line only for the inner multipart only, but, this takes into account
00393        broken MIME messages.
00394 */
00395 
00396 static void do_header(struct rfc2045 *);
00397 
00398 static void doline(struct rfc2045 *p)
00399 {
00400 size_t cnt=p->workbuflen;
00401 char *c=p->workbuf;
00402 size_t n=cnt-1;      /* Strip \n (we always get at least a \n here) */
00403 struct rfc2045 *newp;
00404 struct rfc2045ac *rwp=p->rfc2045acptr;
00405 unsigned num_levels=0;
00406 
00407 size_t k;
00408 int    bit8=0;
00409 
00410        if (p->numparts > MAXPARTS)
00411        {
00412               p->rfcviolation |= RFC2045_ERR2COMPLEX;
00413               return;
00414        }
00415 
00416        for (k=0; k<cnt; k++)
00417        {
00418               if (c[k] == 0)
00419                      c[k]=' ';
00420               if (c[k] & 0x80)     bit8=1;
00421        }
00422 
00423        if (n && c[n-1] == '\r')    /* Strip trailing \r */
00424               --n;
00425 
00426        /* Before the main drill down loop before, look ahead and see if we're
00427        ** in a middle of a form-data section.  */
00428 
00429        for (newp=p; newp->lastpart &&
00430                      !newp->lastpart->workclosed; newp=newp->lastpart,
00431                      ++num_levels)
00432        {
00433               if (ContentBoundary(newp) == 0 || newp->workinheader)
00434                      continue;
00435 
00436               if (newp->lastpart->informdata)
00437               {
00438                      p=newp->lastpart;
00439                      p->informdata=0;
00440                      break;
00441               }
00442        }
00443 
00444        /* Drill down until we match a boundary, or until we've reached
00445        the last RFC2045 section that has been opened.
00446        */
00447 
00448        while (p->lastpart)
00449        {
00450        size_t l;
00451        const char *cb;
00452 
00453               if (p->lastpart->workclosed)
00454               {
00455                      update_counts(p, p->endpos+cnt, p->endpos+n, 1);
00456                      return;
00457               }
00458               /* Leftover trash -- workclosed is set when the final
00459               ** terminating boundary has been seen */
00460 
00461               /* content_boundary may be set before the entire header
00462               ** has been seen, so continue drilling down in that case
00463               */
00464 
00465               cb=ContentBoundary(p);
00466 
00467               if (cb == 0 || p->workinheader)
00468               {
00469                      p=p->lastpart;
00470                      ++num_levels;
00471                      continue;
00472               }
00473 
00474               l=strlen(cb);
00475 
00476               if (c[0] == '-' && c[1] == '-' && n >= 2+l &&
00477                      strncasecmp(cb, c+2, l) == 0)
00478               {
00479 
00480                      if (rwp && (!p->lastpart || !p->lastpart->isdummy))
00481                             (*rwp->end_section)();
00482 
00483               /* Ok, we've found a boundary */
00484 
00485                      if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0)
00486                      {
00487                      /* Last boundary */
00488 
00489                             p->lastpart->workclosed=1;
00490                             update_counts(p, p->endpos+cnt, p->endpos+cnt,
00491                                    1);
00492                             return;
00493                      }
00494 
00495               /* Create new RFC2045 section */
00496 
00497                      newp=append_part(p, p->endpos+cnt);
00498                      update_counts(p, p->endpos+cnt, p->endpos+n, 1);
00499 
00500                      /* The new RFC2045 section is MIME compliant */
00501 
00502                      if ((newp->mime_version=strdup(p->mime_version)) == 0)
00503                             rfc2045_enomem();
00504                      return;
00505               }
00506               p=p->lastpart;
00507               ++num_levels;
00508        }
00509 
00510        /* Ok, we've found the RFC2045 section that we're working with.
00511        ** No what?
00512        */
00513 
00514        if (! p->workinheader)
00515        {
00516               /* Processing body, just update the counts. */
00517 
00518        size_t cnt_update=cnt;
00519 
00520               if (bit8 && !p->content_8bit &&
00521                      (p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0)
00522               {
00523               struct rfc2045 *q;
00524 
00525                      for (q=p; q; q=q->parent)
00526                             q->rfcviolation |= RFC2045_ERR8BITCONTENT;
00527               }
00528 
00529               /*
00530               ** In multiparts, the final newline in a part belongs to the
00531               ** boundary, otherwise, include it in the text.
00532               */
00533               if (p->parent && p->parent->content_type &&
00534                             strncasecmp(p->parent->content_type,
00535                                           "multipart/", 10) == 0)
00536                      cnt_update=n;
00537 
00538               if (!p->lastpart || !p->lastpart->workclosed)
00539               {
00540                      if (rwp && !p->isdummy)
00541                             (*rwp->section_contents)(c, cnt);
00542 
00543                      update_counts(p, p->endpos+cnt, p->endpos+cnt_update,
00544                             1);
00545               }
00546               return;
00547        }
00548 
00549        if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0)
00550        {
00551        struct rfc2045 *q;
00552 
00553               for (q=p; q; q=q->parent)
00554                      q->rfcviolation |= RFC2045_ERR8BITHEADER;
00555        }
00556 
00557        /* In the header */
00558 
00559        if ( n == 0 ) /* End of header, body begins.  Parse header. */
00560        {
00561               do_header(p); /* Clean up any left over header line */
00562               p->workinheader=0;
00563 
00564               /* Message body starts right here */
00565 
00566               p->startbody=p->endpos+cnt;
00567               update_counts(p, p->startbody, p->startbody, 1);
00568               --p->nbodylines;     /* Don't count the blank line */
00569 
00570               /* Discard content type and boundary if I don't understand
00571               ** this MIME flavor.
00572               */
00573 
00574               if (!RFC2045_ISMIME1(p->mime_version))
00575               {
00576                      set_string(&p->content_type, 0);
00577 
00578                      rfc2045_freeattr(p->content_type_attr);
00579                      p->content_type_attr=0;
00580                      set_string(&p->content_disposition, 0);
00581                      rfc2045_freeattr(p->content_disposition_attr);
00582                      p->content_disposition_attr=0;
00583                      if (p->boundary)
00584                      {
00585                             free(p->boundary);
00586                             p->boundary=0;
00587                      }
00588               }
00589 
00590               /* Normally, if we don't have a content_type, default it
00591               ** to text/plain.  However, if the multipart type is
00592               ** multipart/digest, it is message/rfc822.
00593               */
00594 
00595               if (RFC2045_ISMIME1(p->mime_version) && !p->content_type)
00596               {
00597               char   *q="text/plain";
00598 
00599                      if (p->parent && p->parent->content_type &&
00600                             strcmp(p->parent->content_type,
00601                                    "multipart/digest") == 0)
00602                             q="message/rfc822";
00603                      set_string(&p->content_type, q);
00604               }
00605 
00606               /* If this is not a multipart section, we don't want to
00607               ** hear about any boundaries
00608               */
00609 
00610               if (!p->content_type ||
00611                      strncmp(p->content_type, "multipart/", 10))
00612               {
00613                      if (p->boundary)
00614                             free(p->boundary);
00615                      p->boundary=0;
00616               }
00617 
00618               /* If this section's a message, we will expect to see
00619               ** more RFC2045 stuff, so create a nested RFC2045 structure,
00620               ** and indicate that we expect to see headers.
00621               */
00622 
00623               if (p->content_type &&
00624                      strcmp(p->content_type, "message/rfc822") == 0)
00625               {
00626                      newp=append_part_noinherit(p, p->startbody);
00627                      newp->workinheader=1;
00628                      return;
00629               }
00630 
00631               /*
00632               ** If this is a multipart message (boundary defined),
00633               ** create a RFC2045 structure for the pseudo-section
00634               ** that precedes the first boundary line.
00635               */
00636 
00637               if (ContentBoundary(p))
00638               {
00639                      newp=append_part(p, p->startbody);
00640                      newp->workinheader=0;
00641                      newp->isdummy=1;
00642                             /* It's easier just to create it. */
00643                      return;
00644               }
00645 
00646               if (rwp)
00647                      (*rwp->start_section)(p);
00648               return;
00649        }
00650 
00651        /* RFC822 header continues */
00652 
00653        update_counts(p, p->endpos + cnt, p->endpos+n, 1);
00654 
00655        /* If this header line starts with a space, append one space
00656        ** to the saved contents of the previous line, and append this
00657        ** line to it.
00658        */
00659 
00660        if (isspace((int)(unsigned char)*c))
00661        {
00662               rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1);
00663        }
00664        else
00665        {
00666        /* Otherwise the previous header line is complete, so process it */
00667 
00668               do_header(p);
00669               p->headerlen=0;
00670        }
00671 
00672        /* Save this line in the header buffer, because the next line
00673        ** could be a continuation.
00674        */
00675 
00676        rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n);
00677 }
00678 
00679 /***********************************************************************/
00680 
00681 /*
00682 ** paste_tokens() - recombine an array of RFC822 tokens back as a string.
00683 ** (Comments) are ignored.
00684 */
00685 
00686 static char *paste_tokens(struct rfc822t *h, int start, int cnt)
00687 {
00688 int    l;
00689 int    i;
00690 char   *p;
00691 
00692        /* Calculate string size */
00693 
00694        l=1;
00695        for (i=0; i<cnt; i++)
00696        {
00697               if (h->tokens[start+i].token == '(')
00698                      continue;
00699 
00700               if (rfc822_is_atom(h->tokens[start+i].token))
00701                      l += h->tokens[start+i].len;
00702               else
00703                      l++;
00704        }
00705 
00706        /* Do it */
00707 
00708        p=( char *)malloc(l);
00709        if (!p)
00710        {
00711               rfc2045_enomem();
00712               return (0);
00713        }
00714        l=0;
00715 
00716        for (i=0; i<cnt; i++)
00717        {
00718               if (h->tokens[start+i].token == '(')
00719                      continue;
00720 
00721               if (rfc822_is_atom(h->tokens[start+i].token))
00722               {
00723               int l2=h->tokens[start+i].len;
00724 
00725                      memcpy(p+l, h->tokens[start+i].ptr, l2);
00726                      l += l2;
00727               }
00728               else   p[l++]=h->tokens[start+i].token;
00729        }
00730        p[l]=0;
00731        return (p);
00732 }
00733 
00734 /* Various permutations of the above, including forcing the string to
00735 ** lowercase
00736 */
00737 
00738 static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt)
00739 {
00740 char   *p=paste_tokens(h, start, cnt);
00741 char   *q;
00742 
00743        for (q=p; q && *q; q++)
00744               *q=tolower(*q);
00745        return (p);
00746 }
00747 
00748 static char *paste_token(struct rfc822t *h, int i)
00749 {
00750        if (i >= h->ntokens) return (0);
00751        return (paste_tokens(h, i, 1));
00752 }
00753 
00754 static char *lower_paste_token(struct rfc822t *h, int i)
00755 {
00756 char *p=paste_token(h, i);
00757 char *q;
00758 
00759        for (q=p; q && *q; q++)
00760               *q=tolower(*q);
00761        return (p);
00762 }
00763 
00764 /*
00765        do_header() - process completed RFC822 header.
00766 */
00767 
00768 static void mime_version(struct rfc2045 *, struct rfc822t *);
00769 static void content_type(struct rfc2045 *, struct rfc822t *);
00770 static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *);
00771 static void content_disposition(struct rfc2045 *, struct rfc822t *);
00772 static void content_id(struct rfc2045 *, struct rfc822t *);
00773 static void content_description(struct rfc2045 *, const char *);
00774 static void content_language(struct rfc2045 *, const char *);
00775 static void content_md5(struct rfc2045 *, const char *);
00776 static void content_base(struct rfc2045 *, struct rfc822t *);
00777 static void content_location(struct rfc2045 *, struct rfc822t *);
00778 
00779 static void do_header(struct rfc2045 *p)
00780 {
00781 struct rfc822t *header;
00782 char   *t;
00783 
00784        if (p->headerlen == 0)      return;
00785        rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1);
00786                             /* 0 terminate */
00787 
00788        /* Parse the header line according to RFC822 */
00789 
00790        header=rfc822t_alloc_new(p->header, NULL, NULL);
00791 
00792        if (!header)  return;       /* Broken header */
00793 
00794        if (header->ntokens < 2 ||
00795               header->tokens[0].token ||
00796               header->tokens[1].token != ':')
00797        {
00798               rfc822t_free(header);
00799               return;       /* Broken header */
00800        }
00801 
00802        t=lower_paste_token(header, 0);
00803 
00804        if (t == 0)
00805               ;
00806        else if (strcmp(t, "mime-version") == 0)
00807        {
00808               free(t);
00809               mime_version(p, header);
00810        }
00811        else if (strcmp(t, "content-type") == 0)
00812        {
00813               free(t);
00814               content_type(p, header);
00815        } else if (strcmp(t, "content-transfer-encoding") == 0)
00816        {
00817               free(t);
00818               content_transfer_encoding(p, header);
00819        } else if (strcmp(t, "content-disposition") == 0)
00820        {
00821               free(t);
00822               content_disposition(p, header);
00823        } else if (strcmp(t, "content-id") == 0)
00824        {
00825               free(t);
00826               content_id(p, header);
00827        } else if (strcmp(t, "content-description") == 0)
00828        {
00829               free(t);
00830               t=strchr(p->header, ':');
00831               if (t) ++t;
00832               while (t && isspace((int)(unsigned char)*t))
00833                      ++t;
00834               content_description(p, t);
00835        } else if (strcmp(t, "content-language") == 0)
00836        {
00837               free(t);
00838               t=strchr(p->header, ':');
00839               if (t) ++t;
00840               while (t && isspace((int)(unsigned char)*t))
00841                      ++t;
00842               content_language(p, t);
00843        } else if (strcmp(t, "content-base") == 0)
00844        {
00845               free(t);
00846               content_base(p, header);
00847        } else if (strcmp(t, "content-location") == 0)
00848        {
00849               free(t);
00850               content_location(p, header);
00851        } else if (strcmp(t, "content-md5") == 0)
00852        {
00853               free(t);
00854               t=strchr(p->header, ':');
00855               if (t) ++t;
00856               while (t && isspace((int)(unsigned char)*t))
00857                      ++t;
00858               content_md5(p, t);
00859        }
00860        else   free(t);
00861        rfc822t_free(header);
00862 }
00863 
00864 /* Mime-Version: and Content-Transfer-Encoding: headers are easy */
00865 
00866 static void mime_version(struct rfc2045 *p, struct rfc822t *header)
00867 {
00868 char   *vers=paste_tokens(header, 2, header->ntokens-2);
00869 
00870        if (!vers)    return;
00871 
00872        if (p->mime_version) free(p->mime_version);
00873        p->mime_version=vers;
00874 }
00875 
00876 static void content_transfer_encoding(struct rfc2045 *r,
00877                             struct rfc822t *header)
00878 {
00879 char   *p;
00880 
00881        p=lower_paste_tokens(header, 2, header->ntokens-2);
00882        if (!p)       return;
00883 
00884        if (r->content_transfer_encoding)
00885               free(r->content_transfer_encoding);
00886        r->content_transfer_encoding=p;
00887 
00888        if (strcmp(p, "8bit") == 0)
00889               r->content_8bit=1;
00890 }
00891 
00892 /* Dig into the content_type header */
00893 
00894 static void parse_content_header(struct rfc822t *header,
00895                              int init_start,
00896                              void (*init_token)(char *, void *),
00897                              void (*init_parameter)(const char *,
00898                                                  struct rfc822t *,
00899                                                  int, int,
00900                                                  void *),
00901                              void *void_arg)
00902 {
00903 int    start;
00904 int    i, j;
00905 char   *p;
00906 
00907        /* Look for the 1st ; */
00908 
00909        for (start=init_start; start < header->ntokens; start++)
00910               if (header->tokens[start].token == ';')
00911                      break;
00912 
00913        /* Everything up to the 1st ; is the content type */
00914 
00915        p=lower_paste_tokens(header, init_start, start-init_start);
00916        if (!p)       return;
00917 
00918        (*init_token)(p, void_arg);
00919        if (start < header->ntokens) start++;
00920 
00921        /* Handle the remainder of the Content-Type: header */
00922 
00923        while (start < header->ntokens)
00924        {
00925               /* Look for next ; */
00926 
00927               for (i=start; i<header->ntokens; i++)
00928                      if (header->tokens[i].token == ';')
00929                             break;
00930               j=start;
00931               if (j < i)
00932               {
00933                      ++j;
00934 
00935                      /* We only understand <atom>= */
00936 
00937                      while (j < i && header->tokens[j].token == '(')
00938                             ++j;
00939                      if (j < i && header->tokens[j].token == '=')
00940                      {
00941                             ++j;
00942 
00943                             /*
00944                             ** reformime: loose parsing due to loose
00945                             ** parsing in MSOE, leading to viruses slipping
00946                             ** through virus scanners if we strictly
00947                             ** parsed the content-type header.
00948                             */
00949                             if (rfc2045_in_reformime && j < i
00950                                 && header->tokens[j].token == '"')
00951                                    i=j+1;
00952 
00953                             p=lower_paste_token(header, start);
00954                             if (!p)       return;
00955                             (*init_parameter)(p, header, j, i-j, void_arg);
00956                             free(p);
00957                      }
00958               }
00959               if ( i<header->ntokens ) ++i;      /* Skip over ; */
00960               start=i;
00961        }
00962 }
00963 
00964 /* Dig into the content_type header */
00965 
00966 static void save_content_type(char *, void *);
00967 static void save_content_type_parameter( const char *,
00968                                     struct rfc822t *, int, int, void *);
00969 
00970 static void content_type(struct rfc2045 *r, struct rfc822t *header)
00971 {
00972        parse_content_header(header, 2, &save_content_type,
00973                           &save_content_type_parameter, r);
00974 }
00975 
00976 static void save_content_type(char *content_type, void *void_arg)
00977 {
00978        struct rfc2045 *r=(struct rfc2045 *)void_arg;
00979 
00980        if (r->content_type) free(r->content_type);
00981        r->content_type=content_type;
00982 }
00983 
00984 static void save_content_type_parameter(const char *name,
00985                                    struct rfc822t *header, int start,
00986                                    int len, void *void_arg)
00987 {
00988        struct rfc2045 *r=(struct rfc2045 *)void_arg;
00989        char   *p;
00990 
00991        p=strcmp(name, "charset") == 0 ?
00992                      lower_paste_tokens(header, start, len):
00993                      paste_tokens(header, start, len);
00994        if (!p)       return;
00995 
00996        if (rfc2045_attrset(&r->content_type_attr, name, p) < 0)
00997        {
00998               free(p);
00999               rfc2045_enomem();
01000        }
01001 
01002        free(p);
01003 
01004        if (strcmp(name, "boundary") == 0)
01005        {
01006               struct rfc2045 *q;
01007 
01008               if (r->boundary)
01009                      free(r->boundary);
01010               p=lower_paste_tokens(header, start, len);
01011               r->boundary=p;
01012 
01013               /*
01014               ** Check all the outer MIME boundaries.  If this is a
01015               ** substring of an outer MIME boundary, or the outer
01016               ** boundary is a substring of the inner boundary, we
01017               ** have an ambiguity - see "IMPLEMENTOR'S NOTE" in
01018               ** section 5.1.1 of RFC 2046.
01019               */
01020 
01021               for (q=r->parent; q; q=q->parent)
01022               {
01023                      const char *a, *b;
01024 
01025                      if (!q->boundary)
01026                             continue;
01027 
01028                      for (a=q->boundary, b=p; *a && *b; a++, b++)
01029                             if (*a != *b)
01030                                    break;
01031 
01032                      if (!*a || !*b)
01033                      {
01034                             while (q->parent)
01035                                    q=q->parent;
01036                             q->rfcviolation |= RFC2045_ERRBADBOUNDARY;
01037                             break;
01038                      }
01039               }
01040        }
01041 }
01042 
01043 /* Dig into content-disposition */
01044 
01045 static void save_content_disposition(char *, void *);
01046 static void save_content_disposition_parameter( const char *,
01047                                           struct rfc822t *, int, int,
01048                                           void *);
01049 
01050 static void content_disposition(struct rfc2045 *r, struct rfc822t *header)
01051 {
01052        parse_content_header(header, 2, &save_content_disposition,
01053                           &save_content_disposition_parameter, r);
01054 }
01055 
01056 static void save_content_disposition(char *content_disposition, void *void_arg)
01057 {
01058        struct rfc2045 *r=(struct rfc2045 *)void_arg;
01059 
01060        if (r->content_disposition) free(r->content_disposition);
01061        r->content_disposition=content_disposition;
01062 }
01063 
01064 static void save_content_disposition_parameter(const char *name,
01065                                           struct rfc822t *header,
01066                                           int start, int len,
01067                                           void *void_arg)
01068 {
01069        struct rfc2045 *r=(struct rfc2045 *)void_arg;
01070        char   *p;
01071 
01072        p=paste_tokens(header, start, len);
01073        if (!p)       return;
01074 
01075        if (rfc2045_attrset(&r->content_disposition_attr, name, p) < 0)
01076        {
01077               free(p);
01078               rfc2045_enomem();
01079        }
01080        free(p);
01081 }
01082 
01083 char *rfc2045_related_start(const struct rfc2045 *p)
01084 {
01085 const char *cb=rfc2045_getattr( p->content_type_attr, "start");
01086 struct rfc822t *t;
01087 struct rfc822a       *a;
01088 int    i;
01089 
01090        if (!cb || !*cb)     return (0);
01091 
01092        t=rfc822t_alloc_new(cb, 0, NULL);
01093        if (!t)
01094        {
01095               rfc2045_enomem();
01096               return(0);
01097        }
01098 
01099        a=rfc822a_alloc(t);
01100        if (!a)
01101        {
01102               rfc822t_free(t);
01103               rfc2045_enomem();
01104               return (0);
01105        }
01106        for (i=0; i<a->naddrs; i++)
01107               if (a->addrs[i].tokens)
01108               {
01109               char   *s=rfc822_getaddr(a, i);
01110 
01111                      rfc822a_free(a);
01112                      rfc822t_free(t);
01113                      if (!s)
01114                             rfc2045_enomem();
01115                      return (s);
01116               }
01117 
01118        rfc822a_free(a);
01119        rfc822t_free(t);
01120        return (0);
01121 }
01122 
01123 static void content_id(struct rfc2045 *p, struct rfc822t *t)
01124 {
01125 struct rfc822a       *a=rfc822a_alloc(t);
01126 int    i;
01127 
01128        if (!a)
01129        {
01130               rfc2045_enomem();
01131               return;
01132        }
01133 
01134        for (i=0; i<a->naddrs; i++)
01135               if (a->addrs[i].tokens)
01136               {
01137               char   *s=rfc822_getaddr(a, i);
01138 
01139                      if (!s)
01140                      {
01141                             rfc822a_free(a);
01142                             rfc2045_enomem();
01143                             return;
01144                      }
01145                      if (p->content_id)
01146                             free(p->content_id);
01147                      p->content_id=s;
01148                      break;
01149               }
01150 
01151        rfc822a_free(a);
01152 }
01153 
01154 static void content_description(struct rfc2045 *p, const char *s)
01155 {
01156        if (s && *s)
01157               set_string(&p->content_description, s);
01158 }
01159 
01160 static void content_language(struct rfc2045 *p, const char *s)
01161 {
01162        if (s && *s)
01163               set_string(&p->content_language, s);
01164 }
01165 
01166 static void content_md5(struct rfc2045 *p, const char *s)
01167 {
01168        if (s && *s)
01169               set_string(&p->content_md5, s);
01170 }
01171 
01172 static void content_base(struct rfc2045 *p, struct rfc822t *t)
01173 {
01174 char   *s;
01175 int    i;
01176 
01177        for (i=0; i<t->ntokens; i++)
01178               if (t->tokens[i].token == '"')
01179                      t->tokens[i].token=0;
01180 
01181        s=paste_tokens(t, 2, t->ntokens-2);
01182        set_string(&p->content_base, s);
01183 }
01184 
01185 static void content_location(struct rfc2045 *p, struct rfc822t *t)
01186 {
01187 char   *s;
01188 int    i;
01189 
01190        for (i=0; i<t->ntokens; i++)
01191               if (t->tokens[i].token == '"')
01192                      t->tokens[i].token=0;
01193 
01194        s=paste_tokens(t, 2, t->ntokens-2);
01195        set_string(&p->content_location, s);
01196        free(s);
01197 }
01198 
01199 /* -------------------- */
01200 
01201 #define       GETINFO(s, def) ( (s) && (*s) ? (s):def)
01202 
01203 void rfc2045_mimeinfo(const struct rfc2045 *p,
01204        const char **content_type_s,
01205        const char **content_transfer_encoding_s,
01206        const char **charset_s)
01207 {
01208 const char *c;
01209 
01210        *content_type_s=GETINFO(p->content_type, "text/plain");
01211        *content_transfer_encoding_s=GETINFO(p->content_transfer_encoding,
01212                                           "8bit");
01213 
01214        c=rfc2045_getattr(p->content_type_attr, "charset");
01215        if (!c)       c=rfc2045_getdefaultcharset();
01216 
01217        *charset_s=c;
01218 }
01219 
01220 const char *rfc2045_getdefaultcharset()
01221 {
01222 const char *p=rfc2045_defcharset;
01223 
01224        if (!p)       p=RFC2045CHARSET;
01225        return (p);
01226 }
01227 
01228 void rfc2045_setdefaultcharset(const char *charset)
01229 {
01230 char   *p=strdup(charset);
01231 
01232        if (!p)
01233        {
01234               rfc2045_enomem();
01235               return;
01236        }
01237 
01238        if (rfc2045_defcharset)     free(rfc2045_defcharset);
01239        rfc2045_defcharset=p;
01240 }
01241 
01242 const char *rfc2045_boundary(const struct rfc2045 *p)
01243 {
01244 const char *cb=rfc2045_getattr( p->content_type_attr, "boundary");
01245 
01246        if (!cb)      cb="";
01247        return (cb);
01248 }
01249 
01250 int rfc2045_isflowed(const struct rfc2045 *p)
01251 {
01252        const char *cb=rfc2045_getattr(p->content_type_attr, "format");
01253 
01254        return (cb && strcmp(cb, "flowed") == 0);
01255 }
01256 
01257 int rfc2045_isdelsp(const struct rfc2045 *p)
01258 {
01259        const char *cb=rfc2045_getattr(p->content_type_attr, "delsp");
01260 
01261        return (cb && strcmp(cb, "yes") == 0);
01262 }
01263 
01264 const char *rfc2045_content_id(const struct rfc2045 *p)
01265 {
01266        return (p->content_id ? p->content_id:"");
01267 }
01268 
01269 const char *rfc2045_content_description(const struct rfc2045 *p)
01270 {
01271        return (p->content_description ? p->content_description:"");
01272 }
01273 
01274 const char *rfc2045_content_language(const struct rfc2045 *p)
01275 {
01276        return (p->content_language ? p->content_language:"");
01277 }
01278 
01279 const char *rfc2045_content_md5(const struct rfc2045 *p)
01280 {
01281        return (p->content_md5 ? p->content_md5:"");
01282 }
01283 
01284 void rfc2045_mimepos(const struct rfc2045 *p,
01285        off_t *start_pos, off_t *end_pos, off_t *start_body,
01286        off_t *nlines, off_t *nbodylines)
01287 {
01288        *start_pos=p->startpos;
01289        *end_pos=p->endpos;
01290 
01291        *nlines=p->nlines;
01292        *nbodylines=p->nbodylines;
01293        if (p->parent)       /* MIME parts do not have the trailing CRLF */
01294        {
01295               *end_pos=p->endbody;
01296               if (*nlines)  --*nlines;
01297               if (*nbodylines) --*nbodylines;
01298        }
01299        *start_body=p->startbody;
01300 
01301        if (*start_body == *start_pos)     /* No header */
01302        {
01303               *start_body= *end_pos;
01304        }
01305 }
01306 
01307 unsigned rfc2045_mimepartcount(const struct rfc2045 *p)
01308 {
01309 const struct rfc2045 *q;
01310 unsigned n=0;
01311 
01312        for (q=p->firstpart; q; q=q->next) ++n;
01313        return (n);
01314 }
01315 
01316 /*
01317 ** Generic interface into parse_content_header
01318 */
01319 
01320 struct rfc2045_parse_mime_info {
01321        void (*header_type_cb)(const char *, void *);
01322        void (*header_param_cb)(const char *, const char *, void *);
01323        void *void_arg;
01324 };
01325 
01326 static void parse_mime_cb(char *, void *);
01327 static void parse_param_cb(const char *, struct rfc822t *,
01328                         int, int, void *);
01329 
01330 int rfc2045_parse_mime_header(const char *header,
01331                            void (*header_type_cb)(const char *, void *),
01332                            void (*header_param_cb)(const char *,
01333                                                 const char *,
01334                                                 void *),
01335                            void *void_arg)
01336 {
01337        struct rfc2045_parse_mime_info mi;
01338        struct rfc822t *h=rfc822t_alloc_new(header, NULL, NULL);
01339 
01340        mi.header_type_cb=header_type_cb;
01341        mi.header_param_cb=header_param_cb;
01342        mi.void_arg=void_arg;
01343 
01344        if (!h)
01345               return -1;
01346 
01347        parse_content_header(h, 0, parse_mime_cb, parse_param_cb, &mi);
01348        rfc822t_free(h);
01349        return 0;
01350 }
01351 
01352 static void parse_mime_cb(char *t, void *void_arg)
01353 {
01354        struct rfc2045_parse_mime_info *mi=
01355               (struct rfc2045_parse_mime_info *)void_arg;
01356 
01357        (*mi->header_type_cb)(t, mi->void_arg);
01358        free(t);
01359 }
01360 
01361 
01362 static void parse_param_cb(const char *name,
01363                         struct rfc822t *header, int start,
01364                         int len, void *void_arg)
01365 {
01366        struct rfc2045_parse_mime_info *mi=
01367               (struct rfc2045_parse_mime_info *)void_arg;
01368        char *p=paste_tokens(header, start, len);
01369 
01370        if (!p)
01371               return;
01372 
01373        (*mi->header_param_cb)(name, p, mi->void_arg);
01374        free(p);
01375 }