Back to index

courier  0.68.2
rfc2045.h
Go to the documentation of this file.
00001 /*
00002 ** Copyright 1998 - 2011 Double Precision, Inc.  See COPYING for
00003 ** distribution information.
00004 */
00005 
00006 /*
00007 */
00008 #ifndef       rfc2045_h
00009 #define       rfc2045_h
00010 
00011 #include      "../rfc2045/rfc2045_config.h" /* VPATH build */
00012 #include      "../numlib/numlib.h"
00013 #include      <sys/types.h>
00014 #include      <string.h>
00015 #include      <stdio.h>
00016 
00017 #ifdef  __cplusplus
00018 extern "C" {
00019 #endif
00020 
00021 #if 0
00022 }
00023 #endif
00024 
00025 #define       RFC2045_ISMIME1(p)   ((p) && atoi(p) == 1)
00026 #define       RFC2045_ISMIME1DEF(p)       (!(p) || atoi(p) == 1)
00027 
00028 struct rfc2045 {
00029        struct rfc2045 *parent;
00030        unsigned pindex;
00031        struct rfc2045 *next;
00032 
00033        off_t  startpos,     /* At which offset in msg this section starts */
00034               endpos,              /* Where it ends */
00035               startbody,    /* Where the body of the msg starts */
00036               endbody;      /* endpos - trailing CRLF terminator */
00037        off_t  nlines;              /* Number of lines in message */
00038        off_t  nbodylines;   /* Number of lines only in the body */
00039        char *mime_version;
00040        char *content_type;
00041        struct rfc2045attr *content_type_attr;    /* Content-Type: attributes */
00042 
00043        char *content_disposition;
00044        char *boundary;
00045        struct rfc2045attr *content_disposition_attr;
00046        char *content_transfer_encoding;
00047        int content_8bit;           /*
00048                                    ** Set if content_transfer_encoding is
00049                                    ** 8bit
00050                                    */
00051        char *content_id;
00052        char *content_description;
00053        char *content_language;
00054        char *content_md5;
00055        char *content_base;
00056        char *content_location;
00057        struct  rfc2045ac *rfc2045acptr;
00058        int    has8bitchars; /* For rewriting */
00059        int    haslongline;  /* For rewriting */
00060        unsigned rfcviolation;      /* Boo-boos */
00061 
00062 #define       RFC2045_ERR8BITHEADER       1      /* 8 bit characters in headers */
00063 #define       RFC2045_ERR8BITCONTENT      2      /* 8 bit contents, but no 8bit
00064                                    content-transfer-encoding */
00065 #define       RFC2045_ERR2COMPLEX  4      /* Too many nested contents */
00066 #define RFC2045_ERRBADBOUNDARY     8      /* Overlapping MIME boundaries */
00067 
00068        unsigned numparts;   /* # of parts allocated */
00069 
00070        char   *rw_transfer_encoding;      /* For rewriting */
00071 
00072 #define       RFC2045_RW_7BIT      1
00073 #define       RFC2045_RW_8BIT      2
00074 
00075        /* Subsections */
00076 
00077        struct rfc2045 *firstpart, *lastpart;
00078 
00079        /* Working area */
00080 
00081        char *workbuf;
00082        size_t workbufsize;
00083        size_t workbuflen;
00084        int    workinheader;
00085        int    workclosed;
00086        int    isdummy;
00087        int    informdata;   /* In a middle of a long form-data part */
00088        char *header;
00089        size_t headersize;
00090        size_t headerlen;
00091 
00092        int    (*decode_func)(struct rfc2045 *, const char *, size_t);
00093        void   *misc_decode_ptr;
00094        int    (*udecode_func)(const char *, size_t, void *);
00095 } ;
00096 
00097 struct rfc2045attr {
00098        struct rfc2045attr *next;
00099        char *name;
00100        char *value;
00101        } ;
00102 
00103 struct rfc2045 *rfc2045_alloc();
00104 void rfc2045_parse(struct rfc2045 *, const char *, size_t);
00105 void rfc2045_parse_partial(struct rfc2045 *);
00106 void rfc2045_free(struct rfc2045 *);
00107 
00108 void rfc2045_mimeinfo(const struct rfc2045 *,
00109        const char **,
00110        const char **,
00111        const char **);
00112 
00113 const char *rfc2045_boundary(const struct rfc2045 *);
00114 int rfc2045_isflowed(const struct rfc2045 *);
00115 int rfc2045_isdelsp(const struct rfc2045 *);
00116 char *rfc2045_related_start(const struct rfc2045 *);
00117 const char *rfc2045_content_id(const struct rfc2045 *);
00118 const char *rfc2045_content_description(const struct rfc2045 *);
00119 const char *rfc2045_content_language(const struct rfc2045 *);
00120 const char *rfc2045_content_md5(const struct rfc2045 *);
00121 
00122 void rfc2045_mimepos(const struct rfc2045 *, off_t *, off_t *, off_t *,
00123        off_t *, off_t *);
00124 unsigned rfc2045_mimepartcount(const struct rfc2045 *);
00125 
00126 void rfc2045_xdump(struct rfc2045 *);
00127 
00128 struct rfc2045id {
00129        struct rfc2045id *next;
00130        int idnum;
00131 } ;
00132 
00133 void rfc2045_decode(struct rfc2045 *,
00134                   void (*)(struct rfc2045 *, struct rfc2045id *, void *),
00135                   void *);
00136 
00137 struct rfc2045 *rfc2045_find(struct rfc2045 *, const char *);
00138 
00139 
00140 /*
00141 ** Source of an rfc2045-formatted content (internal)
00142 */
00143 
00144 struct rfc2045src {
00145        void (*deinit_func)(void *);
00146 
00147        int (*seek_func)(off_t pos, void *);
00148        ssize_t (*read_func)(char *buf, size_t cnt, void *);
00149 
00150        void *arg;
00151 };
00152 /* Read from a filedesc, returns a malloced buffer */
00153 
00154 struct rfc2045src *rfc2045src_init_fd(int fd);
00155 
00156 /* Destroy a rfc2045src */
00157 
00158 void rfc2045src_deinit(struct rfc2045src *);
00159 
00160 /************************/
00161 
00162 void rfc2045_cdecode_start(struct rfc2045 *,
00163        int (*)(const char *, size_t, void *), void *);
00164 int rfc2045_cdecode(struct rfc2045 *, const char *, size_t);
00165 int rfc2045_cdecode_end(struct rfc2045 *);
00166 
00167 const char *rfc2045_getdefaultcharset();
00168 void rfc2045_setdefaultcharset(const char *);
00169 struct rfc2045 *rfc2045_fromfd(int);
00170 #define       rfc2045_fromfp(f)    (rfc2045_fromfd(fileno((f))))
00171 struct rfc2045 *rfc2045header_fromfd(int);
00172 #define        rfc2045header_fromfp(f)        (rfc2045header_fromfd(fileno((f))))
00173 
00174 extern void rfc2045_error(const char *);
00175 
00176 
00177 struct  rfc2045ac {
00178        void (*start_section)(struct rfc2045 *);
00179        void (*section_contents)(const char *, size_t);
00180        void (*end_section)();
00181        } ;
00182 
00183 struct rfc2045 *rfc2045_alloc_ac();
00184 int rfc2045_ac_check(struct rfc2045 *, int);
00185 int rfc2045_rewrite(struct rfc2045 *p, struct rfc2045src *src, int fdout_arg,
00186                   const char *appname);
00187 int rfc2045_rewrite_func(struct rfc2045 *p, struct rfc2045src *src,
00188                       int (*funcarg)(const char *, int, void *),
00189                       void *funcargarg,
00190                       const char *appname);
00191 
00192 /* Internal functions */
00193 
00194 int rfc2045_try_boundary(struct rfc2045 *, struct rfc2045src *, const char *);
00195 char *rfc2045_mk_boundary(struct rfc2045 *, struct rfc2045src *);
00196 const char *rfc2045_getattr(const struct rfc2045attr *, const char *);
00197 int rfc2045_attrset(struct rfc2045attr **, const char *, const char *);
00198 
00199 /* MIME content base/location */
00200 
00201 char *rfc2045_content_base(struct rfc2045 *p);
00202        /* This joins Content-Base: and Content-Location:, as best as I
00203        ** can figure it out.
00204        */
00205 
00206 char *rfc2045_append_url(const char *, const char *);
00207        /* Do this with two arbitrary URLs */
00208 
00209 /* MISC mime functions */
00210 
00211 struct rfc2045 *rfc2045_searchcontenttype(struct rfc2045 *, const char *);
00212        /* Assume that the "real" message text is the first MIME section here
00213        ** with the given content type.
00214        */
00215 
00216 int rfc2045_decodemimesection(struct rfc2045src *, /* Message to decode */
00217                            struct rfc2045 *,     /* MIME section to decode */
00218                            int (*)(const char *, size_t, void *),
00219                            /*
00220                            ** Callback function that receives decoded
00221                            ** content.
00222                            */
00223                            void *  /* 3rd arg to the callback function */
00224                            );
00225 /*
00226 ** Decode a given MIME section.
00227 */
00228 
00229 int rfc2045_decodetextmimesection(struct rfc2045src *, /* Message to decode */
00230                               struct rfc2045 *, /* MIME section */
00231                               const char *,      /* Convert to this character set */
00232                               int *, /* Set to non-0 if MIME section contained chars that could not be converted to the requested charset */
00233                               int (*)(const char *, size_t, void *),
00234                               /*
00235                               ** Callback function that receives decoded
00236                               ** content.
00237                               */
00238                               void * /* 3rd arg to the callback function */
00239                            );
00240        /*
00241        ** Like decodemimesction(), except that the text is automatically
00242        ** convert to the specified character set (this function falls back
00243        ** to decodemimesection() if libunicode.a is not available, or if
00244        ** either the specified character set, or the MIME character set
00245        ** is not supported by libunicode.a
00246        */
00247 
00248 
00249        /*
00250        ** READ HEADERS FROM A MIME SECTION.
00251        **
00252        ** Call rfc2045header_start() to allocate a structure for the given
00253        ** MIME section.
00254        **
00255        ** Call rfc2045header_get() to repeatedly get the next header.
00256        ** Function returns < 0 for a failure (out of memory, or something
00257        ** like that).  Function returns 0 for a success.  Example:
00258        **
00259        ** rfc2045header_get(ptr, &header, &value, 0);
00260        **
00261        ** If success: check if header is NULL - end of headers, else
00262        ** "header" and "value" will contain the RFC 822 header.
00263        **
00264        ** Last argument is flags:
00265        */
00266 
00267 #define RFC2045H_NOLC 1            /* Do not convert header to lowercase */
00268 #define RFC2045H_KEEPNL 2   /* Preserve newlines in the value string
00269                             ** of multiline headers.
00270                             */
00271 
00272 struct rfc2045headerinfo *
00273        rfc2045header_start(struct rfc2045src *,/* Readonly source */
00274                          struct rfc2045 * /* MIME section to read */
00275                          );
00276 
00277 int rfc2045header_get(struct rfc2045headerinfo *,
00278                     char **,       /* Header return */
00279                     char **,       /* Value return */
00280                     int);   /* Flags */
00281 
00282 void rfc2045header_end(struct rfc2045headerinfo *);
00283 
00284 
00285 /*
00286 ** Generic MIME header parsing code.
00287 **
00288 ** header - something like "text/plain; charset=us-ascii; format=flowed".
00289 **
00290 ** header_type_cb - callback function, receives the "text/plain" parameter.
00291 **
00292 ** header_param_cb - callback function, repeatedly invoked to process the
00293 ** additional parameters.  In this example, receives "charset" and "us-ascii".
00294 ** Note -t he first parameter will always be in lowercase.
00295 **
00296 ** void_arg - passthrough parameter to the callback functions.
00297 */
00298 
00299 int rfc2045_parse_mime_header(const char *header,
00300                            void (*header_type_cb)(const char *, void *),
00301                            void (*header_param_cb)(const char *,
00302                                                 const char *,
00303                                                 void *),
00304                            void *void_arg);
00305 
00306 /*
00307 ** The rfc2045_makereply function is used to generate an initial
00308 ** reply to a MIME message.  rfc2045_makereply takes the following
00309 ** structure:
00310 */
00311 
00312 struct rfc2045_mkreplyinfo {
00313 
00314        struct rfc2045src *src; /* Original message source */
00315 
00316        struct rfc2045 *rfc2045partp;
00317        /*
00318        ** rfc2045 structure for the message to reply.  This may actually
00319        ** represent a single message/rfc822 section within a larger MIME
00320        ** message digest, in which case we format a reply to this message.
00321        */
00322 
00323        void *voidarg;       /* Transparent argument passed to the callback
00324                      ** functions.
00325                      */
00326 
00327        /*
00328        ** The following callback functions are called to generate the reply
00329        ** message.  They must be initialized.
00330        */
00331 
00332        void (*write_func)(const char *, size_t, void *);
00333        /* Called to write out the content of the message */
00334 
00335        void (*writesig_func)(void *);
00336        /* Called to write out the sender's signature */
00337 
00338        int (*myaddr_func)(const char *, void *);
00339        /* myaddr_func receives a pointer to an RFC 822 address, and it
00340        ** should return non-zero if the address is the sender's address
00341        */
00342 
00343        const char *replymode;
00344        /*
00345        ** replymode must be initialized to one of the following.  It sets
00346        ** the actual template for the generated response.
00347        **
00348        ** "forward" - forward original message.
00349        ** "forwardatt" - forward original message as an RFC822 attachment
00350        ** "reply" - a standard reply to the original message's sender
00351        ** "replydsn" - a DSN reply to the original message's sender
00352        ** "feedback" - generate a feedback report (RFC 5965)
00353        ** "replyfeedback" - "feedback" to the sender's address.
00354        ** "replyall" - a "reply to all" response.
00355        ** "replylist" - "reply to mailing list" response.  This is a reply
00356        ** that's addressed to the mailing list the original message was sent
00357        ** to.
00358        */
00359 
00360        int replytoenvelope;
00361        /*
00362        ** If non-zero, the "reply" or "replydsn" message gets addressed to the
00363        ** "Return-Path" or "Errors-To" address, if available.
00364        */
00365 
00366        int donotquote;
00367 
00368        /*
00369        ** If donotquote is set, the contents of the original message are not
00370        ** quoted by any of the "reply" modes, and replysalut (below) does not
00371        ** get emitted.
00372        */
00373 
00374        int fullmsg;
00375        /*
00376        ** For replydsn, feedback, replyfeedback, attach the entire message
00377        ** instead of just its headers.
00378        */
00379 
00380        const char *replysalut;
00381        /*
00382        ** This should be set to the salutation to be used for the reply.
00383        ** The following %-formats may appear in this string:
00384        **
00385        ** %% - an explicit % character
00386        **
00387        ** %n - a newline character
00388        **
00389        ** %C - the X-Newsgroup: header from the original message
00390        **
00391        ** %N - the Newsgroups: header from the original message
00392        **
00393        ** %i - the Message-ID: header from the original message
00394        **
00395        ** %f - the original message's sender's address
00396        **
00397        ** %F - the original message's sender's name
00398        **
00399        ** %S - the Subject: header from the original message
00400        **
00401        ** %d - the original message's date, in the local timezone
00402        **
00403        ** %{...}d - use strftime() to format the original message's date.
00404        **           A plain %d is equivalent to %{%a, %d %b %Y %H:%M:%S %z}d.
00405        **
00406        ** Example:  "%F writes:"
00407        */
00408 
00409        const char *forwarddescr;
00410        /*
00411        ** For forwardatt, this is the Content-Description: header,
00412        ** (typically "Forwarded message").
00413        */
00414 
00415        /*
00416        ** If not NULL, overrides the Subject: header
00417        */
00418 
00419        const char *subject;
00420 
00421        /*
00422        ** When reply mode is 'replydsn', dsnfrom must be set to a valid
00423        ** email address that's specified as the address that's generating
00424        ** the DSN.
00425        */
00426        const char *dsnfrom;
00427 
00428        /*
00429        ** When reply mode is 'replyfeedback', feedbacktype must be set to
00430        ** one of the registered feedback types:
00431        ** "abuse", "fraud", "other", "virus".
00432        */
00433        const char *feedbacktype;
00434 
00435        /*
00436        ** Feedback report headers.
00437        **
00438        ** NOTE: rfc2045_makereply() automatically inserts the
00439        ** Feedback-Type: (from feedbacktype), User-Agent:, Version:, and
00440        ** Arrival-Date: headers.
00441        **
00442        ** This is an array of alternating header name and header value
00443        ** strings. The header name string does not contain a colon,
00444        ** rfc2045_makereply supplies one. And, basically, generates
00445        ** "name: value" from this list.
00446        **
00447        ** For convenience-sake, the capitalization of the headers get
00448        ** adjusted to match the convention in RFC 5965.
00449        **
00450        ** The list, which must contain an even number of strings, is terminated
00451        ** by a NULL pointer.
00452        */
00453        const char * const *feedbackheaders;
00454 
00455        /*
00456        ** Set the reply/fwd MIME headers. If this is a NULL pointer,
00457        ** write_func() receives ``Content-Type: text/plain; format=flowed;
00458        ** delsp=yes; charset="charset" '' with the charset specified below,
00459        ** and "Content-Transfer-Encoding: 8bit".
00460        **
00461        ** If this is not a NULL pointer, the effect of
00462        ** this function should be invocation of write_func() to perform the
00463        ** analogous purpose.
00464        **
00465        ** The output of content_set_charset() should be consistent with the
00466        ** contents of the charset field.
00467        */
00468 
00469        void (*content_set_charset)(void *);
00470 
00471        /*
00472        ** Set the reply/fwd content.
00473        **
00474        ** This function gets called at the point where the additional contents
00475        ** of the reply/fwd should go.
00476        **
00477        ** If this is not a NULL pointer, the effect of this function should
00478        ** be invocation of write_func() with the additional contents of the
00479        ** reply/fwd. The added content should be consistent with the
00480        ** charset field.
00481        **
00482        ** Note -- this content is likely to end up in a multipart MIME
00483        ** message, as such it should not contain any lines that look like
00484        ** MIME boundaries.
00485        */
00486 
00487        void (*content_specify)(void *);
00488 
00489        const char *mailinglists;
00490        /*
00491        ** This should be set to a whitespace-delimited list of mailing list
00492        ** RFC 822 addresses that the respondent is subscribed to.  It is used
00493        ** to figure out which mailing list the original message was sent to
00494        ** (all addresses in the original message are compared against this
00495        ** list).  In the event that we can't find a mailing list address on
00496        ** the original message, "replylist" will fall back to "replyall".
00497        */
00498 
00499        const char *charset;
00500        /* The respondent's local charset */
00501 
00502        const char *forwardsep;
00503        /* This is used instead of replysalut for forwards. */
00504 } ;
00505 
00506 int rfc2045_makereply(struct rfc2045_mkreplyinfo *);
00507 
00508 /********** Search message content **********/
00509 
00510 /*
00511 ** Callback passed rfc2045_decodemsgtoutf8()
00512 */
00513 
00514 struct rfc2045_decodemsgtoutf8_cb {
00515 
00516        int flags; /* Optional flags, see below */
00517 
00518        /* Define a non-null function pointer. It gets the name of a header,
00519        ** and the raw, unformatted, header contents.
00520        ** If returns non-0, the header gets converted and sent to output.
00521        ** If null, all headers are sent
00522        */
00523 
00524        int (*headerfilter_func)(const char *name, const char *raw, void *arg);
00525 
00526        /* The output function */
00527        int (*output_func)(const char *data, size_t cnt, void *arg);
00528 
00529        /* If not null, gets invoked after decoding a single header */
00530        int (*headerdone_func)(const char *headername, void *arg);
00531 
00532        void *arg; /* Passthrough arg to _funcs */
00533 };
00534 
00535 #define RFC2045_DECODEMSG_NOBODY 0x01
00536 /* Do not decode MIME content, headers only */
00537 
00538 #define RFC2045_DECODEMSG_NOHEADERS 0x02
00539 /*
00540 ** Do not decode MIME headers, only body. This is the same as using a
00541 ** headerfilter_func that always returns 0
00542 */
00543 
00544 #define RFC2045_DECODEMSG_NOHEADERNAME 0x04
00545 /*
00546 ** Do not prepend name: to converted header content.
00547 */
00548 
00549 /*
00550 ** Convert a message into a utf8 bytestream. The output produced by this
00551 ** function is a catentation of decoded header and text content data, converted
00552 ** to utf8.
00553 **
00554 ** This is fed into an output function. The output function takes a single
00555 ** octet, and returns 0 if the octet was processed, or a negative value if
00556 ** the output was aborted.
00557 */
00558 
00559 int rfc2045_decodemsgtoutf8(struct rfc2045src *src, /* The message */
00560                          struct rfc2045 *p, /* The parsed message */
00561 
00562                          /* The callback */
00563                          struct rfc2045_decodemsgtoutf8_cb *callback);
00564 
00565 
00566 /********** Decode RFC 2231 attributes ***********/
00567 
00568 /*
00569 ** rfc2231_decodeType() decodes an RFC 2231-encoded Content-Type: header
00570 ** attribute, and rfc2231_decodeDisposition() decodes the attribute in the
00571 ** Content-Disposition: header.
00572 **
00573 ** chsetPtr, langPtr, and textPtr should point to a char ptr.  These
00574 ** functions automatically allocate the memory, the caller's responsible for
00575 ** freeing it.  A NULL argument may be provided if the corresponding
00576 ** information is not wanted.
00577 */
00578 
00579 int rfc2231_decodeType(struct rfc2045 *rfc, const char *name,
00580                      char **chsetPtr,
00581                      char **langPtr,
00582                      char **textPtr);
00583 
00584 int rfc2231_decodeDisposition(struct rfc2045 *rfc, const char *name,
00585                            char **chsetPtr,
00586                            char **langPtr,
00587                            char **textPtr);
00588 
00589 /*
00590 ** The following two functions convert the decoded string to the local
00591 ** charset via unicodelib.  textPtr cannot be null, this time, because this
00592 ** is the only return value.   A NULL myChset is an alias for the default
00593 ** charset.
00594 */
00595 
00596 int rfc2231_udecodeType(struct rfc2045 *rfc, const char *name,
00597                      const char *myChset,
00598                      char **textPtr);
00599 
00600 int rfc2231_udecodeDisposition(struct rfc2045 *rfc, const char *name,
00601                             const char *myChset,
00602                             char **textPtr);
00603 
00604 /*
00605 ** Build an RFC 2231-encoded name*=value.
00606 **
00607 ** name, value, charset, language: see RFC 2231.
00608 **
00609 ** (*cb_func) gets invoked 1 or more time, receives a "name=value" pair
00610 ** each time.
00611 **
00612 ** cb_func must return 0; a non-0 return terminates rfc2231_attrCreate, which
00613 ** passes through the return code.
00614 **
00615 */
00616 int rfc2231_attrCreate(const char *name, const char *value,
00617                      const char *charset,
00618                      const char *language,
00619                      int (*cb_func)(const char *param,
00620                                   const char *value,
00621                                   void *void_arg),
00622                      void *cb_arg);
00623 
00626 struct rfc2231param {
00627        struct rfc2231param *next;
00628 
00629        int paramnum;
00630        int encoded;
00631 
00632        const char *value;
00633 };
00634 
00635 void rfc2231_paramDestroy(struct rfc2231param *paramList);
00636 int rfc2231_buildAttrList(struct rfc2231param **paramList,
00637                        const char *name,
00638 
00639                        const char *attrName,
00640                        const char *attrValue);
00641 
00642 void rfc2231_paramDecode(struct rfc2231param *paramList,
00643                       char *charsetPtr,
00644                       char *langPtr,
00645                       char *textPtr,
00646                       int *charsetLen,
00647                       int *langLen,
00648                       int *textLen);
00649 
00650 #if 0
00651 {
00652 #endif
00653 
00654 #ifdef  __cplusplus
00655 }
00656 #endif
00657 
00658 #endif