Back to index

courier  0.68.2
rfc2047.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 1998 - 2011 Double Precision, Inc.  See COPYING for
00003 ** distribution information.
00004 */
00005 
00006 #include      "rfc822.h"
00007 #include      <stdio.h>
00008 #include      <ctype.h>
00009 #include      <string.h>
00010 #include      <stdlib.h>
00011 #include      <errno.h>
00012 
00013 #include      "rfc822hdr.h"
00014 #include      "rfc2047.h"
00015 #include      "../unicode/unicode.h"
00016 #if LIBIDN
00017 #include <idna.h>
00018 #include <stringprep.h>
00019 #endif
00020 
00021 
00022 #define       RFC2047_ENCODE_FOLDLENGTH   76
00023 
00024 static const char xdigit[]="0123456789ABCDEF";
00025 static const char base64tab[]=
00026 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
00027 
00028 static char *a_rfc2047_encode_str(const char *str, const char *charset,
00029                               int isaddress);
00030 
00031 static void rfc2047_encode_header_do(const struct rfc822a *a,
00032                                  const char *charset,
00033                                  void (*print_func)(char, void *),
00034                                  void (*print_separator)(const char *,
00035                                                       void *), void *ptr)
00036 {
00037        rfc822_print_common(a, &a_rfc2047_encode_str, charset,
00038                          print_func, print_separator, ptr);
00039 }
00040 
00041 static char *rfc822_encode_domain_int(const char *pfix,
00042                                   size_t pfix_len,
00043                                   const char *domain)
00044 {
00045        char *q;
00046 
00047 #if LIBIDN
00048        int err;
00049        char *p;
00050 
00051        err=idna_to_ascii_8z(domain, &p, 0);
00052 
00053        if (err != IDNA_SUCCESS)
00054        {
00055               errno=EINVAL;
00056               return NULL;
00057        }
00058 #else
00059        char *p;
00060 
00061        p=strdup(domain);
00062 
00063        if (!p)
00064               return NULL;
00065 #endif
00066 
00067        q=malloc(strlen(p)+pfix_len+1);
00068 
00069        if (!q)
00070        {
00071               free(p);
00072               return NULL;
00073        }
00074 
00075        if (pfix_len)
00076               memcpy(q, pfix, pfix_len);
00077 
00078        strcpy(q + pfix_len, p);
00079        free(p);
00080        return q;
00081 }
00082 
00083 char *rfc822_encode_domain(const char *address,
00084                         const char *charset)
00085 {
00086        char *p=libmail_u_convert_tobuf(address, charset, "utf-8", NULL);
00087        char *cp, *q;
00088 
00089        if (!p)
00090               return NULL;
00091 
00092        cp=strchr(p, '@');
00093 
00094        if (!cp)
00095        {
00096               q=rfc822_encode_domain_int("", 0, p);
00097               free(p);
00098               return q;
00099        }
00100 
00101        ++cp;
00102        q=rfc822_encode_domain_int(p, cp-p, cp);
00103        free(p);
00104        return q;
00105 }
00106 
00107 static char *a_rfc2047_encode_str(const char *str, const char *charset,
00108                               int isaddress)
00109 {
00110        size_t l;
00111        char   *p;
00112 
00113        if (isaddress)
00114               return rfc822_encode_domain(str, charset);
00115 
00116        for (l=0; str[l]; l++)
00117               if (str[l] & 0x80)
00118                      break;
00119 
00120        if (str[l] == 0)
00121        {
00122               size_t n;
00123 
00124               for (l=0; str[l]; l++)
00125                      if (strchr(RFC822_SPECIALS, str[l]))
00126                             break;
00127 
00128               if (str[l] == 0)
00129                      return (strdup(str));
00130 
00131               for (n=3, l=0; str[l]; l++)
00132               {
00133                      switch (str[l]) {
00134                      case '"':
00135                      case '\\':
00136                             ++n;
00137                      break;
00138                      }
00139 
00140                      ++n;
00141               }
00142 
00143               p=malloc(n);
00144 
00145               if (!p)
00146                      return NULL;
00147 
00148               p[0]='"';
00149 
00150               for (n=1, l=0; str[l]; l++)
00151               {
00152                      switch (str[l]) {
00153                      case '"':
00154                      case '\\':
00155                             p[n++]='\\';
00156                      break;
00157                      }
00158 
00159                      p[n++]=str[l];
00160               }
00161               p[n++]='"';
00162               p[n]=0;
00163 
00164               return (p);
00165        }
00166 
00167        return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
00168 }
00169 
00170 static void count(char c, void *p);
00171 static void counts2(const char *c, void *p);
00172 static void save(char c, void *p);
00173 static void saves2(const char *c, void *p);
00174 
00175 char *rfc2047_encode_header_addr(const struct rfc822a *a,
00176                          const char *charset)
00177 {
00178 size_t l;
00179 char   *s, *p;
00180 
00181        l=1;
00182        rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
00183        if ((s=malloc(l)) == 0)     return (0);
00184        p=s;
00185        rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
00186        *p=0;
00187        return (s);
00188 }
00189 
00190 
00191 char *rfc2047_encode_header_tobuf(const char *name, /* Header name */
00192                               const char *header, /* Header's contents */
00193                               const char *charset)
00194 {
00195        if (rfc822hdr_is_addr(name))
00196        {
00197               char *s=0;
00198 
00199               struct rfc822t *t;
00200               struct rfc822a *a;
00201 
00202               if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
00203               {
00204                      if ((a=rfc822a_alloc(t)) != 0)
00205                      {
00206                             s=rfc2047_encode_header_addr(a, charset);
00207                             rfc822a_free(a);
00208                      }
00209                      rfc822t_free(t);
00210               }
00211               return s;
00212        }
00213 
00214        return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
00215 }
00216 
00217 static void count(char c, void *p)
00218 {
00219        ++*(size_t *)p;
00220 }
00221 
00222 static void counts2(const char *c, void *p)
00223 {
00224        if (*c == ',')
00225               count(*c++, p);
00226 
00227        count('\n', p);
00228        count(' ', p);
00229 
00230        while (*c)    count(*c++, p);
00231 }
00232 
00233 static void save(char c, void *p)
00234 {
00235        **(char **)p=c;
00236        ++*(char **)p;
00237 }
00238 
00239 static void saves2(const char *c, void *p)
00240 {
00241        if (*c == ',')
00242               save(*c++, p);
00243 
00244        save('\n', p);
00245        save(' ', p);
00246 
00247        while (*c)    save(*c++, p);
00248 }
00249 
00250 static int encodebase64(const char *ptr, size_t len, const char *charset,
00251                      int (*qp_allow)(char),
00252                      int (*func)(const char *, size_t, void *), void *arg)
00253 {
00254        unsigned char ibuf[3];
00255        char obuf[4];
00256        int    rc;
00257 
00258        if ((rc=(*func)("=?", 2, arg)) ||
00259            (rc=(*func)(charset, strlen(charset), arg))||
00260            (rc=(*func)("?B?", 3, arg)))
00261               return rc;
00262 
00263        while (len)
00264        {
00265               size_t n=len > 3 ? 3:len;
00266 
00267               ibuf[0]= ptr[0];
00268               if (n>1)
00269                      ibuf[1]=ptr[1];
00270               else
00271                      ibuf[1]=0;
00272               if (n>2)
00273                      ibuf[2]=ptr[2];
00274               else
00275                      ibuf[2]=0;
00276               ptr += n;
00277               len -= n;
00278 
00279               obuf[0] = base64tab[ ibuf[0]        >>2 ];
00280               obuf[1] = base64tab[(ibuf[0] & 0x03)<<4|ibuf[1]>>4];
00281               obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2|ibuf[2]>>6];
00282               obuf[3] = base64tab[ ibuf[2] & 0x3F ];
00283               if (n < 2)
00284                      obuf[2] = '=';
00285               if (n < 3)
00286                      obuf[3] = '=';
00287 
00288               if ((rc=(*func)(obuf, 4, arg)))
00289                      return rc;
00290        }
00291 
00292        if ((rc=(*func)("?=", 2, arg)))
00293               return rc;
00294        return 0;
00295 }
00296 
00297 #define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
00298 #define DOENCODEWORD(c) \
00299        ((c) < 0x20 || (c) > 0x7F || (c) == '"' || \
00300         (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)((char)c))
00301 
00302 /*
00303 ** Encode a character stream using quoted-printable encoding.
00304 */
00305 static int encodeqp(const char *ptr, size_t len,
00306                   const char *charset,
00307                   int (*qp_allow)(char),
00308                   int (*func)(const char *, size_t, void *), void *arg)
00309 {
00310        size_t i;
00311        int rc;
00312        char buf[3];
00313 
00314        if ((rc=(*func)("=?", 2, arg)) ||
00315            (rc=(*func)(charset, strlen(charset), arg))||
00316            (rc=(*func)("?Q?", 3, arg)))
00317               return rc;
00318 
00319        for (i=0; i<len; ++i)
00320        {
00321               size_t j;
00322 
00323               for (j=i; j<len; ++j)
00324               {
00325                      if (ptr[j] == ' ' || DOENCODEWORD(ptr[j]))
00326                             break;
00327               }
00328 
00329               if (j > i)
00330               {
00331                      rc=(*func)(ptr+i, j-i, arg);
00332 
00333                      if (rc)
00334                             return rc;
00335                      if (j >= len)
00336                             break;
00337               }
00338               i=j;
00339 
00340               if (ptr[i] == ' ')
00341                      rc=(*func)("_", 1, arg);
00342               else
00343               {
00344                      buf[0]='=';
00345                      buf[1]=xdigit[ ( ptr[i] >> 4) & 0x0F ];
00346                      buf[2]=xdigit[ ptr[i] & 0x0F ];
00347 
00348                      rc=(*func)(buf, 3, arg);
00349               }
00350 
00351               if (rc)
00352                      return rc;
00353        }
00354 
00355        return (*func)("?=", 2, arg);
00356 }
00357 
00358 /*
00359 ** Calculate whether the next word should be RFC2047-encoded.
00360 **
00361 ** Returns 0 if not, 1 if any character in the next word is flagged by
00362 ** DOENCODEWORD().
00363 */
00364 
00365 static int encode_word(const unicode_char *uc,
00366                      size_t ucsize,
00367                      int (*qp_allow)(char),
00368 
00369                      /*
00370                      ** Points to the starting offset of word in uc.
00371                      ** At exit, points to the end of the word in uc.
00372                      */
00373                      size_t *word_ptr)
00374 {
00375        size_t i;
00376        int encode=0;
00377 
00378        for (i=*word_ptr; i<ucsize; ++i)
00379        {
00380               if (ISSPACE(uc[i]))
00381                      break;
00382 
00383               if (DOENCODEWORD(uc[i]))
00384                      encode=1;
00385        }
00386 
00387        *word_ptr=i;
00388        return encode;
00389 }
00390 
00391 /*
00392 ** Calculate whether the next sequence of words should be RFC2047-encoded.
00393 **
00394 ** Whatever encode_word() returns for the first word, look at the next word
00395 ** and keep going as long as encode_word() keeps returning the same value.
00396 */
00397 
00398 static int encode_words(const unicode_char *uc,
00399                      size_t ucsize,
00400                      int (*qp_allow)(char),
00401 
00402                      /*
00403                      ** Points to the starting offset of words in uc.
00404                      ** At exit, points to the end of the words in uc.
00405                      */
00406 
00407                      size_t *word_ptr)
00408 {
00409        size_t i= *word_ptr, j, k;
00410 
00411        int flag=encode_word(uc, ucsize, qp_allow, &i);
00412 
00413        if (!flag)
00414        {
00415               *word_ptr=i;
00416               return flag;
00417        }
00418 
00419        j=i;
00420 
00421        while (j < ucsize)
00422        {
00423               if (ISSPACE(uc[j]))
00424               {
00425                      ++j;
00426                      continue;
00427               }
00428 
00429               k=j;
00430 
00431               if (!encode_word(uc, ucsize, qp_allow, &k))
00432                      break;
00433               i=j=k;
00434        }
00435 
00436        *word_ptr=i;
00437        return flag;
00438 }
00439 
00440 /*
00441 ** Encode a sequence of words.
00442 */
00443 static int do_encode_words_method(const unicode_char *uc,
00444                               size_t ucsize,
00445                               const char *charset,
00446                               int (*qp_allow)(char),
00447                               size_t offset,
00448                               int (*encoder)(const char *ptr, size_t len,
00449                                            const char *charset,
00450                                            int (*qp_allow)(char),
00451                                            int (*func)(const char *,
00452                                                       size_t, void *),
00453                                            void *arg),
00454                               int (*func)(const char *, size_t, void *),
00455                               void *arg)
00456 {
00457        char    *p;
00458        size_t  psize;
00459        int rc;
00460        int first=1;
00461 
00462        while (ucsize)
00463        {
00464               size_t j;
00465               size_t i;
00466 
00467               if (!first)
00468               {
00469                      rc=(*func)(" ", 1, arg);
00470 
00471                      if (rc)
00472                             return rc;
00473               }
00474               first=0;
00475 
00476               j=(RFC2047_ENCODE_FOLDLENGTH-offset)/2;
00477 
00478               if (j >= ucsize)
00479                      j=ucsize;
00480               else
00481               {
00482                      /*
00483                      ** Do not split rfc2047-encoded works across a
00484                      ** grapheme break.
00485                      */
00486 
00487                      for (i=j; i > 0; --i)
00488                             if (unicode_grapheme_break(uc[i-1], uc[i]))
00489                             {
00490                                    j=i;
00491                                    break;
00492                             }
00493               }
00494 
00495               if ((rc=libmail_u_convert_fromu_tobuf(uc, j, charset,
00496                                                 &p, &psize,
00497                                                 NULL)) != 0)
00498                      return rc;
00499 
00500 
00501               if (psize && p[psize-1] == 0)
00502                      --psize;
00503 
00504               rc=(*encoder)(p, psize, charset, qp_allow,
00505                            func, arg);
00506               free(p);
00507               if (rc)
00508                      return rc;
00509               offset=0;
00510               ucsize -= j;
00511               uc += j;
00512        }
00513        return 0;
00514 }
00515 
00516 static int cnt_conv(const char *dummy, size_t n, void *arg)
00517 {
00518        *(size_t *)arg += n;
00519        return 0;
00520 }
00521 
00522 /*
00523 ** Encode, or not encode, words.
00524 */
00525 
00526 static int do_encode_words(const unicode_char *uc,
00527                         size_t ucsize,
00528                         const char *charset,
00529                         int flag,
00530                         int (*qp_allow)(char),
00531                         size_t offset,
00532                         int (*func)(const char *, size_t, void *),
00533                         void *arg)
00534 {
00535        char    *p;
00536        size_t  psize;
00537        int rc;
00538        size_t b64len, qlen;
00539 
00540        /*
00541        ** Convert from unicode
00542        */
00543 
00544        if ((rc=libmail_u_convert_fromu_tobuf(uc, ucsize, charset,
00545                                          &p, &psize,
00546                                          NULL)) != 0)
00547               return rc;
00548 
00549        if (psize && p[psize-1] == 0)
00550               --psize;
00551 
00552        if (!flag) /* If not converting, then the job is done */
00553        {
00554               rc=(*func)(p, psize, arg);
00555               free(p);
00556               return rc;
00557        }
00558        free(p);
00559 
00560        /*
00561        ** Try first quoted-printable, then base64, then pick whichever
00562        ** one gives the shortest results.
00563        */
00564        qlen=0;
00565        b64len=0;
00566 
00567        rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
00568                               &encodeqp, cnt_conv, &qlen);
00569        if (rc)
00570               return rc;
00571 
00572        rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
00573                               &encodebase64, cnt_conv, &b64len);
00574        if (rc)
00575               return rc;
00576 
00577        return do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
00578                                   qlen < b64len ? encodeqp:encodebase64,
00579                                   func, arg);
00580 }
00581 
00582 /*
00583 ** RFC2047-encoding pass.
00584 */
00585 static int rfc2047_encode_callback(const unicode_char *uc,
00586                                size_t ucsize,
00587                                const char *charset,
00588                                int (*qp_allow)(char),
00589                                int (*func)(const char *, size_t, void *),
00590                                void *arg)
00591 {
00592        int    rc;
00593        size_t i;
00594        int    flag;
00595 
00596        size_t offset=27; /* FIXME: initial offset for line length */
00597 
00598        while (ucsize)
00599        {
00600               /* Pass along all the whitespace */
00601 
00602               if (ISSPACE(*uc))
00603               {
00604                      char c= *uc++;
00605                      --ucsize;
00606 
00607                      if ((rc=(*func)(&c, 1, arg)) != 0)
00608                             return rc;
00609                      continue;
00610               }
00611 
00612               i=0;
00613 
00614               /* Check if the next word needs to be encoded, or not. */
00615 
00616               flag=encode_words(uc, ucsize, qp_allow, &i);
00617 
00618               /*
00619               ** Then proceed to encode, or not encode, the following words.
00620               */
00621 
00622               if ((rc=do_encode_words(uc, i, charset, flag,
00623                                    qp_allow, offset,
00624                                    func, arg)) != 0)
00625                      return rc;
00626 
00627               offset=0;
00628               uc += i;
00629               ucsize -= i;
00630        }
00631 
00632        return 0;
00633 }
00634 
00635 
00636 static int count_char(const char *c, size_t l, void *p)
00637 {
00638 size_t *i=(size_t *)p;
00639 
00640        *i += l;
00641        return (0);
00642 }
00643 
00644 static int save_char(const char *c, size_t l, void *p)
00645 {
00646 char **s=(char **)p;
00647 
00648        memcpy(*s, c, l);
00649        *s += l;
00650        return (0);
00651 }
00652 
00653 char *rfc2047_encode_str(const char *str, const char *charset,
00654                       int (*qp_allow)(char c))
00655 {
00656        size_t i=1;
00657        char   *s, *p;
00658        unicode_char *uc;
00659        size_t ucsize;
00660        int err;
00661 
00662        /* Convert string to unicode */
00663 
00664        if (libmail_u_convert_tou_tobuf(str, strlen(str), charset,
00665                                    &uc, &ucsize, &err))
00666               return NULL;
00667 
00668        /*
00669        ** Perform two passes: calculate size of the buffer where the
00670        ** encoded string gets saved into, then allocate the buffer and
00671        ** do a second pass to actually do it.
00672        */
00673 
00674        if (rfc2047_encode_callback(uc, ucsize,
00675                                 charset,
00676                                 qp_allow,
00677                                 &count_char, &i))
00678        {
00679               free(uc);
00680               return NULL;
00681        }
00682 
00683        if ((s=malloc(i)) == 0)
00684        {
00685               free(uc);
00686               return NULL;
00687        }
00688 
00689        p=s;
00690        (void)rfc2047_encode_callback(uc, ucsize,
00691                                   charset,
00692                                   qp_allow,
00693                                   &save_char, &p);
00694        *p=0;
00695        free(uc);
00696        return (s);
00697 }
00698 
00699 int rfc2047_qp_allow_any(char c)
00700 {
00701        return 1;
00702 }
00703 
00704 int rfc2047_qp_allow_comment(char c)
00705 {
00706        if (c == '(' || c == ')' || c == '"')
00707               return 0;
00708        return 1;
00709 }
00710 
00711 int rfc2047_qp_allow_word(char c)
00712 {
00713        return strchr(base64tab, c) != NULL ||
00714               strchr("*-=_", c) != NULL;
00715 }