Back to index

webcit  8.12-dfsg
decode.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 1996-2012 by the citadel.org team
00003  *
00004  * This program is open source software.  You can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License, version 3.
00006  *
00007  * This program is distributed in the hope that it will be useful,
00008  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00009  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00010  * GNU General Public License for more details.
00011  */
00012 
00013 #include "webcit.h"
00014 #ifdef HAVE_ICONV
00015 
00016 /*
00017  * Wrapper around iconv_open()
00018  * Our version adds aliases for non-standard Microsoft charsets
00019  * such as 'MS950', aliasing them to names like 'CP950'
00020  *
00021  * tocode     Target encoding
00022  * fromcode   Source encoding
00023  * /
00024 iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
00025 {
00026        iconv_t ic = (iconv_t)(-1) ;
00027        ic = iconv_open(tocode, fromcode);
00028        if (ic == (iconv_t)(-1) ) {
00029               char alias_fromcode[64];
00030               if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
00031                      safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
00032                      alias_fromcode[0] = 'C';
00033                      alias_fromcode[1] = 'P';
00034                      ic = iconv_open(tocode, alias_fromcode);
00035               }
00036        }
00037        return(ic);
00038 }
00039 */
00040 
00041 
00042 static inline char *FindNextEnd (char *bptr)
00043 {
00044        char * end;
00045        /* Find the next ?Q? */
00046        end = strchr(bptr + 2, '?');
00047        if (end == NULL) return NULL;
00048        if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && 
00049            (*(end + 2) == '?')) {
00050               /* skip on to the end of the cluster, the next ?= */
00051               end = strstr(end + 3, "?=");
00052        }
00053        else
00054               /* sort of half valid encoding, try to find an end. */
00055               end = strstr(bptr, "?=");
00056        return end;
00057 }
00058 
00059 /*
00060  * Handle subjects with RFC2047 encoding such as:
00061  * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
00062  */
00063 void utf8ify_rfc822_string(char **buf) {
00064        char *start, *end, *next, *nextend, *ptr;
00065        char newbuf[1024];
00066        char charset[128];
00067        char encoding[16];
00068        char istr[1024];
00069        iconv_t ic = (iconv_t)(-1) ;
00070        char *ibuf;                 
00071        char *obuf;                 
00072        size_t ibuflen;                    
00073        size_t obuflen;                    
00074        char *isav;                 
00075        char *osav;                 
00076        int passes = 0;
00077        int i, len, delta;
00078        int illegal_non_rfc2047_encoding = 0;
00079 
00080        /* Sometimes, badly formed messages contain strings which were simply
00081         *  written out directly in some foreign character set instead of
00082         *  using RFC2047 encoding.  This is illegal but we will attempt to
00083         *  handle it anyway by converting from a user-specified default
00084         *  charset to UTF-8 if we see any nonprintable characters.
00085         */
00086        len = strlen(*buf);
00087        for (i=0; i<len; ++i) {
00088               if (((*buf)[i] < 32) || ((*buf)[i] > 126)) {
00089                      illegal_non_rfc2047_encoding = 1;
00090                      i = len; /*< take a shortcut, it won't be more than one. */
00091               }
00092        }
00093        if (illegal_non_rfc2047_encoding) {
00094               StrBuf *default_header_charset;
00095               get_preference("default_header_charset", &default_header_charset);
00096               if ( (strcasecmp(ChrPtr(default_header_charset), "UTF-8")) && 
00097                    (strcasecmp(ChrPtr(default_header_charset), "us-ascii")) ) {
00098                      ctdl_iconv_open("UTF-8", ChrPtr(default_header_charset), &ic);
00099                      if (ic != (iconv_t)(-1) ) {
00100                             ibuf = malloc(1024);
00101                             isav = ibuf;
00102                             safestrncpy(ibuf, *buf, 1023);
00103                             ibuflen = strlen(ibuf);
00104                             obuflen = 1024;
00105                             obuf = (char *) malloc(obuflen);
00106                             osav = obuf;
00107                             iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
00108                             osav[1023-obuflen] = 0;
00109                             free(*buf);
00110                             *buf = osav;
00111                             iconv_close(ic);
00112                             free(isav);
00113                      }
00114               }
00115        }
00116 
00117        /* pre evaluate the first pair */
00118        nextend = end = NULL;
00119        len = strlen(*buf);
00120        start = strstr(*buf, "=?");
00121        if (start != NULL) 
00122               end = FindNextEnd (start);
00123 
00124        while ((start != NULL) && (end != NULL))
00125        {
00126               next = strstr(end, "=?");
00127               if (next != NULL)
00128                      nextend = FindNextEnd(next);
00129               if (nextend == NULL)
00130                      next = NULL;
00131 
00132               /* did we find two partitions */
00133               if ((next != NULL) && 
00134                   ((next - end) > 2))
00135               {
00136                      ptr = end + 2;
00137                      while ((ptr < next) && 
00138                             (isspace(*ptr) ||
00139                             (*ptr == '\r') ||
00140                             (*ptr == '\n') || 
00141                             (*ptr == '\t')))
00142                             ptr ++;
00143                      /* did we find a gab just filled with blanks? */
00144                      if (ptr == next)
00145                      {
00146                             memmove (end + 2,
00147                                     next,
00148                                     len - (next - start));
00149 
00150                             /* now terminate the gab at the end */
00151                             delta = (next - end) - 2;
00152                             len -= delta;
00153                             (*buf)[len] = '\0';
00154 
00155                             /* move next to its new location. */
00156                             next -= delta;
00157                             nextend -= delta;
00158                      }
00159               }
00160               /* our next-pair is our new first pair now. */
00161               start = next;
00162               end = nextend;
00163        }
00164 
00165        /* Now we handle foreign character sets properly encoded
00166         * in RFC2047 format.
00167         */
00168        while (start=strstr((*buf), "=?"), end=FindNextEnd((start != NULL)? start : (*buf)),
00169               ((start != NULL) && (end != NULL) && (end > start)) )
00170        {
00171               extract_token(charset, start, 1, '?', sizeof charset);
00172               extract_token(encoding, start, 2, '?', sizeof encoding);
00173               extract_token(istr, start, 3, '?', sizeof istr);
00174 
00175               ibuf = malloc(1024);
00176               isav = ibuf;
00177               if (!strcasecmp(encoding, "B")) {  
00178                      ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
00179               }
00180               else if (!strcasecmp(encoding, "Q")) {    
00181                      size_t len;
00182                      long pos;
00183                      
00184                      len = strlen(istr);
00185                      pos = 0;
00186                      while (pos < len)
00187                      {
00188                             if (istr[pos] == '_') istr[pos] = ' ';
00189                             pos++;
00190                      }
00191 
00192                      ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
00193               }
00194               else {
00195                      strcpy(ibuf, istr);         
00196                      ibuflen = strlen(istr);
00197               }
00198 
00199               ctdl_iconv_open("UTF-8", charset, &ic);
00200               if (ic != (iconv_t)(-1) ) {
00201                      obuflen = 1024;
00202                      obuf = (char *) malloc(obuflen);
00203                      osav = obuf;
00204                      iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
00205                      osav[1024-obuflen] = 0;
00206 
00207                      end = start;
00208                      end++;
00209                      strcpy(start, "");
00210                      remove_token(end, 0, '?');
00211                      remove_token(end, 0, '?');
00212                      remove_token(end, 0, '?');
00213                      remove_token(end, 0, '?');
00214                      strcpy(end, &end[1]);
00215 
00216                      snprintf(newbuf, sizeof newbuf, "%s%s%s", *buf, osav, end);
00217                      strcpy(*buf, newbuf);
00218                      
00219                      free(osav);
00220                      iconv_close(ic);
00221               }
00222               else {
00223                      end = start;
00224                      end++;
00225                      strcpy(start, "");
00226                      remove_token(end, 0, '?');
00227                      remove_token(end, 0, '?');
00228                      remove_token(end, 0, '?');
00229                      remove_token(end, 0, '?');
00230                      strcpy(end, &end[1]);
00231 
00232                      snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", *buf, end);
00233                      strcpy(*buf, newbuf);
00234               }
00235 
00236               free(isav);
00237 
00238               /*
00239                * Since spammers will go to all sorts of absurd lengths to get their
00240                * messages through, there are LOTS of corrupt headers out there.
00241                * So, prevent a really badly formed RFC2047 header from throwing
00242                * this function into an infinite loop.
00243                */
00244               ++passes;
00245               if (passes > 20) return;
00246        }
00247 
00248 }
00249 #else
00250 inline void utf8ify_rfc822_string(char **a){};
00251 
00252 #endif
00253 
00254 
00255 
00256 
00268 int webcit_rfc2047encode(char *target, int maxlen, char *source, long SourceLen)
00269 {
00270        const char headerStr[] = "=?UTF-8?Q?";
00271        int need_to_encode = 0;
00272        int i = 0;
00273        int len;
00274        unsigned char ch;
00275 
00276        if ((source == NULL) || 
00277            (target == NULL) ||
00278            (SourceLen > maxlen)) return -1;
00279 
00280        while ((!IsEmptyStr (&source[i])) && 
00281               (need_to_encode == 0) &&
00282               (i < SourceLen) ) {
00283               if (((unsigned char) source[i] < 32) || 
00284                   ((unsigned char) source[i] > 126)) {
00285                      need_to_encode = 1;
00286               }
00287               i++;
00288        }
00289 
00290        if (!need_to_encode) {
00291               memcpy (target, source, SourceLen);
00292               target[SourceLen] = '\0';
00293               return SourceLen;
00294        }
00295        
00296        if (sizeof (headerStr + SourceLen + 2) > maxlen)
00297               return -1;
00298        memcpy (target, headerStr, sizeof (headerStr));
00299        len = sizeof (headerStr) - 1;
00300        for (i=0; (i < SourceLen) && (len + 3< maxlen) ; ++i) {
00301               ch = (unsigned char) source[i];
00302               if ((ch < 32) || (ch > 126) || (ch == 61)) {
00303                      sprintf(&target[len], "=%02X", ch);
00304                      len += 3;
00305               }
00306               else {
00307                      sprintf(&target[len], "%c", ch);
00308                      len ++;
00309               }
00310        }
00311        
00312        if (len + 2 < maxlen) {
00313               strcat(&target[len], "?=");
00314               len +=2;
00315               return len;
00316        }
00317        else
00318               return -1;
00319 }
00320