Back to index

lightning-sunbird  0.9+nobinonly
mimetric.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 #include "mimetric.h"
00038 #include "mimebuf.h"
00039 #include "prmem.h"
00040 #include "plstr.h"
00041 #include "nsCRT.h"
00042 #include "prlog.h"
00043 #include "msgCore.h"
00044 
00045 #define MIME_SUPERCLASS mimeInlineTextClass
00046 MimeDefClass(MimeInlineTextRichtext, MimeInlineTextRichtextClass,
00047                       mimeInlineTextRichtextClass, &MIME_SUPERCLASS);
00048 
00049 static int MimeInlineTextRichtext_parse_line (char *, PRInt32, MimeObject *);
00050 static int MimeInlineTextRichtext_parse_begin (MimeObject *);
00051 static int MimeInlineTextRichtext_parse_eof (MimeObject *, PRBool);
00052 
00053 static int
00054 MimeInlineTextRichtextClassInitialize(MimeInlineTextRichtextClass *clazz)
00055 {
00056   MimeObjectClass *oclass = (MimeObjectClass *) clazz;
00057   PR_ASSERT(!oclass->class_initialized);
00058   oclass->parse_begin = MimeInlineTextRichtext_parse_begin;
00059   oclass->parse_line  = MimeInlineTextRichtext_parse_line;
00060   oclass->parse_eof   = MimeInlineTextRichtext_parse_eof;
00061   return 0;
00062 }
00063 
00064 /* This function has this clunky interface because it needs to be called
00065    from outside this module (no MimeObject, etc.)
00066  */
00067 int
00068 MimeRichtextConvert (char *line, PRInt32 length,
00069                                     int (*output_fn) (const char *buf, PRInt32 size, void *closure),
00070                                     void *closure,
00071                                     char **obufferP,
00072                                     PRInt32 *obuffer_sizeP,
00073                                     PRBool enriched_p)
00074 {
00075   /* RFC 1341 (the original MIME spec) defined text/richtext.
00076         RFC 1563 superceded text/richtext with text/enriched.
00077         The changes from text/richtext to text/enriched are:
00078          - CRLF semantics are different
00079          - << maps to <
00080          - These tags were added:
00081             <VERBATIM>, <NOFILL>, <PARAM>, <FLUSHBOTH>
00082          - These tags were removed:
00083             <COMMENT>, <OUTDENT>, <OUTDENTRIGHT>, <SAMEPAGE>, <SUBSCRIPT>,
00084          <SUPERSCRIPT>, <HEADING>, <FOOTING>, <PARAGRAPH>, <SIGNATURE>,
00085             <LT>, <NL>, <NP>
00086         This method implements them both.
00087 
00088         draft-resnick-text-enriched-03.txt is a proposed update to 1563.
00089          - These tags were added:
00090            <FONTFAMILY>, <COLOR>, <PARAINDENT>, <LANG>.
00091               However, all of these rely on the magic <PARAM> tag, which we
00092               don't implement, so we're ignoring all of these.
00093         Interesting fact: it's by Peter W. Resnick from Qualcomm (Eudora).
00094         And it also says "It is fully expected that other text formatting
00095         standards like HTML and SGML will supplant text/enriched in
00096         Internet mail."
00097    */
00098   int status = 0;
00099   char *out;
00100   const char *data_end;
00101   const char *last_end;
00102   const char *this_start;
00103   const char *this_end;
00104   unsigned int desired_size;
00105 
00106   // The code below must never expand the input by more than 5x;
00107   // if it does, the desired_size multiplier (5) below must be changed too
00108 #define BGROWTH 5
00109   if ( (PRUint32)length >= ( (PRUint32) 0xfffffffe)/BGROWTH )
00110       return -1;
00111   desired_size = (length * BGROWTH) + 1;
00112 #undef BGROWTH  
00113   if (desired_size >= (PRUint32) *obuffer_sizeP)
00114        status = mime_GrowBuffer (desired_size, sizeof(char), 1024,
00115                                                   obufferP, obuffer_sizeP);
00116   if (status < 0) return status;
00117 
00118   if (enriched_p)
00119        {
00120          for (this_start = line; this_start < line + length; this_start++)
00121               if (!nsCRT::IsAsciiSpace (*this_start)) break;
00122          if (this_start >= line + length) /* blank line */
00123               {
00124                 PL_strncpyz (*obufferP, "<BR>", *obuffer_sizeP);
00125                 return output_fn (*obufferP, strlen(*obufferP), closure);
00126               }
00127        }
00128 
00129   PRUint32 outlen = (PRUint32) *obuffer_sizeP;
00130   out = *obufferP;
00131   *out = 0;
00132 
00133   data_end = line + length;
00134   last_end = line;
00135   this_start = last_end;
00136   this_end = this_start;
00137   PRUint32 addedlen = 0;
00138   while (this_end < data_end)
00139        {
00140          /* Skip forward to next special character. */
00141          while (this_start < data_end &&
00142                       *this_start != '<' && *this_start != '>' &&
00143                       *this_start != '&')
00144               this_start++;
00145 
00146          this_end = this_start;
00147 
00148          /* Skip to the end of the tag. */
00149          if (this_start < data_end && *this_start == '<')
00150               {
00151                 this_end++;
00152                 while (this_end < data_end &&
00153                              !nsCRT::IsAsciiSpace (*this_end) &&
00154                              *this_end != '<' && *this_end != '>' &&
00155                              *this_end != '&')
00156                      this_end++;
00157               }
00158 
00159          this_end++;
00160 
00161          /* Push out the text preceeding the tag. */
00162          if (last_end && last_end != this_start)
00163               {
00164                 memcpy (out, last_end, this_start - last_end);
00165                 out += this_start - last_end;
00166                 *out = 0;
00167                 outlen -= (this_start - last_end);
00168               }
00169 
00170          if (this_start >= data_end)
00171               break;
00172          else if (*this_start == '&')
00173               {
00174                 PL_strncpyz (out, "&amp;", outlen);
00175                 addedlen = strlen(out);
00176                 outlen -= addedlen;
00177                 out += addedlen;
00178               }
00179          else if (*this_start == '>')
00180               {
00181                 PL_strncpyz (out, "&gt;", outlen);
00182                 addedlen = strlen(out);
00183                 outlen -= addedlen;
00184                 out += addedlen;
00185               }
00186          else if (enriched_p &&
00187                         this_start < data_end + 1 &&
00188                         this_start[0] == '<' &&
00189                         this_start[1] == '<')
00190               {
00191                 PL_strncpyz (out, "&lt;", outlen);
00192                 addedlen = strlen(out);
00193                 outlen -= addedlen;
00194                 out += addedlen;
00195               }
00196          else if (this_start != this_end)
00197               {
00198                 /* Push out this ID. */
00199                 const char *old = this_start + 1;
00200                 char *tag_open  = 0;
00201                 char *tag_close = 0;
00202                 if (*old == '/')
00203                      {
00204                        /* This is </tag> */
00205                        old++;
00206                      }
00207 
00208                 switch (*old)
00209                      {
00210                      case 'b': case 'B':
00211                        if (!nsCRT::strncasecmp ("BIGGER>", old, 7))
00212                             tag_open = "<FONT SIZE=\"+1\">", tag_close = "</FONT>";
00213                        else if (!nsCRT::strncasecmp ("BLINK>", old, 5))
00214                             /* Of course, both text/richtext and text/enriched must be
00215                                enhanced *somehow*...  Or else what would people think. */
00216                             tag_open = "<BLINK>", tag_close = "</BLINK>";
00217                        else if (!nsCRT::strncasecmp ("BOLD>", old, 5))
00218                             tag_open = "<B>", tag_close = "</B>";
00219                        break;
00220                      case 'c': case 'C':
00221                        if (!nsCRT::strncasecmp ("CENTER>", old, 7))
00222                             tag_open = "<CENTER>", tag_close = "</CENTER>";
00223                        else if (!enriched_p &&
00224                                       !nsCRT::strncasecmp ("COMMENT>", old, 8))
00225                             tag_open = "<!-- ", tag_close = " -->";
00226                        break;
00227                      case 'e': case 'E':
00228                        if (!nsCRT::strncasecmp ("EXCERPT>", old, 8))
00229                             tag_open = "<BLOCKQUOTE>", tag_close = "</BLOCKQUOTE>";
00230                        break;
00231                      case 'f': case 'F':
00232                        if (!nsCRT::strncasecmp ("FIXED>", old, 6))
00233                             tag_open = "<TT>", tag_close = "</TT>";
00234                        else if (enriched_p &&
00235                                       !nsCRT::strncasecmp ("FLUSHBOTH>", old, 10))
00236                             tag_open = "<P ALIGN=LEFT>", tag_close = "</P>";
00237                        else if (!nsCRT::strncasecmp ("FLUSHLEFT>", old, 10))
00238                             tag_open = "<P ALIGN=LEFT>", tag_close = "</P>";
00239                        else if (!nsCRT::strncasecmp ("FLUSHRIGHT>", old, 11))
00240                             tag_open = "<P ALIGN=RIGHT>", tag_close = "</P>";
00241                        else if (!enriched_p &&
00242                                       !nsCRT::strncasecmp ("FOOTING>", old, 8))
00243                             tag_open = "<H6>", tag_close = "</H6>";
00244                        break;
00245                      case 'h': case 'H':
00246                        if (!enriched_p &&
00247                               !nsCRT::strncasecmp ("HEADING>", old, 8))
00248                             tag_open = "<H6>", tag_close = "</H6>";
00249                        break;
00250                      case 'i': case 'I':
00251                        if (!nsCRT::strncasecmp ("INDENT>", old, 7))
00252                             tag_open = "<UL>", tag_close = "</UL>";
00253                        else if (!nsCRT::strncasecmp ("INDENTRIGHT>", old, 12))
00254                             tag_open = 0, tag_close = 0;
00255 /*                     else if (!enriched_p &&
00256                                    !nsCRT::strncasecmp ("ISO-8859-", old, 9))
00257                             tag_open = 0, tag_close = 0; */
00258                        else if (!nsCRT::strncasecmp ("ITALIC>", old, 7))
00259                             tag_open = "<I>", tag_close = "</I>";
00260                        break;
00261                      case 'l': case 'L':
00262                        if (!enriched_p &&
00263                               !nsCRT::strncasecmp ("LT>", old, 3))
00264                             tag_open = "&lt;", tag_close = 0;
00265                        break;
00266                      case 'n': case 'N':
00267                        if (!enriched_p &&
00268                               !nsCRT::strncasecmp ("NL>", old, 3))
00269                             tag_open = "<BR>", tag_close = 0;
00270                        if (enriched_p &&
00271                               !nsCRT::strncasecmp ("NOFILL>", old, 7))
00272                             tag_open = "<NOBR>", tag_close = "</NOBR>";
00273 /*                     else if (!enriched_p &&
00274                                    !nsCRT::strncasecmp ("NO-OP>", old, 6))
00275                             tag_open = 0, tag_close = 0; */
00276 /*                     else if (!enriched_p &&
00277                                    !nsCRT::strncasecmp ("NP>", old, 3))
00278                             tag_open = 0, tag_close = 0; */
00279                        break;
00280                      case 'o': case 'O':
00281                        if (!enriched_p &&
00282                               !nsCRT::strncasecmp ("OUTDENT>", old, 8))
00283                             tag_open = 0, tag_close = 0;
00284                        else if (!enriched_p &&
00285                                       !nsCRT::strncasecmp ("OUTDENTRIGHT>", old, 13))
00286                             tag_open = 0, tag_close = 0;
00287                        break;
00288                      case 'p': case 'P':
00289                        if (enriched_p &&
00290                               !nsCRT::strncasecmp ("PARAM>", old, 6))
00291                             tag_open = "<!-- ", tag_close = " -->";
00292                        else if (!enriched_p &&
00293                                       !nsCRT::strncasecmp ("PARAGRAPH>", old, 10))
00294                             tag_open = "<P>", tag_close = 0;
00295                        break;
00296                      case 's': case 'S':
00297                        if (!enriched_p &&
00298                               !nsCRT::strncasecmp ("SAMEPAGE>", old, 9))
00299                             tag_open = 0, tag_close = 0;
00300                        else if (!enriched_p &&
00301                                       !nsCRT::strncasecmp ("SIGNATURE>", old, 10))
00302                             tag_open = "<I><FONT SIZE=\"-1\">", tag_close = "</FONT></I>";
00303                        else if (!nsCRT::strncasecmp ("SMALLER>", old, 8))
00304                             tag_open = "<FONT SIZE=\"-1\">", tag_close = "</FONT>";
00305                        else if (!enriched_p &&
00306                                       !nsCRT::strncasecmp ("SUBSCRIPT>", old, 10))
00307                             tag_open = "<SUB>", tag_close = "</SUB>";
00308                        else if (!enriched_p &&
00309                                       !nsCRT::strncasecmp ("SUPERSCRIPT>", old, 12))
00310                             tag_open = "<SUP>", tag_close = "</SUP>";
00311                        break;
00312                      case 'u': case 'U':
00313                        if (!nsCRT::strncasecmp ("UNDERLINE>", old, 10))
00314                             tag_open = "<U>", tag_close = "</U>";
00315 /*                     else if (!enriched_p &&
00316                                       !nsCRT::strncasecmp ("US-ASCII>", old, 10))
00317                             tag_open = 0, tag_close = 0; */
00318                        break;
00319                      case 'v': case 'V':
00320                        if (enriched_p &&
00321                               !nsCRT::strncasecmp ("VERBATIM>", old, 9))
00322                             tag_open = "<PRE>", tag_close = "</PRE>";
00323                        break;
00324                      }
00325 
00326                 if (this_start[1] == '/')
00327                      {
00328                        if (tag_close) PL_strncpyz (out, tag_close, outlen);
00329                        addedlen = strlen (out);
00330                        outlen -= addedlen;
00331                        out += addedlen;
00332                      }
00333                 else
00334                      {
00335                        if (tag_open) PL_strncpyz (out, tag_open, outlen);
00336                        addedlen = strlen (out);
00337                        outlen -= addedlen;
00338                        out += addedlen;
00339                      }
00340               }
00341 
00342          /* now go around again */
00343          last_end = this_end;
00344          this_start = last_end;
00345        }
00346   *out = 0;
00347 
00348   return output_fn (*obufferP, out - *obufferP, closure);
00349 }
00350 
00351 
00352 static int
00353 MimeInlineTextRichtext_parse_line (char *line, PRInt32 length, MimeObject *obj)
00354 {
00355   PRBool enriched_p = (((MimeInlineTextRichtextClass *) obj->clazz)
00356                                           ->enriched_p);
00357 
00358   return MimeRichtextConvert (line, length,
00359                                                    obj->options->output_fn,
00360                                                    obj->options->stream_closure,
00361                                                    &obj->obuffer, &obj->obuffer_size,
00362                                                    enriched_p);
00363 }
00364 
00365 
00366 static int
00367 MimeInlineTextRichtext_parse_begin (MimeObject *obj)
00368 {
00369   int status = ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_begin(obj);
00370   char s[] = "";
00371   if (status < 0) return status;
00372   return MimeObject_write(obj, s, 0, PR_TRUE); /* force out any separators... */
00373 }
00374 
00375 
00376 static int
00377 MimeInlineTextRichtext_parse_eof (MimeObject *obj, PRBool abort_p)
00378 {
00379   int status;
00380   if (obj->closed_p) return 0;
00381 
00382   /* Run parent method first, to flush out any buffered data. */
00383   status = ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_eof(obj, abort_p);
00384   if (status < 0) return status;
00385 
00386   return 0;
00387 }