Back to index

courier  0.68.2
rfc3676parser.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 2011 Double Precision, Inc.  See COPYING for
00003 ** distribution information.
00004 */
00005 
00006 #include "rfc2045_config.h"
00007 #include      "rfc3676parser.h"
00008 #include      <stdlib.h>
00009 #include      <string.h>
00010 
00011 #define NONFLOWED_WRAP_REDUCE      74
00012 
00013 #define NONFLOWED_THRESHOLD_EXCEEDED      30
00014 
00015 
00016 static void emit_line_begin(rfc3676_parser_t handle);
00017 
00018 static void emit_line_contents(rfc3676_parser_t handle,
00019                             const unicode_char *uc,
00020                             size_t cnt);
00021 
00022 static void emit_line_flowed_wrap(rfc3676_parser_t handle);
00023 
00024 static void emit_line_end(rfc3676_parser_t handle);
00025 
00026 
00027 static void nonflowed_line_begin(rfc3676_parser_t handle);
00028 
00029 static void nonflowed_line_contents(rfc3676_parser_t handle,
00030                                 const unicode_char *uc,
00031                                 size_t cnt);
00032 
00033 static void nonflowed_line_end(rfc3676_parser_t handle);
00034 
00035 static int nonflowed_line_process(int linebreak_opportunity,
00036                               unicode_char ch, void *dummy);
00037 
00038 #define EMIT_LINE_BEGIN(h) do {                  \
00039               (*(h)->line_begin_handler)(h);     \
00040        } while (0)
00041 
00042 #define EMIT_LINE_CONTENTS(h, uc, cnt) do {                    \
00043               (*(h)->line_content_handler)((h),(uc),(cnt));    \
00044        } while (0)
00045 
00046 #define EMIT_LINE_END(h) do {                    \
00047               (*(h)->line_end_handler)(h);       \
00048        } while (0)
00049 
00050 struct rfc3676_parser_struct {
00051 
00052        struct rfc3676_parser_info info;
00053        libmail_u_convert_handle_t uhandle;
00054 
00055        int errflag;
00056 
00057        /* Receive raw text stream, converted to unicode */
00058        size_t (*line_handler)(rfc3676_parser_t,
00059                             const unicode_char *ptr, size_t cnt);
00060 
00061        /*
00062        ** Receive mostly raw text stream: CRs that precede an LF
00063        ** are removed from the stream received by content_handler.
00064        */
00065        size_t (*content_handler)(rfc3676_parser_t,
00066                               const unicode_char *ptr, size_t cnt);
00067 
00068        size_t quote_level;
00069        size_t sig_block_index;
00070 
00071        /*
00072        ** Flag: previous line ended in a flowed space, and the previous
00073        ** line's quoting level was this.
00074        */
00075        int has_previous_quote_level;
00076        size_t previous_quote_level;
00077 
00078        /*
00079        ** Flag: current line was flowed into from a previous line with the
00080        ** same quoting level.
00081        */
00082        int was_previous_quote_level;
00083 
00084        /* A line has begun */
00085        void (*line_begin_handler)(rfc3676_parser_t handle);
00086 
00087        /* Content of this line */
00088        void (*line_content_handler)(rfc3676_parser_t handle,
00089                                  const unicode_char *uc,
00090                                  size_t cnt);
00091 
00092        /* End of this line */
00093        void (*line_end_handler)(rfc3676_parser_t handle);
00094 
00095 
00096        /*
00097        ** When non-flowed text is getting rewrapped, we utilize the services
00098        ** of the unicode_lbc_info API.
00099        */
00100 
00101        unicode_lbc_info_t lb;
00102 
00103        struct unicode_buf nonflowed_line;
00104        /* Collect unflowed line until it reaches the given size */
00105 
00106        struct unicode_buf nonflowed_next_word;
00107        /* Collects unicode stream until a linebreaking opportunity */
00108 
00109        size_t nonflowed_line_target_width;
00110        /* Targeted width of nonflowed lines */
00111 
00112        size_t nonflowed_line_width; /* Width of nonflowed_line */
00113 
00114        size_t nonflowed_next_word_width; /* Width of nonflowed_next_word */
00115 
00116        /* Current handle of non-flowd content. */
00117        void (*nonflowed_line_process)(struct rfc3676_parser_struct *handle,
00118                                    int linebreak_opportunity,
00119                                    unicode_char ch,
00120                                    size_t ch_width);
00121 
00122        void (*nonflowed_line_end)(struct rfc3676_parser_struct *handle);
00123 };
00124 
00125 static int parse_unicode(const char *, size_t, void *);
00126 
00127 static size_t scan_crlf(rfc3676_parser_t handle,
00128                      const unicode_char *ptr, size_t cnt);
00129 
00130 static size_t scan_crlf_seen_cr(rfc3676_parser_t handle,
00131                             const unicode_char *ptr, size_t cnt);
00132 
00133 static size_t start_of_line(rfc3676_parser_t handle,
00134                          const unicode_char *ptr, size_t cnt);
00135 
00136 static size_t count_quote_level(rfc3676_parser_t handle,
00137                             const unicode_char *ptr, size_t cnt);
00138 
00139 static size_t counted_quote_level(rfc3676_parser_t handle,
00140                               const unicode_char *ptr, size_t cnt);
00141 
00142 static size_t check_signature_block(rfc3676_parser_t handle,
00143                                 const unicode_char *ptr, size_t cnt);
00144 
00145 static size_t start_content_line(rfc3676_parser_t handle,
00146                             const unicode_char *ptr, size_t cnt);
00147 
00148 static size_t scan_content_line(rfc3676_parser_t handle,
00149                             const unicode_char *ptr, size_t cnt);
00150 
00151 static size_t seen_sig_block(rfc3676_parser_t handle,
00152                           const unicode_char *ptr, size_t cnt);
00153 
00154 static size_t seen_notsig_block(rfc3676_parser_t handle,
00155                             const unicode_char *ptr, size_t cnt);
00156 
00157 static size_t seen_content_sp(rfc3676_parser_t handle,
00158                            const unicode_char *ptr, size_t cnt);
00159 
00160 
00161 /*
00162 ** The top layer initializes the conversion to unicode.
00163 */
00164 
00165 rfc3676_parser_t rfc3676parser_init(const struct rfc3676_parser_info *info)
00166 {
00167        rfc3676_parser_t handle=
00168               (rfc3676_parser_t)calloc(1,
00169                                     sizeof(struct rfc3676_parser_struct));
00170 
00171        if (!handle)
00172               return NULL;
00173 
00174        handle->info=*info;
00175        if ((handle->uhandle=libmail_u_convert_init(info->charset,
00176                                               libmail_u_ucs4_native,
00177                                               parse_unicode,
00178                                               handle)) == NULL)
00179        {
00180               free(handle);
00181               return NULL;
00182        }
00183 
00184        if (!handle->info.isflowed)
00185               handle->info.isdelsp=0; /* Sanity check */
00186 
00187        handle->line_handler=scan_crlf;
00188        handle->content_handler=start_of_line;
00189        handle->has_previous_quote_level=0;
00190        handle->previous_quote_level=0;
00191 
00192        handle->line_begin_handler=emit_line_begin;
00193        handle->line_content_handler=emit_line_contents;
00194        handle->line_end_handler=emit_line_end;
00195 
00196        unicode_buf_init(&handle->nonflowed_line, (size_t)-1);
00197        unicode_buf_init(&handle->nonflowed_next_word, (size_t)-1);
00198 
00199        if (!handle->info.isflowed)
00200        {
00201               handle->line_begin_handler=nonflowed_line_begin;
00202               handle->line_content_handler=nonflowed_line_contents;
00203               handle->line_end_handler=nonflowed_line_end;
00204        }
00205        return handle;
00206 }
00207 
00208 int rfc3676parser(rfc3676_parser_t handle,
00209                 const char *txt,
00210                 size_t txt_cnt)
00211 {
00212        if (handle->errflag)
00213               return handle->errflag; /* Error occured previously */
00214 
00215        /* Convert to unicode and invoke parse_unicode() */
00216 
00217        return libmail_u_convert(handle->uhandle, txt, txt_cnt);
00218 }
00219 
00220 /*
00221 ** Convert char stream from iconv into unicode_chars, then pass them to the
00222 ** current handler, until all converted unicode_chars are consumed.
00223 */
00224 
00225 static int parse_unicode(const char *ucs4, size_t nbytes, void *arg)
00226 {
00227        rfc3676_parser_t handle=(rfc3676_parser_t)arg;
00228        unicode_char ucs4buf[128];
00229        const unicode_char *p;
00230 
00231        /* Keep going until there's an error, or everything is consumed. */
00232 
00233        while (handle->errflag == 0 && nbytes)
00234        {
00235               /* Do it in pieces, using the temporary unicode_char buffer */
00236 
00237               size_t cnt=nbytes;
00238 
00239               if (cnt > sizeof(ucs4buf))
00240                      cnt=sizeof(ucs4buf);
00241 
00242               memcpy(ucs4buf, ucs4, cnt);
00243 
00244               ucs4 += cnt;
00245               nbytes -= cnt;
00246 
00247               cnt /= sizeof(unicode_char);
00248               p=ucs4buf;
00249 
00250               /* Keep feeding it to the current handler */
00251 
00252               while (handle->errflag == 0 && cnt)
00253               {
00254                      size_t n=(*handle->line_handler)(handle, p, cnt);
00255 
00256                      if (handle->errflag == 0)
00257                      {
00258                             cnt -= n;
00259                             p += n;
00260                      }
00261               }
00262        }
00263 
00264        return handle->errflag;
00265 }
00266 
00267 int rfc3676parser_deinit(rfc3676_parser_t handle, int *errptr)
00268 {
00269        /* Finish unicode conversion */
00270 
00271        int rc=libmail_u_convert_deinit(handle->uhandle, errptr);
00272 
00273        if (rc == 0)
00274               rc=handle->errflag;
00275 
00276        if (rc == 0)
00277        {
00278               (*handle->line_handler)(handle, NULL, 0);
00279               rc=handle->errflag;
00280        }
00281 
00282        if (handle->lb)
00283        {
00284               int rc2=unicode_lbc_end(handle->lb);
00285 
00286               if (rc2 && rc == 0)
00287                      rc=rc2;
00288        }
00289 
00290        unicode_buf_deinit(&handle->nonflowed_line);
00291        unicode_buf_deinit(&handle->nonflowed_next_word);
00292 
00293        free(handle);
00294        return rc;
00295 }
00296 
00297 /*
00298 ** Look for a CR that might precede an LF.
00299 */
00300 
00301 static size_t scan_crlf(rfc3676_parser_t handle,
00302                      const unicode_char *ptr, size_t cnt)
00303 {
00304        size_t i;
00305 
00306        if (ptr == NULL)
00307        {
00308               if (handle->errflag == 0)
00309                      (*handle->content_handler)(handle, NULL, 0);
00310               return 0;
00311        }
00312 
00313        for (i=0; ptr && i<cnt; ++i)
00314        {
00315               if (ptr[i] == '\r')
00316                      break;
00317        }
00318 
00319        if (i)
00320        {
00321               size_t consumed=0;
00322 
00323               while (i && handle->errflag == 0)
00324               {
00325                      size_t n=(*handle->content_handler)(handle, ptr, i);
00326 
00327                      ptr += n;
00328                      consumed += n;
00329                      i -= n;
00330               }
00331               return consumed;
00332        }
00333 
00334        /* Consume the first character, the CR */
00335 
00336        handle->line_handler=scan_crlf_seen_cr;
00337        return 1;
00338 }
00339 
00340 /*
00341 ** Check the first character after a CR.
00342 */
00343 
00344 static size_t scan_crlf_seen_cr(rfc3676_parser_t handle,
00345                             const unicode_char *ptr, size_t cnt)
00346 {
00347        unicode_char cr='\r';
00348 
00349        handle->line_handler=scan_crlf;
00350 
00351        if (ptr == NULL || *ptr != '\n')
00352        {
00353               /*
00354               ** CR was not followed by a NL.
00355               ** Restore it in the char stream.
00356               */
00357 
00358               while (handle->errflag == 0)
00359                      if ((*handle->content_handler)(handle, &cr, 1))
00360                             break;
00361        }
00362 
00363        return scan_crlf(handle, ptr, cnt);
00364 }
00365 
00366 /*
00367 ** From this point on, CRLF are collapsed into NLs, so don't need to worry
00368 ** about them.
00369 */
00370 
00371 
00372 /*
00373 ** Check for an EOF indication at the start of the line.
00374 */
00375 
00376 static size_t start_of_line(rfc3676_parser_t handle,
00377                          const unicode_char *ptr, size_t cnt)
00378 {
00379        if (ptr == NULL)
00380        {
00381               if (handle->has_previous_quote_level)
00382                      EMIT_LINE_END(handle); /* Last line was flowed */
00383 
00384               return cnt; /* EOF */
00385        }
00386 
00387        /* Begin counting the quote level */
00388 
00389        handle->content_handler=count_quote_level;
00390        handle->quote_level=0;
00391        return count_quote_level(handle, ptr, cnt);
00392 }
00393 
00394 /*
00395 ** Count leading > in flowed content.
00396 */
00397 
00398 static size_t count_quote_level(rfc3676_parser_t handle,
00399                             const unicode_char *ptr, size_t cnt)
00400 {
00401        size_t i;
00402 
00403        if (ptr == NULL) /* EOF, pretend that the quote level was counted */
00404               return (handle->content_handler=counted_quote_level)
00405                      (handle, ptr, cnt);
00406 
00407        for (i=0; i<cnt; ++i)
00408        {
00409               if (ptr[i] != '>' || !handle->info.isflowed)
00410               {
00411                      handle->content_handler=counted_quote_level;
00412 
00413                      if (i == 0)
00414                             return counted_quote_level(handle, ptr, cnt);
00415                      break;
00416               }
00417               ++handle->quote_level;
00418        }
00419 
00420        return i;
00421 }
00422 
00423 /*
00424 ** This line's quote level has now been counted.
00425 */
00426 
00427 static size_t counted_quote_level(rfc3676_parser_t handle,
00428                               const unicode_char *ptr, size_t cnt)
00429 {
00430        handle->was_previous_quote_level=0;
00431 
00432        /*
00433        ** If the previous line was flowed and this line has the same
00434        ** quote level, make the flow official.
00435        */
00436 
00437        if (handle->has_previous_quote_level &&
00438            handle->quote_level == handle->previous_quote_level)
00439        {
00440               /* Remember that this line was flowed into */
00441               handle->was_previous_quote_level=1;
00442        }
00443        else
00444        {
00445               /*
00446               ** If the previous line was flowed, but this line carries
00447               ** a different quote level, force-terminate the previous
00448               ** line, before beginning this line.
00449               */
00450               if (handle->has_previous_quote_level)
00451                      EMIT_LINE_END(handle);
00452 
00453               EMIT_LINE_BEGIN(handle);
00454        }
00455 
00456        handle->has_previous_quote_level=0;
00457        /* Assume this line won't be flowed, until shown otherwise */
00458 
00459 
00460        if (!handle->info.isflowed)
00461        {
00462               /*
00463               ** No space-stuffing, or sig block checking, if this is not
00464               ** flowed content.
00465               */
00466               handle->content_handler=scan_content_line;
00467               return scan_content_line(handle, ptr, cnt);
00468        }
00469 
00470 
00471        handle->content_handler=start_content_line;
00472 
00473        if (ptr != NULL && *ptr == ' ')
00474               return 1; /* Remove stuffed space */
00475 
00476        return start_content_line(handle, ptr, cnt);
00477 }
00478 
00479 /*
00480 ** Minor deviation from RFC3676, but this fixes a lot of broken text.
00481 **
00482 ** If the previous line was flowed, but this is an empty line (optionally
00483 ** space-stuffed), unflow the last line (make it fixed), and this becomes
00484 ** a fixed line too. Example:
00485 **
00486 ** this is the last end of a paragraph[SPACE]
00487 ** [SPACE]
00488 ** This is the first line of the next paragraph.
00489 **
00490 ** Strict RFC3676 rules will parse this as a flowed line, then a fixed line,
00491 ** resulting in no paragraph breaks.
00492 */
00493 
00494 static size_t start_content_line(rfc3676_parser_t handle,
00495                             const unicode_char *ptr, size_t cnt)
00496 {
00497        /*
00498        ** We'll start scanning for the signature block, as soon as
00499        ** this check is done.
00500        */
00501        handle->content_handler=check_signature_block;
00502        handle->sig_block_index=0;
00503        
00504        if (ptr && *ptr == '\n' && handle->was_previous_quote_level)
00505        {
00506               EMIT_LINE_END(handle);
00507               EMIT_LINE_BEGIN(handle);
00508               handle->was_previous_quote_level=0;
00509        }
00510 
00511        return check_signature_block(handle, ptr, cnt);
00512 }
00513 
00514 
00515 static const unicode_char sig_block[]={'-', '-', ' '};
00516 
00517 /* Checking for a magical sig block */
00518 
00519 static size_t check_signature_block(rfc3676_parser_t handle,
00520                                 const unicode_char *ptr, size_t cnt)
00521 {
00522        if (ptr && *ptr == sig_block[handle->sig_block_index])
00523        {
00524               if (++handle->sig_block_index == sizeof(sig_block)
00525                   /sizeof(sig_block[0]))
00526 
00527                      /* Well, it's there, but does a NL follow? */
00528                      handle->content_handler=seen_sig_block;
00529               return 1;
00530        }
00531 
00532        return seen_notsig_block(handle, ptr, cnt);
00533 }
00534 
00535 static size_t seen_sig_block(rfc3676_parser_t handle,
00536                           const unicode_char *ptr, size_t cnt)
00537 {
00538        if (ptr == NULL || *ptr == '\n')
00539        {
00540               /*
00541               ** If the previous line was flowed, the sig block is not
00542               ** considered to be flowable-into content, so terminate
00543               ** the previous line before emitting the sig block.
00544               */
00545 
00546               if (handle->was_previous_quote_level)
00547               {
00548                      EMIT_LINE_END(handle);
00549                      EMIT_LINE_BEGIN(handle);
00550                      handle->was_previous_quote_level=0;
00551               }
00552 
00553               /* Pass through the sig block */
00554 
00555               handle->content_handler=start_of_line;
00556 
00557               EMIT_LINE_CONTENTS(handle, sig_block,
00558                                sizeof(sig_block)/sizeof(sig_block[0]));
00559               EMIT_LINE_END(handle);
00560               return ptr ? 1:0;
00561        }
00562 
00563        return seen_notsig_block(handle, ptr, cnt);
00564 }
00565 
00566 /* This is not a sig block line */
00567 
00568 static size_t seen_notsig_block(rfc3676_parser_t handle,
00569                              const unicode_char *newptr, size_t newcnt)
00570 {
00571        const unicode_char *ptr;
00572        size_t i;
00573 
00574        if (handle->was_previous_quote_level)
00575               emit_line_flowed_wrap(handle);
00576 
00577        handle->content_handler=scan_content_line;
00578 
00579        ptr=sig_block;
00580        i=handle->sig_block_index;
00581 
00582        while (i && handle->errflag == 0)
00583        {
00584               size_t n=(*handle->content_handler)(handle, ptr, i);
00585 
00586               ptr += n;
00587               i -= n;
00588        }
00589 
00590        return (*handle->content_handler)(handle, newptr, newcnt);
00591 }
00592 
00593 /*
00594 ** Pass through the line, until encountering an NL, or a space in flowable
00595 ** content.
00596 */
00597 
00598 static size_t scan_content_line(rfc3676_parser_t handle,
00599                             const unicode_char *ptr, size_t cnt)
00600 {
00601        size_t i;
00602 
00603        for (i=0; ptr && i<cnt && ptr[i] != '\n' &&
00604                    (ptr[i] != ' ' || !handle->info.isflowed); ++i)
00605               ;
00606 
00607        /* Pass through anything before the NL or potentially flowable SP */
00608 
00609        if (i)
00610               EMIT_LINE_CONTENTS(handle, ptr, i);
00611 
00612        if (i)
00613               return i;
00614 
00615        if (ptr && ptr[i] == ' ')
00616        {
00617               handle->content_handler=seen_content_sp;
00618               return 1;
00619        }
00620 
00621        /* NL. This line does not flow */
00622        EMIT_LINE_END(handle);
00623 
00624        handle->content_handler=start_of_line;
00625 
00626        return ptr ? 1:0;
00627 }
00628 
00629 static size_t seen_content_sp(rfc3676_parser_t handle,
00630                            const unicode_char *ptr, size_t cnt)
00631 {
00632        unicode_char sp=' ';
00633 
00634        handle->content_handler=scan_content_line;
00635 
00636        if (ptr == NULL || *ptr != '\n')
00637        {
00638               /*
00639               ** SP was not followed by the NL. Pass through the space,
00640               ** then resume scanning.
00641               */
00642               EMIT_LINE_CONTENTS(handle, &sp, 1);
00643               return scan_content_line(handle, ptr, cnt);
00644        }
00645 
00646        /* NL after a SP -- flowed line */
00647 
00648        if (!handle->info.isdelsp)
00649               EMIT_LINE_CONTENTS(handle, &sp, 1);
00650 
00651        handle->has_previous_quote_level=1;
00652        handle->previous_quote_level=handle->quote_level;
00653        handle->content_handler=start_of_line;
00654        return ptr ? 1:0;
00655 }
00656 
00657 /**************************************************************************/
00658 
00659 /*
00660 ** At this point, the processing has reduced to the following API:
00661 **
00662 ** + begin logical line
00663 **
00664 ** + contents of the logical line (multiple consecutive invocations)
00665 **
00666 ** + the logical line has flowed onto the next physical line
00667 **
00668 ** + end of logical line
00669 **
00670 ** The third one, logical line flowed, is normally used for flowed text,
00671 ** by definition. But, it may also be get used if non-flowed text gets
00672 ** rewrapped when broken formatting is detected.
00673 **
00674 ** Provide default implementations of the other three API calls that
00675 ** simply invoke the corresponding user callback.
00676 */
00677 
00678 static void emit_line_begin(rfc3676_parser_t handle)
00679 {
00680        if (handle->errflag == 0)
00681               handle->errflag=(*handle->info.line_begin)(handle->quote_level,
00682                                                     handle->info.arg);
00683 }
00684 
00685 static void emit_line_flowed_wrap(rfc3676_parser_t handle)
00686 {
00687        if (handle->errflag == 0 && handle->info.line_flowed_notify)
00688               handle->errflag=(*handle->info.line_flowed_notify)
00689                      (handle->info.arg);
00690 }
00691 
00692 static void emit_line_contents(rfc3676_parser_t handle,
00693                             const unicode_char *uc,
00694                             size_t cnt)
00695 {
00696        if (handle->errflag == 0 && cnt > 0)
00697               handle->errflag=(*handle->info.line_contents)
00698                      (uc, cnt, handle->info.arg);
00699 }
00700 
00701 static void emit_line_end(rfc3676_parser_t handle)
00702 {
00703        if (handle->errflag == 0)
00704               handle->errflag=(*handle->info.line_end)(handle->info.arg);
00705 }
00706 
00707 /*
00708 ** When processing a non-flowed text, handle broken mail formatters (I'm
00709 ** looking at you, Apple Mail) that spew out quoted-printable content with
00710 ** each decoded line forming a single paragraph. This is heuristically
00711 ** detected by looking for lines that exceed a wrapping threshold, then
00712 ** rewrapping them.
00713 **
00714 ** Redefine the three line API calls to launder the logical line via
00715 ** the linebreak API.
00716 */
00717 
00718 static void initial_nonflowed_line(rfc3676_parser_t handle,
00719                                int linebreak_opportunity,
00720                                unicode_char ch,
00721                                size_t ch_width);
00722 
00723 static void initial_nonflowed_end(rfc3676_parser_t handle);
00724 
00725 static void begin_forced_rewrap(rfc3676_parser_t handle);
00726 
00727 /*
00728 ** A non-flowed line begins. Initialize the linebreaking module.
00729 */
00730 static void nonflowed_line_begin(rfc3676_parser_t handle)
00731 {
00732        if (handle->lb)
00733        {
00734               /* Just in case */
00735 
00736               int rc=unicode_lbc_end(handle->lb);
00737 
00738               if (rc && handle->errflag == 0)
00739                      handle->errflag=rc;
00740        }
00741 
00742        if ((handle->lb=unicode_lbc_init(nonflowed_line_process, handle))
00743            == NULL)
00744        {
00745               if (handle->errflag == 0)
00746                      handle->errflag=-1;
00747        }
00748 
00749        if (handle->lb)
00750               unicode_lbc_set_opts(handle->lb,
00751                                  UNICODE_LB_OPT_PRBREAK
00752                                  | UNICODE_LB_OPT_SYBREAK);
00753 
00754        unicode_buf_clear(&handle->nonflowed_line);
00755        unicode_buf_clear(&handle->nonflowed_next_word);
00756 
00757        handle->nonflowed_line_width=0;
00758        handle->nonflowed_next_word_width=0;
00759 
00760        handle->nonflowed_line_process=initial_nonflowed_line;
00761        handle->nonflowed_line_end=initial_nonflowed_end;
00762        emit_line_begin(handle); /* Fallthru - user callback */
00763 
00764        handle->nonflowed_line_target_width=
00765               handle->quote_level < NONFLOWED_WRAP_REDUCE - 20 ?
00766               NONFLOWED_WRAP_REDUCE - handle->quote_level:20;
00767 }
00768 
00769 /*
00770 ** Process contents of non-flowed lines. The contents are submitted to the
00771 ** linebreaking API.
00772 */
00773 
00774 static void nonflowed_line_contents(rfc3676_parser_t handle,
00775                                 const unicode_char *uc,
00776                                 size_t cnt)
00777 {
00778        if (!handle->lb)
00779               return;
00780 
00781        while (cnt)
00782        {
00783               if (handle->errflag == 0)
00784                      handle->errflag=unicode_lbc_next(handle->lb, *uc);
00785 
00786               ++uc;
00787               --cnt;
00788        }
00789 }
00790 
00791 /*
00792 ** End of non-flowed content. Terminate the linebreaking API, then invoke
00793 ** the current end-of-line handler.
00794 */
00795 static void nonflowed_line_end(rfc3676_parser_t handle)
00796 {
00797        if (handle->lb)
00798        {
00799               int rc=unicode_lbc_end(handle->lb);
00800 
00801               if (rc && handle->errflag == 0)
00802                      handle->errflag=rc;
00803 
00804               handle->lb=NULL;
00805        }
00806 
00807        (*handle->nonflowed_line_end)(handle);
00808        emit_line_end(handle); /* FALLTHRU */
00809 }
00810 
00811 /*
00812 ** Callback from the linebreaking API, gives us the next unicode character
00813 ** and its linebreak property. Look up the unicode character's width, then
00814 ** invoke the current handler.
00815 */
00816 static int nonflowed_line_process(int linebreak_opportunity,
00817                               unicode_char ch, void *dummy)
00818 {
00819        rfc3676_parser_t handle=(rfc3676_parser_t)dummy;
00820 
00821        (*handle->nonflowed_line_process)(handle, linebreak_opportunity, ch,
00822                                      unicode_wcwidth(ch));
00823 
00824        return 0;
00825 }
00826 
00827 /*
00828 ** Collecting initial nonflowed line.
00829 */
00830 
00831 static void initial_nonflowed_line(rfc3676_parser_t handle,
00832                                int linebreak_opportunity,
00833                                unicode_char ch,
00834                                size_t ch_width)
00835 {
00836        /*
00837        ** Collect words into nonflowed_line as long as it fits within the
00838        ** targeted width.
00839        */
00840        if (linebreak_opportunity != UNICODE_LB_NONE &&
00841            handle->nonflowed_line_width + handle->nonflowed_next_word_width
00842            <= handle->nonflowed_line_target_width)
00843        {
00844               unicode_buf_append_buf(&handle->nonflowed_line,
00845                                    &handle->nonflowed_next_word);
00846               handle->nonflowed_line_width +=
00847                      handle->nonflowed_next_word_width;
00848 
00849               unicode_buf_clear(&handle->nonflowed_next_word);
00850               handle->nonflowed_next_word_width=0;
00851        }
00852 
00853        /*
00854        ** Add the character to the growing word.
00855        **
00856        ** If the line's size now exceeds the target width by quite a bit,
00857        ** we've had enough!
00858        */
00859 
00860        unicode_buf_append(&handle->nonflowed_next_word, &ch, 1);
00861        handle->nonflowed_next_word_width += ch_width;
00862 
00863        if (handle->nonflowed_line_width + handle->nonflowed_next_word_width
00864            > handle->nonflowed_line_target_width
00865            + NONFLOWED_THRESHOLD_EXCEEDED)
00866               begin_forced_rewrap(handle);
00867 }
00868 
00869 /*
00870 ** End of line handler. The line did not reach its threshold, so output it.
00871 */
00872 static void initial_nonflowed_end(rfc3676_parser_t handle)
00873 {
00874        emit_line_contents(handle,
00875                         unicode_buf_ptr(&handle->nonflowed_line),
00876                         unicode_buf_len(&handle->nonflowed_line));
00877 
00878        emit_line_contents(handle,
00879                         unicode_buf_ptr(&handle->nonflowed_next_word),
00880                         unicode_buf_len(&handle->nonflowed_next_word));
00881 }
00882 
00883 /*
00884 ** Check for the abnormal situation where we're ready to wrap something but
00885 ** nonflowed_line is empty because all this text did not have a linebreaking
00886 ** opportunity.
00887 */
00888 
00889 static void check_abnormal_line(rfc3676_parser_t handle)
00890 {
00891        size_t n, i;
00892        const unicode_char *p;
00893 
00894        if (unicode_buf_len(&handle->nonflowed_line) > 0)
00895               return;
00896 
00897        /* Extreme times call for extreme measures */
00898 
00899        n=unicode_buf_len(&handle->nonflowed_next_word);
00900        p=unicode_buf_ptr(&handle->nonflowed_next_word);
00901 
00902        for (i=n; i>0; --i)
00903        {
00904               if (i < n && unicode_grapheme_break(p[i-1], p[i]))
00905               {
00906                      n=i;
00907                      break;
00908               }
00909        }
00910 
00911        unicode_buf_append(&handle->nonflowed_line, p, n);
00912        unicode_buf_remove(&handle->nonflowed_next_word, 0, n);
00913 
00914        /*
00915        ** Recalculate the width of the growing word, now.
00916        */
00917 
00918        handle->nonflowed_next_word_width=0;
00919        p=unicode_buf_ptr(&handle->nonflowed_next_word);
00920 
00921        for (i=0; i<unicode_buf_len(&handle->nonflowed_next_word); ++i)
00922               handle->nonflowed_next_word_width +=
00923                      unicode_wcwidth(p[i]);
00924 }
00925 
00926 /*
00927 ** We've decided that the line is too long, so begin rewrapping it.
00928 */
00929 
00930 static void forced_rewrap_line(rfc3676_parser_t handle,
00931                             int linebreak_opportunity,
00932                             unicode_char ch,
00933                             size_t ch_width);
00934 
00935 static void forced_rewrap_end(rfc3676_parser_t handle);
00936 
00937 /*
00938 ** Emit nonflowed_line as the rewrapped line. Clear the buffer.
00939 */
00940 static void emit_rewrapped_line(rfc3676_parser_t handle)
00941 {
00942        check_abnormal_line(handle);
00943        emit_line_contents(handle, unicode_buf_ptr(&handle->nonflowed_line),
00944                         unicode_buf_len(&handle->nonflowed_line));
00945 
00946        emit_line_flowed_wrap(handle);
00947 
00948        /* nonflowed_line is now empty */
00949        unicode_buf_clear(&handle->nonflowed_line);
00950        handle->nonflowed_line_width=0;
00951 }
00952 
00953 static void begin_forced_rewrap(rfc3676_parser_t handle)
00954 {
00955        handle->nonflowed_line_process=forced_rewrap_line;
00956        handle->nonflowed_line_end=forced_rewrap_end;
00957        emit_rewrapped_line(handle);
00958 }
00959 
00960 static void forced_rewrap_line(rfc3676_parser_t handle,
00961                             int linebreak_opportunity,
00962                             unicode_char ch,
00963                             size_t ch_width)
00964 {
00965        if (linebreak_opportunity != UNICODE_LB_NONE)
00966        {
00967               /* Found a linebreaking opportunity */
00968 
00969               if (handle->nonflowed_line_width
00970                   + handle->nonflowed_next_word_width
00971                   > handle->nonflowed_line_target_width)
00972               {
00973                      /* Accumulated word is too long */
00974                      emit_rewrapped_line(handle);
00975               }
00976 
00977               unicode_buf_append_buf(&handle->nonflowed_line,
00978                                    &handle->nonflowed_next_word);
00979 
00980               handle->nonflowed_line_width +=
00981                      handle->nonflowed_next_word_width;
00982               unicode_buf_clear(&handle->nonflowed_next_word);
00983               handle->nonflowed_next_word_width=0;
00984        }
00985 
00986        /*
00987        ** Check for another excessively long line.
00988        */
00989 
00990        if (handle->nonflowed_line_width == 0 &&
00991            handle->nonflowed_next_word_width + ch_width
00992            > handle->nonflowed_line_target_width)
00993        {
00994               emit_rewrapped_line(handle);
00995        }
00996 
00997        unicode_buf_append(&handle->nonflowed_next_word, &ch, 1);
00998        handle->nonflowed_next_word_width += ch_width;
00999 }
01000 
01001 static void forced_rewrap_end(rfc3676_parser_t handle)
01002 {
01003        initial_nonflowed_end(handle); /* Same logic, for now */
01004 }
01005