Back to index

courier  0.68.2
Classes | Defines | Functions
unicode_linebreak.c File Reference
#include "unicode_config.h"
#include "unicode.h"
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "linebreaktab_internal.h"
#include "linebreaktab.h"

Go to the source code of this file.

Classes

struct  unicode_lb_info
struct  unicode_lbc_info

Defines

#define UNICODE_LB_SOT   0xFF
#define RESULT(x)   (*i->cb_func)((x), i->cb_arg)

Functions

static int next_def (unicode_lb_info_t, uint8_t)
static int end_def (unicode_lb_info_t)
static int next_lb25_seenophy (unicode_lb_info_t, uint8_t)
static int end_lb25_seenophy (unicode_lb_info_t)
static int next_lb25_seennu (unicode_lb_info_t, uint8_t)
static int next_lb25_seennuclcp (unicode_lb_info_t, uint8_t)
static void unicode_lb_reset (unicode_lb_info_t i)
unicode_lb_info_t unicode_lb_init (int(*cb_func)(int, void *), void *cb_arg)
int unicode_lb_end (unicode_lb_info_t i)
void unicode_lb_set_opts (unicode_lb_info_t i, int opts)
int unicode_lb_next_cnt (unicode_lb_info_t i, const unicode_char *chars, size_t cnt)
int unicode_lb_lookup (unicode_char ch)
int unicode_lb_next (unicode_lb_info_t i, unicode_char ch)
static int next_def_nolb25 (unicode_lb_info_t i, uint8_t uclass, int nolb25)
static int unwind_lb25_seenophy (unicode_lb_info_t i)
static int unicode_lbc_callback (int value, void *ptr)
unicode_lbc_info_t unicode_lbc_init (int(*cb_func)(int, unicode_char, void *), void *cb_arg)
void unicode_lbc_set_opts (unicode_lbc_info_t i, int opts)
int unicode_lbc_next (unicode_lbc_info_t i, unicode_char ch)
int unicode_lbc_end (unicode_lbc_info_t i)

Define Documentation

#define RESULT (   x)    (*i->cb_func)((x), i->cb_arg)

Definition at line 90 of file unicode_linebreak.c.

#define UNICODE_LB_SOT   0xFF

Definition at line 20 of file unicode_linebreak.c.


Function Documentation

static int end_def ( unicode_lb_info_t  i) [static]

Definition at line 85 of file unicode_linebreak.c.

{
       /* LB3 N/A */
       return 0;
}

Here is the caller graph for this function:

static int end_lb25_seenophy ( unicode_lb_info_t  i) [static]

Definition at line 495 of file unicode_linebreak.c.

{
       int rc=unwind_lb25_seenophy(i);

       if (rc == 0)
              rc=end_def(i);
       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int next_def ( unicode_lb_info_t  i,
uint8_t  uclass 
) [static]

Definition at line 135 of file unicode_linebreak.c.

{
       return next_def_nolb25(i, uclass, 0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int next_def_nolb25 ( unicode_lb_info_t  i,
uint8_t  uclass,
int  nolb25 
) [static]

Definition at line 141 of file unicode_linebreak.c.

{

       /* Retrieve the previous unicode character's linebreak class. */

       uint8_t prevclass=i->prevclass;
       uint8_t prevclass_nsp=i->prevclass_nsp;

       /* Save this unicode char's linebreak class, for the next goaround */
       i->prevclass=uclass;

       if (uclass != UNICODE_LB_SP)
              i->prevclass_nsp=uclass;

       if (uclass == UNICODE_LB_NU)
              i->next_handler=next_lb25_seennu; /* LB25 */

       if (prevclass == UNICODE_LB_SOT)
       {
              if (uclass == UNICODE_LB_CM) /* LB9 */
                     i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;

              return RESULT(UNICODE_LB_NONE); /* LB2 */
       }

       if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF)
              return RESULT(UNICODE_LB_NONE); /* LB5 */

       switch (prevclass) {
       case UNICODE_LB_BK:
       case UNICODE_LB_CR:
       case UNICODE_LB_LF:
       case UNICODE_LB_NL:

              if (uclass == UNICODE_LB_CM)
              {
                     i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
                     /* LB9 */
              }

              return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */

       case UNICODE_LB_SP:
       case UNICODE_LB_ZW:
              if (uclass == UNICODE_LB_CM)
                     i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
              /* LB10 */
              break;
       default:
              break;
       }

       switch (uclass) {

              /* LB6: */
       case UNICODE_LB_BK:
       case UNICODE_LB_CR:
       case UNICODE_LB_LF:
       case UNICODE_LB_NL:

              /* LB7: */
       case UNICODE_LB_SP:
       case UNICODE_LB_ZW:

              return RESULT(UNICODE_LB_NONE);
       default:
              break;
       }

       if (prevclass_nsp == UNICODE_LB_ZW)
              return RESULT(UNICODE_LB_ALLOWED); /* LB8 */

       if (uclass == UNICODE_LB_CM)
       {
              i->prevclass=prevclass;
              i->prevclass_nsp=prevclass_nsp;
              return RESULT(UNICODE_LB_NONE); /* LB9 */
       }

       if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ)
              return RESULT(UNICODE_LB_NONE); /* LB11 */

       if (prevclass == UNICODE_LB_GL)
              return RESULT(UNICODE_LB_NONE); /* LB12 */

       if (uclass == UNICODE_LB_GL &&
           prevclass != UNICODE_LB_SP &&
           prevclass != UNICODE_LB_BA &&
           prevclass != UNICODE_LB_HY)
              return RESULT(UNICODE_LB_NONE); /* LB12a */


       switch (uclass) {
       case UNICODE_LB_SY:
              if (i->opts & UNICODE_LB_OPT_SYBREAK)
              {
                     if (prevclass == UNICODE_LB_SP)
                            return RESULT(UNICODE_LB_ALLOWED);
              }

       case UNICODE_LB_CL:
       case UNICODE_LB_CP:
       case UNICODE_LB_EX:
       case UNICODE_LB_IS:
              return RESULT(UNICODE_LB_NONE); /* LB13 */
       default:
              break;
       }

       if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY)
              switch (uclass) {
              case UNICODE_LB_EX:
              case UNICODE_LB_AL:
              case UNICODE_LB_ID:
                     return RESULT(UNICODE_LB_NONE);
              }

       if (prevclass_nsp == UNICODE_LB_OP)
              return RESULT(UNICODE_LB_NONE); /* LB14 */

       if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP)
              return RESULT(UNICODE_LB_NONE); /* LB15 */

       if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP)
           && uclass == UNICODE_LB_NS)
              return RESULT(UNICODE_LB_NONE); /* LB16 */

       if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2)
              return RESULT(UNICODE_LB_NONE); /* LB17 */

       if (prevclass == UNICODE_LB_SP)
              return RESULT(UNICODE_LB_ALLOWED); /* LB18 */

       if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU)
              return RESULT(UNICODE_LB_NONE); /* LB19 */

       if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB)
              return RESULT(UNICODE_LB_ALLOWED); /* LB20 */

       /* LB21: */

       switch (uclass) {
       case UNICODE_LB_BA:
       case UNICODE_LB_HY:
       case UNICODE_LB_NS:
              return RESULT(UNICODE_LB_NONE);
       default:
              break;
       }

       if (prevclass == UNICODE_LB_BB)
              return RESULT(UNICODE_LB_NONE);

       if (uclass == UNICODE_LB_IN)
              switch (prevclass) {
              case UNICODE_LB_AL:
              case UNICODE_LB_ID:
              case UNICODE_LB_IN:
              case UNICODE_LB_NU:
                     return RESULT(UNICODE_LB_NONE); /* LB22 */
              default:
                     break;
              }


       if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO)
              return RESULT(UNICODE_LB_NONE); /* LB23 */
       if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU)
              return RESULT(UNICODE_LB_NONE); /* LB23 */

       if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL)
              return RESULT(UNICODE_LB_NONE); /* LB23 */


       if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID)
              return RESULT(UNICODE_LB_NONE); /* LB24 */
       if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL)
              return RESULT(UNICODE_LB_NONE); /* LB24 */
       if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL)
              return RESULT(UNICODE_LB_NONE); /* LB24 */

       if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR)
              switch (prevclass) {
              case UNICODE_LB_PR:
              case UNICODE_LB_AL:
              case UNICODE_LB_ID:
                     return RESULT(UNICODE_LB_NONE);
              }
              
       if (!nolb25 &&
           (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO))
       {
              if (uclass == UNICODE_LB_NU)
                     return RESULT(UNICODE_LB_NONE); /* LB25 */

              if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY)
              {
                     i->prevclass=prevclass;
                     i->prevclass_nsp=prevclass_nsp;

                     i->savedclass=uclass;
                     i->savedcmcnt=0;
                     i->next_handler=next_lb25_seenophy;
                     i->end_handler=end_lb25_seenophy;
                     return 0;
              }
       }

       if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) &&
           uclass == UNICODE_LB_NU)
              return RESULT(UNICODE_LB_NONE); /* LB25 */

       /*****/

       if (prevclass == UNICODE_LB_JL)
              switch (uclass) {
              case UNICODE_LB_JL:
              case UNICODE_LB_JV:
              case UNICODE_LB_H2:
              case UNICODE_LB_H3:
                     return RESULT(UNICODE_LB_NONE); /* LB26 */
              default:
                     break;
              }

       if ((prevclass == UNICODE_LB_JV ||
            prevclass == UNICODE_LB_H2) &&
           (uclass == UNICODE_LB_JV ||
            uclass == UNICODE_LB_JT))
              return RESULT(UNICODE_LB_NONE); /* LB26 */

       if ((prevclass == UNICODE_LB_JT ||
            prevclass == UNICODE_LB_H3) &&
           uclass == UNICODE_LB_JT)
              return RESULT(UNICODE_LB_NONE); /* LB26 */


       switch (prevclass) {
       case UNICODE_LB_JL:
       case UNICODE_LB_JV:
       case UNICODE_LB_JT:
       case UNICODE_LB_H2:
       case UNICODE_LB_H3:
              if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO)
                     return RESULT(UNICODE_LB_NONE); /* LB27 */
       default:
              break;
       }

       switch (uclass) {
       case UNICODE_LB_JL:
       case UNICODE_LB_JV:
       case UNICODE_LB_JT:
       case UNICODE_LB_H2:
       case UNICODE_LB_H3:
              if (prevclass == UNICODE_LB_PR)
                     return RESULT(UNICODE_LB_NONE); /* LB27 */
       default:
              break;
       }

       if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_AL)
              return RESULT(UNICODE_LB_NONE); /* LB28 */

       if (prevclass == UNICODE_LB_IS && uclass == UNICODE_LB_AL)
              return RESULT(UNICODE_LB_NONE); /* LB29 */

       if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_NU) &&
           uclass == UNICODE_LB_OP)
              return RESULT(UNICODE_LB_NONE); /* LB30 */

       if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_NU) &&
           prevclass == UNICODE_LB_CP)
              return RESULT(UNICODE_LB_NONE); /* LB30 */

       return RESULT(UNICODE_LB_ALLOWED); /* LB31 */
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int next_lb25_seennu ( unicode_lb_info_t  i,
uint8_t  uclass 
) [static]

Definition at line 507 of file unicode_linebreak.c.

{
       if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY ||
           uclass == UNICODE_LB_IS)
       {
              i->prevclass=i->prevclass_nsp=uclass;
              return RESULT(UNICODE_LB_NONE);
       }

       if (uclass == UNICODE_LB_CM)
              return RESULT(UNICODE_LB_NONE); /* LB9 */

       if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP)
       {
              i->prevclass=i->prevclass_nsp=uclass;
              i->next_handler=next_lb25_seennuclcp;
              i->end_handler=end_def;
              return RESULT(UNICODE_LB_NONE);
       }

       i->next_handler=next_def;
       i->end_handler=end_def;

       if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
       {
              i->prevclass=i->prevclass_nsp=uclass;
              return RESULT(UNICODE_LB_NONE);
       }

       return next_def(i, uclass); /* Not a prefix, process normally */
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int next_lb25_seennuclcp ( unicode_lb_info_t  i,
uint8_t  uclass 
) [static]

Definition at line 542 of file unicode_linebreak.c.

{
       if (uclass == UNICODE_LB_CM)
              return RESULT(UNICODE_LB_NONE); /* LB9 */

       i->next_handler=next_def;
       i->end_handler=end_def;

       if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
       {
              i->prevclass=i->prevclass_nsp=uclass;

              return RESULT(UNICODE_LB_NONE);
       }

       return next_def(i, uclass);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int next_lb25_seenophy ( unicode_lb_info_t  i,
uint8_t  uclass 
) [static]

Definition at line 456 of file unicode_linebreak.c.

{
       int rc;

       if (uclass == UNICODE_LB_CM)
       {
              ++i->savedcmcnt; /* Keep track of CMs, and try again */
              return 0;
       }

       if (uclass != UNICODE_LB_NU)
       {
              rc=unwind_lb25_seenophy(i);

              if (rc)
                     return rc;

              return next_def_nolb25(i, uclass, 0);
       }

       do
       {
              rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */

              if (rc)
                     return rc;
       } while (i->savedcmcnt--);

       i->next_handler=next_lb25_seennu;
       i->end_handler=end_def;
       i->prevclass=i->prevclass_nsp=uclass;
       return RESULT(UNICODE_LB_NONE);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 70 of file unicode_linebreak.c.

{
       int rc=(*i->end_handler)(i);

       free(i);
       return rc;
}

Here is the caller graph for this function:

unicode_lb_info_t unicode_lb_init ( int(*)(int, void *)  cb_func,
void *  cb_arg 
)

Definition at line 58 of file unicode_linebreak.c.

{
       unicode_lb_info_t i=calloc(1, sizeof(struct unicode_lb_info));

       i->cb_func=cb_func;
       i->cb_arg=cb_arg;

       unicode_lb_reset(i);
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 109 of file unicode_linebreak.c.

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 120 of file unicode_linebreak.c.

{
       return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) &&
                              (ch == 0x2012 || ch == 0x2013)
                              ? UNICODE_LB_WJ:unicode_lb_lookup(ch));
}

Here is the call graph for this function:

Here is the caller graph for this function:

int unicode_lb_next_cnt ( unicode_lb_info_t  i,
const unicode_char chars,
size_t  cnt 
)

Definition at line 92 of file unicode_linebreak.c.

{
       while (cnt)
       {
              int rc=unicode_lb_next(i, *chars);

              if (rc)
                     return rc;

              ++chars;
              --cnt;
       }
       return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void unicode_lb_reset ( unicode_lb_info_t  i) [static]

Definition at line 51 of file unicode_linebreak.c.

Here is the call graph for this function:

Here is the caller graph for this function:

void unicode_lb_set_opts ( unicode_lb_info_t  i,
int  opts 
)

Definition at line 78 of file unicode_linebreak.c.

{
       i->opts=opts;
}

Here is the caller graph for this function:

static int unicode_lbc_callback ( int  value,
void *  ptr 
) [static]

Definition at line 573 of file unicode_linebreak.c.

{
       unicode_lbc_info_t h=(unicode_lbc_info_t)ptr;

       if (h->buf_ptr >= unicode_buf_len(&h->buf))
       {
              errno=EINVAL;
              return -1; /* Shouldn't happen */
       }

       return (*h->cb_func)(value, unicode_buf_ptr(&h->buf)[h->buf_ptr++],
                          h->cb_arg);
}

Here is the caller graph for this function:

Definition at line 625 of file unicode_linebreak.c.

{
       int rc=unicode_lb_end(i->handle);

       unicode_buf_deinit(&i->buf);
       free(i);
       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

unicode_lbc_info_t unicode_lbc_init ( int(*)(int, unicode_char, void *)  cb_func,
void *  cb_arg 
)

Definition at line 587 of file unicode_linebreak.c.

{
       unicode_lbc_info_t h=
              (unicode_lbc_info_t)calloc(1, sizeof(struct unicode_lbc_info));

       if (!h)
              return NULL;

       h->cb_func=cb_func;
       h->cb_arg=cb_arg;

       if ((h->handle=unicode_lb_init(unicode_lbc_callback, h)) == NULL)
       {
              free(h);
              return NULL;
       }
       unicode_buf_init(&h->buf, (size_t)-1);
       return h;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 613 of file unicode_linebreak.c.

{
       if (i->buf_ptr >= unicode_buf_len(&i->buf))
       {
              i->buf_ptr=0;
              unicode_buf_clear(&i->buf);
       }

       unicode_buf_append(&i->buf, &ch, 1);
       return unicode_lb_next(i->handle, ch);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void unicode_lbc_set_opts ( unicode_lbc_info_t  i,
int  opts 
)

Definition at line 608 of file unicode_linebreak.c.

{
       unicode_lb_set_opts(i->handle, opts);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int unwind_lb25_seenophy ( unicode_lb_info_t  i) [static]

Definition at line 428 of file unicode_linebreak.c.

{
       int rc;

       /*uint8_t class=i->savedclass;*/
       int nolb25_flag=1;

       i->next_handler=next_def;
       i->end_handler=end_def;

       do
       {
              rc=next_def_nolb25(i, i->savedclass, nolb25_flag);

              if (rc)
                     return rc;

              /*class=UNICODE_LB_CM;*/
              nolb25_flag=0;
       } while (i->savedcmcnt--);
       return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function: