Back to index

courier  0.68.2
Classes | Defines | Functions
unicode_wordbreak.c File Reference
#include "unicode_config.h"
#include "unicode.h"
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "wordbreaktab_internal.h"
#include "wordbreaktab.h"

Go to the source code of this file.

Classes

struct  unicode_wb_info
struct  unicode_wbscan_info

Defines

#define SET_HANDLER(next, end)   (i->next_handler=next, i->end_handler=end)

Functions

static int sot (unicode_wb_info_t i, uint8_t cl)
static int wb4 (unicode_wb_info_t i)
static int wb1and2_done (unicode_wb_info_t i, uint8_t cl)
static int seen_wb67_handler (unicode_wb_info_t i, uint8_t cl)
static int seen_wb67_end_handler (unicode_wb_info_t i)
static int wb67_done (unicode_wb_info_t i, uint8_t prevclass, uint8_t cl)
static int seen_wb1112_handler (unicode_wb_info_t i, uint8_t cl)
static int seen_wb1112_end_handler (unicode_wb_info_t i)
static int wb1112_done (unicode_wb_info_t i, uint8_t prevclass, uint8_t cl)
unicode_wb_info_t unicode_wb_init (int(*cb_func)(int, void *), void *cb_arg)
int unicode_wb_end (unicode_wb_info_t i)
int unicode_wb_next_cnt (unicode_wb_info_t i, const unicode_char *chars, size_t cnt)
int unicode_wb_next (unicode_wb_info_t i, unicode_char ch)
static int result (unicode_wb_info_t i, int flag)
static int unicode_wbscan_callback (int, void *)
unicode_wbscan_info_t unicode_wbscan_init ()
int unicode_wbscan_next (unicode_wbscan_info_t i, unicode_char ch)
size_t unicode_wbscan_end (unicode_wbscan_info_t i)

Class Documentation

struct unicode_wbscan_info

Definition at line 393 of file unicode_wordbreak.c.

Collaboration diagram for unicode_wbscan_info:
Class Members
size_t cnt
int found
unicode_wb_info_t wb_handle

Define Documentation

#define SET_HANDLER (   next,
  end 
)    (i->next_handler=next, i->end_handler=end)

Definition at line 123 of file unicode_wordbreak.c.


Function Documentation

static int result ( unicode_wb_info_t  i,
int  flag 
) [static]

Definition at line 113 of file unicode_wordbreak.c.

{
       int rc=wb4(i);

       if (rc == 0)
              rc=(*i->cb_func)(flag, i->cb_arg);

       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int seen_wb1112_end_handler ( unicode_wb_info_t  i) [static]

Definition at line 349 of file unicode_wordbreak.c.

{
       int rc;
       size_t extra_cnt=i->wb4_extra_cnt;

       /*
       ** Process the second character, starting with WB11.
       */

       rc=wb1112_done(i, UNICODE_WB_Numeric, i->prevclass);
       i->wb4_cnt=extra_cnt;
       if (rc == 0)
              rc=wb4(i);
       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int seen_wb1112_handler ( unicode_wb_info_t  i,
uint8_t  cl 
) [static]

Definition at line 293 of file unicode_wordbreak.c.

{
       int rc;
       uint8_t prevclass;
       size_t extra_cnt;

       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
       {
              ++i->wb4_extra_cnt;
              return 0;
       }

       extra_cnt=i->wb4_extra_cnt;

       /*
       ** Reset the handler to the default, then check WB6
       */

       SET_HANDLER(wb1and2_done, NULL);

       if (cl == UNICODE_WB_Numeric)
       {
              rc=result(i, 0); /* WB11 */
              i->wb4_cnt=extra_cnt;

              if (rc == 0)
                     rc=result(i, 0); /* WB12 */

              i->prevclass=cl;
                     
              return rc;
       }

       prevclass=i->prevclass; /* This was the second character */

       /*
       ** Process the second character, starting with WB7
       */

       rc=wb1112_done(i, UNICODE_WB_Numeric, prevclass);

       i->prevclass=prevclass;
       i->wb4_cnt=extra_cnt;

       if (rc == 0)
              rc=(*i->next_handler)(i, cl);
       /* Process the current char now */

       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int seen_wb67_end_handler ( unicode_wb_info_t  i) [static]

Definition at line 244 of file unicode_wordbreak.c.

{
       int rc;
       size_t extra_cnt=i->wb4_extra_cnt;

       /*
       ** Process the second character, starting with WB7.
       */

       rc=wb67_done(i, UNICODE_WB_ALetter, i->prevclass);
       i->wb4_cnt=extra_cnt;
       if (rc == 0)
              rc=wb4(i);
       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int seen_wb67_handler ( unicode_wb_info_t  i,
uint8_t  cl 
) [static]

Definition at line 188 of file unicode_wordbreak.c.

{
       int rc;
       uint8_t prevclass;
       size_t extra_cnt;

       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
       {
              ++i->wb4_extra_cnt;
              return 0;
       }

       extra_cnt=i->wb4_extra_cnt;

       /*
       ** Reset the handler to the default, then check WB6
       */

       SET_HANDLER(wb1and2_done, NULL);

       if (cl == UNICODE_WB_ALetter)
       {
              rc=result(i, 0); /* WB6 */
              i->wb4_cnt=extra_cnt;

              if (rc == 0)
                     rc=result(i, 0); /* WB7 */

              i->prevclass=cl;
                     
              return rc;
       }

       prevclass=i->prevclass; /* This was the second character */

       /*
       ** Process the second character, starting with WB7
       */

       rc=wb67_done(i, UNICODE_WB_ALetter, prevclass);

       i->prevclass=prevclass;
       i->wb4_cnt=extra_cnt;

       if (rc == 0)
              rc=(*i->next_handler)(i, cl);
       /* Process the current char now */

       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int sot ( unicode_wb_info_t  i,
uint8_t  cl 
) [static]

Definition at line 125 of file unicode_wordbreak.c.

{
       i->prevclass=cl;
       SET_HANDLER(wb1and2_done, NULL);

       return result(i, 1); /* WB1 */
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 58 of file unicode_wordbreak.c.

{
       int rc;

       if (i->end_handler)
              rc=(*i->end_handler)(i);
       else
              rc=wb4(i);

       free(i);
       return rc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

unicode_wb_info_t unicode_wb_init ( int(*)(int, void *)  cb_func,
void *  cb_arg 
)

Definition at line 44 of file unicode_wordbreak.c.

{
       unicode_wb_info_t i=calloc(1, sizeof(struct unicode_wb_info));

       if (!i)
              return NULL;

       i->next_handler=sot;
       i->cb_func=cb_func;
       i->cb_arg=cb_arg;
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 87 of file unicode_wordbreak.c.

Here is the call graph for this function:

Here is the caller graph for this function:

int unicode_wb_next_cnt ( unicode_wb_info_t  i,
const unicode_char chars,
size_t  cnt 
)

Definition at line 71 of file unicode_wordbreak.c.

{
       int rc;

       while (cnt)
       {
              rc=unicode_wb_next(i, *chars++);
              --cnt;
              if (rc)
                     return rc;
       }
       return 0;
}

Here is the call graph for this function:

static int unicode_wbscan_callback ( int  flag,
void *  arg 
) [static]

Definition at line 437 of file unicode_wordbreak.c.

{
       unicode_wbscan_info_t i=(unicode_wbscan_info_t)arg;

       if (flag && i->cnt > 0)
              i->found=1;

       if (!i->found)
              ++i->cnt;
       return 0;
}

Here is the caller graph for this function:

Definition at line 426 of file unicode_wordbreak.c.

{
       size_t n;

       unicode_wb_end(i->wb_handle);

       n=i->cnt;
       free(i);
       return n;
}

Here is the call graph for this function:

Definition at line 402 of file unicode_wordbreak.c.

{
       unicode_wbscan_info_t i=calloc(1, sizeof(struct unicode_wbscan_info));

       if (!i)
              return NULL;

       if ((i->wb_handle=unicode_wb_init(unicode_wbscan_callback, i)) == NULL)
       {
              free(i);
              return NULL;
       }

       return i;
}

Here is the call graph for this function:

Definition at line 418 of file unicode_wordbreak.c.

{
       if (!i->found)
              unicode_wb_next(i->wb_handle, ch);

       return i->found;
}

Here is the call graph for this function:

static int wb1112_done ( unicode_wb_info_t  i,
uint8_t  prevclass,
uint8_t  cl 
) [static]

Definition at line 365 of file unicode_wordbreak.c.

{
       if (prevclass == UNICODE_WB_Katakana &&
           cl == UNICODE_WB_Katakana)
              return result(i, 0); /* WB13 */

       switch (prevclass) {
       case UNICODE_WB_ALetter:
       case UNICODE_WB_Numeric:
       case UNICODE_WB_Katakana:
       case UNICODE_WB_ExtendNumLet:
              if (cl == UNICODE_WB_ExtendNumLet)
                     return result(i, 0); /* WB13a */
       }

       if (prevclass == UNICODE_WB_ExtendNumLet)
              switch (cl) {
              case UNICODE_WB_ALetter:
              case UNICODE_WB_Numeric:
              case UNICODE_WB_Katakana:
                     return result(i, 0); /* WB13b */
              }

       return result(i, 1); /* WB14 */
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int wb1and2_done ( unicode_wb_info_t  i,
uint8_t  cl 
) [static]

Definition at line 133 of file unicode_wordbreak.c.

{
       uint8_t prevclass=i->prevclass;

       i->prevclass=cl;

       if (prevclass == UNICODE_WB_CR && cl == UNICODE_WB_LF)
              return result(i, 0); /* WB3 */

       switch (prevclass) {
       case UNICODE_WB_CR:
       case UNICODE_WB_LF:
       case UNICODE_WB_Newline:
              return result(i, 1); /* WB3a */
       }

       switch (cl) {
       case UNICODE_WB_CR:
       case UNICODE_WB_LF:
       case UNICODE_WB_Newline:
              return result(i, 1); /* WB3b */
       }

       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
       {
              i->prevclass=prevclass;
              ++i->wb4_cnt;
              return 0; /* WB4 */
       }

       if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_ALetter)
       {
              return result(i, 0); /* WB5 */
       }

       if (prevclass == UNICODE_WB_ALetter &&
           (cl == UNICODE_WB_MidLetter || cl == UNICODE_WB_MidNumLet))
       {
              i->wb4_extra_cnt=0;
              SET_HANDLER(seen_wb67_handler, seen_wb67_end_handler);
              return 0;
       }

       return wb67_done(i, prevclass, cl);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int wb4 ( unicode_wb_info_t  i) [static]

Definition at line 99 of file unicode_wordbreak.c.

{
       int rc=0;

       while (i->wb4_cnt > 0)
       {
              --i->wb4_cnt;

              if (rc == 0)
                     rc=(*i->cb_func)(0, i->cb_arg);
       }
       return rc;
}

Here is the caller graph for this function:

static int wb67_done ( unicode_wb_info_t  i,
uint8_t  prevclass,
uint8_t  cl 
) [static]

Definition at line 261 of file unicode_wordbreak.c.

{
       if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_Numeric)
              return result(i, 0); /* WB8 */

       if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_Numeric)
              return result(i, 0); /* WB9 */

       if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_ALetter)
              return result(i, 0); /* WB10 */


       if (prevclass == UNICODE_WB_Numeric &&
           (cl == UNICODE_WB_MidNum || cl == UNICODE_WB_MidNumLet))
       {
              i->wb4_extra_cnt=0;
              SET_HANDLER(seen_wb1112_handler, seen_wb1112_end_handler);
              return 0;
       }

       return wb1112_done(i, prevclass, cl);
}

Here is the call graph for this function:

Here is the caller graph for this function: