Back to index

lightning-sunbird  0.9+nobinonly
Defines | Typedefs | Functions | Variables
rulebrk.c File Reference
#include <stdlib.h>
#include <assert.h>
#include "th_char.h"

Go to the source code of this file.

Defines

#define TH_UNICODE
#define th_isalpha(c)   (((c)>='a'&&(c)<='z')||((c)>='A'&&(c)<='Z'))
#define th_isspace(c)   ((c)==' '||(c)=='\t')
#define VRS   0x0001
#define VRE   0x0002
#define VRX   0x0004
#define VRA   0x0008
#define VLA   0x0010
#define VLO   0x0020
#define VLI   0x0040
#define VC   0x0080
#define CC   0x0100
#define CS   0x0200
#define C2   0x0400
#define CHB   0x0800
#define CHE   0x1000
#define MT   0x2000
#define M   0x4000
#define T   0x8000
#define VL   (VLA|VLO|VLI)
#define VR   (VRS|VRE|VRX)
#define NE   (VL|VRS)
#define NB   (VR|M)
#define V   (VL|VR)
#define CX   (CC|CS)
#define C   (CX|VC)
#define A   (C|V|M)
#define twbtype(c)   (_TwbType[th_zcode(c)])
#define TRUE   1
#define FALSE   0
#define RETURN(b)   return (b)
#define c(i)   (_c[(i)+3])
#define t(i)   (_t[(i)+3])

Typedefs

typedef unsigned short twb_t

Functions

int TrbWordBreakPos (const th_char *pstr, int left, const th_char *rstr, int right)
int TrbFollowing (const th_char *begin, int length, int offset)

Variables

const twb_t _TwbType [0x100-0xa0]

Define Documentation

#define A   (C|V|M)

Definition at line 91 of file rulebrk.c.

#define C   (CX|VC)

Definition at line 90 of file rulebrk.c.

#define c (   i)    (_c[(i)+3])
#define C2   0x0400

Definition at line 72 of file rulebrk.c.

#define CC   0x0100

Definition at line 69 of file rulebrk.c.

#define CHB   0x0800

Definition at line 73 of file rulebrk.c.

#define CHE   0x1000

Definition at line 74 of file rulebrk.c.

#define CS   0x0200

Definition at line 70 of file rulebrk.c.

#define CX   (CC|CS)

Definition at line 89 of file rulebrk.c.

#define FALSE   0

Definition at line 97 of file rulebrk.c.

#define M   0x4000

Definition at line 80 of file rulebrk.c.

#define MT   0x2000

Definition at line 76 of file rulebrk.c.

#define NB   (VR|M)

Definition at line 87 of file rulebrk.c.

#define NE   (VL|VRS)

Definition at line 86 of file rulebrk.c.

#define RETURN (   b)    return (b)

Definition at line 99 of file rulebrk.c.

#define T   0x8000

Definition at line 82 of file rulebrk.c.

#define t (   i)    (_t[(i)+3])
#define th_isalpha (   c)    (((c)>='a'&&(c)<='z')||((c)>='A'&&(c)<='Z'))

Definition at line 41 of file rulebrk.c.

#define th_isspace (   c)    ((c)==' '||(c)=='\t')

Definition at line 42 of file rulebrk.c.

Definition at line 36 of file rulebrk.c.

#define TRUE   1

Definition at line 96 of file rulebrk.c.

#define twbtype (   c)    (_TwbType[th_zcode(c)])

Definition at line 93 of file rulebrk.c.

#define V   (VL|VR)

Definition at line 88 of file rulebrk.c.

#define VC   0x0080

Definition at line 67 of file rulebrk.c.

#define VL   (VLA|VLO|VLI)

Definition at line 84 of file rulebrk.c.

#define VLA   0x0010

Definition at line 63 of file rulebrk.c.

#define VLI   0x0040

Definition at line 65 of file rulebrk.c.

#define VLO   0x0020

Definition at line 64 of file rulebrk.c.

#define VR   (VRS|VRE|VRX)

Definition at line 85 of file rulebrk.c.

#define VRA   0x0008

Definition at line 61 of file rulebrk.c.

#define VRE   0x0002

Definition at line 58 of file rulebrk.c.

#define VRS   0x0001

Definition at line 57 of file rulebrk.c.

#define VRX   0x0004

Definition at line 59 of file rulebrk.c.


Typedef Documentation

typedef unsigned short twb_t

Definition at line 50 of file rulebrk.c.


Function Documentation

int TrbFollowing ( const th_char begin,
int  length,
int  offset 
)

Definition at line 264 of file rulebrk.c.

{
       const th_char *w = begin + offset;
    const th_char *end = begin + length;
       while(w < end && *w && !th_isthai(*w) && th_isspace(*w)) w++;

       if(w < end && *w && !th_isthai(*w)) {
              int english = FALSE;
              while(w < end && *w && !th_isthai(*w) && !th_isspace(*w)) {
                     if(th_isalpha(*w)) english = TRUE;
                     w++; 
              }
              if(english || w == end || 
            (!th_isthai(*w) && th_isspace(*w))) return w - begin;
       } 
       if(w == end || *w == 0 || !th_isthai(*w)) return w - begin;
       w++;
       if(w < end && *w && th_isthai(*w)) {
              int brk = TrbWordBreakPos(begin, w-begin, w, end-w);
              while (brk < 0) {
                     w++;
                     if(w == end || *w == 0 || !th_isthai(*w)) break;
                     brk = TrbWordBreakPos(begin, w-begin, w, end-w);
              }
        if (brk > 0) w += brk;
       }
       if(w < end && *w && !th_isthai(*w)) {
              while(w < end && *w && !th_isthai(*w) && 
            !th_isalpha(*w) && !th_isspace(*w)) w++;
       }
       return w - begin;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int TrbWordBreakPos ( const th_char pstr,
int  left,
const th_char rstr,
int  right 
)

Definition at line 106 of file rulebrk.c.

{
       /*
       //int left, right;
       //const th_char *s = *p;
       */
    const th_char *lstr = pstr + left;
       th_char _c[6];
       twb_t _t[6];
       #define c(i) (_c[(i)+3])
       #define t(i) (_t[(i)+3])
       int i, j;

       /*
       //left = s - it->begin; 
       */
       if(left < 0) return -1;
       /*
        //right = (it->end == NULL) ? 4 : it->begin - s;
       */
       if(right < 1) return -1;

        /*
       // get c(0), t(0)
        */
       c(0) = rstr[0]; /* may be '\0' */
    if(!th_isthai(c(0))) return -1;
       t(0) = twbtype(c(0));
       if(!(t(0) & A)) return -1;

        /*
       // get c(-1), t(-1)
        */
       if(left >= 1) { 
              c(-1) = lstr[-1]; 
              if(!th_isthai(c(-1))) return 0;
              t(-1) = twbtype(c(-1)); 
              if(!(t(-1) & A)) return 0;  /* handle punctuation marks here */
       } else { c(-1) = 0; t(-1) = 0; }

       /*
       // get c(1..2), t(1..2)
       */
       for(i = 1; i <= 2; i++) {
              if(i >= right) { c(i) = 0; t(i) = 0; }
              else {
                     c(i) = rstr[i]; /* may be '\0'; */
                     if(!th_isthai(c(i))) right = i--;
                     else {
                            t(i) = twbtype(c(i));
                            if(!(t(i) & A)) right = i--;
                     }
              }
       }
       /*
       // get c(-2..-3), t(-2..-3)
       */
       for(i = -2, j = -2; i >= -3 ; j--) {
              if(j < -left) { c(i) = 0; t(i) = 0; i--; }
              else {
                     c(i) = lstr[j]; 
                     if(!th_isthai(c(i))) left = 0;
                     else {
                            t(i) = (twb_t)(th_isthai(c(i)) ? twbtype(c(i)) : 0);
                            if(!(t(i) & A)) left = 0;
                            else {
                                   if((t(i+1) & MT) && ((t(i) & VR) || (t(i+2) & VR))) {
                                          c(i+1) = c(i); t(i+1) = t(i);
                                   } else i--;
                            }
                     }
              }
       }

       /*
       // prohibit the unlikely
       */
       if((t(-1) & C) && (t(0) & C)) {
         if((t(-1) & CHE) || (t(0) & CHB)) return -1;
       }
       /*
       // special case : vlao, C/ sara_a|aa, !sara_a
       */
       if((t(-3) & (VLA|VLO)) && (t(-2) & C) && (c(0) != TH_SARA_A) &&
              (c(-1) == TH_SARA_A || c(-0) == TH_SARA_AA)) return 0;

       /*
       // prohibit break
       */
       if(t(0) & NB) return -1; 
       if(t(-1) & NE) return -1;


  /*
       // apply 100% rules
  */
       if(t(-1) & VRE) {
              if(c(-2) == TH_SARA_AA && c(-1) == TH_SARA_A) return 0;
              return -1; /* usually too short syllable, part of word */
       }

       if(t(-2) & VRE) return -1;

       if((t(0) & C) && (t(1) & (VR|MT)) && (c(2) != TH_THANTHAKHAT)) { /*?C, NB */
              if((t(-1) & (VRS|VRX))  && c(1) == TH_SARA_I) return -1; /* exception */
              if(t(-1) & (V|M)) return 0; /* !C/ C, NB */
              if(t(-2) & VRS) return 0;   /* VRS, C / C, NB */
              if(!(t(0) & C2) && c(1) == TH_SARA_I) {   /*     / !C2 or /c, sara_i */
                     if(t(-2) & VRX) return 0; /* VRX, C / C, NB ? 100%? */
                     if(t(-2) & VC) return 0;    /* VC, C / C, NB ? 100% */
              }
       }
       if((t(-1) & VRX) && (t(0) & CC)) return 0;                            /* VRX/ CC */
       if((t(-2) & VRS) && (t(-1) & C) && (t(0) & (V|M))) return 0;/* VRS, C/ !C */

       
       if((t(0) & CX) && (t(1) & C2) && (c(2) != TH_THANTHAKHAT)) {
              if((t(-2) & A) && (t(-1) & CX)) return 0; /* A, CX / CX, C2 */
              if((t(-2) & CX) && (t(-1) & MT)) return 0; /* CX, MT / CX, C2 */
       }
       /*
       // apply 90% rules
       */
       if(t(0) & VL) return 0;
       if(t(1) & VL) return -1;
       if(c(-1) == TH_THANTHAKHAT && c(-2) != TH_RORUA && c(-2) != TH_LOLING) return 0;

       /*
       //return -1;
       // apply 80% rules
       */
       if(t(0) & CHE) {
              if((t(-2) & VRS) && (t(-1) & C)) return 0;       /* VRS, C/ CHE */
              /*if(t(-1) & VRX) return 0;                             // VRX/ CHE */
              if(t(-1) & VC) return 0;                                /* VC/ CHE */
       }
       if(t(-1) & CHB) {
              if((t(0) & C) && (t(1) & VR)) return 0;   /* CHB/ CC, VR */
              if(t(0) & VC) return 0;                                 /* CHB/ VC */
       }
       
       if((t(-2) & VL) && (t(1) & VR)) { /* VL, C? C, VR */
              if(t(-2) & VLI) return 0;  /* VLI,C/C,VR .*/
              else { /* vlao, C ? C , VR */
                     if(c(1) == TH_SARA_A) return 2; /* vlao, C, C, sara_a/ */
                     if(t(-2) & VLO) return 0; /* VLO, C/ C, !sara_a */
                     if(!(t(1) & VRA)) return 0; /* VLA, C/ C, !vca */
              }
       }
       /* C,MT,C */ 
       if((t(-2) & C) && (t(-1) & MT) && (t(0) & CX)) return 1;

       return -1;
}

Here is the caller graph for this function:


Variable Documentation

Definition at line 304 of file rulebrk.c.