Back to index

courier  0.68.2
unicode_graphemebreak.c
Go to the documentation of this file.
00001 /*
00002 ** Copyright 2011 Double Precision, Inc.
00003 ** See COPYING for distribution information.
00004 **
00005 */
00006 
00007 #include      "unicode_config.h"
00008 #include      "unicode.h"
00009 #include      <unistd.h>
00010 #include      <stdint.h>
00011 #include      <stdlib.h>
00012 
00013 #define UNICODE_GRAPHEMEBREAK_ANY         0x00
00014 #define UNICODE_GRAPHEMEBREAK_CR          0x01
00015 #define UNICODE_GRAPHEMEBREAK_LF          0x02
00016 #define UNICODE_GRAPHEMEBREAK_Control            0x03
00017 #define UNICODE_GRAPHEMEBREAK_Extend             0x04
00018 #define UNICODE_GRAPHEMEBREAK_Prepend            0x05
00019 #define UNICODE_GRAPHEMEBREAK_SpacingMark 0x06
00020 #define UNICODE_GRAPHEMEBREAK_L                  0x07
00021 #define UNICODE_GRAPHEMEBREAK_V                  0x08
00022 #define UNICODE_GRAPHEMEBREAK_T                  0x09
00023 #define UNICODE_GRAPHEMEBREAK_LV          0x0A
00024 #define UNICODE_GRAPHEMEBREAK_LVT         0x0B
00025 
00026 #include "graphemebreaktab.h"
00027 
00028 int unicode_grapheme_break(unicode_char a, unicode_char b)
00029 {
00030        uint8_t ac=unicode_tab_lookup(a, unicode_indextab,
00031                       sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
00032                       unicode_rangetab,
00033                       unicode_classtab,
00034                       UNICODE_GRAPHEMEBREAK_ANY),
00035               bc=unicode_tab_lookup(b, unicode_indextab,
00036                       sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
00037                       unicode_rangetab,
00038                       unicode_classtab,
00039                       UNICODE_GRAPHEMEBREAK_ANY);
00040 
00041        /* GB1 and GB2 are implied */
00042 
00043        if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
00044               return 0; /* GB3 */
00045 
00046 
00047        switch (ac) {
00048        case UNICODE_GRAPHEMEBREAK_CR:
00049        case UNICODE_GRAPHEMEBREAK_LF:
00050        case UNICODE_GRAPHEMEBREAK_Control:
00051               return 1; /* GB4 */
00052        default:
00053               break;
00054        }
00055 
00056        switch (bc) {
00057        case UNICODE_GRAPHEMEBREAK_CR:
00058        case UNICODE_GRAPHEMEBREAK_LF:
00059        case UNICODE_GRAPHEMEBREAK_Control:
00060               return 1; /* GB5 */
00061        default:
00062               break;
00063        }
00064 
00065        if (ac == UNICODE_GRAPHEMEBREAK_L)
00066               switch (bc) {
00067               case UNICODE_GRAPHEMEBREAK_L:
00068               case UNICODE_GRAPHEMEBREAK_V:
00069               case UNICODE_GRAPHEMEBREAK_LV:
00070               case UNICODE_GRAPHEMEBREAK_LVT:
00071                      return 0; /* GB6 */
00072               }
00073 
00074        if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
00075             ac == UNICODE_GRAPHEMEBREAK_V) &&
00076            (bc == UNICODE_GRAPHEMEBREAK_V ||
00077             bc == UNICODE_GRAPHEMEBREAK_T))
00078               return 0; /* GB7 */
00079 
00080        if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
00081             ac == UNICODE_GRAPHEMEBREAK_T) &&
00082            bc == UNICODE_GRAPHEMEBREAK_T)
00083               return 0; /* GB8 */
00084 
00085        if (bc == UNICODE_GRAPHEMEBREAK_Extend)
00086               return 0; /* GB9 */
00087 
00088        if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
00089               return 0; /* GB9a */
00090 
00091        if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
00092               return 0; /* GB9b */
00093 
00094        return 1; /* GB10 */
00095 }