Back to index

openldap  2.4.31
Classes | Defines | Functions | Variables
ucdata.c File Reference
#include "portable.h"
#include "ldap_config.h"
#include <stdio.h>
#include <ac/stdlib.h>
#include <ac/string.h>
#include <ac/unistd.h>
#include <ac/bytes.h>
#include "lber_pvt.h"
#include "ucdata.h"
#include "uctable.h"

Go to the source code of this file.

Classes

struct  _ucheader_t
union  _ucheader_t.size

Defines

#define HARDCODE_DATA   1
#define endian_short(cc)   (((cc) >> 8) | (((cc) & 0xff) << 8))
#define endian_long(cc)

Functions

static int _ucprop_lookup (ac_uint4 code, ac_uint4 n)
int ucisprop (ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2)
static ac_uint4 _uccase_lookup (ac_uint4 code, long l, long r, int field)
ac_uint4 uctoupper (ac_uint4 code)
ac_uint4 uctolower (ac_uint4 code)
ac_uint4 uctotitle (ac_uint4 code)
int uccomp (ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp)
int uccomp_hangul (ac_uint4 *str, int len)
int uccanoncomp (ac_uint4 *str, int len)
int ucdecomp (ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
int uckdecomp (ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
int ucdecomp_hangul (ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[])
static int uccanoncompatdecomp (const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, short mode, void *ctx)
int uccanondecomp (const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, void *ctx)
int uccompatdecomp (const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, void *ctx)
ac_uint4 uccombining_class (ac_uint4 code)
int ucnumber_lookup (ac_uint4 code, struct ucnumber *num)
int ucdigit_lookup (ac_uint4 code, int *digit)
struct ucnumber ucgetnumber (ac_uint4 code)
int ucgetdigit (ac_uint4 code)
int ucdata_load (char *paths, int masks)
void ucdata_unload (int masks)
int ucdata_reload (char *paths, int masks)

Variables

static ac_uint4 masks32 [32]

Class Documentation

struct _ucheader_t

Definition at line 64 of file ucdata.c.

Class Members
ac_uint2 bom
ac_uint2 cnt
union _ucheader_t size
union _ucheader_t.size

Definition at line 67 of file ucdata.c.

Class Members
ac_uint4 bytes
ac_uint2 len

Define Documentation

#define endian_long (   cc)
Value:
((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
                        ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))

Definition at line 88 of file ucdata.c.

#define endian_short (   cc)    (((cc) >> 8) | (((cc) & 0xff) << 8))

Definition at line 87 of file ucdata.c.

#define HARDCODE_DATA   1

Definition at line 51 of file ucdata.c.


Function Documentation

static ac_uint4 _uccase_lookup ( ac_uint4  code,
long  l,
long  r,
int  field 
) [static]

Definition at line 397 of file ucdata.c.

{
    long m;
       const ac_uint4 *tmp;

    /*
     * Do the binary search.
     */
    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a case mapping triple.
         */
        m = (l + r) >> 1;
              tmp = &_uccase_map[m*3];
        if (code > *tmp)
          l = m + 1;
        else if (code < *tmp)
          r = m - 1;
        else if (code == *tmp)
          return tmp[field];
    }

    return code;
}

Here is the caller graph for this function:

static int _ucprop_lookup ( ac_uint4  code,
ac_uint4  n 
) [static]

Definition at line 246 of file ucdata.c.

{
    long l, r, m;

    if (_ucprop_size == 0)
      return 0;

    /*
     * There is an extra node on the end of the offsets to allow this routine
     * to work right.  If the index is 0xffff, then there are no nodes for the
     * property.
     */
    if ((l = _ucprop_offsets[n]) == 0xffff)
      return 0;

    /*
     * Locate the next offset that is not 0xffff.  The sentinel at the end of
     * the array is the max index value.
     */
    for (m = 1;
         n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;

    r = _ucprop_offsets[n + m] - 1;

    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a range pair.
         */
        m = (l + r) >> 1;
        m -= (m & 1);
        if (code > _ucprop_ranges[m + 1])
          l = m + 2;
        else if (code < _ucprop_ranges[m])
          r = m - 2;
        else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
          return 1;
    }
    return 0;
}

Here is the caller graph for this function:

int uccanoncomp ( ac_uint4 str,
int  len 
)

Definition at line 672 of file ucdata.c.

{
    int i, stpos, copos;
    ac_uint4 cl, prevcl, st, ch, co;

    st = str[0];
    stpos = 0;
    copos = 1;
    prevcl = uccombining_class(st) == 0 ? 0 : 256;
        
    for (i = 1; i < len; i++) {
        ch = str[i];
        cl = uccombining_class(ch);
        if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
          st = str[stpos] = co;
        else {
            if (cl == 0) {
                stpos = copos;
                st = ch;
            }
            prevcl = cl;
            str[copos++] = ch;
        }
    }

    return uccomp_hangul(str, copos);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int uccanoncompatdecomp ( const ac_uint4 in,
int  inlen,
ac_uint4 **  out,
int outlen,
short  mode,
void ctx 
) [static]

Definition at line 936 of file ucdata.c.

{
    int l, size;
       unsigned i, j, k;
    ac_uint4 num, class, *decomp, hangdecomp[3];

    size = inlen * 2;
    *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx);
    if (*out == NULL)
        return *outlen = -1;

    i = 0;
    for (j = 0; j < (unsigned) inlen; j++) {
       if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
            if ( size - i < num) {
                size = inlen + i - j + num - 1;
                *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx );
                if (*out == NULL)
                    return *outlen = -1;
            }
            for (k = 0; k < num; k++) {
                class = uccombining_class(decomp[k]);
                if (class == 0) {
                    (*out)[i] = decomp[k];
                } else {
                    for (l = i; l > 0; l--)
                        if (class >= uccombining_class((*out)[l-1]))
                            break;
                    AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
                    (*out)[l] = decomp[k];
                }
                i++;
            }
        } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
            if (size - i < num) {
                size = inlen + i - j + num - 1;
                *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
                if (*out == NULL)
                    return *outlen = -1;
            }
            for (k = 0; k < num; k++) {
                (*out)[i] = hangdecomp[k];
                i++;
            }
        } else {
            if (size - i < 1) {
                size = inlen + i - j;
                *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
                if (*out == NULL)
                    return *outlen = -1;
            }
            class = uccombining_class(in[j]);
            if (class == 0) {
                (*out)[i] = in[j];
            } else {
                for (l = i; l > 0; l--)
                    if (class >= uccombining_class((*out)[l-1]))
                        break;
                AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
                (*out)[l] = in[j];
            }
            i++;
        }
    }
    return *outlen = i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int uccanondecomp ( const ac_uint4 in,
int  inlen,
ac_uint4 **  out,
int outlen,
void ctx 
)

Definition at line 1005 of file ucdata.c.

{
    return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);
}

Here is the call graph for this function:

Definition at line 1093 of file ucdata.c.

{
    long l, r, m;

    l = 0;
    r = _uccmcl_size - 1;

    while (l <= r) {
        m = (l + r) >> 1;
        m -= (m % 3);
        if (code > _uccmcl_nodes[m + 1])
          l = m + 3;
        else if (code < _uccmcl_nodes[m])
          r = m - 3;
        else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
          return _uccmcl_nodes[m + 2];
    }
    return 0;
}

Here is the caller graph for this function:

int uccomp ( ac_uint4  node1,
ac_uint4  node2,
ac_uint4 comp 
)

Definition at line 594 of file ucdata.c.

{
    int l, r, m;

    l = 0;
    r = _uccomp_size - 1;

    while (l <= r) {
        m = ((r + l) >> 1);
        m -= m & 3;
        if (node1 > _uccomp_data[m+2])
          l = m + 4;
        else if (node1 < _uccomp_data[m+2])
          r = m - 4;
        else if (node2 > _uccomp_data[m+3])
          l = m + 4;
        else if (node2 < _uccomp_data[m+3])
          r = m - 4;
        else {
            *comp = _uccomp_data[m];
            return 1;
        }
    }
    return 0;
}

Here is the caller graph for this function:

int uccomp_hangul ( ac_uint4 str,
int  len 
)

Definition at line 621 of file ucdata.c.

{
    const int SBase = 0xAC00, LBase = 0x1100,
        VBase = 0x1161, TBase = 0x11A7,
        LCount = 19, VCount = 21, TCount = 28,
        NCount = VCount * TCount,   /* 588 */
        SCount = LCount * NCount;   /* 11172 */
    
    int i, rlen;
    ac_uint4 ch, last, lindex, sindex;

    last = str[0];
    rlen = 1;
    for ( i = 1; i < len; i++ ) {
        ch = str[i];

        /* check if two current characters are L and V */
        lindex = last - LBase;
        if (lindex < (ac_uint4) LCount) {
            ac_uint4 vindex = ch - VBase;
            if (vindex < (ac_uint4) VCount) {
                /* make syllable of form LV */
                last = SBase + (lindex * VCount + vindex) * TCount;
                str[rlen-1] = last; /* reset last */
                continue;
            }
        }
        
        /* check if two current characters are LV and T */
        sindex = last - SBase;
        if (sindex < (ac_uint4) SCount
                     && (sindex % TCount) == 0)
              {
            ac_uint4 tindex = ch - TBase;
            if (tindex <= (ac_uint4) TCount) {
                /* make syllable of form LVT */
                last += tindex;
                str[rlen-1] = last; /* reset last */
                continue;
            }
        }

        /* if neither case was true, just add the character */
        last = ch;
        str[rlen] = ch;
        rlen++;
    }
    return rlen;
}

Here is the caller graph for this function:

int uccompatdecomp ( const ac_uint4 in,
int  inlen,
ac_uint4 **  out,
int outlen,
void ctx 
)

Definition at line 1012 of file ucdata.c.

{
    return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int ucdata_load ( char *  paths,
int  masks 
)

Definition at line 1300 of file ucdata.c.

{ return 0; }
int ucdata_reload ( char *  paths,
int  masks 
)

Definition at line 1302 of file ucdata.c.

{ return 0; }
void ucdata_unload ( int  masks)

Definition at line 1301 of file ucdata.c.

{ }

Here is the caller graph for this function:

int ucdecomp ( ac_uint4  code,
ac_uint4 num,
ac_uint4 **  decomp 
)

Definition at line 856 of file ucdata.c.

{
    long l, r, m;

    if (code < _ucdcmp_nodes[0]) {
       return 0;
    }

    l = 0;
    r = _ucdcmp_nodes[_ucdcmp_size] - 1;

    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a code+offset pair.
         */
        m = (l + r) >> 1;
        m -= (m & 1);
        if (code > _ucdcmp_nodes[m])
          l = m + 2;
        else if (code < _ucdcmp_nodes[m])
          r = m - 2;
        else if (code == _ucdcmp_nodes[m]) {
            *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
            *decomp = (ac_uint4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
            return 1;
        }
    }
    return 0;
}

Here is the caller graph for this function:

int ucdecomp_hangul ( ac_uint4  code,
ac_uint4 num,
ac_uint4  decomp[] 
)

Definition at line 920 of file ucdata.c.

{
    if (!ucishangul(code))
      return 0;

    code -= 0xac00;
    decomp[0] = 0x1100 + (ac_uint4) (code / 588);
    decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28);
    decomp[2] = 0x11a7 + (ac_uint4) (code % 28);
    *num = (decomp[2] != 0x11a7) ? 3 : 2;

    return 1;
}

Here is the caller graph for this function:

int ucdigit_lookup ( ac_uint4  code,
int digit 
)

Definition at line 1229 of file ucdata.c.

{
    long l, r, m;
    short *vp;

    l = 0;
    r = _ucnum_size - 1;
    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a code+offset pair.
         */
        m = (l + r) >> 1;
        m -= (m & 1);
        if (code > _ucnum_nodes[m])
          l = m + 2;
        else if (code < _ucnum_nodes[m])
          r = m - 2;
        else {
            vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
            if (*vp == *(vp + 1)) {
              *digit = *vp;
              return 1;
            }
            return 0;
        }
    }
    return 0;
}

Here is the caller graph for this function:

int ucgetdigit ( ac_uint4  code)

Definition at line 1277 of file ucdata.c.

{
    int dig;

    /*
     * Initialize with some arbitrary value, because the caller simply cannot
     * tell for sure if the code is a number without calling the ucisdigit()
     * macro before calling this function.
     */
    dig = -111;

    (void) ucdigit_lookup(code, &dig);

    return dig;
}

Here is the call graph for this function:

struct ucnumber ucgetnumber ( ac_uint4  code) [read]

Definition at line 1260 of file ucdata.c.

{
    struct ucnumber num;

    /*
     * Initialize with some arbitrary value, because the caller simply cannot
     * tell for sure if the code is a number without calling the ucisnumber()
     * macro before calling this function.
     */
    num.numerator = num.denominator = -111;

    (void) ucnumber_lookup(code, &num);

    return num;
}

Here is the call graph for this function:

int ucisprop ( ac_uint4  code,
ac_uint4  mask1,
ac_uint4  mask2 
)

Definition at line 288 of file ucdata.c.

{
    ac_uint4 i;

    if (mask1 == 0 && mask2 == 0)
      return 0;

    for (i = 0; mask1 && i < 32; i++) {
        if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
          return 1;
    }

    for (i = 32; mask2 && i < _ucprop_size; i++) {
        if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
          return 1;
    }

    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int uckdecomp ( ac_uint4  code,
ac_uint4 num,
ac_uint4 **  decomp 
)

Definition at line 888 of file ucdata.c.

{
    long l, r, m;

    if (code < _uckdcmp_nodes[0]) {
       return 0;
    }
    
    l = 0;
    r = _uckdcmp_nodes[_uckdcmp_size] - 1;

    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a code+offset pair.
         */
        m = (l + r) >> 1;
        m -= (m & 1);
        if (code > _uckdcmp_nodes[m])
          l = m + 2;
        else if (code < _uckdcmp_nodes[m])
          r = m - 2;
        else if (code == _uckdcmp_nodes[m]) {
            *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
            *decomp = (ac_uint4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
            return 1;
        }
    }
    return 0;
}

Here is the caller graph for this function:

int ucnumber_lookup ( ac_uint4  code,
struct ucnumber num 
)

Definition at line 1200 of file ucdata.c.

{
    long l, r, m;
    short *vp;

    l = 0;
    r = _ucnum_size - 1;
    while (l <= r) {
        /*
         * Determine a "mid" point and adjust to make sure the mid point is at
         * the beginning of a code+offset pair.
         */
        m = (l + r) >> 1;
        m -= (m & 1);
        if (code > _ucnum_nodes[m])
          l = m + 2;
        else if (code < _ucnum_nodes[m])
          r = m - 2;
        else {
            vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
            num->numerator = (int) *vp++;
            num->denominator = (int) *vp;
            return 1;
        }
    }
    return 0;
}

Here is the caller graph for this function:

Definition at line 451 of file ucdata.c.

{
    int field;
    long l, r;

    if (ucislower(code))
      return code;

    if (ucisupper(code)) {
        /*
         * The character is upper case.
         */
        field = 1;
        l = 0;
        r = _uccase_len[0] - 1;
    } else {
        /*
         * The character is title case.
         */
        field = 2;
        l = _uccase_len[0] + _uccase_len[1];
        r = _uccase_size - 1;
    }
    return _uccase_lookup(code, l, r, field);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 478 of file ucdata.c.

{
    int field;
    long l, r;

    if (ucistitle(code))
      return code;

    /*
     * The offset will always be the same for converting to title case.
     */
    field = 2;

    if (ucisupper(code)) {
        /*
         * The character is upper case.
         */
        l = 0;
        r = _uccase_len[0] - 1;
    } else {
        /*
         * The character is lower case.
         */
        l = _uccase_len[0];
        r = (l + _uccase_len[1]) - 1;
    }
    return _uccase_lookup(code, l, r, field);
}

Here is the call graph for this function:

Definition at line 424 of file ucdata.c.

{
    int field;
    long l, r;

    if (ucisupper(code))
      return code;

    if (ucislower(code)) {
        /*
         * The character is lower case.
         */
        field = 2;
        l = _uccase_len[0];
        r = (l + _uccase_len[1]) - 1;
    } else {
        /*
         * The character is title case.
         */
        field = 1;
        l = _uccase_len[0] + _uccase_len[1];
        r = _uccase_size - 1;
    }
    return _uccase_lookup(code, l, r, field);
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

ac_uint4 masks32[32] [static]
Initial value:
 {
       0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
       0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
       0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
       0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
       0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
       0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
       0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
       0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
}

Definition at line 76 of file ucdata.c.