Back to index

lightning-sunbird  0.9+nobinonly
Defines | Functions | Variables
cairo-unicode.c File Reference
#include <limits.h>
#include <cairoint.h>

Go to the source code of this file.

Defines

#define UTF8_COMPUTE(Char, Mask, Len)
#define UTF8_LENGTH(Char)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
#define UNICODE_VALID(Char)
#define UTF8_NEXT_CHAR(p)   ((p) + utf8_skip_data[*(unsigned char *)(p)])

Functions

static uint32_t _utf8_get_char (const unsigned char *p)
static uint32_t _utf8_get_char_extended (const unsigned char *p, long max_len)
cairo_status_t _cairo_utf8_to_ucs4 (const unsigned char *str, int len, uint32_t **result, int *items_written)
 _cairo_utf8_to_utf32: : an UTF-8 string : length of in bytes, or -1 if it is nul-terminated.
cairo_status_t _cairo_utf8_to_utf16 (const unsigned char *str, int len, uint16_t **result, int *items_written)
 _cairo_utf8_to_utf16: : an UTF-8 string : length of in bytes, or -1 if it is nul-terminated.

Variables

static const char utf8_skip_data [256]

Define Documentation

Value:
((Char) < 0x110000 &&                     \
     (((Char) & 0xFFFFF800) != 0xD800) &&     \
     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
     ((Char) & 0xFFFE) != 0xFFFE)

Definition at line 102 of file cairo-unicode.c.

#define UTF8_COMPUTE (   Char,
  Mask,
  Len 
)

Definition at line 47 of file cairo-unicode.c.

#define UTF8_GET (   Result,
  Chars,
  Count,
  Mask,
  Len 
)
Value:
(Result) = (Chars)[0] & (Mask);                                      \
  for ((Count) = 1; (Count) < (Len); ++(Count))                             \
    {                                                                \
      if (((Chars)[(Count)] & 0xc0) != 0x80)                                \
       {                                                             \
         (Result) = -1;                                              \
         break;                                                      \
       }                                                             \
      (Result) <<= 6;                                                       \
      (Result) |= ((Chars)[(Count)] & 0x3f);                                \
    }

Definition at line 89 of file cairo-unicode.c.

Value:
((Char) < 0x80 ? 1 :                 \
   ((Char) < 0x800 ? 2 :               \
    ((Char) < 0x10000 ? 3 :            \
     ((Char) < 0x200000 ? 4 :          \
      ((Char) < 0x4000000 ? 5 : 6)))))

Definition at line 81 of file cairo-unicode.c.

#define UTF8_NEXT_CHAR (   p)    ((p) + utf8_skip_data[*(unsigned char *)(p)])

Definition at line 120 of file cairo-unicode.c.


Function Documentation

cairo_status_t _cairo_utf8_to_ucs4 ( const unsigned char *  str,
int  len,
uint32_t **  result,
int items_written 
)

_cairo_utf8_to_utf32: : an UTF-8 string : length of in bytes, or -1 if it is nul-terminated.

If is supplied and the string has an embedded nul byte, only the portion before the nul byte is converted.

Returns:
: location to store a pointer to a newly allocated UTF-32 string (always native endian). Free with free(). A 0 word will be written after the last character. : location to store number of 32-bit words written. (Not including the trailing 0)

Converts a UTF-8 string to UCS-4. UCS-4 is an encoding of Unicode with 1 32-bit word per character. The string is validated to consist entirely of valid Unicode characters.

Return value: CAIRO_STATUS_SUCCESS if the entire string was succesfully converted. CAIRO_STATUS_INVALID_STRING if an an invalid sequence was found.

Definition at line 223 of file cairo-unicode.c.

{
    uint32_t *str32 = NULL;
    int n_chars, i;
    const unsigned char *in;
  
    in = str;
    n_chars = 0;
    while ((len < 0 || str + len - in > 0) && *in)
    {
       uint32_t wc = _utf8_get_char_extended (in, str + len - in);
       if (wc & 0x80000000 || !UNICODE_VALID (wc))
           return CAIRO_STATUS_INVALID_STRING;
      
       n_chars++;
       if (n_chars == INT_MAX)
           return CAIRO_STATUS_INVALID_STRING;

       in = UTF8_NEXT_CHAR (in);
    }

    str32 = malloc (sizeof (uint32_t) * (n_chars + 1));
    if (!str32)
       return CAIRO_STATUS_NO_MEMORY;
  
    in = str;
    for (i=0; i < n_chars; i++) {
       str32[i] = _utf8_get_char (in);
       in = UTF8_NEXT_CHAR (in);
    }
    str32[i] = 0;

    *result = str32;
    if (items_written)
       *items_written = n_chars;

    return CAIRO_STATUS_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

cairo_status_t _cairo_utf8_to_utf16 ( const unsigned char *  str,
int  len,
uint16_t **  result,
int items_written 
)

_cairo_utf8_to_utf16: : an UTF-8 string : length of in bytes, or -1 if it is nul-terminated.

If is supplied and the string has an embedded nul byte, only the portion before the nul byte is converted.

Returns:
: location to store a pointer to a newly allocated UTF-16 string (always native endian). Free with free(). A 0 word will be written after the last character. : location to store number of 16-bit words written. (Not including the trailing 0)

Converts a UTF-8 string to UTF-16. UTF-16 is an encoding of Unicode where characters are represented either as a single 16-bit word, or as a pair of 16-bit "surrogates". The string is validated to consist entirely of valid Unicode characters.

Return value: CAIRO_STATUS_SUCCESS if the entire string was succesfully converted. CAIRO_STATUS_INVALID_STRING if an an invalid sequence was found.

Definition at line 287 of file cairo-unicode.c.

{
    uint16_t *str16 = NULL;
    int n16, i;
    const unsigned char *in;

    in = str;
    n16 = 0;
    while ((len < 0 || str + len - in > 0) && *in) {
       uint32_t wc = _utf8_get_char_extended (in, str + len - in);
       if (wc & 0x80000000 || !UNICODE_VALID (wc))
           return CAIRO_STATUS_INVALID_STRING;
       
       if (wc < 0x10000)
           n16 += 1;
       else
           n16 += 2;
      
       if (n16 == INT_MAX - 1 || n16 == INT_MAX)
           return CAIRO_STATUS_INVALID_STRING;
       
       in = UTF8_NEXT_CHAR (in);
    }

  
    str16 = malloc (sizeof (uint16_t) * (n16 + 1));
    if (!str16)
       return CAIRO_STATUS_NO_MEMORY;
  
    in = str;
    for (i = 0; i < n16;) {
       uint32_t wc = _utf8_get_char (in);

       if (wc < 0x10000) {
           str16[i++] = wc;
       } else {
           str16[i++] = (wc - 0x10000) / 0x400 + 0xd800;
           str16[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
       }
      
       in = UTF8_NEXT_CHAR (in);
    }

    str16[i] = 0;

    *result = str16;
    if (items_written)
       *items_written = n16;

    return CAIRO_STATUS_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static uint32_t _utf8_get_char ( const unsigned char *  p) [static]

Definition at line 127 of file cairo-unicode.c.

{
    int i, mask = 0, len;
    uint32_t result;
    unsigned char c = (unsigned char) *p;

    UTF8_COMPUTE (c, mask, len);
    if (len == -1)
       return (uint32_t)-1;
    UTF8_GET (result, p, i, mask, len);

    return result;
}

Here is the caller graph for this function:

static uint32_t _utf8_get_char_extended ( const unsigned char *  p,
long  max_len 
) [static]

Definition at line 145 of file cairo-unicode.c.

{
    int i, len;
    uint32_t wc = (unsigned char) *p;

    if (wc < 0x80) {
       return wc;
    } else if (wc < 0xc0) {
       return (uint32_t)-1;
    } else if (wc < 0xe0) {
       len = 2;
       wc &= 0x1f;
    } else if (wc < 0xf0) {
       len = 3;
       wc &= 0x0f;
    } else if (wc < 0xf8) {
       len = 4;
       wc &= 0x07;
    } else if (wc < 0xfc) {
       len = 5;
       wc &= 0x03;
    } else if (wc < 0xfe) {
       len = 6;
       wc &= 0x01;
    } else {
       return (uint32_t)-1;
    }
  
    if (max_len >= 0 && len > max_len) {
       for (i = 1; i < max_len; i++) {
           if ((((unsigned char *)p)[i] & 0xc0) != 0x80)
              return (uint32_t)-1;
       }
       return (uint32_t)-2;
    }

    for (i = 1; i < len; ++i) {
       uint32_t ch = ((unsigned char *)p)[i];
      
       if ((ch & 0xc0) != 0x80) {
           if (ch)
              return (uint32_t)-1;
           else
              return (uint32_t)-2;
       }

       wc <<= 6;
       wc |= (ch & 0x3f);
    }

    if (UTF8_LENGTH(wc) != len)
       return (uint32_t)-1;
  
    return wc;
}

Here is the caller graph for this function:


Variable Documentation

const char utf8_skip_data[256] [static]
Initial value:
 {
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
}

Definition at line 109 of file cairo-unicode.c.