Back to index

glibc  2.9
Defines | Enumerations | Functions | Variables
nfkc.c File Reference
#include <stdlib.h>
#include <string.h>
#include "stringprep.h"
#include "gunidecomp.h"
#include "gunicomp.h"

Go to the source code of this file.

Defines

#define gboolean   int
#define gchar   char
#define guchar   unsigned char
#define glong   long
#define gint   int
#define guint   unsigned int
#define gushort   unsigned short
#define gint16   int16_t
#define guint16   uint16_t
#define gunichar   uint32_t
#define gsize   size_t
#define gssize   ssize_t
#define g_malloc   malloc
#define g_free   free
#define GError   void
#define g_set_error(a, b, c, d)   ((void) 0)
#define g_new(struct_type, n_structs)   ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))
#define G_STMT_START   do
#define G_STMT_END   while (0)
#define g_return_val_if_fail(expr, val)   G_STMT_START{ (void)0; }G_STMT_END
#define G_N_ELEMENTS(arr)   (sizeof (arr) / sizeof ((arr)[0]))
#define TRUE   1
#define FALSE   0
#define UTF8_COMPUTE(Char, Mask, Len)
#define UTF8_LENGTH(Char)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
#define UNICODE_VALID(Char)
#define g_utf8_next_char(p)   (char *)((p) + g_utf8_skip[*(guchar *)(p)])
#define CC_PART1(Page, Char)
#define CC_PART2(Page, Char)
#define COMBINING_CLASS(Char)
#define SBase   0xAC00
#define LBase   0x1100
#define VBase   0x1161
#define TBase   0x11A7
#define LCount   19
#define VCount   21
#define TCount   28
#define NCount   (VCount * TCount)
#define SCount   (LCount * NCount)
#define CI(Page, Char)
#define COMPOSE_INDEX(Char)   ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Enumerations

enum  GNormalizeMode {
  G_NORMALIZE_DEFAULT, G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
  G_NORMALIZE_ALL, G_NORMALIZE_NFKD = G_NORMALIZE_ALL, G_NORMALIZE_ALL_COMPOSE, G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
}

Functions

static glong g_utf8_strlen (const gchar *p, gssize max)
static gunichar g_utf8_get_char (const gchar *p)
static int g_unichar_to_utf8 (gunichar c, gchar *outbuf)
static gunicharg_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
static gcharg_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error)
static void g_unicode_canonical_ordering (gunichar *string, gsize len)
static void decompose_hangul (gunichar s, gunichar *r, gsize *result_len)
static const gcharfind_decomposition (gunichar ch, gboolean compat)
static gboolean combine_hangul (gunichar a, gunichar b, gunichar *result)
static gboolean combine (gunichar a, gunichar b, gunichar *result)
static gunichar_g_utf8_normalize_wc (const gchar *str, gssize max_len, GNormalizeMode mode)
static gcharg_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode)
uint32_t stringprep_utf8_to_unichar (const char *p)
 stringprep_utf8_to_unichar: : a pointer to Unicode character encoded as UTF-8
int stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
 stringprep_unichar_to_utf8: : a ISO10646 character code : output buffer, must have at least 6 bytes of space.
uint32_tstringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t *items_written)
 stringprep_utf8_to_ucs4: : a UTF-8 encoded string : the maximum length of to use.
char * stringprep_ucs4_to_utf8 (const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
 stringprep_ucs4_to_utf8: : a UCS-4 encoded string : the maximum length of to use.
char * stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)
 stringprep_utf8_nfkc_normalize: : a UTF-8 encoded string.
uint32_tstringprep_ucs4_nfkc_normalize (uint32_t *str, ssize_t len)
 stringprep_ucs4_nfkc_normalize: : a Unicode string.

Variables

static const gchar utf8_skip_data [256]
const gchar *const g_utf8_skip = utf8_skip_data

Define Documentation

#define CC_PART1 (   Page,
  Char 
)
#define CC_PART2 (   Page,
  Char 
)
#define CI (   Page,
  Char 
)
Value:

Definition at line 679 of file nfkc.c.

#define COMBINING_CLASS (   Char)
Value:
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
   ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
   : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
      ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
      : 0))

Definition at line 507 of file nfkc.c.

#define COMPOSE_INDEX (   Char)    ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Definition at line 684 of file nfkc.c.

#define FALSE   0

Definition at line 72 of file nfkc.c.

#define g_free   free

Definition at line 52 of file nfkc.c.

#define g_malloc   malloc

Definition at line 51 of file nfkc.c.

#define G_N_ELEMENTS (   arr)    (sizeof (arr) / sizeof ((arr)[0]))

Definition at line 70 of file nfkc.c.

#define g_new (   struct_type,
  n_structs 
)    ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))

Definition at line 55 of file nfkc.c.

#define g_return_val_if_fail (   expr,
  val 
)    G_STMT_START{ (void)0; }G_STMT_END

Definition at line 69 of file nfkc.c.

#define g_set_error (   a,
  b,
  c,
  d 
)    ((void) 0)

Definition at line 54 of file nfkc.c.

#define G_STMT_END   while (0)

Definition at line 66 of file nfkc.c.

#define G_STMT_START   do

Definition at line 65 of file nfkc.c.

#define g_utf8_next_char (   p)    (char *)((p) + g_utf8_skip[*(guchar *)(p)])

Definition at line 174 of file nfkc.c.

#define gboolean   int

Definition at line 39 of file nfkc.c.

#define gchar   char

Definition at line 40 of file nfkc.c.

#define GError   void

Definition at line 53 of file nfkc.c.

#define gint   int

Definition at line 43 of file nfkc.c.

#define gint16   int16_t

Definition at line 46 of file nfkc.c.

#define glong   long

Definition at line 42 of file nfkc.c.

#define gsize   size_t

Definition at line 49 of file nfkc.c.

#define gssize   ssize_t

Definition at line 50 of file nfkc.c.

#define guchar   unsigned char

Definition at line 41 of file nfkc.c.

#define guint   unsigned int

Definition at line 44 of file nfkc.c.

#define guint16   uint16_t

Definition at line 47 of file nfkc.c.

#define gunichar   uint32_t

Definition at line 48 of file nfkc.c.

#define gushort   unsigned short

Definition at line 45 of file nfkc.c.

#define LBase   0x1100

Definition at line 516 of file nfkc.c.

#define LCount   19

Definition at line 519 of file nfkc.c.

#define NCount   (VCount * TCount)

Definition at line 522 of file nfkc.c.

#define SBase   0xAC00

Definition at line 515 of file nfkc.c.

#define SCount   (LCount * NCount)

Definition at line 523 of file nfkc.c.

#define TBase   0x11A7

Definition at line 518 of file nfkc.c.

#define TCount   28

Definition at line 521 of file nfkc.c.

#define TRUE   1

Definition at line 71 of file nfkc.c.

#define UNICODE_VALID (   Char)
Value:
((Char) < 0x110000 &&                            \
   (((Char) & 0xFFFFF800) != 0xD800) &&          \
   ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&       \
   ((Char) & 0xFFFE) != 0xFFFE)

Definition at line 146 of file nfkc.c.

#define UTF8_COMPUTE (   Char,
  Mask,
  Len 
)

Definition at line 91 of file nfkc.c.

#define UTF8_GET (   Result,
  Chars,
  Count,
  Mask,
  Len 
)
Value:
(Result) = (Chars)[0] & (Mask);                  \
  for ((Count) = 1; (Count) < (Len); ++(Count))         \
    {                                            \
      if (((Chars)[(Count)] & 0xc0) != 0x80)            \
       {                                         \
         (Result) = -1;                          \
         break;                                  \
       }                                         \
      (Result) <<= 6;                                   \
      (Result) |= ((Chars)[(Count)] & 0x3f);            \
    }

Definition at line 133 of file nfkc.c.

#define UTF8_LENGTH (   Char)
Value:
((Char) < 0x80 ? 1 :                      \
   ((Char) < 0x800 ? 2 :                  \
    ((Char) < 0x10000 ? 3 :               \
     ((Char) < 0x200000 ? 4 :                    \
      ((Char) < 0x4000000 ? 5 : 6)))))

Definition at line 125 of file nfkc.c.

#define VBase   0x1161

Definition at line 517 of file nfkc.c.

#define VCount   21

Definition at line 520 of file nfkc.c.


Enumeration Type Documentation

Enumerator:
G_NORMALIZE_DEFAULT 
G_NORMALIZE_NFD 
G_NORMALIZE_DEFAULT_COMPOSE 
G_NORMALIZE_NFC 
G_NORMALIZE_ALL 
G_NORMALIZE_NFKD 
G_NORMALIZE_ALL_COMPOSE 
G_NORMALIZE_NFKC 

Definition at line 76 of file nfkc.c.


Function Documentation

static gunichar* _g_utf8_normalize_wc ( const gchar str,
gssize  max_len,
GNormalizeMode  mode 
) [static]

Definition at line 743 of file nfkc.c.

{
  gsize n_wc;
  gunichar *wc_buffer;
  const char *p;
  gsize last_start;
  gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
  gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);

  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      const gchar *decomp;
      gunichar wc = g_utf8_get_char (p);

      if (wc >= 0xac00 && wc <= 0xd7af)
       {
         gsize result_len;
         decompose_hangul (wc, NULL, &result_len);
         n_wc += result_len;
       }
      else
       {
         decomp = find_decomposition (wc, do_compat);

         if (decomp)
           n_wc += g_utf8_strlen (decomp, -1);
         else
           n_wc++;
       }

      p = g_utf8_next_char (p);
    }

  wc_buffer = g_new (gunichar, n_wc + 1);
  if (!wc_buffer)
    return NULL;

  last_start = 0;
  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar wc = g_utf8_get_char (p);
      const gchar *decomp;
      int cc;
      gsize old_n_wc = n_wc;

      if (wc >= 0xac00 && wc <= 0xd7af)
       {
         gsize result_len;
         decompose_hangul (wc, wc_buffer + n_wc, &result_len);
         n_wc += result_len;
       }
      else
       {
         decomp = find_decomposition (wc, do_compat);

         if (decomp)
           {
             const char *pd;
             for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
              wc_buffer[n_wc++] = g_utf8_get_char (pd);
           }
         else
           wc_buffer[n_wc++] = wc;
       }

      if (n_wc > 0)
       {
         cc = COMBINING_CLASS (wc_buffer[old_n_wc]);

         if (cc == 0)
           {
             g_unicode_canonical_ordering (wc_buffer + last_start,
                                       n_wc - last_start);
             last_start = old_n_wc;
           }
       }

      p = g_utf8_next_char (p);
    }

  if (n_wc > 0)
    {
      g_unicode_canonical_ordering (wc_buffer + last_start,
                                n_wc - last_start);
      last_start = n_wc;
    }

  wc_buffer[n_wc] = 0;

  /* All decomposed and reordered */

  if (do_compose && n_wc > 0)
    {
      gsize i, j;
      int last_cc = 0;
      last_start = 0;

      for (i = 0; i < n_wc; i++)
       {
         int cc = COMBINING_CLASS (wc_buffer[i]);

         if (i > 0 &&
             (last_cc == 0 || last_cc != cc) &&
             combine (wc_buffer[last_start], wc_buffer[i],
                     &wc_buffer[last_start]))
           {
             for (j = i + 1; j < n_wc; j++)
              wc_buffer[j - 1] = wc_buffer[j];
             n_wc--;
             i--;

             if (i == last_start)
              last_cc = 0;
             else
              last_cc = COMBINING_CLASS (wc_buffer[i - 1]);

             continue;
           }

         if (cc == 0)
           last_start = i;

         last_cc = cc;
       }
    }

  wc_buffer[n_wc] = 0;

  return wc_buffer;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static gboolean combine ( gunichar  a,
gunichar  b,
gunichar result 
) [static]

Definition at line 688 of file nfkc.c.

{
  gushort index_a, index_b;

  if (combine_hangul (a, b, result))
    return TRUE;

  index_a = COMPOSE_INDEX (a);

  if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
    {
      if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
       {
         *result =
           compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
         return TRUE;
       }
      else
       return FALSE;
    }

  index_b = COMPOSE_INDEX (b);

  if (index_b >= COMPOSE_SECOND_SINGLE_START)
    {
      if (a ==
         compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
       {
         *result =
           compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
         return TRUE;
       }
      else
       return FALSE;
    }

  if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
      && index_b >= COMPOSE_SECOND_START
      && index_b < COMPOSE_SECOND_SINGLE_START)
    {
      gunichar res =
       compose_array[index_a - COMPOSE_FIRST_START][index_b -
                                               COMPOSE_SECOND_START];

      if (res)
       {
         *result = res;
         return TRUE;
       }
    }

  return FALSE;
}

Here is the call graph for this function:

static gboolean combine_hangul ( gunichar  a,
gunichar  b,
gunichar result 
) [static]

Definition at line 656 of file nfkc.c.

{
  gint LIndex = a - LBase;
  gint SIndex = a - SBase;

  gint VIndex = b - VBase;
  gint TIndex = b - TBase;

  if (0 <= LIndex && LIndex < LCount && 0 <= VIndex && VIndex < VCount)
    {
      *result = SBase + (LIndex * VCount + VIndex) * TCount;
      return TRUE;
    }
  else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
          && 0 <= TIndex && TIndex <= TCount)
    {
      *result = a + TIndex;
      return TRUE;
    }

  return FALSE;
}

Here is the caller graph for this function:

static void decompose_hangul ( gunichar  s,
gunichar r,
gsize result_len 
) [static]

Definition at line 577 of file nfkc.c.

{
  gint SIndex = s - SBase;

  /* not a hangul syllable */
  if (SIndex < 0 || SIndex >= SCount)
    {
      if (r)
       r[0] = s;
      *result_len = 1;
    }
  else
    {
      gunichar L = LBase + SIndex / NCount;
      gunichar V = VBase + (SIndex % NCount) / TCount;
      gunichar T = TBase + SIndex % TCount;

      if (r)
       {
         r[0] = L;
         r[1] = V;
       }

      if (T != TBase)
       {
         if (r)
           r[2] = T;
         *result_len = 3;
       }
      else
       *result_len = 2;
    }
}

Here is the caller graph for this function:

static const gchar* find_decomposition ( gunichar  ch,
gboolean  compat 
) [static]

Definition at line 613 of file nfkc.c.

{
  int start = 0;
  int end = G_N_ELEMENTS (decomp_table);

  if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
    {
      while (TRUE)
       {
         int half = (start + end) / 2;
         if (ch == decomp_table[half].ch)
           {
             int offset;

             if (compat)
              {
                offset = decomp_table[half].compat_offset;
                if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
                  offset = decomp_table[half].canon_offset;
              }
             else
              {
                offset = decomp_table[half].canon_offset;
                if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
                  return NULL;
              }

             return &(decomp_expansion_string[offset]);
           }
         else if (half == start)
           break;
         else if (ch > decomp_table[half].ch)
           start = half;
         else
           end = half;
       }
    }

  return NULL;
}

Here is the caller graph for this function:

static gchar* g_ucs4_to_utf8 ( const gunichar str,
glong  len,
glong items_read,
glong items_written,
GError **  error 
) [static]

Definition at line 442 of file nfkc.c.

{
  gint result_length;
  gchar *result = NULL;
  gchar *p;
  gint i;

  result_length = 0;
  for (i = 0; len < 0 || i < len; i++)
    {
      if (!str[i])
       break;

      if (str[i] >= 0x80000000)
       {
         if (items_read)
           *items_read = i;

         g_set_error (error, G_CONVERT_ERROR,
                     G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                     _("Character out of range for UTF-8"));
         goto err_out;
       }

      result_length += UTF8_LENGTH (str[i]);
    }

  result = g_malloc (result_length + 1);
  if (!result)
    return NULL;
  p = result;

  i = 0;
  while (p < result + result_length)
    p += g_unichar_to_utf8 (str[i++], p);

  *p = '\0';

  if (items_written)
    *items_written = p - result;

err_out:
  if (items_read)
    *items_read = i;

  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int g_unichar_to_utf8 ( gunichar  c,
gchar outbuf 
) [static]

Definition at line 265 of file nfkc.c.

{
  guint len = 0;
  int first;
  int i;

  if (c < 0x80)
    {
      first = 0;
      len = 1;
    }
  else if (c < 0x800)
    {
      first = 0xc0;
      len = 2;
    }
  else if (c < 0x10000)
    {
      first = 0xe0;
      len = 3;
    }
  else if (c < 0x200000)
    {
      first = 0xf0;
      len = 4;
    }
  else if (c < 0x4000000)
    {
      first = 0xf8;
      len = 5;
    }
  else
    {
      first = 0xfc;
      len = 6;
    }

  if (outbuf)
    {
      for (i = len - 1; i > 0; --i)
       {
         outbuf[i] = (c & 0x3f) | 0x80;
         c >>= 6;
       }
      outbuf[0] = c | first;
    }

  return len;
}

Here is the caller graph for this function:

static void g_unicode_canonical_ordering ( gunichar string,
gsize  len 
) [static]

Definition at line 536 of file nfkc.c.

{
  gsize i;
  int swap = 1;

  while (swap)
    {
      int last;
      swap = 0;
      last = COMBINING_CLASS (string[0]);
      for (i = 0; i < len - 1; ++i)
       {
         int next = COMBINING_CLASS (string[i + 1]);
         if (next != 0 && last > next)
           {
             gsize j;
             /* Percolate item leftward through string.  */
             for (j = i + 1; j > 0; --j)
              {
                gunichar t;
                if (COMBINING_CLASS (string[j - 1]) <= next)
                  break;
                t = string[j];
                string[j] = string[j - 1];
                string[j - 1] = t;
                swap = 1;
              }
             /* We're re-entering the loop looking at the old
                character again.  */
             next = last;
           }
         last = next;
       }
    }
}

Here is the caller graph for this function:

static gunichar g_utf8_get_char ( const gchar p) [static]

Definition at line 239 of file nfkc.c.

{
  int i, mask = 0, len;
  gunichar result;
  unsigned char c = (unsigned char) *p;

  UTF8_COMPUTE (c, mask, len);
  if (len == -1)
    return (gunichar) - 1;
  UTF8_GET (result, p, i, mask, len);

  return result;
}

Here is the caller graph for this function:

static gchar* g_utf8_normalize ( const gchar str,
gssize  len,
GNormalizeMode  mode 
) [static]

Definition at line 915 of file nfkc.c.

{
  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
  gchar *result;

  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
  g_free (result_wc);

  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static glong g_utf8_strlen ( const gchar p,
gssize  max 
) [static]

Definition at line 189 of file nfkc.c.

{
  glong len = 0;
  const gchar *start = p;
  g_return_val_if_fail (p != NULL || max == 0, 0);

  if (max < 0)
    {
      while (*p)
       {
         p = g_utf8_next_char (p);
         ++len;
       }
    }
  else
    {
      if (max == 0 || !*p)
       return 0;

      p = g_utf8_next_char (p);

      while (p - start < max && *p)
       {
         ++len;
         p = g_utf8_next_char (p);
       }

      /* only do the last len increment if we got a complete
       * char (don't count partial chars)
       */
      if (p - start == max)
       ++len;
    }

  return len;
}

Here is the caller graph for this function:

static gunichar* g_utf8_to_ucs4_fast ( const gchar str,
glong  len,
glong items_written 
) [static]

Definition at line 332 of file nfkc.c.

{
  gint j, charlen;
  gunichar *result;
  gint n_chars, i;
  const gchar *p;

  g_return_val_if_fail (str != NULL, NULL);

  p = str;
  n_chars = 0;
  if (len < 0)
    {
      while (*p)
       {
         p = g_utf8_next_char (p);
         ++n_chars;
       }
    }
  else
    {
      while (p < str + len && *p)
       {
         p = g_utf8_next_char (p);
         ++n_chars;
       }
    }

  result = g_new (gunichar, n_chars + 1);
  if (!result)
    return NULL;

  p = str;
  for (i = 0; i < n_chars; i++)
    {
      gunichar wc = ((unsigned char *) p)[0];

      if (wc < 0x80)
       {
         result[i] = wc;
         p++;
       }
      else
       {
         if (wc < 0xe0)
           {
             charlen = 2;
             wc &= 0x1f;
           }
         else if (wc < 0xf0)
           {
             charlen = 3;
             wc &= 0x0f;
           }
         else if (wc < 0xf8)
           {
             charlen = 4;
             wc &= 0x07;
           }
         else if (wc < 0xfc)
           {
             charlen = 5;
             wc &= 0x03;
           }
         else
           {
             charlen = 6;
             wc &= 0x01;
           }

         for (j = 1; j < charlen; j++)
           {
             wc <<= 6;
             wc |= ((unsigned char *) p)[j] & 0x3f;
           }

         result[i] = wc;
         p += charlen;
       }
    }
  result[i] = 0;

  if (items_written)
    *items_written = i;

  return result;
}

Here is the caller graph for this function:

stringprep_ucs4_nfkc_normalize: : a Unicode string.

: length of array, or -1 if is nul-terminated.

Converts UCS4 string into UTF-8 and runs stringprep_utf8_nfkc_normalize().

Return value: a newly allocated Unicode string, that is the NFKC normalized form of .

Definition at line 1048 of file nfkc.c.

{
  char *p;
  uint32_t *result_wc;

  p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
  result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
  free (p);

  return result_wc;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* stringprep_ucs4_to_utf8 ( const uint32_t str,
ssize_t  len,
size_t items_read,
size_t items_written 
)

stringprep_ucs4_to_utf8: : a UCS-4 encoded string : the maximum length of to use.

If < 0, then the string is terminated with a 0 character. : location to store number of characters read read, or NULL. : location to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte.

Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.

Return value: a pointer to a newly allocated UTF-8 string. This value must be freed with free(). If an error occurs, NULL will be returned and set.

Definition at line 1001 of file nfkc.c.

{
  return g_ucs4_to_utf8 (str, len, (glong *) items_read,
                      (glong *) items_written, NULL);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int stringprep_unichar_to_utf8 ( uint32_t  c,
char *  outbuf 
)

stringprep_unichar_to_utf8: : a ISO10646 character code : output buffer, must have at least 6 bytes of space.

  If %NULL, the length will be computed and returned
  and nothing will be written to @outbuf.

Converts a single character to UTF-8.

Return value: number of bytes written.

Definition at line 956 of file nfkc.c.

{
  return g_unichar_to_utf8 (c, outbuf);
}

Here is the call graph for this function:

char* stringprep_utf8_nfkc_normalize ( const char *  str,
ssize_t  len 
)

stringprep_utf8_nfkc_normalize: : a UTF-8 encoded string.

: length of , in bytes, or -1 if is nul-terminated.

Converts a string into canonical form, standardizing such issues as whether a character with an accent is represented as a base character and combining accent or as a single precomposed character.

The normalization mode is NFKC (ALL COMPOSE). It standardizes differences that do not affect the text content, such as the above-mentioned accent representation. It standardizes the "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting information may be lost but for most text operations such characters should be considered the same. It returns a result with composed forms rather than a maximally decomposed form.

Return value: a newly allocated string, that is the NFKC normalized form of .

Definition at line 1031 of file nfkc.c.

Here is the call graph for this function:

uint32_t* stringprep_utf8_to_ucs4 ( const char *  str,
ssize_t  len,
size_t items_written 
)

stringprep_utf8_to_ucs4: : a UTF-8 encoded string : the maximum length of to use.

If < 0, then the string is nul-terminated. : location to store the number of characters in the result, or NULL.

Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4, assuming valid UTF-8 input. This function does no error checking on the input.

Return value: a pointer to a newly allocated UCS-4 string. This value must be freed with free().

Definition at line 977 of file nfkc.c.

{
  return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written);
}

Here is the call graph for this function:

Here is the caller graph for this function:

stringprep_utf8_to_unichar: : a pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If does not point to a valid UTF-8 encoded character, results are undefined.

Return value: the resulting character.

Definition at line 939 of file nfkc.c.

{
  return g_utf8_get_char (p);
}

Here is the call graph for this function:


Variable Documentation

Definition at line 172 of file nfkc.c.

const gchar utf8_skip_data[256] [static]
Initial value:
 {
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2,
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
  5, 5, 5, 6, 6, 1, 1
}

Definition at line 153 of file nfkc.c.