Back to index

glibc  2.9
Defines | Functions
strxfrm_l.c File Reference
#include <assert.h>
#include <langinfo.h>
#include <locale.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include "../locale/localeinfo.h"
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Defines

#define STRING_TYPE   char
#define USTRING_TYPE   unsigned char
#define STRXFRM   __strxfrm_l
#define STRCMP   strcmp
#define STRLEN   strlen
#define STPNCPY   __stpncpy
#define WEIGHT_H   "../locale/weight.h"
#define SUFFIX   MB
#define L(arg)   arg
#define CONCAT(a, b)   CONCAT1(a,b)
#define CONCAT1(a, b)   a##b

Functions

static int utf8_encode (char *buf, int val)
size_t STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)

Define Documentation

#define CONCAT (   a,
  b 
)    CONCAT1(a,b)

Definition at line 42 of file strxfrm_l.c.

#define CONCAT1 (   a,
  b 
)    a##b

Definition at line 43 of file strxfrm_l.c.

#define L (   arg)    arg

Definition at line 39 of file strxfrm_l.c.

#define STPNCPY   __stpncpy

Definition at line 36 of file strxfrm_l.c.

#define STRCMP   strcmp

Definition at line 34 of file strxfrm_l.c.

#define STRING_TYPE   char

Definition at line 31 of file strxfrm_l.c.

#define STRLEN   strlen

Definition at line 35 of file strxfrm_l.c.

#define STRXFRM   __strxfrm_l

Definition at line 33 of file strxfrm_l.c.

#define SUFFIX   MB

Definition at line 38 of file strxfrm_l.c.

#define USTRING_TYPE   unsigned char

Definition at line 32 of file strxfrm_l.c.

#define WEIGHT_H   "../locale/weight.h"

Definition at line 37 of file strxfrm_l.c.


Function Documentation

size_t STRXFRM ( STRING_TYPE dest,
const STRING_TYPE src,
size_t  n,
__locale_t  l 
)

Definition at line 87 of file strxfrm_l.c.

{
  struct locale_data *current = l->__locales[LC_COLLATE];
  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
  /* We don't assign the following values right away since it might be
     unnecessary in case there are no rules.  */
  const unsigned char *rulesets;
  const int32_t *table;
  const USTRING_TYPE *weights;
  const USTRING_TYPE *extra;
  const int32_t *indirect;
  uint_fast32_t pass;
  size_t needed;
  size_t last_needed;
  const USTRING_TYPE *usrc;
  size_t srclen = STRLEN (src);
  int32_t *idxarr;
  unsigned char *rulearr;
  size_t idxmax;
  size_t idxcnt;
  int use_malloc;

#include WEIGHT_H

  if (nrules == 0)
    {
      if (n != 0)
       STPNCPY (dest, src, MIN (srclen + 1, n));

      return srclen;
    }

  rulesets = (const unsigned char *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
  table = (const int32_t *)
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
  weights = (const USTRING_TYPE *)
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
  extra = (const USTRING_TYPE *)
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
  indirect = (const int32_t *)
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
  use_malloc = 0;

  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);

  /* Handle an empty string as a special case.  */
  if (srclen == 0)
    {
      if (n != 0)
        *dest = L('\0');
      return 0;
    }

  /* We need the elements of the string as unsigned values since they
     are used as indeces.  */
  usrc = (const USTRING_TYPE *) src;

  /* Perform the first pass over the string and while doing this find
     and store the weights for each character.  Since we want this to
     be as fast as possible we are using `alloca' to store the temporary
     values.  But since there is no limit on the length of the string
     we have to use `malloc' if the string is too long.  We should be
     very conservative here.  */
  if (! __libc_use_alloca (srclen))
    {
      idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
      rulearr = (unsigned char *) &idxarr[srclen];

      if (idxarr == NULL)
       /* No memory.  Well, go with the stack then.

          XXX Once this implementation is stable we will handle this
          differently.  Instead of precomputing the indeces we will
          do this in time.  This means, though, that this happens for
          every pass again.  */
       goto try_stack;
      use_malloc = 1;
    }
  else
    {
    try_stack:
      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
      rulearr = (unsigned char *) alloca (srclen + 1);
    }

  idxmax = 0;
  do
    {
      int32_t tmp = findidx (&usrc);
      rulearr[idxmax] = tmp >> 24;
      idxarr[idxmax] = tmp & 0xffffff;

      ++idxmax;
    }
  while (*usrc != L('\0'));

  /* This element is only read, the value never used but to determine
     another value which then is ignored.  */
  rulearr[idxmax] = '\0';

  /* Now the passes over the weights.  We now use the indeces we found
     before.  */
  needed = 0;
  for (pass = 0; pass < nrules; ++pass)
    {
      size_t backw_stop = ~0ul;
      int rule = rulesets[rulearr[0] * nrules + pass];
      /* We assume that if a rule has defined `position' in one section
        this is true for all of them.  */
      int position = rule & sort_position;

      last_needed = needed;
      if (position == 0)
       {
         for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
           {
             if ((rule & sort_forward) != 0)
              {
                size_t len;

                if (backw_stop != ~0ul)
                  {
                    /* Handle the pushed elements now.  */
                    size_t backw;

                    for (backw = idxcnt; backw > backw_stop; )
                     {
                       --backw;
                       len = weights[idxarr[backw]++];

                       if (needed + len < n)
                         while (len-- > 0)
                           dest[needed++] = weights[idxarr[backw]++];
                       else
                         {
                            /* No more characters fit into the buffer.  */
                           needed += len;
                           idxarr[backw] += len;
                         }
                     }

                    backw_stop = ~0ul;
                  }

                /* Now handle the forward element.  */
                len = weights[idxarr[idxcnt]++];
                if (needed + len < n)
                  while (len-- > 0)
                    dest[needed++] = weights[idxarr[idxcnt]++];
                else
                  {
                    /* No more characters fit into the buffer.  */
                    needed += len;
                    idxarr[idxcnt] += len;
                  }
              }
             else
              {
                /* Remember where the backwards series started.  */
                if (backw_stop == ~0ul)
                  backw_stop = idxcnt;
              }

             rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
           }


         if (backw_stop != ~0ul)
           {
             /* Handle the pushed elements now.  */
             size_t backw;

             backw = idxcnt;
             while (backw > backw_stop)
              {
                size_t len = weights[idxarr[--backw]++];

                if (needed + len < n)
                  while (len-- > 0)
                    dest[needed++] = weights[idxarr[backw]++];
                else
                  {
                    /* No more characters fit into the buffer.  */
                    needed += len;
                    idxarr[backw] += len;
                  }
              }
           }
       }
      else
       {
         int val = 1;
#ifndef WIDE_CHAR_VERSION
         char buf[7];
         size_t buflen;
#endif
         size_t i;

         for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
           {
             if ((rule & sort_forward) != 0)
              {
                size_t len;

                if (backw_stop != ~0ul)
                  {
                   /* Handle the pushed elements now.  */
                    size_t backw;

                    for (backw = idxcnt; backw > backw_stop; )
                     {
                       --backw;
                       len = weights[idxarr[backw]++];
                       if (len != 0)
                         {
#ifdef WIDE_CHAR_VERSION
                           if (needed + 1 + len < n)
                            {
                              dest[needed] = val;
                              for (i = 0; i < len; ++i)
                                dest[needed + 1 + i] =
                                  weights[idxarr[backw] + i];
                            }
                           needed += 1 + len;
#else
                           buflen = utf8_encode (buf, val);
                           if (needed + buflen + len < n)
                            {
                              for (i = 0; i < buflen; ++i)
                                dest[needed + i] = buf[i];
                              for (i = 0; i < len; ++i)
                                dest[needed + buflen + i] =
                                  weights[idxarr[backw] + i];
                            }
                           needed += buflen + len;
#endif
                           idxarr[backw] += len;
                           val = 1;
                         }
                       else
                         ++val;
                     }

                    backw_stop = ~0ul;
                  }

                /* Now handle the forward element.  */
                len = weights[idxarr[idxcnt]++];
                if (len != 0)
                  {
#ifdef WIDE_CHAR_VERSION
                    if (needed + 1+ len < n)
                     {
                       dest[needed] = val;
                       for (i = 0; i < len; ++i)
                         dest[needed + 1 + i] =
                           weights[idxarr[idxcnt] + i];
                     }
                    needed += 1 + len;
#else
                    buflen = utf8_encode (buf, val);
                    if (needed + buflen + len < n)
                     {
                       for (i = 0; i < buflen; ++i)
                         dest[needed + i] = buf[i];
                       for (i = 0; i < len; ++i)
                         dest[needed + buflen + i] =
                           weights[idxarr[idxcnt] + i];
                     }
                    needed += buflen + len;
#endif
                    idxarr[idxcnt] += len;
                    val = 1;
                  }
                else
                  /* Note that we don't have to increment `idxarr[idxcnt]'
                     since the length is zero.  */
                  ++val;
              }
             else
              {
                /* Remember where the backwards series started.  */
                if (backw_stop == ~0ul)
                  backw_stop = idxcnt;
              }

             rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
           }

         if (backw_stop != ~0ul)
           {
             /* Handle the pushed elements now.  */
             size_t backw;

             backw = idxmax - 1;
             while (backw > backw_stop)
              {
                size_t len = weights[idxarr[--backw]++];
                if (len != 0)
                  {
#ifdef WIDE_CHAR_VERSION
                    if (needed + 1 + len < n)
                     {
                       dest[needed] = val;
                       for (i = 0; i < len; ++i)
                         dest[needed + 1 + i] =
                           weights[idxarr[backw] + i];
                     }
                    needed += 1 + len;
#else
                    buflen = utf8_encode (buf, val);
                    if (needed + buflen + len < n)
                     {
                       for (i = 0; i < buflen; ++i)
                         dest[needed + i] = buf[i];
                       for (i = 0; i < len; ++i)
                         dest[needed + buflen + i] =
                           weights[idxarr[backw] + i];
                     }
                    needed += buflen + len;
#endif
                    idxarr[backw] += len;
                    val = 1;
                  }
                else
                  ++val;
              }
           }
       }

      /* Finally store the byte to separate the passes or terminate
        the string.  */
      if (needed < n)
       dest[needed] = pass + 1 < nrules ? L('\1') : L('\0');
      ++needed;
    }

  /* This is a little optimization: many collation specifications have
     a `position' rule at the end and if no non-ignored character
     is found the last \1 byte is immediately followed by a \0 byte
     signalling this.  We can avoid the \1 byte(s).  */
  if (needed > 2 && needed == last_needed + 1)
    {
      /* Remove the \1 byte.  */
      if (--needed <= n)
       dest[needed - 1] = L('\0');
    }

  /* Free the memory if needed.  */
  if (use_malloc)
    free (idxarr);

  /* Return the number of bytes/words we need, but don't count the NUL
     byte/word at the end.  */
  return needed - 1;
}

Here is the call graph for this function:

static int utf8_encode ( char *  buf,
int  val 
) [static]

Definition at line 52 of file strxfrm_l.c.

{
  int retval;

  if (val < 0x80)
    {
      *buf++ = (char) val;
      retval = 1;
    }
  else
    {
      int step;

      for (step = 2; step < 6; ++step)
       if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
         break;
      retval = step;

      *buf = (unsigned char) (~0xff >> step);
      --step;
      do
       {
         buf[step] = 0x80 | (val & 0x3f);
         val >>= 6;
       }
      while (--step > 0);
      *buf |= val;
    }

  return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function: