Back to index

glibc  2.9
wchar-lookup.h
Go to the documentation of this file.
00001 /* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 #include <stdint.h>
00021 
00022 /* Tables indexed by a wide character are compressed through the use
00023    of a multi-level lookup.  The compression effect comes from blocks
00024    that don't need particular data and from blocks that can share their
00025    data.  */
00026 
00027 /* Bit tables are accessed by cutting wc in four blocks of bits:
00028    - the high 32-q-p bits,
00029    - the next q bits,
00030    - the next p bits,
00031    - the next 5 bits.
00032 
00033            +------------------+-----+-----+-----+
00034      wc  =  +     32-q-p-5     |  q  |  p  |  5  |
00035            +------------------+-----+-----+-----+
00036 
00037    p and q are variable.  For 16-bit Unicode it is sufficient to
00038    choose p and q such that q+p+5 <= 16.
00039 
00040    The table contains the following uint32_t words:
00041    - q+p+5,
00042    - s = upper exclusive bound for wc >> (q+p+5),
00043    - p+5,
00044    - 2^q-1,
00045    - 2^p-1,
00046    - 1st-level table: s offsets, pointing into the 2nd-level table,
00047    - 2nd-level table: k*2^q offsets, pointing into the 3rd-level table,
00048    - 3rd-level table: j*2^p words, each containing 32 bits of data.
00049 */
00050 
00051 static __inline int
00052 __attribute ((always_inline))
00053 wctype_table_lookup (const char *table, uint32_t wc)
00054 {
00055   uint32_t shift1 = ((const uint32_t *) table)[0];
00056   uint32_t index1 = wc >> shift1;
00057   uint32_t bound = ((const uint32_t *) table)[1];
00058   if (index1 < bound)
00059     {
00060       uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
00061       if (lookup1 != 0)
00062        {
00063          uint32_t shift2 = ((const uint32_t *) table)[2];
00064          uint32_t mask2 = ((const uint32_t *) table)[3];
00065          uint32_t index2 = (wc >> shift2) & mask2;
00066          uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
00067          if (lookup2 != 0)
00068            {
00069              uint32_t mask3 = ((const uint32_t *) table)[4];
00070              uint32_t index3 = (wc >> 5) & mask3;
00071              uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3];
00072 
00073              return (lookup3 >> (wc & 0x1f)) & 1;
00074            }
00075        }
00076     }
00077   return 0;
00078 }
00079 
00080 /* Byte tables are similar to bit tables, except that the addressing
00081    unit is a single byte, and no 5 bits are used as a word index.  */
00082 
00083 static __inline int
00084 __attribute ((always_inline))
00085 wcwidth_table_lookup (const char *table, uint32_t wc)
00086 {
00087   uint32_t shift1 = ((const uint32_t *) table)[0];
00088   uint32_t index1 = wc >> shift1;
00089   uint32_t bound = ((const uint32_t *) table)[1];
00090   if (index1 < bound)
00091     {
00092       uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
00093       if (lookup1 != 0)
00094        {
00095          uint32_t shift2 = ((const uint32_t *) table)[2];
00096          uint32_t mask2 = ((const uint32_t *) table)[3];
00097          uint32_t index2 = (wc >> shift2) & mask2;
00098          uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
00099          if (lookup2 != 0)
00100            {
00101              uint32_t mask3 = ((const uint32_t *) table)[4];
00102              uint32_t index3 = wc & mask3;
00103              uint8_t lookup3 = ((const uint8_t *)(table + lookup2))[index3];
00104 
00105              return lookup3;
00106            }
00107        }
00108     }
00109   return 0xff;
00110 }
00111 
00112 /* Mapping tables are similar to bit tables, except that the
00113    addressing unit is a single signed 32-bit word, containing the
00114    difference between the desired result and the argument, and no 5
00115    bits are used as a word index.  */
00116 
00117 static __inline uint32_t
00118 __attribute ((always_inline))
00119 wctrans_table_lookup (const char *table, uint32_t wc)
00120 {
00121   uint32_t shift1 = ((const uint32_t *) table)[0];
00122   uint32_t index1 = wc >> shift1;
00123   uint32_t bound = ((const uint32_t *) table)[1];
00124   if (index1 < bound)
00125     {
00126       uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
00127       if (lookup1 != 0)
00128        {
00129          uint32_t shift2 = ((const uint32_t *) table)[2];
00130          uint32_t mask2 = ((const uint32_t *) table)[3];
00131          uint32_t index2 = (wc >> shift2) & mask2;
00132          uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
00133          if (lookup2 != 0)
00134            {
00135              uint32_t mask3 = ((const uint32_t *) table)[4];
00136              uint32_t index3 = wc & mask3;
00137              int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3];
00138 
00139              return wc + lookup3;
00140            }
00141        }
00142     }
00143   return wc;
00144 }