Back to index

glibc  2.9
ld-ctype.c
Go to the documentation of this file.
00001 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published
00007    by the Free Software Foundation; version 2 of the License, or
00008    (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software Foundation,
00017    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00018 
00019 #ifdef HAVE_CONFIG_H
00020 # include <config.h>
00021 #endif
00022 
00023 #include <alloca.h>
00024 #include <byteswap.h>
00025 #include <endian.h>
00026 #include <errno.h>
00027 #include <limits.h>
00028 #include <obstack.h>
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <wchar.h>
00032 #include <wctype.h>
00033 #include <sys/uio.h>
00034 
00035 #include "localedef.h"
00036 #include "charmap.h"
00037 #include "localeinfo.h"
00038 #include "langinfo.h"
00039 #include "linereader.h"
00040 #include "locfile-token.h"
00041 #include "locfile.h"
00042 
00043 #include <assert.h>
00044 
00045 
00046 #ifdef PREDEFINED_CLASSES
00047 /* These are the extra bits not in wctype.h since these are not preallocated
00048    classes.  */
00049 # define _ISwspecial1       (1 << 29)
00050 # define _ISwspecial2       (1 << 30)
00051 # define _ISwspecial3       (1 << 31)
00052 #endif
00053 
00054 
00055 /* The bit used for representing a special class.  */
00056 #define BITPOS(class) ((class) - tok_upper)
00057 #define BIT(class) (_ISbit (BITPOS (class)))
00058 #define BITw(class) (_ISwbit (BITPOS (class)))
00059 
00060 #define ELEM(ctype, collection, idx, value)                                 \
00061   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
00062             &ctype->collection##_act idx, value)
00063 
00064 
00065 /* To be compatible with former implementations we for now restrict
00066    the number of bits for character classes to 16.  When compatibility
00067    is not necessary anymore increase the number to 32.  */
00068 #define char_class_t uint16_t
00069 #define char_class32_t uint32_t
00070 
00071 
00072 /* Type to describe a transliteration action.  We have a possibly
00073    multiple character from-string and a set of multiple character
00074    to-strings.  All are 32bit values since this is what is used in
00075    the gconv functions.  */
00076 struct translit_to_t
00077 {
00078   uint32_t *str;
00079 
00080   struct translit_to_t *next;
00081 };
00082 
00083 struct translit_t
00084 {
00085   uint32_t *from;
00086 
00087   const char *fname;
00088   size_t lineno;
00089 
00090   struct translit_to_t *to;
00091 
00092   struct translit_t *next;
00093 };
00094 
00095 struct translit_ignore_t
00096 {
00097   uint32_t from;
00098   uint32_t to;
00099   uint32_t step;
00100 
00101   const char *fname;
00102   size_t lineno;
00103 
00104   struct translit_ignore_t *next;
00105 };
00106 
00107 
00108 /* Type to describe a transliteration include statement.  */
00109 struct translit_include_t
00110 {
00111   const char *copy_locale;
00112   const char *copy_repertoire;
00113 
00114   struct translit_include_t *next;
00115 };
00116 
00117 
00118 /* Sparse table of uint32_t.  */
00119 #define TABLE idx_table
00120 #define ELEMENT uint32_t
00121 #define DEFAULT ((uint32_t) ~0)
00122 #define NO_FINALIZE
00123 #include "3level.h"
00124 
00125 
00126 /* The real definition of the struct for the LC_CTYPE locale.  */
00127 struct locale_ctype_t
00128 {
00129   uint32_t *charnames;
00130   size_t charnames_max;
00131   size_t charnames_act;
00132   /* An index lookup table, to speedup find_idx.  */
00133   struct idx_table charnames_idx;
00134 
00135   struct repertoire_t *repertoire;
00136 
00137   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
00138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
00139   size_t nr_charclass;
00140   const char *classnames[MAX_NR_CHARCLASS];
00141   uint32_t last_class_char;
00142   uint32_t class256_collection[256];
00143   uint32_t *class_collection;
00144   size_t class_collection_max;
00145   size_t class_collection_act;
00146   uint32_t class_done;
00147   uint32_t class_offset;
00148 
00149   struct charseq **mbdigits;
00150   size_t mbdigits_act;
00151   size_t mbdigits_max;
00152   uint32_t *wcdigits;
00153   size_t wcdigits_act;
00154   size_t wcdigits_max;
00155 
00156   struct charseq *mboutdigits[10];
00157   uint32_t wcoutdigits[10];
00158   size_t outdigits_act;
00159 
00160   /* If the following number ever turns out to be too small simply
00161      increase it.  But I doubt it will.  --drepper@gnu */
00162 #define MAX_NR_CHARMAP 16
00163   const char *mapnames[MAX_NR_CHARMAP];
00164   uint32_t *map_collection[MAX_NR_CHARMAP];
00165   uint32_t map256_collection[2][256];
00166   size_t map_collection_max[MAX_NR_CHARMAP];
00167   size_t map_collection_act[MAX_NR_CHARMAP];
00168   size_t map_collection_nr;
00169   size_t last_map_idx;
00170   int tomap_done[MAX_NR_CHARMAP];
00171   uint32_t map_offset;
00172 
00173   /* Transliteration information.  */
00174   struct translit_include_t *translit_include;
00175   struct translit_t *translit;
00176   struct translit_ignore_t *translit_ignore;
00177   uint32_t ntranslit_ignore;
00178 
00179   uint32_t *default_missing;
00180   const char *default_missing_file;
00181   size_t default_missing_lineno;
00182 
00183   uint32_t to_nonascii;
00184 
00185   /* The arrays for the binary representation.  */
00186   char_class_t *ctype_b;
00187   char_class32_t *ctype32_b;
00188   uint32_t **map_b;
00189   uint32_t **map32_b;
00190   uint32_t **class_b;
00191   struct iovec *class_3level;
00192   struct iovec *map_3level;
00193   uint32_t *class_name_ptr;
00194   uint32_t *map_name_ptr;
00195   struct iovec width;
00196   uint32_t mb_cur_max;
00197   const char *codeset_name;
00198   uint32_t *translit_from_idx;
00199   uint32_t *translit_from_tbl;
00200   uint32_t *translit_to_idx;
00201   uint32_t *translit_to_tbl;
00202   uint32_t translit_idx_size;
00203   size_t translit_from_tbl_size;
00204   size_t translit_to_tbl_size;
00205 
00206   struct obstack mempool;
00207 };
00208 
00209 
00210 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
00211    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
00212 #define EMPTY ((uint32_t) ~0)
00213 
00214 
00215 #define obstack_chunk_alloc xmalloc
00216 #define obstack_chunk_free free
00217 
00218 
00219 /* Prototypes for local functions.  */
00220 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
00221                         const struct charmap_t *charmap,
00222                         struct localedef_t *copy_locale,
00223                         int ignore_content);
00224 static void ctype_class_new (struct linereader *lr,
00225                           struct locale_ctype_t *ctype, const char *name);
00226 static void ctype_map_new (struct linereader *lr,
00227                         struct locale_ctype_t *ctype,
00228                         const char *name, const struct charmap_t *charmap);
00229 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
00230                         size_t *max, size_t *act, unsigned int idx);
00231 static void set_class_defaults (struct locale_ctype_t *ctype,
00232                             const struct charmap_t *charmap,
00233                             struct repertoire_t *repertoire);
00234 static void allocate_arrays (struct locale_ctype_t *ctype,
00235                           const struct charmap_t *charmap,
00236                           struct repertoire_t *repertoire);
00237 
00238 
00239 static const char *longnames[] =
00240 {
00241   "zero", "one", "two", "three", "four",
00242   "five", "six", "seven", "eight", "nine"
00243 };
00244 static const char *uninames[] =
00245 {
00246   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
00247   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
00248 };
00249 static const unsigned char digits[] = "0123456789";
00250 
00251 
00252 static void
00253 ctype_startup (struct linereader *lr, struct localedef_t *locale,
00254               const struct charmap_t *charmap,
00255               struct localedef_t *copy_locale, int ignore_content)
00256 {
00257   unsigned int cnt;
00258   struct locale_ctype_t *ctype;
00259 
00260   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
00261     {
00262       if (copy_locale == NULL)
00263        {
00264          /* Allocate the needed room.  */
00265          locale->categories[LC_CTYPE].ctype = ctype =
00266            (struct locale_ctype_t *) xcalloc (1,
00267                                           sizeof (struct locale_ctype_t));
00268 
00269          /* We have seen no names yet.  */
00270          ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
00271          ctype->charnames =
00272            (unsigned int *) xmalloc (ctype->charnames_max
00273                                   * sizeof (unsigned int));
00274          for (cnt = 0; cnt < 256; ++cnt)
00275            ctype->charnames[cnt] = cnt;
00276          ctype->charnames_act = 256;
00277          idx_table_init (&ctype->charnames_idx);
00278 
00279          /* Fill character class information.  */
00280          ctype->last_class_char = ILLEGAL_CHAR_VALUE;
00281          /* The order of the following instructions determines the bit
00282             positions!  */
00283          ctype_class_new (lr, ctype, "upper");
00284          ctype_class_new (lr, ctype, "lower");
00285          ctype_class_new (lr, ctype, "alpha");
00286          ctype_class_new (lr, ctype, "digit");
00287          ctype_class_new (lr, ctype, "xdigit");
00288          ctype_class_new (lr, ctype, "space");
00289          ctype_class_new (lr, ctype, "print");
00290          ctype_class_new (lr, ctype, "graph");
00291          ctype_class_new (lr, ctype, "blank");
00292          ctype_class_new (lr, ctype, "cntrl");
00293          ctype_class_new (lr, ctype, "punct");
00294          ctype_class_new (lr, ctype, "alnum");
00295 #ifdef PREDEFINED_CLASSES
00296          /* The following are extensions from ISO 14652.  */
00297          ctype_class_new (lr, ctype, "left_to_right");
00298          ctype_class_new (lr, ctype, "right_to_left");
00299          ctype_class_new (lr, ctype, "num_terminator");
00300          ctype_class_new (lr, ctype, "num_separator");
00301          ctype_class_new (lr, ctype, "segment_separator");
00302          ctype_class_new (lr, ctype, "block_separator");
00303          ctype_class_new (lr, ctype, "direction_control");
00304          ctype_class_new (lr, ctype, "sym_swap_layout");
00305          ctype_class_new (lr, ctype, "char_shape_selector");
00306          ctype_class_new (lr, ctype, "num_shape_selector");
00307          ctype_class_new (lr, ctype, "non_spacing");
00308          ctype_class_new (lr, ctype, "non_spacing_level3");
00309          ctype_class_new (lr, ctype, "normal_connect");
00310          ctype_class_new (lr, ctype, "r_connect");
00311          ctype_class_new (lr, ctype, "no_connect");
00312          ctype_class_new (lr, ctype, "no_connect-space");
00313          ctype_class_new (lr, ctype, "vowel_connect");
00314 #endif
00315 
00316          ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
00317          ctype->class_collection
00318            = (uint32_t *) xcalloc (sizeof (unsigned long int),
00319                                 ctype->class_collection_max);
00320          ctype->class_collection_act = 256;
00321 
00322          /* Fill character map information.  */
00323          ctype->last_map_idx = MAX_NR_CHARMAP;
00324          ctype_map_new (lr, ctype, "toupper", charmap);
00325          ctype_map_new (lr, ctype, "tolower", charmap);
00326 #ifdef PREDEFINED_CLASSES
00327          ctype_map_new (lr, ctype, "tosymmetric", charmap);
00328 #endif
00329 
00330          /* Fill first 256 entries in `toXXX' arrays.  */
00331          for (cnt = 0; cnt < 256; ++cnt)
00332            {
00333              ctype->map_collection[0][cnt] = cnt;
00334              ctype->map_collection[1][cnt] = cnt;
00335 #ifdef PREDEFINED_CLASSES
00336              ctype->map_collection[2][cnt] = cnt;
00337 #endif
00338              ctype->map256_collection[0][cnt] = cnt;
00339              ctype->map256_collection[1][cnt] = cnt;
00340            }
00341 
00342          if (enc_not_ascii_compatible)
00343            ctype->to_nonascii = 1;
00344 
00345          obstack_init (&ctype->mempool);
00346        }
00347       else
00348        ctype = locale->categories[LC_CTYPE].ctype =
00349          copy_locale->categories[LC_CTYPE].ctype;
00350     }
00351 }
00352 
00353 
00354 void
00355 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
00356 {
00357   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
00358 #define NCLASS 12
00359   static const struct
00360   {
00361     const char *name;
00362     const char allow[NCLASS];
00363   }
00364   valid_table[NCLASS] =
00365   {
00366     /* The order is important.  See token.h for more information.
00367        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
00368     { "upper",  "--MX-XDDXXX-" },
00369     { "lower",  "--MX-XDDXXX-" },
00370     { "alpha",  "---X-XDDXXX-" },
00371     { "digit",  "XXX--XDDXXX-" },
00372     { "xdigit", "-----XDDXXX-" },
00373     { "space",  "XXXXX------X" },
00374     { "print",  "---------X--" },
00375     { "graph",  "---------X--" },
00376     { "blank",  "XXXXXM-----X" },
00377     { "cntrl",  "XXXXX-XX--XX" },
00378     { "punct",  "XXXXX-DD-X-X" },
00379     { "alnum",  "-----XDDXXX-" }
00380   };
00381   size_t cnt;
00382   int cls1, cls2;
00383   uint32_t space_value;
00384   struct charseq *space_seq;
00385   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
00386   int warned;
00387   const void *key;
00388   size_t len;
00389   void *vdata;
00390   void *curs;
00391 
00392   /* Now resolve copying and also handle completely missing definitions.  */
00393   if (ctype == NULL)
00394     {
00395       const char *repertoire_name;
00396 
00397       /* First see whether we were supposed to copy.  If yes, find the
00398         actual definition.  */
00399       if (locale->copy_name[LC_CTYPE] != NULL)
00400        {
00401          /* Find the copying locale.  This has to happen transitively since
00402             the locale we are copying from might also copying another one.  */
00403          struct localedef_t *from = locale;
00404 
00405          do
00406            from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
00407                             from->repertoire_name, charmap);
00408          while (from->categories[LC_CTYPE].ctype == NULL
00409                && from->copy_name[LC_CTYPE] != NULL);
00410 
00411          ctype = locale->categories[LC_CTYPE].ctype
00412            = from->categories[LC_CTYPE].ctype;
00413        }
00414 
00415       /* If there is still no definition issue an warning and create an
00416         empty one.  */
00417       if (ctype == NULL)
00418        {
00419          if (! be_quiet)
00420            WITH_CUR_LOCALE (error (0, 0, _("\
00421 No definition for %s category found"), "LC_CTYPE"));
00422          ctype_startup (NULL, locale, charmap, NULL, 0);
00423          ctype = locale->categories[LC_CTYPE].ctype;
00424        }
00425 
00426       /* Get the repertoire we have to use.  */
00427       repertoire_name = locale->repertoire_name ?: repertoire_global;
00428       if (repertoire_name != NULL)
00429        ctype->repertoire = repertoire_read (repertoire_name);
00430     }
00431 
00432   /* We need the name of the currently used 8-bit character set to
00433      make correct conversion between this 8-bit representation and the
00434      ISO 10646 character set used internally for wide characters.  */
00435   ctype->codeset_name = charmap->code_set_name;
00436   if (ctype->codeset_name == NULL)
00437     {
00438       if (! be_quiet)
00439        WITH_CUR_LOCALE (error (0, 0, _("\
00440 No character set name specified in charmap")));
00441       ctype->codeset_name = "//UNKNOWN//";
00442     }
00443 
00444   /* Set default value for classes not specified.  */
00445   set_class_defaults (ctype, charmap, ctype->repertoire);
00446 
00447   /* Check according to table.  */
00448   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
00449     {
00450       uint32_t tmp = ctype->class_collection[cnt];
00451 
00452       if (tmp != 0)
00453        {
00454          for (cls1 = 0; cls1 < NCLASS; ++cls1)
00455            if ((tmp & _ISwbit (cls1)) != 0)
00456              for (cls2 = 0; cls2 < NCLASS; ++cls2)
00457               if (valid_table[cls1].allow[cls2] != '-')
00458                 {
00459                   int eq = (tmp & _ISwbit (cls2)) != 0;
00460                   switch (valid_table[cls1].allow[cls2])
00461                     {
00462                     case 'M':
00463                      if (!eq)
00464                        {
00465                          uint32_t value = ctype->charnames[cnt];
00466 
00467                          if (!be_quiet)
00468                            WITH_CUR_LOCALE (error (0, 0, _("\
00469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
00470                                                 value > 0xffff ? 8 : 4,
00471                                                 value,
00472                                                 valid_table[cls1].name,
00473                                                 valid_table[cls2].name));
00474                        }
00475                      break;
00476 
00477                     case 'X':
00478                      if (eq)
00479                        {
00480                          uint32_t value = ctype->charnames[cnt];
00481 
00482                          if (!be_quiet)
00483                            WITH_CUR_LOCALE (error (0, 0, _("\
00484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
00485                                                 value > 0xffff ? 8 : 4,
00486                                                 value,
00487                                                 valid_table[cls1].name,
00488                                                 valid_table[cls2].name));
00489                        }
00490                      break;
00491 
00492                     case 'D':
00493                      ctype->class_collection[cnt] |= _ISwbit (cls2);
00494                      break;
00495 
00496                     default:
00497                      WITH_CUR_LOCALE (error (5, 0, _("\
00498 internal error in %s, line %u"), __FUNCTION__, __LINE__));
00499                     }
00500                 }
00501        }
00502     }
00503 
00504   for (cnt = 0; cnt < 256; ++cnt)
00505     {
00506       uint32_t tmp = ctype->class256_collection[cnt];
00507 
00508       if (tmp != 0)
00509        {
00510          for (cls1 = 0; cls1 < NCLASS; ++cls1)
00511            if ((tmp & _ISbit (cls1)) != 0)
00512              for (cls2 = 0; cls2 < NCLASS; ++cls2)
00513               if (valid_table[cls1].allow[cls2] != '-')
00514                 {
00515                   int eq = (tmp & _ISbit (cls2)) != 0;
00516                   switch (valid_table[cls1].allow[cls2])
00517                     {
00518                     case 'M':
00519                      if (!eq)
00520                        {
00521                          char buf[17];
00522 
00523                          snprintf (buf, sizeof buf, "\\%Zo", cnt);
00524 
00525                          if (!be_quiet)
00526                            WITH_CUR_LOCALE (error (0, 0, _("\
00527 character '%s' in class `%s' must be in class `%s'"),
00528                                                 buf,
00529                                                 valid_table[cls1].name,
00530                                                 valid_table[cls2].name));
00531                        }
00532                      break;
00533 
00534                     case 'X':
00535                      if (eq)
00536                        {
00537                          char buf[17];
00538 
00539                          snprintf (buf, sizeof buf, "\\%Zo", cnt);
00540 
00541                          if (!be_quiet)
00542                            WITH_CUR_LOCALE (error (0, 0, _("\
00543 character '%s' in class `%s' must not be in class `%s'"),
00544                                                 buf,
00545                                                 valid_table[cls1].name,
00546                                                 valid_table[cls2].name));
00547                        }
00548                      break;
00549 
00550                     case 'D':
00551                      ctype->class256_collection[cnt] |= _ISbit (cls2);
00552                      break;
00553 
00554                     default:
00555                      WITH_CUR_LOCALE (error (5, 0, _("\
00556 internal error in %s, line %u"), __FUNCTION__, __LINE__));
00557                     }
00558                 }
00559        }
00560     }
00561 
00562   /* ... and now test <SP> as a special case.  */
00563   space_value = 32;
00564   if (((cnt = BITPOS (tok_space),
00565        (ELEM (ctype, class_collection, , space_value)
00566         & BITw (tok_space)) == 0)
00567        || (cnt = BITPOS (tok_blank),
00568           (ELEM (ctype, class_collection, , space_value)
00569            & BITw (tok_blank)) == 0)))
00570     {
00571       if (!be_quiet)
00572        WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
00573                             valid_table[cnt].name));
00574     }
00575   else if (((cnt = BITPOS (tok_punct),
00576             (ELEM (ctype, class_collection, , space_value)
00577              & BITw (tok_punct)) != 0)
00578            || (cnt = BITPOS (tok_graph),
00579               (ELEM (ctype, class_collection, , space_value)
00580                & BITw (tok_graph))
00581               != 0)))
00582     {
00583       if (!be_quiet)
00584        WITH_CUR_LOCALE (error (0, 0, _("\
00585 <SP> character must not be in class `%s'"),
00586                             valid_table[cnt].name));
00587     }
00588   else
00589     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
00590 
00591   space_seq = charmap_find_value (charmap, "SP", 2);
00592   if (space_seq == NULL)
00593     space_seq = charmap_find_value (charmap, "space", 5);
00594   if (space_seq == NULL)
00595     space_seq = charmap_find_value (charmap, "U00000020", 9);
00596   if (space_seq == NULL || space_seq->nbytes != 1)
00597     {
00598       if (!be_quiet)
00599        WITH_CUR_LOCALE (error (0, 0, _("\
00600 character <SP> not defined in character map")));
00601     }
00602   else if (((cnt = BITPOS (tok_space),
00603             (ctype->class256_collection[space_seq->bytes[0]]
00604              & BIT (tok_space)) == 0)
00605            || (cnt = BITPOS (tok_blank),
00606               (ctype->class256_collection[space_seq->bytes[0]]
00607                & BIT (tok_blank)) == 0)))
00608     {
00609       if (!be_quiet)
00610        WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
00611                             valid_table[cnt].name));
00612     }
00613   else if (((cnt = BITPOS (tok_punct),
00614             (ctype->class256_collection[space_seq->bytes[0]]
00615              & BIT (tok_punct)) != 0)
00616            || (cnt = BITPOS (tok_graph),
00617               (ctype->class256_collection[space_seq->bytes[0]]
00618                & BIT (tok_graph)) != 0)))
00619     {
00620       if (!be_quiet)
00621        WITH_CUR_LOCALE (error (0, 0, _("\
00622 <SP> character must not be in class `%s'"),
00623                             valid_table[cnt].name));
00624     }
00625   else
00626     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
00627 
00628   /* Now that the tests are done make sure the name array contains all
00629      characters which are handled in the WIDTH section of the
00630      character set definition file.  */
00631   if (charmap->width_rules != NULL)
00632     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
00633       {
00634        unsigned char bytes[charmap->mb_cur_max];
00635        int nbytes = charmap->width_rules[cnt].from->nbytes;
00636 
00637        /* We have the range of character for which the width is
00638            specified described using byte sequences of the multibyte
00639            charset.  We have to convert this to UCS4 now.  And we
00640            cannot simply convert the beginning and the end of the
00641            sequence, we have to iterate over the byte sequence and
00642            convert it for every single character.  */
00643        memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
00644 
00645        while (nbytes < charmap->width_rules[cnt].to->nbytes
00646               || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
00647                        nbytes) <= 0)
00648          {
00649            /* Find the UCS value for `bytes'.  */
00650            int inner;
00651            uint32_t wch;
00652            struct charseq *seq
00653              = charmap_find_symbol (charmap, (char *) bytes, nbytes);
00654 
00655            if (seq == NULL)
00656              wch = ILLEGAL_CHAR_VALUE;
00657            else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
00658              wch = seq->ucs4;
00659            else
00660              wch = repertoire_find_value (ctype->repertoire, seq->name,
00661                                       strlen (seq->name));
00662 
00663            if (wch != ILLEGAL_CHAR_VALUE)
00664              /* We are only interested in the side-effects of the
00665                `find_idx' call.  It will add appropriate entries in
00666                the name array if this is necessary.  */
00667              (void) find_idx (ctype, NULL, NULL, NULL, wch);
00668 
00669            /* "Increment" the bytes sequence.  */
00670            inner = nbytes - 1;
00671            while (inner >= 0 && bytes[inner] == 0xff)
00672              --inner;
00673 
00674            if (inner < 0)
00675              {
00676               /* We have to extend the byte sequence.  */
00677               if (nbytes >= charmap->width_rules[cnt].to->nbytes)
00678                 break;
00679 
00680               bytes[0] = 1;
00681               memset (&bytes[1], 0, nbytes);
00682               ++nbytes;
00683              }
00684            else
00685              {
00686               ++bytes[inner];
00687               while (++inner < nbytes)
00688                 bytes[inner] = 0;
00689              }
00690          }
00691       }
00692 
00693   /* Now set all the other characters of the character set to the
00694      default width.  */
00695   curs = NULL;
00696   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
00697     {
00698       struct charseq *data = (struct charseq *) vdata;
00699 
00700       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
00701        data->ucs4 = repertoire_find_value (ctype->repertoire,
00702                                        data->name, len);
00703 
00704       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
00705        (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
00706     }
00707 
00708   /* There must be a multiple of 10 digits.  */
00709   if (ctype->mbdigits_act % 10 != 0)
00710     {
00711       assert (ctype->mbdigits_act == ctype->wcdigits_act);
00712       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
00713       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
00714       WITH_CUR_LOCALE (error (0, 0, _("\
00715 `digit' category has not entries in groups of ten")));
00716     }
00717 
00718   /* Check the input digits.  There must be a multiple of ten available.
00719      In each group it could be that one or the other character is missing.
00720      In this case the whole group must be removed.  */
00721   cnt = 0;
00722   while (cnt < ctype->mbdigits_act)
00723     {
00724       size_t inner;
00725       for (inner = 0; inner < 10; ++inner)
00726        if (ctype->mbdigits[cnt + inner] == NULL)
00727          break;
00728 
00729       if (inner == 10)
00730        cnt += 10;
00731       else
00732        {
00733          /* Remove the group.  */
00734          memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
00735                  ((ctype->wcdigits_act - cnt - 10)
00736                   * sizeof (ctype->mbdigits[0])));
00737          ctype->mbdigits_act -= 10;
00738        }
00739     }
00740 
00741   /* If no input digits are given use the default.  */
00742   if (ctype->mbdigits_act == 0)
00743     {
00744       if (ctype->mbdigits_max == 0)
00745        {
00746          ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
00747                                       10 * sizeof (struct charseq *));
00748          ctype->mbdigits_max = 10;
00749        }
00750 
00751       for (cnt = 0; cnt < 10; ++cnt)
00752        {
00753          ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
00754                                                 (char *) digits + cnt, 1);
00755          if (ctype->mbdigits[cnt] == NULL)
00756            {
00757              ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
00758                                                    longnames[cnt],
00759                                                    strlen (longnames[cnt]));
00760              if (ctype->mbdigits[cnt] == NULL)
00761               {
00762                 /* Hum, this ain't good.  */
00763                 WITH_CUR_LOCALE (error (0, 0, _("\
00764 no input digits defined and none of the standard names in the charmap")));
00765 
00766                 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
00767                                                  sizeof (struct charseq) + 1);
00768 
00769                 /* This is better than nothing.  */
00770                 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
00771                 ctype->mbdigits[cnt]->nbytes = 1;
00772               }
00773            }
00774        }
00775 
00776       ctype->mbdigits_act = 10;
00777     }
00778 
00779   /* Check the wide character input digits.  There must be a multiple
00780      of ten available.  In each group it could be that one or the other
00781      character is missing.  In this case the whole group must be
00782      removed.  */
00783   cnt = 0;
00784   while (cnt < ctype->wcdigits_act)
00785     {
00786       size_t inner;
00787       for (inner = 0; inner < 10; ++inner)
00788        if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
00789          break;
00790 
00791       if (inner == 10)
00792        cnt += 10;
00793       else
00794        {
00795          /* Remove the group.  */
00796          memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
00797                  ((ctype->wcdigits_act - cnt - 10)
00798                   * sizeof (ctype->wcdigits[0])));
00799          ctype->wcdigits_act -= 10;
00800        }
00801     }
00802 
00803   /* If no input digits are given use the default.  */
00804   if (ctype->wcdigits_act == 0)
00805     {
00806       if (ctype->wcdigits_max == 0)
00807        {
00808          ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
00809                                       10 * sizeof (uint32_t));
00810          ctype->wcdigits_max = 10;
00811        }
00812 
00813       for (cnt = 0; cnt < 10; ++cnt)
00814        ctype->wcdigits[cnt] = L'0' + cnt;
00815 
00816       ctype->mbdigits_act = 10;
00817     }
00818 
00819   /* Check the outdigits.  */
00820   warned = 0;
00821   for (cnt = 0; cnt < 10; ++cnt)
00822     if (ctype->mboutdigits[cnt] == NULL)
00823       {
00824        static struct charseq replace[2];
00825 
00826        if (!warned)
00827          {
00828            WITH_CUR_LOCALE (error (0, 0, _("\
00829 not all characters used in `outdigit' are available in the charmap")));
00830            warned = 1;
00831          }
00832 
00833        replace[0].nbytes = 1;
00834        replace[0].bytes[0] = '?';
00835        replace[0].bytes[1] = '\0';
00836        ctype->mboutdigits[cnt] = &replace[0];
00837       }
00838 
00839   warned = 0;
00840   for (cnt = 0; cnt < 10; ++cnt)
00841     if (ctype->wcoutdigits[cnt] == 0)
00842       {
00843        if (!warned)
00844          {
00845            WITH_CUR_LOCALE (error (0, 0, _("\
00846 not all characters used in `outdigit' are available in the repertoire")));
00847            warned = 1;
00848          }
00849 
00850        ctype->wcoutdigits[cnt] = L'?';
00851       }
00852 
00853   /* Sort the entries in the translit_ignore list.  */
00854   if (ctype->translit_ignore != NULL)
00855     {
00856       struct translit_ignore_t *firstp = ctype->translit_ignore;
00857       struct translit_ignore_t *runp;
00858 
00859       ctype->ntranslit_ignore = 1;
00860 
00861       for (runp = firstp->next; runp != NULL; runp = runp->next)
00862        {
00863          struct translit_ignore_t *lastp = NULL;
00864          struct translit_ignore_t *cmpp;
00865 
00866          ++ctype->ntranslit_ignore;
00867 
00868          for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
00869            if (runp->from < cmpp->from)
00870              break;
00871 
00872          runp->next = lastp;
00873          if (lastp == NULL)
00874            firstp = runp;
00875        }
00876 
00877       ctype->translit_ignore = firstp;
00878     }
00879 }
00880 
00881 
00882 void
00883 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
00884              const char *output_path)
00885 {
00886   static const char nulbytes[4] = { 0, 0, 0, 0 };
00887   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
00888   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
00889                       + ctype->nr_charclass + ctype->map_collection_nr);
00890   struct iovec *iov = alloca (sizeof *iov
00891                            * (2 + nelems + 2 * ctype->nr_charclass
00892                              + ctype->map_collection_nr + 4));
00893   struct locale_file data;
00894   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
00895   uint32_t default_missing_len;
00896   size_t elem, cnt, offset, total;
00897   char *cp;
00898 
00899   /* Now prepare the output: Find the sizes of the table we can use.  */
00900   allocate_arrays (ctype, charmap, ctype->repertoire);
00901 
00902   data.magic = LIMAGIC (LC_CTYPE);
00903   data.n = nelems;
00904   iov[0].iov_base = (void *) &data;
00905   iov[0].iov_len = sizeof (data);
00906 
00907   iov[1].iov_base = (void *) idx;
00908   iov[1].iov_len = nelems * sizeof (uint32_t);
00909 
00910   idx[0] = iov[0].iov_len + iov[1].iov_len;
00911   offset = 0;
00912 
00913   for (elem = 0; elem < nelems; ++elem)
00914     {
00915       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
00916        switch (elem)
00917          {
00918 #define CTYPE_EMPTY(name) \
00919          case name:                                                  \
00920            iov[2 + elem + offset].iov_base = NULL;                          \
00921            iov[2 + elem + offset].iov_len = 0;                              \
00922            idx[elem + 1] = idx[elem];                                       \
00923            break
00924 
00925          CTYPE_EMPTY(_NL_CTYPE_GAP1);
00926          CTYPE_EMPTY(_NL_CTYPE_GAP2);
00927          CTYPE_EMPTY(_NL_CTYPE_GAP3);
00928          CTYPE_EMPTY(_NL_CTYPE_GAP4);
00929          CTYPE_EMPTY(_NL_CTYPE_GAP5);
00930          CTYPE_EMPTY(_NL_CTYPE_GAP6);
00931 
00932 #define CTYPE_DATA(name, base, len)                                         \
00933          case _NL_ITEM_INDEX (name):                                        \
00934            iov[2 + elem + offset].iov_base = (base);                        \
00935            iov[2 + elem + offset].iov_len = (len);                          \
00936            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;             \
00937            break
00938 
00939          CTYPE_DATA (_NL_CTYPE_CLASS,
00940                     ctype->ctype_b,
00941                     (256 + 128) * sizeof (char_class_t));
00942 
00943          CTYPE_DATA (_NL_CTYPE_TOUPPER,
00944                     ctype->map_b[0],
00945                     (256 + 128) * sizeof (uint32_t));
00946          CTYPE_DATA (_NL_CTYPE_TOLOWER,
00947                     ctype->map_b[1],
00948                     (256 + 128) * sizeof (uint32_t));
00949 
00950          CTYPE_DATA (_NL_CTYPE_TOUPPER32,
00951                     ctype->map32_b[0],
00952                     256 * sizeof (uint32_t));
00953          CTYPE_DATA (_NL_CTYPE_TOLOWER32,
00954                     ctype->map32_b[1],
00955                     256 * sizeof (uint32_t));
00956 
00957          CTYPE_DATA (_NL_CTYPE_CLASS32,
00958                     ctype->ctype32_b,
00959                     256 * sizeof (char_class32_t));
00960 
00961          CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
00962                     &ctype->class_offset, sizeof (uint32_t));
00963 
00964          CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
00965                     &ctype->map_offset, sizeof (uint32_t));
00966 
00967          CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
00968                     &ctype->translit_idx_size, sizeof (uint32_t));
00969 
00970          CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
00971                     ctype->translit_from_idx,
00972                     ctype->translit_idx_size * sizeof (uint32_t));
00973 
00974          CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
00975                     ctype->translit_from_tbl,
00976                     ctype->translit_from_tbl_size);
00977 
00978          CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
00979                     ctype->translit_to_idx,
00980                     ctype->translit_idx_size * sizeof (uint32_t));
00981 
00982          CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
00983                     ctype->translit_to_tbl, ctype->translit_to_tbl_size);
00984 
00985          case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
00986            /* The class name array.  */
00987            total = 0;
00988            for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
00989              {
00990               iov[2 + elem + offset].iov_base
00991                 = (void *) ctype->classnames[cnt];
00992               iov[2 + elem + offset].iov_len
00993                 = strlen (ctype->classnames[cnt]) + 1;
00994               total += iov[2 + elem + offset].iov_len;
00995              }
00996            iov[2 + elem + offset].iov_base = (void *) nulbytes;
00997            iov[2 + elem + offset].iov_len = 4 - (total % 4);
00998            total += 4 - (total % 4);
00999 
01000            idx[elem + 1] = idx[elem] + total;
01001            break;
01002 
01003          case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
01004            /* The class name array.  */
01005            total = 0;
01006            for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
01007              {
01008               iov[2 + elem + offset].iov_base
01009                 = (void *) ctype->mapnames[cnt];
01010               iov[2 + elem + offset].iov_len
01011                 = strlen (ctype->mapnames[cnt]) + 1;
01012               total += iov[2 + elem + offset].iov_len;
01013              }
01014            iov[2 + elem + offset].iov_base = (void *) nulbytes;
01015            iov[2 + elem + offset].iov_len = 4 - (total % 4);
01016            total += 4 - (total % 4);
01017 
01018            idx[elem + 1] = idx[elem] + total;
01019            break;
01020 
01021          CTYPE_DATA (_NL_CTYPE_WIDTH,
01022                     ctype->width.iov_base,
01023                     ctype->width.iov_len);
01024 
01025          CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
01026                     &ctype->mb_cur_max, sizeof (uint32_t));
01027 
01028          case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
01029            total = strlen (ctype->codeset_name) + 1;
01030            if (total % 4 == 0)
01031              iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
01032            else
01033              {
01034               iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
01035               memset (mempcpy (iov[2 + elem + offset].iov_base,
01036                              ctype->codeset_name, total),
01037                      '\0', 4 - (total & 3));
01038               total = (total + 3) & ~3;
01039              }
01040            iov[2 + elem + offset].iov_len = total;
01041            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01042            break;
01043 
01044 
01045          CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
01046                     &ctype->to_nonascii, sizeof (uint32_t));
01047 
01048          case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
01049            iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
01050            iov[2 + elem + offset].iov_len = sizeof (uint32_t);
01051            *(uint32_t *) iov[2 + elem + offset].iov_base =
01052              ctype->mbdigits_act / 10;
01053            idx[elem + 1] = idx[elem] + sizeof (uint32_t);
01054            break;
01055 
01056          case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
01057            /* Align entries.  */
01058            iov[2 + elem + offset].iov_base = (void *) nulbytes;
01059            iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
01060            idx[elem] += iov[2 + elem + offset].iov_len;
01061            ++offset;
01062 
01063            iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
01064            iov[2 + elem + offset].iov_len = sizeof (uint32_t);
01065            *(uint32_t *) iov[2 + elem + offset].iov_base =
01066              ctype->wcdigits_act / 10;
01067            idx[elem + 1] = idx[elem] + sizeof (uint32_t);
01068            break;
01069 
01070          case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
01071            /* Compute the length of all possible characters.  For INDIGITS
01072               there might be more than one.  We simply concatenate all of
01073               them with a NUL byte following.  The NUL byte wouldn't be
01074               necessary but it makes it easier for the user.  */
01075            total = 0;
01076 
01077            for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
01078                cnt < ctype->mbdigits_act; cnt += 10)
01079              total += ctype->mbdigits[cnt]->nbytes + 1;
01080            iov[2 + elem + offset].iov_base = (char *) alloca (total);
01081            iov[2 + elem + offset].iov_len = total;
01082 
01083            cp = iov[2 + elem + offset].iov_base;
01084            for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
01085                cnt < ctype->mbdigits_act; cnt += 10)
01086              {
01087               cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
01088                            ctype->mbdigits[cnt]->nbytes);
01089               *cp++ = '\0';
01090              }
01091            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01092            break;
01093 
01094          case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
01095            /* Compute the length of all possible characters.  For INDIGITS
01096               there might be more than one.  We simply concatenate all of
01097               them with a NUL byte following.  The NUL byte wouldn't be
01098               necessary but it makes it easier for the user.  */
01099            cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
01100            total = ctype->mboutdigits[cnt]->nbytes + 1;
01101            iov[2 + elem + offset].iov_base = (char *) alloca (total);
01102            iov[2 + elem + offset].iov_len = total;
01103 
01104            *(char *) mempcpy (iov[2 + elem + offset].iov_base,
01105                             ctype->mboutdigits[cnt]->bytes,
01106                             ctype->mboutdigits[cnt]->nbytes) = '\0';
01107            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01108            break;
01109 
01110          case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
01111            total = ctype->wcdigits_act / 10;
01112 
01113            iov[2 + elem + offset].iov_base =
01114              (uint32_t *) alloca (total * sizeof (uint32_t));
01115            iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
01116 
01117            for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
01118                cnt < ctype->wcdigits_act; cnt += 10)
01119              ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
01120               = ctype->wcdigits[cnt];
01121            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01122            break;
01123 
01124          case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
01125            /* Align entries.  */
01126            iov[2 + elem + offset].iov_base = (void *) nulbytes;
01127            iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
01128            idx[elem] += iov[2 + elem + offset].iov_len;
01129            ++offset;
01130            /* FALLTRHOUGH */
01131 
01132          case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
01133            cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
01134            iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
01135            iov[2 + elem + offset].iov_len = sizeof (uint32_t);
01136            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01137            break;
01138 
01139          case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
01140            /* Align entries.  */
01141            iov[2 + elem + offset].iov_base = (void *) nulbytes;
01142            iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
01143            idx[elem] += iov[2 + elem + offset].iov_len;
01144            ++offset;
01145 
01146            default_missing_len = (ctype->default_missing
01147                                ? wcslen ((wchar_t *)ctype->default_missing)
01148                                : 0);
01149            iov[2 + elem + offset].iov_base = &default_missing_len;
01150            iov[2 + elem + offset].iov_len = sizeof (uint32_t);
01151            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01152            break;
01153 
01154          case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
01155            iov[2 + elem + offset].iov_base =
01156              ctype->default_missing ?: (uint32_t *) L"";
01157            iov[2 + elem + offset].iov_len =
01158              wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
01159            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01160            break;
01161 
01162          case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
01163            /* Align entries.  */
01164            iov[2 + elem + offset].iov_base = (void *) nulbytes;
01165            iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
01166            idx[elem] += iov[2 + elem + offset].iov_len;
01167            ++offset;
01168 
01169            iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
01170            iov[2 + elem + offset].iov_len = sizeof (uint32_t);
01171            idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01172            break;
01173 
01174          case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
01175            {
01176              uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
01177                                                 * 3 * sizeof (uint32_t));
01178              struct translit_ignore_t *runp;
01179 
01180              iov[2 + elem + offset].iov_base = ranges;
01181              iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
01182                                           * 3 * sizeof (uint32_t));
01183 
01184              for (runp = ctype->translit_ignore; runp != NULL;
01185                  runp = runp->next)
01186               {
01187                 *ranges++ = runp->from;
01188                 *ranges++ = runp->to;
01189                 *ranges++ = runp->step;
01190               }
01191            }
01192            /* Remove the following line in case a new entry is added
01193               after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
01194            if (elem < nelems)
01195              idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01196            break;
01197 
01198          default:
01199            assert (! "unknown CTYPE element");
01200          }
01201       else
01202        {
01203          /* Handle extra maps.  */
01204          size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
01205          if (nr < ctype->nr_charclass)
01206            {
01207              iov[2 + elem + offset].iov_base = ctype->class_b[nr];
01208              iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
01209              idx[elem] += iov[2 + elem + offset].iov_len;
01210              ++offset;
01211 
01212              iov[2 + elem + offset] = ctype->class_3level[nr];
01213            }
01214          else
01215            {
01216              nr -= ctype->nr_charclass;
01217              assert (nr < ctype->map_collection_nr);
01218              iov[2 + elem + offset] = ctype->map_3level[nr];
01219            }
01220          idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
01221        }
01222     }
01223 
01224   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
01225                             + ctype->map_collection_nr + 4 + 2));
01226 
01227   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
01228                    iov);
01229 }
01230 
01231 
01232 /* Local functions.  */
01233 static void
01234 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
01235                const char *name)
01236 {
01237   size_t cnt;
01238 
01239   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
01240     if (strcmp (ctype->classnames[cnt], name) == 0)
01241       break;
01242 
01243   if (cnt < ctype->nr_charclass)
01244     {
01245       lr_error (lr, _("character class `%s' already defined"), name);
01246       return;
01247     }
01248 
01249   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
01250     /* Exit code 2 is prescribed in P1003.2b.  */
01251     WITH_CUR_LOCALE (error (2, 0, _("\
01252 implementation limit: no more than %Zd character classes allowed"),
01253                          MAX_NR_CHARCLASS));
01254 
01255   ctype->classnames[ctype->nr_charclass++] = name;
01256 }
01257 
01258 
01259 static void
01260 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
01261               const char *name, const struct charmap_t *charmap)
01262 {
01263   size_t max_chars = 0;
01264   size_t cnt;
01265 
01266   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
01267     {
01268       if (strcmp (ctype->mapnames[cnt], name) == 0)
01269        break;
01270 
01271       if (max_chars < ctype->map_collection_max[cnt])
01272        max_chars = ctype->map_collection_max[cnt];
01273     }
01274 
01275   if (cnt < ctype->map_collection_nr)
01276     {
01277       lr_error (lr, _("character map `%s' already defined"), name);
01278       return;
01279     }
01280 
01281   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
01282     /* Exit code 2 is prescribed in P1003.2b.  */
01283     WITH_CUR_LOCALE (error (2, 0, _("\
01284 implementation limit: no more than %d character maps allowed"),
01285                          MAX_NR_CHARMAP));
01286 
01287   ctype->mapnames[cnt] = name;
01288 
01289   if (max_chars == 0)
01290     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
01291   else
01292     ctype->map_collection_max[cnt] = max_chars;
01293 
01294   ctype->map_collection[cnt] = (uint32_t *)
01295     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
01296   ctype->map_collection_act[cnt] = 256;
01297 
01298   ++ctype->map_collection_nr;
01299 }
01300 
01301 
01302 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
01303    is possible if we only want to extend the name array.  */
01304 static uint32_t *
01305 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
01306          size_t *act, uint32_t idx)
01307 {
01308   size_t cnt;
01309 
01310   if (idx < 256)
01311     return table == NULL ? NULL : &(*table)[idx];
01312 
01313   /* Use the charnames_idx lookup table instead of the slow search loop.  */
01314 #if 1
01315   cnt = idx_table_get (&ctype->charnames_idx, idx);
01316   if (cnt == EMPTY)
01317     /* Not found.  */
01318     cnt = ctype->charnames_act;
01319 #else
01320   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
01321     if (ctype->charnames[cnt] == idx)
01322       break;
01323 #endif
01324 
01325   /* We have to distinguish two cases: the name is found or not.  */
01326   if (cnt == ctype->charnames_act)
01327     {
01328       /* Extend the name array.  */
01329       if (ctype->charnames_act == ctype->charnames_max)
01330        {
01331          ctype->charnames_max *= 2;
01332          ctype->charnames = (uint32_t *)
01333            xrealloc (ctype->charnames,
01334                     sizeof (uint32_t) * ctype->charnames_max);
01335        }
01336       ctype->charnames[ctype->charnames_act++] = idx;
01337       idx_table_add (&ctype->charnames_idx, idx, cnt);
01338     }
01339 
01340   if (table == NULL)
01341     /* We have done everything we are asked to do.  */
01342     return NULL;
01343 
01344   if (max == NULL)
01345     /* The caller does not want to extend the table.  */
01346     return (cnt >= *act ? NULL : &(*table)[cnt]);
01347 
01348   if (cnt >= *act)
01349     {
01350       if (cnt >= *max)
01351        {
01352          size_t old_max = *max;
01353          do
01354            *max *= 2;
01355          while (*max <= cnt);
01356 
01357          *table =
01358            (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
01359          memset (&(*table)[old_max], '\0',
01360                 (*max - old_max) * sizeof (uint32_t));
01361        }
01362 
01363       *act = cnt + 1;
01364     }
01365 
01366   return &(*table)[cnt];
01367 }
01368 
01369 
01370 static int
01371 get_character (struct token *now, const struct charmap_t *charmap,
01372               struct repertoire_t *repertoire,
01373               struct charseq **seqp, uint32_t *wchp)
01374 {
01375   if (now->tok == tok_bsymbol)
01376     {
01377       /* This will hopefully be the normal case.  */
01378       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
01379                                  now->val.str.lenmb);
01380       *seqp = charmap_find_value (charmap, now->val.str.startmb,
01381                               now->val.str.lenmb);
01382     }
01383   else if (now->tok == tok_ucs4)
01384     {
01385       char utmp[10];
01386 
01387       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
01388       *seqp = charmap_find_value (charmap, utmp, 9);
01389 
01390       if (*seqp == NULL)
01391        *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
01392 
01393       if (*seqp == NULL)
01394        {
01395          /* Compute the value in the charmap from the UCS value.  */
01396          const char *symbol = repertoire_find_symbol (repertoire,
01397                                                  now->val.ucs4);
01398 
01399          if (symbol == NULL)
01400            *seqp = NULL;
01401          else
01402            *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
01403 
01404          if (*seqp == NULL)
01405            {
01406              if (repertoire != NULL)
01407               {
01408                 /* Insert a negative entry.  */
01409                 static const struct charseq negative
01410                   = { .ucs4 = ILLEGAL_CHAR_VALUE };
01411                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
01412                                             sizeof (uint32_t));
01413                 *newp = now->val.ucs4;
01414 
01415                 insert_entry (&repertoire->seq_table, newp,
01416                             sizeof (uint32_t), (void *) &negative);
01417               }
01418            }
01419          else
01420            (*seqp)->ucs4 = now->val.ucs4;
01421        }
01422       else if ((*seqp)->ucs4 != now->val.ucs4)
01423        *seqp = NULL;
01424 
01425       *wchp = now->val.ucs4;
01426     }
01427   else if (now->tok == tok_charcode)
01428     {
01429       /* We must map from the byte code to UCS4.  */
01430       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
01431                                now->val.str.lenmb);
01432 
01433       if (*seqp == NULL)
01434        *wchp = ILLEGAL_CHAR_VALUE;
01435       else
01436        {
01437          if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
01438            (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
01439                                              strlen ((*seqp)->name));
01440          *wchp = (*seqp)->ucs4;
01441        }
01442     }
01443   else
01444     return 1;
01445 
01446   return 0;
01447 }
01448 
01449 
01450 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
01451    the .(2). counterparts.  */
01452 static void
01453 charclass_symbolic_ellipsis (struct linereader *ldfile,
01454                           struct locale_ctype_t *ctype,
01455                           const struct charmap_t *charmap,
01456                           struct repertoire_t *repertoire,
01457                           struct token *now,
01458                           const char *last_str,
01459                           unsigned long int class256_bit,
01460                           unsigned long int class_bit, int base,
01461                           int ignore_content, int handle_digits, int step)
01462 {
01463   const char *nowstr = now->val.str.startmb;
01464   char tmp[now->val.str.lenmb + 1];
01465   const char *cp;
01466   char *endp;
01467   unsigned long int from;
01468   unsigned long int to;
01469 
01470   /* We have to compute the ellipsis values using the symbolic names.  */
01471   assert (last_str != NULL);
01472 
01473   if (strlen (last_str) != now->val.str.lenmb)
01474     {
01475     invalid_range:
01476       lr_error (ldfile,
01477               _("`%s' and `%.*s' are not valid names for symbolic range"),
01478               last_str, (int) now->val.str.lenmb, nowstr);
01479       return;
01480     }
01481 
01482   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
01483     /* Nothing to do, the names are the same.  */
01484     return;
01485 
01486   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
01487     ;
01488 
01489   errno = 0;
01490   from = strtoul (cp, &endp, base);
01491   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
01492     goto invalid_range;
01493 
01494   to = strtoul (nowstr + (cp - last_str), &endp, base);
01495   if ((to == UINT_MAX && errno == ERANGE)
01496       || (endp - nowstr) != now->val.str.lenmb || from >= to)
01497     goto invalid_range;
01498 
01499   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
01500   if (!ignore_content)
01501     {
01502       now->val.str.startmb = tmp;
01503       while ((from += step) <= to)
01504        {
01505          struct charseq *seq;
01506          uint32_t wch;
01507 
01508          sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
01509                  (int) (cp - last_str), last_str,
01510                  (int) (now->val.str.lenmb - (cp - last_str)),
01511                  from);
01512 
01513          get_character (now, charmap, repertoire, &seq, &wch);
01514 
01515          if (seq != NULL && seq->nbytes == 1)
01516            /* Yep, we can store information about this byte sequence.  */
01517            ctype->class256_collection[seq->bytes[0]] |= class256_bit;
01518 
01519          if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
01520            /* We have the UCS4 position.  */
01521            *find_idx (ctype, &ctype->class_collection,
01522                      &ctype->class_collection_max,
01523                      &ctype->class_collection_act, wch) |= class_bit;
01524 
01525          if (handle_digits == 1)
01526            {
01527              /* We must store the digit values.  */
01528              if (ctype->mbdigits_act == ctype->mbdigits_max)
01529               {
01530                 ctype->mbdigits_max *= 2;
01531                 ctype->mbdigits = xrealloc (ctype->mbdigits,
01532                                          (ctype->mbdigits_max
01533                                           * sizeof (char *)));
01534                 ctype->wcdigits_max *= 2;
01535                 ctype->wcdigits = xrealloc (ctype->wcdigits,
01536                                          (ctype->wcdigits_max
01537                                           * sizeof (uint32_t)));
01538               }
01539 
01540              ctype->mbdigits[ctype->mbdigits_act++] = seq;
01541              ctype->wcdigits[ctype->wcdigits_act++] = wch;
01542            }
01543          else if (handle_digits == 2)
01544            {
01545              /* We must store the digit values.  */
01546              if (ctype->outdigits_act >= 10)
01547               {
01548                 lr_error (ldfile, _("\
01549 %s: field `%s' does not contain exactly ten entries"),
01550                          "LC_CTYPE", "outdigit");
01551                 return;
01552               }
01553 
01554              ctype->mboutdigits[ctype->outdigits_act] = seq;
01555              ctype->wcoutdigits[ctype->outdigits_act] = wch;
01556              ++ctype->outdigits_act;
01557            }
01558        }
01559     }
01560 }
01561 
01562 
01563 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
01564 static void
01565 charclass_ucs4_ellipsis (struct linereader *ldfile,
01566                       struct locale_ctype_t *ctype,
01567                       const struct charmap_t *charmap,
01568                       struct repertoire_t *repertoire,
01569                       struct token *now, uint32_t last_wch,
01570                       unsigned long int class256_bit,
01571                       unsigned long int class_bit, int ignore_content,
01572                       int handle_digits, int step)
01573 {
01574   if (last_wch > now->val.ucs4)
01575     {
01576       lr_error (ldfile, _("\
01577 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
01578               (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
01579               (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
01580       return;
01581     }
01582 
01583   if (!ignore_content)
01584     while ((last_wch += step) <= now->val.ucs4)
01585       {
01586        /* We have to find out whether there is a byte sequence corresponding
01587           to this UCS4 value.  */
01588        struct charseq *seq;
01589        char utmp[10];
01590 
01591        snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
01592        seq = charmap_find_value (charmap, utmp, 9);
01593        if (seq == NULL)
01594          {
01595            snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
01596            seq = charmap_find_value (charmap, utmp, 5);
01597          }
01598 
01599        if (seq == NULL)
01600          /* Try looking in the repertoire map.  */
01601          seq = repertoire_find_seq (repertoire, last_wch);
01602 
01603        /* If this is the first time we look for this sequence create a new
01604           entry.  */
01605        if (seq == NULL)
01606          {
01607            static const struct charseq negative
01608              = { .ucs4 = ILLEGAL_CHAR_VALUE };
01609 
01610            /* Find the symbolic name for this UCS4 value.  */
01611            if (repertoire != NULL)
01612              {
01613               const char *symbol = repertoire_find_symbol (repertoire,
01614                                                       last_wch);
01615               uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
01616                                           sizeof (uint32_t));
01617               *newp = last_wch;
01618 
01619               if (symbol != NULL)
01620                 /* We have a name, now search the multibyte value.  */
01621                 seq = charmap_find_value (charmap, symbol, strlen (symbol));
01622 
01623               if (seq == NULL)
01624                 /* We have to create a fake entry.  */
01625                 seq = (struct charseq *) &negative;
01626               else
01627                 seq->ucs4 = last_wch;
01628 
01629               insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
01630                            seq);
01631              }
01632            else
01633              /* We have to create a fake entry.  */
01634              seq = (struct charseq *) &negative;
01635          }
01636 
01637        /* We have a name, now search the multibyte value.  */
01638        if (seq->ucs4 == last_wch && seq->nbytes == 1)
01639          /* Yep, we can store information about this byte sequence.  */
01640          ctype->class256_collection[(size_t) seq->bytes[0]]
01641            |= class256_bit;
01642 
01643        /* And of course we have the UCS4 position.  */
01644        if (class_bit != 0)
01645          *find_idx (ctype, &ctype->class_collection,
01646                    &ctype->class_collection_max,
01647                    &ctype->class_collection_act, last_wch) |= class_bit;
01648 
01649        if (handle_digits == 1)
01650          {
01651            /* We must store the digit values.  */
01652            if (ctype->mbdigits_act == ctype->mbdigits_max)
01653              {
01654               ctype->mbdigits_max *= 2;
01655               ctype->mbdigits = xrealloc (ctype->mbdigits,
01656                                        (ctype->mbdigits_max
01657                                         * sizeof (char *)));
01658               ctype->wcdigits_max *= 2;
01659               ctype->wcdigits = xrealloc (ctype->wcdigits,
01660                                        (ctype->wcdigits_max
01661                                         * sizeof (uint32_t)));
01662              }
01663 
01664            ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
01665                                                 ? seq : NULL);
01666            ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
01667          }
01668        else if (handle_digits == 2)
01669          {
01670            /* We must store the digit values.  */
01671            if (ctype->outdigits_act >= 10)
01672              {
01673               lr_error (ldfile, _("\
01674 %s: field `%s' does not contain exactly ten entries"),
01675                        "LC_CTYPE", "outdigit");
01676               return;
01677              }
01678 
01679            ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
01680                                                  ? seq : NULL);
01681            ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
01682            ++ctype->outdigits_act;
01683          }
01684       }
01685 }
01686 
01687 
01688 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
01689 static void
01690 charclass_charcode_ellipsis (struct linereader *ldfile,
01691                           struct locale_ctype_t *ctype,
01692                           const struct charmap_t *charmap,
01693                           struct repertoire_t *repertoire,
01694                           struct token *now, char *last_charcode,
01695                           uint32_t last_charcode_len,
01696                           unsigned long int class256_bit,
01697                           unsigned long int class_bit, int ignore_content,
01698                           int handle_digits)
01699 {
01700   /* First check whether the to-value is larger.  */
01701   if (now->val.charcode.nbytes != last_charcode_len)
01702     {
01703       lr_error (ldfile, _("\
01704 start and end character sequence of range must have the same length"));
01705       return;
01706     }
01707 
01708   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
01709     {
01710       lr_error (ldfile, _("\
01711 to-value character sequence is smaller than from-value sequence"));
01712       return;
01713     }
01714 
01715   if (!ignore_content)
01716     {
01717       do
01718        {
01719          /* Increment the byte sequence value.  */
01720          struct charseq *seq;
01721          uint32_t wch;
01722          int i;
01723 
01724          for (i = last_charcode_len - 1; i >= 0; --i)
01725            if (++last_charcode[i] != 0)
01726              break;
01727 
01728          if (last_charcode_len == 1)
01729            /* Of course we have the charcode value.  */
01730            ctype->class256_collection[(size_t) last_charcode[0]]
01731              |= class256_bit;
01732 
01733          /* Find the symbolic name.  */
01734          seq = charmap_find_symbol (charmap, last_charcode,
01735                                  last_charcode_len);
01736          if (seq != NULL)
01737            {
01738              if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
01739               seq->ucs4 = repertoire_find_value (repertoire, seq->name,
01740                                              strlen (seq->name));
01741              wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
01742 
01743              if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
01744               *find_idx (ctype, &ctype->class_collection,
01745                         &ctype->class_collection_max,
01746                         &ctype->class_collection_act, wch) |= class_bit;
01747            }
01748          else
01749            wch = ILLEGAL_CHAR_VALUE;
01750 
01751          if (handle_digits == 1)
01752            {
01753              /* We must store the digit values.  */
01754              if (ctype->mbdigits_act == ctype->mbdigits_max)
01755               {
01756                 ctype->mbdigits_max *= 2;
01757                 ctype->mbdigits = xrealloc (ctype->mbdigits,
01758                                          (ctype->mbdigits_max
01759                                           * sizeof (char *)));
01760                 ctype->wcdigits_max *= 2;
01761                 ctype->wcdigits = xrealloc (ctype->wcdigits,
01762                                          (ctype->wcdigits_max
01763                                           * sizeof (uint32_t)));
01764               }
01765 
01766              seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
01767              memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
01768              seq->nbytes = last_charcode_len;
01769 
01770              ctype->mbdigits[ctype->mbdigits_act++] = seq;
01771              ctype->wcdigits[ctype->wcdigits_act++] = wch;
01772            }
01773          else if (handle_digits == 2)
01774            {
01775              struct charseq *seq;
01776              /* We must store the digit values.  */
01777              if (ctype->outdigits_act >= 10)
01778               {
01779                 lr_error (ldfile, _("\
01780 %s: field `%s' does not contain exactly ten entries"),
01781                          "LC_CTYPE", "outdigit");
01782                 return;
01783               }
01784 
01785              seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
01786              memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
01787              seq->nbytes = last_charcode_len;
01788 
01789              ctype->mboutdigits[ctype->outdigits_act] = seq;
01790              ctype->wcoutdigits[ctype->outdigits_act] = wch;
01791              ++ctype->outdigits_act;
01792            }
01793        }
01794       while (memcmp (last_charcode, now->val.charcode.bytes,
01795                    last_charcode_len) != 0);
01796     }
01797 }
01798 
01799 
01800 static uint32_t *
01801 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
01802               uint32_t wch)
01803 {
01804   struct translit_t *trunp = ctype->translit;
01805   struct translit_ignore_t *tirunp = ctype->translit_ignore;
01806 
01807   while (trunp != NULL)
01808     {
01809       /* XXX We simplify things here.  The transliterations we look
01810         for are only allowed to have one character.  */
01811       if (trunp->from[0] == wch && trunp->from[1] == 0)
01812        {
01813          /* Found it.  Now look for a transliteration which can be
01814             represented with the character set.  */
01815          struct translit_to_t *torunp = trunp->to;
01816 
01817          while (torunp != NULL)
01818            {
01819              int i;
01820 
01821              for (i = 0; torunp->str[i] != 0; ++i)
01822               {
01823                 char utmp[10];
01824 
01825                 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
01826                 if (charmap_find_value (charmap, utmp, 9) == NULL)
01827                   /* This character cannot be represented.  */
01828                   break;
01829               }
01830 
01831              if (torunp->str[i] == 0)
01832               return torunp->str;
01833 
01834              torunp = torunp->next;
01835            }
01836 
01837          break;
01838        }
01839 
01840       trunp = trunp->next;
01841     }
01842 
01843   /* Check for ignored chars.  */
01844   while (tirunp != NULL)
01845     {
01846       if (tirunp->from <= wch && tirunp->to >= wch)
01847        {
01848          uint32_t wi;
01849 
01850          for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
01851            if (wi == wch)
01852              return (uint32_t []) { 0 };
01853        }
01854     }
01855 
01856   /* Nothing found.  */
01857   return NULL;
01858 }
01859 
01860 
01861 uint32_t *
01862 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
01863               uint32_t wch)
01864 {
01865   struct locale_ctype_t *ctype;
01866   uint32_t *result = NULL;
01867 
01868   assert (locale != NULL);
01869   ctype = locale->categories[LC_CTYPE].ctype;
01870 
01871   if (ctype == NULL)
01872     return NULL;
01873 
01874   if (ctype->translit != NULL)
01875     result = find_translit2 (ctype, charmap, wch);
01876 
01877   if (result == NULL)
01878     {
01879       struct translit_include_t *irunp = ctype->translit_include;
01880 
01881       while (irunp != NULL && result == NULL)
01882        {
01883          result = find_translit (find_locale (CTYPE_LOCALE,
01884                                           irunp->copy_locale,
01885                                           irunp->copy_repertoire,
01886                                           charmap),
01887                               charmap, wch);
01888          irunp = irunp->next;
01889        }
01890     }
01891 
01892   return result;
01893 }
01894 
01895 
01896 /* Read one transliteration entry.  */
01897 static uint32_t *
01898 read_widestring (struct linereader *ldfile, struct token *now,
01899                const struct charmap_t *charmap,
01900                struct repertoire_t *repertoire)
01901 {
01902   uint32_t *wstr;
01903 
01904   if (now->tok == tok_default_missing)
01905     /* The special name "" will denote this case.  */
01906     wstr = ((uint32_t *) { 0 });
01907   else if (now->tok == tok_bsymbol)
01908     {
01909       /* Get the value from the repertoire.  */
01910       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
01911       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
01912                                    now->val.str.lenmb);
01913       if (wstr[0] == ILLEGAL_CHAR_VALUE)
01914        {
01915          /* We cannot proceed, we don't know the UCS4 value.  */
01916          free (wstr);
01917          return NULL;
01918        }
01919 
01920       wstr[1] = 0;
01921     }
01922   else if (now->tok == tok_ucs4)
01923     {
01924       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
01925       wstr[0] = now->val.ucs4;
01926       wstr[1] = 0;
01927     }
01928   else if (now->tok == tok_charcode)
01929     {
01930       /* Argh, we have to convert to the symbol name first and then to the
01931         UCS4 value.  */
01932       struct charseq *seq = charmap_find_symbol (charmap,
01933                                            now->val.str.startmb,
01934                                            now->val.str.lenmb);
01935       if (seq == NULL)
01936        /* Cannot find the UCS4 value.  */
01937        return NULL;
01938 
01939       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
01940        seq->ucs4 = repertoire_find_value (repertoire, seq->name,
01941                                       strlen (seq->name));
01942       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
01943        /* We cannot proceed, we don't know the UCS4 value.  */
01944        return NULL;
01945 
01946       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
01947       wstr[0] = seq->ucs4;
01948       wstr[1] = 0;
01949     }
01950   else if (now->tok == tok_string)
01951     {
01952       wstr = now->val.str.startwc;
01953       if (wstr == NULL || wstr[0] == 0)
01954        return NULL;
01955     }
01956   else
01957     {
01958       if (now->tok != tok_eol && now->tok != tok_eof)
01959        lr_ignore_rest (ldfile, 0);
01960       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
01961       return (uint32_t *) -1l;
01962     }
01963 
01964   return wstr;
01965 }
01966 
01967 
01968 static void
01969 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
01970                    struct token *now, const struct charmap_t *charmap,
01971                    struct repertoire_t *repertoire)
01972 {
01973   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
01974   struct translit_t *result;
01975   struct translit_to_t **top;
01976   struct obstack *ob = &ctype->mempool;
01977   int first;
01978   int ignore;
01979 
01980   if (from_wstr == NULL)
01981     /* There is no valid from string.  */
01982     return;
01983 
01984   result = (struct translit_t *) obstack_alloc (ob,
01985                                           sizeof (struct translit_t));
01986   result->from = from_wstr;
01987   result->fname = ldfile->fname;
01988   result->lineno = ldfile->lineno;
01989   result->next = NULL;
01990   result->to = NULL;
01991   top = &result->to;
01992   first = 1;
01993   ignore = 0;
01994 
01995   while (1)
01996     {
01997       uint32_t *to_wstr;
01998 
01999       /* Next we have one or more transliterations.  They are
02000         separated by semicolons.  */
02001       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
02002 
02003       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
02004        {
02005          /* One string read.  */
02006          const uint32_t zero = 0;
02007 
02008          if (!ignore)
02009            {
02010              obstack_grow (ob, &zero, 4);
02011              to_wstr = obstack_finish (ob);
02012 
02013              *top = obstack_alloc (ob, sizeof (struct translit_to_t));
02014              (*top)->str = to_wstr;
02015              (*top)->next = NULL;
02016            }
02017 
02018          if (now->tok == tok_eol)
02019            {
02020              result->next = ctype->translit;
02021              ctype->translit = result;
02022              return;
02023            }
02024 
02025          if (!ignore)
02026            top = &(*top)->next;
02027          ignore = 0;
02028        }
02029       else
02030        {
02031          to_wstr = read_widestring (ldfile, now, charmap, repertoire);
02032          if (to_wstr == (uint32_t *) -1l)
02033            {
02034              /* An error occurred.  */
02035              obstack_free (ob, result);
02036              return;
02037            }
02038 
02039          if (to_wstr == NULL)
02040            ignore = 1;
02041          else
02042            /* This value is usable.  */
02043            obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
02044 
02045          first = 0;
02046        }
02047     }
02048 }
02049 
02050 
02051 static void
02052 read_translit_ignore_entry (struct linereader *ldfile,
02053                          struct locale_ctype_t *ctype,
02054                          const struct charmap_t *charmap,
02055                          struct repertoire_t *repertoire)
02056 {
02057   /* We expect a semicolon-separated list of characters we ignore.  We are
02058      only interested in the wide character definitions.  These must be
02059      single characters, possibly defining a range when an ellipsis is used.  */
02060   while (1)
02061     {
02062       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
02063                                 verbose);
02064       struct translit_ignore_t *newp;
02065       uint32_t from;
02066 
02067       if (now->tok == tok_eol || now->tok == tok_eof)
02068        {
02069          lr_error (ldfile,
02070                   _("premature end of `translit_ignore' definition"));
02071          return;
02072        }
02073 
02074       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
02075        {
02076          lr_error (ldfile, _("syntax error"));
02077          lr_ignore_rest (ldfile, 0);
02078          return;
02079        }
02080 
02081       if (now->tok == tok_ucs4)
02082        from = now->val.ucs4;
02083       else
02084        /* Try to get the value.  */
02085        from = repertoire_find_value (repertoire, now->val.str.startmb,
02086                                   now->val.str.lenmb);
02087 
02088       if (from == ILLEGAL_CHAR_VALUE)
02089        {
02090          lr_error (ldfile, "invalid character name");
02091          newp = NULL;
02092        }
02093       else
02094        {
02095          newp = (struct translit_ignore_t *)
02096            obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
02097          newp->from = from;
02098          newp->to = from;
02099          newp->step = 1;
02100 
02101          newp->next = ctype->translit_ignore;
02102          ctype->translit_ignore = newp;
02103        }
02104 
02105       /* Now we expect either a semicolon, an ellipsis, or the end of the
02106         line.  */
02107       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
02108 
02109       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
02110        {
02111          /* XXX Should we bother implementing `....'?  `...' certainly
02112             will not be implemented.  */
02113          uint32_t to;
02114          int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
02115 
02116          now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
02117 
02118          if (now->tok == tok_eol || now->tok == tok_eof)
02119            {
02120              lr_error (ldfile,
02121                      _("premature end of `translit_ignore' definition"));
02122              return;
02123            }
02124 
02125          if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
02126            {
02127              lr_error (ldfile, _("syntax error"));
02128              lr_ignore_rest (ldfile, 0);
02129              return;
02130            }
02131 
02132          if (now->tok == tok_ucs4)
02133            to = now->val.ucs4;
02134          else
02135            /* Try to get the value.  */
02136            to = repertoire_find_value (repertoire, now->val.str.startmb,
02137                                    now->val.str.lenmb);
02138 
02139          if (to == ILLEGAL_CHAR_VALUE)
02140            lr_error (ldfile, "invalid character name");
02141          else
02142            {
02143              /* Make sure the `to'-value is larger.  */
02144              if (to >= from)
02145               {
02146                 newp->to = to;
02147                 newp->step = step;
02148               }
02149              else
02150               lr_error (ldfile, _("\
02151 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
02152                        (to | from) < 65536 ? 4 : 8, to,
02153                        (to | from) < 65536 ? 4 : 8, from);
02154            }
02155 
02156          /* And the next token.  */
02157          now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
02158        }
02159 
02160       if (now->tok == tok_eol || now->tok == tok_eof)
02161        /* We are done.  */
02162        return;
02163 
02164       if (now->tok == tok_semicolon)
02165        /* Next round.  */
02166        continue;
02167 
02168       /* If we come here something is wrong.  */
02169       lr_error (ldfile, _("syntax error"));
02170       lr_ignore_rest (ldfile, 0);
02171       return;
02172     }
02173 }
02174 
02175 
02176 /* The parser for the LC_CTYPE section of the locale definition.  */
02177 void
02178 ctype_read (struct linereader *ldfile, struct localedef_t *result,
02179            const struct charmap_t *charmap, const char *repertoire_name,
02180            int ignore_content)
02181 {
02182   struct repertoire_t *repertoire = NULL;
02183   struct locale_ctype_t *ctype;
02184   struct token *now;
02185   enum token_t nowtok;
02186   size_t cnt;
02187   struct charseq *last_seq;
02188   uint32_t last_wch = 0;
02189   enum token_t last_token;
02190   enum token_t ellipsis_token;
02191   int step;
02192   char last_charcode[16];
02193   size_t last_charcode_len = 0;
02194   const char *last_str = NULL;
02195   int mapidx;
02196   struct localedef_t *copy_locale = NULL;
02197 
02198   /* Get the repertoire we have to use.  */
02199   if (repertoire_name != NULL)
02200     repertoire = repertoire_read (repertoire_name);
02201 
02202   /* The rest of the line containing `LC_CTYPE' must be free.  */
02203   lr_ignore_rest (ldfile, 1);
02204 
02205 
02206   do
02207     {
02208       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02209       nowtok = now->tok;
02210     }
02211   while (nowtok == tok_eol);
02212 
02213   /* If we see `copy' now we are almost done.  */
02214   if (nowtok == tok_copy)
02215     {
02216       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02217       if (now->tok != tok_string)
02218        {
02219          SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
02220 
02221        skip_category:
02222          do
02223            now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02224          while (now->tok != tok_eof && now->tok != tok_end);
02225 
02226          if (now->tok != tok_eof
02227              || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
02228                 now->tok == tok_eof))
02229            lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
02230          else if (now->tok != tok_lc_ctype)
02231            {
02232              lr_error (ldfile, _("\
02233 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
02234              lr_ignore_rest (ldfile, 0);
02235            }
02236          else
02237            lr_ignore_rest (ldfile, 1);
02238 
02239          return;
02240        }
02241 
02242       if (! ignore_content)
02243        {
02244          /* Get the locale definition.  */
02245          copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
02246                                  repertoire_name, charmap, NULL);
02247          if ((copy_locale->avail & CTYPE_LOCALE) == 0)
02248            {
02249              /* Not yet loaded.  So do it now.  */
02250              if (locfile_read (copy_locale, charmap) != 0)
02251               goto skip_category;
02252            }
02253 
02254          if (copy_locale->categories[LC_CTYPE].ctype == NULL)
02255            return;
02256        }
02257 
02258       lr_ignore_rest (ldfile, 1);
02259 
02260       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02261       nowtok = now->tok;
02262     }
02263 
02264   /* Prepare the data structures.  */
02265   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
02266   ctype = result->categories[LC_CTYPE].ctype;
02267 
02268   /* Remember the repertoire we use.  */
02269   if (!ignore_content)
02270     ctype->repertoire = repertoire;
02271 
02272   while (1)
02273     {
02274       unsigned long int class_bit = 0;
02275       unsigned long int class256_bit = 0;
02276       int handle_digits = 0;
02277 
02278       /* Of course we don't proceed beyond the end of file.  */
02279       if (nowtok == tok_eof)
02280        break;
02281 
02282       /* Ingore empty lines.  */
02283       if (nowtok == tok_eol)
02284        {
02285          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02286          nowtok = now->tok;
02287          continue;
02288        }
02289 
02290       switch (nowtok)
02291        {
02292        case tok_charclass:
02293          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02294          while (now->tok == tok_ident || now->tok == tok_string)
02295            {
02296              ctype_class_new (ldfile, ctype, now->val.str.startmb);
02297              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02298              if (now->tok != tok_semicolon)
02299               break;
02300              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02301            }
02302          if (now->tok != tok_eol)
02303            SYNTAX_ERROR (_("\
02304 %s: syntax error in definition of new character class"), "LC_CTYPE");
02305          break;
02306 
02307        case tok_charconv:
02308          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02309          while (now->tok == tok_ident || now->tok == tok_string)
02310            {
02311              ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
02312              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02313              if (now->tok != tok_semicolon)
02314               break;
02315              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02316            }
02317          if (now->tok != tok_eol)
02318            SYNTAX_ERROR (_("\
02319 %s: syntax error in definition of new character map"), "LC_CTYPE");
02320          break;
02321 
02322        case tok_class:
02323          /* Ignore the rest of the line if we don't need the input of
02324             this line.  */
02325          if (ignore_content)
02326            {
02327              lr_ignore_rest (ldfile, 0);
02328              break;
02329            }
02330 
02331          /* We simply forget the `class' keyword and use the following
02332             operand to determine the bit.  */
02333          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02334          if (now->tok == tok_ident || now->tok == tok_string)
02335            {
02336              /* Must can be one of the predefined class names.  */
02337              for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
02338               if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
02339                 break;
02340              if (cnt >= ctype->nr_charclass)
02341               {
02342 #ifdef PREDEFINED_CLASSES
02343                 if (now->val.str.lenmb == 8
02344                     && memcmp ("special1", now->val.str.startmb, 8) == 0)
02345                   class_bit = _ISwspecial1;
02346                 else if (now->val.str.lenmb == 8
02347                     && memcmp ("special2", now->val.str.startmb, 8) == 0)
02348                   class_bit = _ISwspecial2;
02349                 else if (now->val.str.lenmb == 8
02350                     && memcmp ("special3", now->val.str.startmb, 8) == 0)
02351                   class_bit = _ISwspecial3;
02352                 else
02353 #endif
02354                   {
02355                     /* OK, it's a new class.  */
02356                     ctype_class_new (ldfile, ctype, now->val.str.startmb);
02357 
02358                     class_bit = _ISwbit (ctype->nr_charclass - 1);
02359                   }
02360               }
02361              else
02362               {
02363                 class_bit = _ISwbit (cnt);
02364 
02365                 free (now->val.str.startmb);
02366               }
02367            }
02368          else if (now->tok == tok_digit)
02369            goto handle_tok_digit;
02370          else if (now->tok < tok_upper || now->tok > tok_blank)
02371            goto err_label;
02372          else
02373            {
02374              class_bit = BITw (now->tok);
02375              class256_bit = BIT (now->tok);
02376            }
02377 
02378          /* The next character must be a semicolon.  */
02379          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02380          if (now->tok != tok_semicolon)
02381            goto err_label;
02382          goto read_charclass;
02383 
02384        case tok_upper:
02385        case tok_lower:
02386        case tok_alpha:
02387        case tok_alnum:
02388        case tok_space:
02389        case tok_cntrl:
02390        case tok_punct:
02391        case tok_graph:
02392        case tok_print:
02393        case tok_xdigit:
02394        case tok_blank:
02395          /* Ignore the rest of the line if we don't need the input of
02396             this line.  */
02397          if (ignore_content)
02398            {
02399              lr_ignore_rest (ldfile, 0);
02400              break;
02401            }
02402 
02403          class_bit = BITw (now->tok);
02404          class256_bit = BIT (now->tok);
02405          handle_digits = 0;
02406        read_charclass:
02407          ctype->class_done |= class_bit;
02408          last_token = tok_none;
02409          ellipsis_token = tok_none;
02410          step = 1;
02411          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02412          while (now->tok != tok_eol && now->tok != tok_eof)
02413            {
02414              uint32_t wch;
02415              struct charseq *seq;
02416 
02417              if (ellipsis_token == tok_none)
02418               {
02419                 if (get_character (now, charmap, repertoire, &seq, &wch))
02420                   goto err_label;
02421 
02422                 if (!ignore_content && seq != NULL && seq->nbytes == 1)
02423                   /* Yep, we can store information about this byte
02424                      sequence.  */
02425                   ctype->class256_collection[seq->bytes[0]] |= class256_bit;
02426 
02427                 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
02428                     && class_bit != 0)
02429                   /* We have the UCS4 position.  */
02430                   *find_idx (ctype, &ctype->class_collection,
02431                             &ctype->class_collection_max,
02432                             &ctype->class_collection_act, wch) |= class_bit;
02433 
02434                 last_token = now->tok;
02435                 /* Terminate the string.  */
02436                 if (last_token == tok_bsymbol)
02437                   {
02438                     now->val.str.startmb[now->val.str.lenmb] = '\0';
02439                     last_str = now->val.str.startmb;
02440                   }
02441                 else
02442                   last_str = NULL;
02443                 last_seq = seq;
02444                 last_wch = wch;
02445                 memcpy (last_charcode, now->val.charcode.bytes, 16);
02446                 last_charcode_len = now->val.charcode.nbytes;
02447 
02448                 if (!ignore_content && handle_digits == 1)
02449                   {
02450                     /* We must store the digit values.  */
02451                     if (ctype->mbdigits_act == ctype->mbdigits_max)
02452                      {
02453                        ctype->mbdigits_max += 10;
02454                        ctype->mbdigits = xrealloc (ctype->mbdigits,
02455                                                 (ctype->mbdigits_max
02456                                                  * sizeof (char *)));
02457                        ctype->wcdigits_max += 10;
02458                        ctype->wcdigits = xrealloc (ctype->wcdigits,
02459                                                 (ctype->wcdigits_max
02460                                                  * sizeof (uint32_t)));
02461                      }
02462 
02463                     ctype->mbdigits[ctype->mbdigits_act++] = seq;
02464                     ctype->wcdigits[ctype->wcdigits_act++] = wch;
02465                   }
02466                 else if (!ignore_content && handle_digits == 2)
02467                   {
02468                     /* We must store the digit values.  */
02469                     if (ctype->outdigits_act >= 10)
02470                      {
02471                        lr_error (ldfile, _("\
02472 %s: field `%s' does not contain exactly ten entries"),
02473                          "LC_CTYPE", "outdigit");
02474                        lr_ignore_rest (ldfile, 0);
02475                        break;
02476                      }
02477 
02478                     ctype->mboutdigits[ctype->outdigits_act] = seq;
02479                     ctype->wcoutdigits[ctype->outdigits_act] = wch;
02480                     ++ctype->outdigits_act;
02481                   }
02482               }
02483              else
02484               {
02485                 /* Now it gets complicated.  We have to resolve the
02486                    ellipsis problem.  First we must distinguish between
02487                    the different kind of ellipsis and this must match the
02488                    tokens we have seen.  */
02489                 assert (last_token != tok_none);
02490 
02491                 if (last_token != now->tok)
02492                   {
02493                     lr_error (ldfile, _("\
02494 ellipsis range must be marked by two operands of same type"));
02495                     lr_ignore_rest (ldfile, 0);
02496                     break;
02497                   }
02498 
02499                 if (last_token == tok_bsymbol)
02500                   {
02501                     if (ellipsis_token == tok_ellipsis3)
02502                      lr_error (ldfile, _("with symbolic name range values \
02503 the absolute ellipsis `...' must not be used"));
02504 
02505                     charclass_symbolic_ellipsis (ldfile, ctype, charmap,
02506                                              repertoire, now, last_str,
02507                                              class256_bit, class_bit,
02508                                              (ellipsis_token
02509                                               == tok_ellipsis4
02510                                               ? 10 : 16),
02511                                              ignore_content,
02512                                              handle_digits, step);
02513                   }
02514                 else if (last_token == tok_ucs4)
02515                   {
02516                     if (ellipsis_token != tok_ellipsis2)
02517                      lr_error (ldfile, _("\
02518 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
02519 
02520                     charclass_ucs4_ellipsis (ldfile, ctype, charmap,
02521                                           repertoire, now, last_wch,
02522                                           class256_bit, class_bit,
02523                                           ignore_content, handle_digits,
02524                                           step);
02525                   }
02526                 else
02527                   {
02528                     assert (last_token == tok_charcode);
02529 
02530                     if (ellipsis_token != tok_ellipsis3)
02531                      lr_error (ldfile, _("\
02532 with character code range values one must use the absolute ellipsis `...'"));
02533 
02534                     charclass_charcode_ellipsis (ldfile, ctype, charmap,
02535                                              repertoire, now,
02536                                              last_charcode,
02537                                              last_charcode_len,
02538                                              class256_bit, class_bit,
02539                                              ignore_content,
02540                                              handle_digits);
02541                   }
02542 
02543                 /* Now we have used the last value.  */
02544                 last_token = tok_none;
02545               }
02546 
02547              /* Next we expect a semicolon or the end of the line.  */
02548              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02549              if (now->tok == tok_eol || now->tok == tok_eof)
02550               break;
02551 
02552              if (last_token != tok_none
02553                 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
02554               {
02555                 if (now->tok == tok_ellipsis2_2)
02556                   {
02557                     now->tok = tok_ellipsis2;
02558                     step = 2;
02559                   }
02560                 else if (now->tok == tok_ellipsis4_2)
02561                   {
02562                     now->tok = tok_ellipsis4;
02563                     step = 2;
02564                   }
02565 
02566                 ellipsis_token = now->tok;
02567 
02568                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02569                 continue;
02570               }
02571 
02572              if (now->tok != tok_semicolon)
02573               goto err_label;
02574 
02575              /* And get the next character.  */
02576              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02577 
02578              ellipsis_token = tok_none;
02579              step = 1;
02580            }
02581          break;
02582 
02583        case tok_digit:
02584          /* Ignore the rest of the line if we don't need the input of
02585             this line.  */
02586          if (ignore_content)
02587            {
02588              lr_ignore_rest (ldfile, 0);
02589              break;
02590            }
02591 
02592        handle_tok_digit:
02593          class_bit = _ISwdigit;
02594          class256_bit = _ISdigit;
02595          handle_digits = 1;
02596          goto read_charclass;
02597 
02598        case tok_outdigit:
02599          /* Ignore the rest of the line if we don't need the input of
02600             this line.  */
02601          if (ignore_content)
02602            {
02603              lr_ignore_rest (ldfile, 0);
02604              break;
02605            }
02606 
02607          if (ctype->outdigits_act != 0)
02608            lr_error (ldfile, _("\
02609 %s: field `%s' declared more than once"),
02610                     "LC_CTYPE", "outdigit");
02611          class_bit = 0;
02612          class256_bit = 0;
02613          handle_digits = 2;
02614          goto read_charclass;
02615 
02616        case tok_toupper:
02617          /* Ignore the rest of the line if we don't need the input of
02618             this line.  */
02619          if (ignore_content)
02620            {
02621              lr_ignore_rest (ldfile, 0);
02622              break;
02623            }
02624 
02625          mapidx = 0;
02626          goto read_mapping;
02627 
02628        case tok_tolower:
02629          /* Ignore the rest of the line if we don't need the input of
02630             this line.  */
02631          if (ignore_content)
02632            {
02633              lr_ignore_rest (ldfile, 0);
02634              break;
02635            }
02636 
02637          mapidx = 1;
02638          goto read_mapping;
02639 
02640        case tok_map:
02641          /* Ignore the rest of the line if we don't need the input of
02642             this line.  */
02643          if (ignore_content)
02644            {
02645              lr_ignore_rest (ldfile, 0);
02646              break;
02647            }
02648 
02649          /* We simply forget the `map' keyword and use the following
02650             operand to determine the mapping.  */
02651          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02652          if (now->tok == tok_ident || now->tok == tok_string)
02653            {
02654              size_t cnt;
02655 
02656              for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
02657               if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
02658                 break;
02659 
02660              if (cnt < ctype->map_collection_nr)
02661               free (now->val.str.startmb);
02662              else
02663               /* OK, it's a new map.  */
02664               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
02665 
02666              mapidx = cnt;
02667            }
02668          else if (now->tok < tok_toupper || now->tok > tok_tolower)
02669            goto err_label;
02670          else
02671            mapidx = now->tok - tok_toupper;
02672 
02673          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02674          /* This better should be a semicolon.  */
02675          if (now->tok != tok_semicolon)
02676            goto err_label;
02677 
02678        read_mapping:
02679          /* Test whether this mapping was already defined.  */
02680          if (ctype->tomap_done[mapidx])
02681            {
02682              lr_error (ldfile, _("duplicated definition for mapping `%s'"),
02683                      ctype->mapnames[mapidx]);
02684              lr_ignore_rest (ldfile, 0);
02685              break;
02686            }
02687          ctype->tomap_done[mapidx] = 1;
02688 
02689          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02690          while (now->tok != tok_eol && now->tok != tok_eof)
02691            {
02692              struct charseq *from_seq;
02693              uint32_t from_wch;
02694              struct charseq *to_seq;
02695              uint32_t to_wch;
02696 
02697              /* Every pair starts with an opening brace.  */
02698              if (now->tok != tok_open_brace)
02699               goto err_label;
02700 
02701              /* Next comes the from-value.  */
02702              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02703              if (get_character (now, charmap, repertoire, &from_seq,
02704                              &from_wch) != 0)
02705               goto err_label;
02706 
02707              /* The next is a comma.  */
02708              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02709              if (now->tok != tok_comma)
02710               goto err_label;
02711 
02712              /* And the other value.  */
02713              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02714              if (get_character (now, charmap, repertoire, &to_seq,
02715                              &to_wch) != 0)
02716               goto err_label;
02717 
02718              /* And the last thing is the closing brace.  */
02719              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02720              if (now->tok != tok_close_brace)
02721               goto err_label;
02722 
02723              if (!ignore_content)
02724               {
02725                 /* Check whether the mapping converts from an ASCII value
02726                    to a non-ASCII value.  */
02727                 if (from_seq != NULL && from_seq->nbytes == 1
02728                     && isascii (from_seq->bytes[0])
02729                     && to_seq != NULL && (to_seq->nbytes != 1
02730                                        || !isascii (to_seq->bytes[0])))
02731                   ctype->to_nonascii = 1;
02732 
02733                 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
02734                     && from_seq->nbytes == 1 && to_seq->nbytes == 1)
02735                   /* We can use this value.  */
02736                   ctype->map256_collection[mapidx][from_seq->bytes[0]]
02737                     = to_seq->bytes[0];
02738 
02739                 if (from_wch != ILLEGAL_CHAR_VALUE
02740                     && to_wch != ILLEGAL_CHAR_VALUE)
02741                   /* Both correct values.  */
02742                   *find_idx (ctype, &ctype->map_collection[mapidx],
02743                             &ctype->map_collection_max[mapidx],
02744                             &ctype->map_collection_act[mapidx],
02745                             from_wch) = to_wch;
02746               }
02747 
02748              /* Now comes a semicolon or the end of the line/file.  */
02749              now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02750              if (now->tok == tok_semicolon)
02751               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02752            }
02753          break;
02754 
02755        case tok_translit_start:
02756          /* Ignore the entire translit section with its peculiar syntax
02757             if we don't need the input.  */
02758          if (ignore_content)
02759            {
02760              do
02761               {
02762                 lr_ignore_rest (ldfile, 0);
02763                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02764               }
02765              while (now->tok != tok_translit_end && now->tok != tok_eof);
02766 
02767              if (now->tok == tok_eof)
02768               lr_error (ldfile, _(\
02769 "%s: `translit_start' section does not end with `translit_end'"),
02770                        "LC_CTYPE");
02771 
02772              break;
02773            }
02774 
02775          /* The rest of the line better should be empty.  */
02776          lr_ignore_rest (ldfile, 1);
02777 
02778          /* We count here the number of allocated entries in the `translit'
02779             array.  */
02780          cnt = 0;
02781 
02782          ldfile->translate_strings = 1;
02783          ldfile->return_widestr = 1;
02784 
02785          /* We proceed until we see the `translit_end' token.  */
02786          while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
02787                now->tok != tok_translit_end && now->tok != tok_eof)
02788            {
02789              if (now->tok == tok_eol)
02790               /* Ignore empty lines.  */
02791               continue;
02792 
02793              if (now->tok == tok_include)
02794               {
02795                 /* We have to include locale.  */
02796                 const char *locale_name;
02797                 const char *repertoire_name;
02798                 struct translit_include_t *include_stmt, **include_ptr;
02799 
02800                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02801                 /* This should be a string or an identifier.  In any
02802                    case something to name a locale.  */
02803                 if (now->tok != tok_string && now->tok != tok_ident)
02804                   {
02805                   translit_syntax:
02806                     lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
02807                     lr_ignore_rest (ldfile, 0);
02808                     continue;
02809                   }
02810                 locale_name = now->val.str.startmb;
02811 
02812                 /* Next should be a semicolon.  */
02813                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02814                 if (now->tok != tok_semicolon)
02815                   goto translit_syntax;
02816 
02817                 /* Now the repertoire name.  */
02818                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02819                 if ((now->tok != tok_string && now->tok != tok_ident)
02820                     || now->val.str.startmb == NULL)
02821                   goto translit_syntax;
02822                 repertoire_name = now->val.str.startmb;
02823                 if (repertoire_name[0] == '\0')
02824                   /* Ignore the empty string.  */
02825                   repertoire_name = NULL;
02826 
02827                 /* Save the include statement for later processing.  */
02828                 include_stmt = (struct translit_include_t *)
02829                   xmalloc (sizeof (struct translit_include_t));
02830                 include_stmt->copy_locale = locale_name;
02831                 include_stmt->copy_repertoire = repertoire_name;
02832                 include_stmt->next = NULL;
02833 
02834                 include_ptr = &ctype->translit_include;
02835                 while (*include_ptr != NULL)
02836                   include_ptr = &(*include_ptr)->next;
02837                 *include_ptr = include_stmt;
02838 
02839                 /* The rest of the line must be empty.  */
02840                 lr_ignore_rest (ldfile, 1);
02841 
02842                 /* Make sure the locale is read.  */
02843                 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
02844                                1, NULL);
02845                 continue;
02846               }
02847              else if (now->tok == tok_default_missing)
02848               {
02849                 uint32_t *wstr;
02850 
02851                 while (1)
02852                   {
02853                     /* We expect a single character or string as the
02854                       argument.  */
02855                     now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02856                     wstr = read_widestring (ldfile, now, charmap,
02857                                          repertoire);
02858 
02859                     if (wstr != NULL)
02860                      {
02861                        if (ctype->default_missing != NULL)
02862                          {
02863                            lr_error (ldfile, _("\
02864 %s: duplicate `default_missing' definition"), "LC_CTYPE");
02865                            WITH_CUR_LOCALE (error_at_line (0, 0,
02866                                                        ctype->default_missing_file,
02867                                                        ctype->default_missing_lineno,
02868                                                        _("\
02869 previous definition was here")));
02870                          }
02871                        else
02872                          {
02873                            ctype->default_missing = wstr;
02874                            ctype->default_missing_file = ldfile->fname;
02875                            ctype->default_missing_lineno = ldfile->lineno;
02876                          }
02877                        /* We can have more entries, ignore them.  */
02878                        lr_ignore_rest (ldfile, 0);
02879                        break;
02880                      }
02881                     else if (wstr == (uint32_t *) -1l)
02882                      /* This was an syntax error.  */
02883                      break;
02884 
02885                     /* Maybe there is another replacement we can use.  */
02886                     now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02887                     if (now->tok == tok_eol || now->tok == tok_eof)
02888                      {
02889                        /* Nothing found.  We tell the user.  */
02890                        lr_error (ldfile, _("\
02891 %s: no representable `default_missing' definition found"), "LC_CTYPE");
02892                        break;
02893                      }
02894                     if (now->tok != tok_semicolon)
02895                      goto translit_syntax;
02896                   }
02897 
02898                 continue;
02899               }
02900              else if (now->tok == tok_translit_ignore)
02901               {
02902                 read_translit_ignore_entry (ldfile, ctype, charmap,
02903                                          repertoire);
02904                 continue;
02905               }
02906 
02907              read_translit_entry (ldfile, ctype, now, charmap, repertoire);
02908            }
02909          ldfile->return_widestr = 0;
02910 
02911          if (now->tok == tok_eof)
02912            lr_error (ldfile, _(\
02913 "%s: `translit_start' section does not end with `translit_end'"),
02914                     "LC_CTYPE");
02915 
02916          break;
02917 
02918        case tok_ident:
02919          /* Ignore the rest of the line if we don't need the input of
02920             this line.  */
02921          if (ignore_content)
02922            {
02923              lr_ignore_rest (ldfile, 0);
02924              break;
02925            }
02926 
02927          /* This could mean one of several things.  First test whether
02928             it's a character class name.  */
02929          for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
02930            if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
02931              break;
02932          if (cnt < ctype->nr_charclass)
02933            {
02934              class_bit = _ISwbit (cnt);
02935              class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
02936              free (now->val.str.startmb);
02937              goto read_charclass;
02938            }
02939          for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
02940            if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
02941              break;
02942          if (cnt < ctype->map_collection_nr)
02943            {
02944              mapidx = cnt;
02945              free (now->val.str.startmb);
02946              goto read_mapping;
02947             }
02948 #ifdef PREDEFINED_CLASSES
02949          if (strcmp (now->val.str.startmb, "special1") == 0)
02950            {
02951              class_bit = _ISwspecial1;
02952              free (now->val.str.startmb);
02953              goto read_charclass;
02954            }
02955          if (strcmp (now->val.str.startmb, "special2") == 0)
02956            {
02957              class_bit = _ISwspecial2;
02958              free (now->val.str.startmb);
02959              goto read_charclass;
02960            }
02961          if (strcmp (now->val.str.startmb, "special3") == 0)
02962            {
02963              class_bit = _ISwspecial3;
02964              free (now->val.str.startmb);
02965              goto read_charclass;
02966            }
02967          if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
02968            {
02969              mapidx = 2;
02970              goto read_mapping;
02971            }
02972 #endif
02973          break;
02974 
02975        case tok_end:
02976          /* Next we assume `LC_CTYPE'.  */
02977          now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02978          if (now->tok == tok_eof)
02979            break;
02980          if (now->tok == tok_eol)
02981            lr_error (ldfile, _("%s: incomplete `END' line"),
02982                     "LC_CTYPE");
02983          else if (now->tok != tok_lc_ctype)
02984            lr_error (ldfile, _("\
02985 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
02986          lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
02987          return;
02988 
02989        default:
02990        err_label:
02991          if (now->tok != tok_eof)
02992            SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
02993        }
02994 
02995       /* Prepare for the next round.  */
02996       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02997       nowtok = now->tok;
02998     }
02999 
03000   /* When we come here we reached the end of the file.  */
03001   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
03002 }
03003 
03004 
03005 static void
03006 set_class_defaults (struct locale_ctype_t *ctype,
03007                   const struct charmap_t *charmap,
03008                   struct repertoire_t *repertoire)
03009 {
03010   size_t cnt;
03011 
03012   /* These function defines the default values for the classes and conversions
03013      according to POSIX.2 2.5.2.1.
03014      It may seem that the order of these if-blocks is arbitrary but it is NOT.
03015      Don't move them unless you know what you do!  */
03016 
03017   auto void set_default (int bitpos, int from, int to);
03018 
03019   void set_default (int bitpos, int from, int to)
03020     {
03021       char tmp[2];
03022       int ch;
03023       int bit = _ISbit (bitpos);
03024       int bitw = _ISwbit (bitpos);
03025       /* Define string.  */
03026       strcpy (tmp, "?");
03027 
03028       for (ch = from; ch <= to; ++ch)
03029        {
03030          struct charseq *seq;
03031          tmp[0] = ch;
03032 
03033          seq = charmap_find_value (charmap, tmp, 1);
03034          if (seq == NULL)
03035            {
03036              char buf[10];
03037              sprintf (buf, "U%08X", ch);
03038              seq = charmap_find_value (charmap, buf, 9);
03039            }
03040          if (seq == NULL)
03041            {
03042              if (!be_quiet)
03043               WITH_CUR_LOCALE (error (0, 0, _("\
03044 %s: character `%s' not defined while needed as default value"),
03045                                    "LC_CTYPE", tmp));
03046            }
03047          else if (seq->nbytes != 1)
03048            WITH_CUR_LOCALE (error (0, 0, _("\
03049 %s: character `%s' in charmap not representable with one byte"),
03050                                 "LC_CTYPE", tmp));
03051          else
03052            ctype->class256_collection[seq->bytes[0]] |= bit;
03053 
03054          /* No need to search here, the ASCII value is also the Unicode
03055             value.  */
03056          ELEM (ctype, class_collection, , ch) |= bitw;
03057        }
03058     }
03059 
03060   /* Set default values if keyword was not present.  */
03061   if ((ctype->class_done & BITw (tok_upper)) == 0)
03062     /* "If this keyword [lower] is not specified, the lowercase letters
03063         `A' through `Z', ..., shall automatically belong to this class,
03064        with implementation defined character values."  [P1003.2, 2.5.2.1]  */
03065     set_default (BITPOS (tok_upper), 'A', 'Z');
03066 
03067   if ((ctype->class_done & BITw (tok_lower)) == 0)
03068     /* "If this keyword [lower] is not specified, the lowercase letters
03069         `a' through `z', ..., shall automatically belong to this class,
03070        with implementation defined character values."  [P1003.2, 2.5.2.1]  */
03071     set_default (BITPOS (tok_lower), 'a', 'z');
03072 
03073   if ((ctype->class_done & BITw (tok_alpha)) == 0)
03074     {
03075       /* Table 2-6 in P1003.2 says that characters in class `upper' or
03076         class `lower' *must* be in class `alpha'.  */
03077       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
03078       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
03079 
03080       for (cnt = 0; cnt < 256; ++cnt)
03081        if ((ctype->class256_collection[cnt] & mask) != 0)
03082          ctype->class256_collection[cnt] |= BIT (tok_alpha);
03083 
03084       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
03085        if ((ctype->class_collection[cnt] & maskw) != 0)
03086          ctype->class_collection[cnt] |= BITw (tok_alpha);
03087     }
03088 
03089   if ((ctype->class_done & BITw (tok_digit)) == 0)
03090     /* "If this keyword [digit] is not specified, the digits `0' through
03091         `9', ..., shall automatically belong to this class, with
03092        implementation-defined character values."  [P1003.2, 2.5.2.1]  */
03093     set_default (BITPOS (tok_digit), '0', '9');
03094 
03095   /* "Only characters specified for the `alpha' and `digit' keyword
03096      shall be specified.  Characters specified for the keyword `alpha'
03097      and `digit' are automatically included in this class.  */
03098   {
03099     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
03100     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
03101 
03102     for (cnt = 0; cnt < 256; ++cnt)
03103       if ((ctype->class256_collection[cnt] & mask) != 0)
03104        ctype->class256_collection[cnt] |= BIT (tok_alnum);
03105 
03106     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
03107       if ((ctype->class_collection[cnt] & maskw) != 0)
03108        ctype->class_collection[cnt] |= BITw (tok_alnum);
03109   }
03110 
03111   if ((ctype->class_done & BITw (tok_space)) == 0)
03112     /* "If this keyword [space] is not specified, the characters <space>,
03113         <form-feed>, <newline>, <carriage-return>, <tab>, and
03114        <vertical-tab>, ..., shall automatically belong to this class,
03115        with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
03116     {
03117       struct charseq *seq;
03118 
03119       seq = charmap_find_value (charmap, "space", 5);
03120       if (seq == NULL)
03121        seq = charmap_find_value (charmap, "SP", 2);
03122       if (seq == NULL)
03123        seq = charmap_find_value (charmap, "U00000020", 9);
03124       if (seq == NULL)
03125        {
03126          if (!be_quiet)
03127            WITH_CUR_LOCALE (error (0, 0, _("\
03128 %s: character `%s' not defined while needed as default value"),
03129                                 "LC_CTYPE", "<space>"));
03130        }
03131       else if (seq->nbytes != 1)
03132        WITH_CUR_LOCALE (error (0, 0, _("\
03133 %s: character `%s' in charmap not representable with one byte"),
03134                             "LC_CTYPE", "<space>"));
03135       else
03136        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03137 
03138       /* No need to search.  */
03139       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
03140 
03141       seq = charmap_find_value (charmap, "form-feed", 9);
03142       if (seq == NULL)
03143        seq = charmap_find_value (charmap, "U0000000C", 9);
03144       if (seq == NULL)
03145        {
03146          if (!be_quiet)
03147            WITH_CUR_LOCALE (error (0, 0, _("\
03148 %s: character `%s' not defined while needed as default value"),
03149                                 "LC_CTYPE", "<form-feed>"));
03150        }
03151       else if (seq->nbytes != 1)
03152        WITH_CUR_LOCALE (error (0, 0, _("\
03153 %s: character `%s' in charmap not representable with one byte"),
03154                             "LC_CTYPE", "<form-feed>"));
03155       else
03156        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03157 
03158       /* No need to search.  */
03159       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
03160 
03161 
03162       seq = charmap_find_value (charmap, "newline", 7);
03163       if (seq == NULL)
03164        seq = charmap_find_value (charmap, "U0000000A", 9);
03165       if (seq == NULL)
03166        {
03167          if (!be_quiet)
03168            WITH_CUR_LOCALE (error (0, 0, _("\
03169 %s: character `%s' not defined while needed as default value"),
03170                                 "LC_CTYPE", "<newline>"));
03171        }
03172       else if (seq->nbytes != 1)
03173        WITH_CUR_LOCALE (error (0, 0, _("\
03174 %s: character `%s' in charmap not representable with one byte"),
03175                             "LC_CTYPE", "<newline>"));
03176       else
03177        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03178 
03179       /* No need to search.  */
03180       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
03181 
03182 
03183       seq = charmap_find_value (charmap, "carriage-return", 15);
03184       if (seq == NULL)
03185        seq = charmap_find_value (charmap, "U0000000D", 9);
03186       if (seq == NULL)
03187        {
03188          if (!be_quiet)
03189            WITH_CUR_LOCALE (error (0, 0, _("\
03190 %s: character `%s' not defined while needed as default value"),
03191                                 "LC_CTYPE", "<carriage-return>"));
03192        }
03193       else if (seq->nbytes != 1)
03194        WITH_CUR_LOCALE (error (0, 0, _("\
03195 %s: character `%s' in charmap not representable with one byte"),
03196                             "LC_CTYPE", "<carriage-return>"));
03197       else
03198        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03199 
03200       /* No need to search.  */
03201       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
03202 
03203 
03204       seq = charmap_find_value (charmap, "tab", 3);
03205       if (seq == NULL)
03206        seq = charmap_find_value (charmap, "U00000009", 9);
03207       if (seq == NULL)
03208        {
03209          if (!be_quiet)
03210            WITH_CUR_LOCALE (error (0, 0, _("\
03211 %s: character `%s' not defined while needed as default value"),
03212                                 "LC_CTYPE", "<tab>"));
03213        }
03214       else if (seq->nbytes != 1)
03215        WITH_CUR_LOCALE (error (0, 0, _("\
03216 %s: character `%s' in charmap not representable with one byte"),
03217                             "LC_CTYPE", "<tab>"));
03218       else
03219        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03220 
03221       /* No need to search.  */
03222       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
03223 
03224 
03225       seq = charmap_find_value (charmap, "vertical-tab", 12);
03226       if (seq == NULL)
03227        seq = charmap_find_value (charmap, "U0000000B", 9);
03228       if (seq == NULL)
03229        {
03230          if (!be_quiet)
03231            WITH_CUR_LOCALE (error (0, 0, _("\
03232 %s: character `%s' not defined while needed as default value"),
03233                                 "LC_CTYPE", "<vertical-tab>"));
03234        }
03235       else if (seq->nbytes != 1)
03236        WITH_CUR_LOCALE (error (0, 0, _("\
03237 %s: character `%s' in charmap not representable with one byte"),
03238                             "LC_CTYPE", "<vertical-tab>"));
03239       else
03240        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
03241 
03242       /* No need to search.  */
03243       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
03244     }
03245 
03246   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
03247     /* "If this keyword is not specified, the digits `0' to `9', the
03248         uppercase letters `A' through `F', and the lowercase letters `a'
03249        through `f', ..., shell automatically belong to this class, with
03250        implementation defined character values."  [P1003.2, 2.5.2.1]  */
03251     {
03252       set_default (BITPOS (tok_xdigit), '0', '9');
03253       set_default (BITPOS (tok_xdigit), 'A', 'F');
03254       set_default (BITPOS (tok_xdigit), 'a', 'f');
03255     }
03256 
03257   if ((ctype->class_done & BITw (tok_blank)) == 0)
03258     /* "If this keyword [blank] is unspecified, the characters <space> and
03259        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
03260    {
03261       struct charseq *seq;
03262 
03263       seq = charmap_find_value (charmap, "space", 5);
03264       if (seq == NULL)
03265        seq = charmap_find_value (charmap, "SP", 2);
03266       if (seq == NULL)
03267        seq = charmap_find_value (charmap, "U00000020", 9);
03268       if (seq == NULL)
03269        {
03270          if (!be_quiet)
03271            WITH_CUR_LOCALE (error (0, 0, _("\
03272 %s: character `%s' not defined while needed as default value"),
03273                                 "LC_CTYPE", "<space>"));
03274        }
03275       else if (seq->nbytes != 1)
03276        WITH_CUR_LOCALE (error (0, 0, _("\
03277 %s: character `%s' in charmap not representable with one byte"),
03278                             "LC_CTYPE", "<space>"));
03279       else
03280        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
03281 
03282       /* No need to search.  */
03283       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
03284 
03285 
03286       seq = charmap_find_value (charmap, "tab", 3);
03287       if (seq == NULL)
03288        seq = charmap_find_value (charmap, "U00000009", 9);
03289       if (seq == NULL)
03290        {
03291          if (!be_quiet)
03292            WITH_CUR_LOCALE (error (0, 0, _("\
03293 %s: character `%s' not defined while needed as default value"),
03294                                 "LC_CTYPE", "<tab>"));
03295        }
03296       else if (seq->nbytes != 1)
03297        WITH_CUR_LOCALE (error (0, 0, _("\
03298 %s: character `%s' in charmap not representable with one byte"),
03299                             "LC_CTYPE", "<tab>"));
03300       else
03301        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
03302 
03303       /* No need to search.  */
03304       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
03305     }
03306 
03307   if ((ctype->class_done & BITw (tok_graph)) == 0)
03308     /* "If this keyword [graph] is not specified, characters specified for
03309         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
03310        shall belong to this character class."  [P1003.2, 2.5.2.1]  */
03311     {
03312       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
03313        BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
03314       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
03315        BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
03316        BITw (tok_punct);
03317       size_t cnt;
03318 
03319       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
03320        if ((ctype->class_collection[cnt] & maskw) != 0)
03321          ctype->class_collection[cnt] |= BITw (tok_graph);
03322 
03323       for (cnt = 0; cnt < 256; ++cnt)
03324        if ((ctype->class256_collection[cnt] & mask) != 0)
03325          ctype->class256_collection[cnt] |= BIT (tok_graph);
03326     }
03327 
03328   if ((ctype->class_done & BITw (tok_print)) == 0)
03329     /* "If this keyword [print] is not provided, characters specified for
03330         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
03331        and the <space> character shall belong to this character class."
03332        [P1003.2, 2.5.2.1]  */
03333     {
03334       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
03335        BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
03336       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
03337        BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
03338        BITw (tok_punct);
03339       size_t cnt;
03340       struct charseq *seq;
03341 
03342       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
03343        if ((ctype->class_collection[cnt] & maskw) != 0)
03344          ctype->class_collection[cnt] |= BITw (tok_print);
03345 
03346       for (cnt = 0; cnt < 256; ++cnt)
03347        if ((ctype->class256_collection[cnt] & mask) != 0)
03348          ctype->class256_collection[cnt] |= BIT (tok_print);
03349 
03350 
03351       seq = charmap_find_value (charmap, "space", 5);
03352       if (seq == NULL)
03353        seq = charmap_find_value (charmap, "SP", 2);
03354       if (seq == NULL)
03355        seq = charmap_find_value (charmap, "U00000020", 9);
03356       if (seq == NULL)
03357        {
03358          if (!be_quiet)
03359            WITH_CUR_LOCALE (error (0, 0, _("\
03360 %s: character `%s' not defined while needed as default value"),
03361                                 "LC_CTYPE", "<space>"));
03362        }
03363       else if (seq->nbytes != 1)
03364        WITH_CUR_LOCALE (error (0, 0, _("\
03365 %s: character `%s' in charmap not representable with one byte"),
03366                             "LC_CTYPE", "<space>"));
03367       else
03368        ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
03369 
03370       /* No need to search.  */
03371       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
03372     }
03373 
03374   if (ctype->tomap_done[0] == 0)
03375     /* "If this keyword [toupper] is not specified, the lowercase letters
03376         `a' through `z', and their corresponding uppercase letters `A' to
03377        `Z', ..., shall automatically be included, with implementation-
03378        defined character values."  [P1003.2, 2.5.2.1]  */
03379     {
03380       char tmp[4];
03381       int ch;
03382 
03383       strcpy (tmp, "<?>");
03384 
03385       for (ch = 'a'; ch <= 'z'; ++ch)
03386        {
03387          struct charseq *seq_from, *seq_to;
03388 
03389          tmp[1] = (char) ch;
03390 
03391          seq_from = charmap_find_value (charmap, &tmp[1], 1);
03392          if (seq_from == NULL)
03393            {
03394              char buf[10];
03395              sprintf (buf, "U%08X", ch);
03396              seq_from = charmap_find_value (charmap, buf, 9);
03397            }
03398          if (seq_from == NULL)
03399            {
03400              if (!be_quiet)
03401               WITH_CUR_LOCALE (error (0, 0, _("\
03402 %s: character `%s' not defined while needed as default value"),
03403                                    "LC_CTYPE", tmp));
03404            }
03405          else if (seq_from->nbytes != 1)
03406            {
03407              if (!be_quiet)
03408               WITH_CUR_LOCALE (error (0, 0, _("\
03409 %s: character `%s' needed as default value not representable with one byte"),
03410                                    "LC_CTYPE", tmp));
03411            }
03412          else
03413            {
03414              /* This conversion is implementation defined.  */
03415              tmp[1] = (char) (ch + ('A' - 'a'));
03416              seq_to = charmap_find_value (charmap, &tmp[1], 1);
03417              if (seq_to == NULL)
03418               {
03419                 char buf[10];
03420                 sprintf (buf, "U%08X", ch + ('A' - 'a'));
03421                 seq_to = charmap_find_value (charmap, buf, 9);
03422               }
03423              if (seq_to == NULL)
03424               {
03425                 if (!be_quiet)
03426                   WITH_CUR_LOCALE (error (0, 0, _("\
03427 %s: character `%s' not defined while needed as default value"),
03428                                        "LC_CTYPE", tmp));
03429               }
03430              else if (seq_to->nbytes != 1)
03431               {
03432                 if (!be_quiet)
03433                   WITH_CUR_LOCALE (error (0, 0, _("\
03434 %s: character `%s' needed as default value not representable with one byte"),
03435                                        "LC_CTYPE", tmp));
03436               }
03437              else
03438               /* The index [0] is determined by the order of the
03439                  `ctype_map_newP' calls in `ctype_startup'.  */
03440               ctype->map256_collection[0][seq_from->bytes[0]]
03441                 = seq_to->bytes[0];
03442            }
03443 
03444          /* No need to search.  */
03445          ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
03446        }
03447     }
03448 
03449   if (ctype->tomap_done[1] == 0)
03450     /* "If this keyword [tolower] is not specified, the mapping shall be
03451        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
03452     {
03453       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
03454        if (ctype->map_collection[0][cnt] != 0)
03455          ELEM (ctype, map_collection, [1],
03456               ctype->map_collection[0][cnt])
03457            = ctype->charnames[cnt];
03458 
03459       for (cnt = 0; cnt < 256; ++cnt)
03460        if (ctype->map256_collection[0][cnt] != 0)
03461          ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
03462     }
03463 
03464   if (ctype->outdigits_act != 10)
03465     {
03466       if (ctype->outdigits_act != 0)
03467        WITH_CUR_LOCALE (error (0, 0, _("\
03468 %s: field `%s' does not contain exactly ten entries"),
03469                             "LC_CTYPE", "outdigit"));
03470 
03471       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
03472        {
03473          ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
03474                                                   (char *) digits + cnt,
03475                                                   1);
03476 
03477          if (ctype->mboutdigits[cnt] == NULL)
03478            ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
03479                                                     longnames[cnt],
03480                                                     strlen (longnames[cnt]));
03481 
03482          if (ctype->mboutdigits[cnt] == NULL)
03483            ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
03484                                                     uninames[cnt], 9);
03485 
03486          if (ctype->mboutdigits[cnt] == NULL)
03487            {
03488              /* Provide a replacement.  */
03489              WITH_CUR_LOCALE (error (0, 0, _("\
03490 no output digits defined and none of the standard names in the charmap")));
03491 
03492              ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
03493                                                  sizeof (struct charseq)
03494                                                  + 1);
03495 
03496              /* This is better than nothing.  */
03497              ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
03498              ctype->mboutdigits[cnt]->nbytes = 1;
03499            }
03500 
03501          ctype->wcoutdigits[cnt] = L'0' + cnt;
03502        }
03503 
03504       ctype->outdigits_act = 10;
03505     }
03506 }
03507 
03508 
03509 /* Construction of sparse 3-level tables.
03510    See wchar-lookup.h for their structure and the meaning of p and q.  */
03511 
03512 struct wctype_table
03513 {
03514   /* Parameters.  */
03515   unsigned int p;
03516   unsigned int q;
03517   /* Working representation.  */
03518   size_t level1_alloc;
03519   size_t level1_size;
03520   uint32_t *level1;
03521   size_t level2_alloc;
03522   size_t level2_size;
03523   uint32_t *level2;
03524   size_t level3_alloc;
03525   size_t level3_size;
03526   uint32_t *level3;
03527   /* Compressed representation.  */
03528   size_t result_size;
03529   char *result;
03530 };
03531 
03532 /* Initialize.  Assumes t->p and t->q have already been set.  */
03533 static inline void
03534 wctype_table_init (struct wctype_table *t)
03535 {
03536   t->level1 = NULL;
03537   t->level1_alloc = t->level1_size = 0;
03538   t->level2 = NULL;
03539   t->level2_alloc = t->level2_size = 0;
03540   t->level3 = NULL;
03541   t->level3_alloc = t->level3_size = 0;
03542 }
03543 
03544 /* Retrieve an entry.  */
03545 static inline int
03546 wctype_table_get (struct wctype_table *t, uint32_t wc)
03547 {
03548   uint32_t index1 = wc >> (t->q + t->p + 5);
03549   if (index1 < t->level1_size)
03550     {
03551       uint32_t lookup1 = t->level1[index1];
03552       if (lookup1 != EMPTY)
03553        {
03554          uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
03555                          + (lookup1 << t->q);
03556          uint32_t lookup2 = t->level2[index2];
03557          if (lookup2 != EMPTY)
03558            {
03559              uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
03560                             + (lookup2 << t->p);
03561              uint32_t lookup3 = t->level3[index3];
03562              uint32_t index4 = wc & 0x1f;
03563 
03564              return (lookup3 >> index4) & 1;
03565            }
03566        }
03567     }
03568   return 0;
03569 }
03570 
03571 /* Add one entry.  */
03572 static void
03573 wctype_table_add (struct wctype_table *t, uint32_t wc)
03574 {
03575   uint32_t index1 = wc >> (t->q + t->p + 5);
03576   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
03577   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
03578   uint32_t index4 = wc & 0x1f;
03579   size_t i, i1, i2;
03580 
03581   if (index1 >= t->level1_size)
03582     {
03583       if (index1 >= t->level1_alloc)
03584        {
03585          size_t alloc = 2 * t->level1_alloc;
03586          if (alloc <= index1)
03587            alloc = index1 + 1;
03588          t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
03589                                         alloc * sizeof (uint32_t));
03590          t->level1_alloc = alloc;
03591        }
03592       while (index1 >= t->level1_size)
03593        t->level1[t->level1_size++] = EMPTY;
03594     }
03595 
03596   if (t->level1[index1] == EMPTY)
03597     {
03598       if (t->level2_size == t->level2_alloc)
03599        {
03600          size_t alloc = 2 * t->level2_alloc + 1;
03601          t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
03602                                         (alloc << t->q) * sizeof (uint32_t));
03603          t->level2_alloc = alloc;
03604        }
03605       i1 = t->level2_size << t->q;
03606       i2 = (t->level2_size + 1) << t->q;
03607       for (i = i1; i < i2; i++)
03608        t->level2[i] = EMPTY;
03609       t->level1[index1] = t->level2_size++;
03610     }
03611 
03612   index2 += t->level1[index1] << t->q;
03613 
03614   if (t->level2[index2] == EMPTY)
03615     {
03616       if (t->level3_size == t->level3_alloc)
03617        {
03618          size_t alloc = 2 * t->level3_alloc + 1;
03619          t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
03620                                         (alloc << t->p) * sizeof (uint32_t));
03621          t->level3_alloc = alloc;
03622        }
03623       i1 = t->level3_size << t->p;
03624       i2 = (t->level3_size + 1) << t->p;
03625       for (i = i1; i < i2; i++)
03626        t->level3[i] = 0;
03627       t->level2[index2] = t->level3_size++;
03628     }
03629 
03630   index3 += t->level2[index2] << t->p;
03631 
03632   t->level3[index3] |= (uint32_t)1 << index4;
03633 }
03634 
03635 /* Finalize and shrink.  */
03636 static void
03637 wctype_table_finalize (struct wctype_table *t)
03638 {
03639   size_t i, j, k;
03640   uint32_t reorder3[t->level3_size];
03641   uint32_t reorder2[t->level2_size];
03642   uint32_t level1_offset, level2_offset, level3_offset;
03643 
03644   /* Uniquify level3 blocks.  */
03645   k = 0;
03646   for (j = 0; j < t->level3_size; j++)
03647     {
03648       for (i = 0; i < k; i++)
03649        if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
03650                   (1 << t->p) * sizeof (uint32_t)) == 0)
03651          break;
03652       /* Relocate block j to block i.  */
03653       reorder3[j] = i;
03654       if (i == k)
03655        {
03656          if (i != j)
03657            memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
03658                   (1 << t->p) * sizeof (uint32_t));
03659          k++;
03660        }
03661     }
03662   t->level3_size = k;
03663 
03664   for (i = 0; i < (t->level2_size << t->q); i++)
03665     if (t->level2[i] != EMPTY)
03666       t->level2[i] = reorder3[t->level2[i]];
03667 
03668   /* Uniquify level2 blocks.  */
03669   k = 0;
03670   for (j = 0; j < t->level2_size; j++)
03671     {
03672       for (i = 0; i < k; i++)
03673        if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
03674                   (1 << t->q) * sizeof (uint32_t)) == 0)
03675          break;
03676       /* Relocate block j to block i.  */
03677       reorder2[j] = i;
03678       if (i == k)
03679        {
03680          if (i != j)
03681            memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
03682                   (1 << t->q) * sizeof (uint32_t));
03683          k++;
03684        }
03685     }
03686   t->level2_size = k;
03687 
03688   for (i = 0; i < t->level1_size; i++)
03689     if (t->level1[i] != EMPTY)
03690       t->level1[i] = reorder2[t->level1[i]];
03691 
03692   /* Create and fill the resulting compressed representation.  */
03693   t->result_size =
03694     5 * sizeof (uint32_t)
03695     + t->level1_size * sizeof (uint32_t)
03696     + (t->level2_size << t->q) * sizeof (uint32_t)
03697     + (t->level3_size << t->p) * sizeof (uint32_t);
03698   t->result = (char *) xmalloc (t->result_size);
03699 
03700   level1_offset =
03701     5 * sizeof (uint32_t);
03702   level2_offset =
03703     5 * sizeof (uint32_t)
03704     + t->level1_size * sizeof (uint32_t);
03705   level3_offset =
03706     5 * sizeof (uint32_t)
03707     + t->level1_size * sizeof (uint32_t)
03708     + (t->level2_size << t->q) * sizeof (uint32_t);
03709 
03710   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
03711   ((uint32_t *) t->result)[1] = t->level1_size;
03712   ((uint32_t *) t->result)[2] = t->p + 5;
03713   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
03714   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
03715 
03716   for (i = 0; i < t->level1_size; i++)
03717     ((uint32_t *) (t->result + level1_offset))[i] =
03718       (t->level1[i] == EMPTY
03719        ? 0
03720        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
03721 
03722   for (i = 0; i < (t->level2_size << t->q); i++)
03723     ((uint32_t *) (t->result + level2_offset))[i] =
03724       (t->level2[i] == EMPTY
03725        ? 0
03726        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
03727 
03728   for (i = 0; i < (t->level3_size << t->p); i++)
03729     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
03730 
03731   if (t->level1_alloc > 0)
03732     free (t->level1);
03733   if (t->level2_alloc > 0)
03734     free (t->level2);
03735   if (t->level3_alloc > 0)
03736     free (t->level3);
03737 }
03738 
03739 #define TABLE wcwidth_table
03740 #define ELEMENT uint8_t
03741 #define DEFAULT 0xff
03742 #include "3level.h"
03743 
03744 #define TABLE wctrans_table
03745 #define ELEMENT int32_t
03746 #define DEFAULT 0
03747 #define wctrans_table_add wctrans_table_add_internal
03748 #include "3level.h"
03749 #undef wctrans_table_add
03750 /* The wctrans_table must actually store the difference between the
03751    desired result and the argument.  */
03752 static inline void
03753 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
03754 {
03755   wctrans_table_add_internal (t, wc, mapped_wc - wc);
03756 }
03757 
03758 
03759 /* Flattens the included transliterations into a translit list.
03760    Inserts them in the list at `cursor', and returns the new cursor.  */
03761 static struct translit_t **
03762 translit_flatten (struct locale_ctype_t *ctype,
03763                 const struct charmap_t *charmap,
03764                 struct translit_t **cursor)
03765 {
03766   while (ctype->translit_include != NULL)
03767     {
03768       const char *copy_locale = ctype->translit_include->copy_locale;
03769       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
03770       struct localedef_t *other;
03771 
03772       /* Unchain the include statement.  During the depth-first traversal
03773         we don't want to visit any locale more than once.  */
03774       ctype->translit_include = ctype->translit_include->next;
03775 
03776       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
03777 
03778       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
03779        {
03780          WITH_CUR_LOCALE (error (0, 0, _("\
03781 %s: transliteration data from locale `%s' not available"),
03782                               "LC_CTYPE", copy_locale));
03783        }
03784       else
03785        {
03786          struct locale_ctype_t *other_ctype =
03787            other->categories[LC_CTYPE].ctype;
03788 
03789          cursor = translit_flatten (other_ctype, charmap, cursor);
03790          assert (other_ctype->translit_include == NULL);
03791 
03792          if (other_ctype->translit != NULL)
03793            {
03794              /* Insert the other_ctype->translit list at *cursor.  */
03795              struct translit_t *endp = other_ctype->translit;
03796              while (endp->next != NULL)
03797               endp = endp->next;
03798 
03799              endp->next = *cursor;
03800              *cursor = other_ctype->translit;
03801 
03802              /* Avoid any risk of circular lists.  */
03803              other_ctype->translit = NULL;
03804 
03805              cursor = &endp->next;
03806            }
03807 
03808          if (ctype->default_missing == NULL)
03809            ctype->default_missing = other_ctype->default_missing;
03810        }
03811     }
03812 
03813   return cursor;
03814 }
03815 
03816 static void
03817 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
03818                struct repertoire_t *repertoire)
03819 {
03820   size_t idx, nr;
03821   const void *key;
03822   size_t len;
03823   void *vdata;
03824   void *curs;
03825 
03826   /* You wonder about this amount of memory?  This is only because some
03827      users do not manage to address the array with unsigned values or
03828      data types with range >= 256.  '\200' would result in the array
03829      index -128.  To help these poor people we duplicate the entries for
03830      128 up to 255 below the entry for \0.  */
03831   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
03832   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
03833   ctype->class_b = (uint32_t **)
03834     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
03835   ctype->class_3level = (struct iovec *)
03836     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
03837 
03838   /* This is the array accessed using the multibyte string elements.  */
03839   for (idx = 0; idx < 256; ++idx)
03840     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
03841 
03842   /* Mirror first 127 entries.  We must take care that entry -1 is not
03843      mirrored because EOF == -1.  */
03844   for (idx = 0; idx < 127; ++idx)
03845     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
03846 
03847   /* The 32 bit array contains all characters < 0x100.  */
03848   for (idx = 0; idx < ctype->class_collection_act; ++idx)
03849     if (ctype->charnames[idx] < 0x100)
03850       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
03851 
03852   for (nr = 0; nr < ctype->nr_charclass; nr++)
03853     {
03854       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
03855 
03856       /* We only set CLASS_B for the bits in the ISO C classes, not
03857         the user defined classes.  The number should not change but
03858         who knows.  */
03859 #define LAST_ISO_C_BIT 11
03860       if (nr <= LAST_ISO_C_BIT)
03861        for (idx = 0; idx < 256; ++idx)
03862          if (ctype->class256_collection[idx] & _ISbit (nr))
03863            ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
03864     }
03865 
03866   for (nr = 0; nr < ctype->nr_charclass; nr++)
03867     {
03868       struct wctype_table t;
03869 
03870       t.p = 4; /* or: 5 */
03871       t.q = 7; /* or: 6 */
03872       wctype_table_init (&t);
03873 
03874       for (idx = 0; idx < ctype->class_collection_act; ++idx)
03875        if (ctype->class_collection[idx] & _ISwbit (nr))
03876          wctype_table_add (&t, ctype->charnames[idx]);
03877 
03878       wctype_table_finalize (&t);
03879 
03880       if (verbose)
03881        WITH_CUR_LOCALE (fprintf (stderr, _("\
03882 %s: table for class \"%s\": %lu bytes\n"),
03883                              "LC_CTYPE", ctype->classnames[nr],
03884                              (unsigned long int) t.result_size));
03885 
03886       ctype->class_3level[nr].iov_base = t.result;
03887       ctype->class_3level[nr].iov_len = t.result_size;
03888     }
03889 
03890   /* Room for table of mappings.  */
03891   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
03892   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
03893                                      * sizeof (uint32_t *));
03894   ctype->map_3level = (struct iovec *)
03895     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
03896 
03897   /* Fill in all mappings.  */
03898   for (idx = 0; idx < 2; ++idx)
03899     {
03900       unsigned int idx2;
03901 
03902       /* Allocate table.  */
03903       ctype->map_b[idx] = (uint32_t *)
03904        xmalloc ((256 + 128) * sizeof (uint32_t));
03905 
03906       /* Copy values from collection.  */
03907       for (idx2 = 0; idx2 < 256; ++idx2)
03908        ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
03909 
03910       /* Mirror first 127 entries.  We must take care not to map entry
03911         -1 because EOF == -1.  */
03912       for (idx2 = 0; idx2 < 127; ++idx2)
03913        ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
03914 
03915       /* EOF must map to EOF.  */
03916       ctype->map_b[idx][127] = EOF;
03917     }
03918 
03919   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
03920     {
03921       unsigned int idx2;
03922 
03923       /* Allocate table.  */
03924       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
03925 
03926       /* Copy values from collection.  Default is identity mapping.  */
03927       for (idx2 = 0; idx2 < 256; ++idx2)
03928        ctype->map32_b[idx][idx2] =
03929          (ctype->map_collection[idx][idx2] != 0
03930           ? ctype->map_collection[idx][idx2]
03931           : idx2);
03932     }
03933 
03934   for (nr = 0; nr < ctype->map_collection_nr; nr++)
03935     {
03936       struct wctrans_table t;
03937 
03938       t.p = 7;
03939       t.q = 9;
03940       wctrans_table_init (&t);
03941 
03942       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
03943        if (ctype->map_collection[nr][idx] != 0)
03944          wctrans_table_add (&t, ctype->charnames[idx],
03945                           ctype->map_collection[nr][idx]);
03946 
03947       wctrans_table_finalize (&t);
03948 
03949       if (verbose)
03950        WITH_CUR_LOCALE (fprintf (stderr, _("\
03951 %s: table for map \"%s\": %lu bytes\n"),
03952                              "LC_CTYPE", ctype->mapnames[nr],
03953                              (unsigned long int) t.result_size));
03954 
03955       ctype->map_3level[nr].iov_base = t.result;
03956       ctype->map_3level[nr].iov_len = t.result_size;
03957     }
03958 
03959   /* Extra array for class and map names.  */
03960   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
03961                                           * sizeof (uint32_t));
03962   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
03963                                          * sizeof (uint32_t));
03964 
03965   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
03966   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
03967 
03968   /* Array for width information.  Because the expected widths are very
03969      small (never larger than 2) we use only one single byte.  This
03970      saves space.
03971      We put only printable characters in the table.  wcwidth is specified
03972      to return -1 for non-printable characters.  Doing the check here
03973      saves a run-time check.
03974      But we put L'\0' in the table.  This again saves a run-time check.  */
03975   {
03976     struct wcwidth_table t;
03977 
03978     t.p = 7;
03979     t.q = 9;
03980     wcwidth_table_init (&t);
03981 
03982     /* First set all the printable characters of the character set to
03983        the default width.  */
03984     curs = NULL;
03985     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
03986       {
03987        struct charseq *data = (struct charseq *) vdata;
03988 
03989        if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
03990          data->ucs4 = repertoire_find_value (ctype->repertoire,
03991                                          data->name, len);
03992 
03993        if (data->ucs4 != ILLEGAL_CHAR_VALUE)
03994          {
03995            uint32_t *class_bits =
03996              find_idx (ctype, &ctype->class_collection, NULL,
03997                      &ctype->class_collection_act, data->ucs4);
03998 
03999            if (class_bits != NULL && (*class_bits & BITw (tok_print)))
04000              wcwidth_table_add (&t, data->ucs4, charmap->width_default);
04001          }
04002       }
04003 
04004     /* Now add the explicitly specified widths.  */
04005     if (charmap->width_rules != NULL)
04006       {
04007        size_t cnt;
04008 
04009        for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
04010          {
04011            unsigned char bytes[charmap->mb_cur_max];
04012            int nbytes = charmap->width_rules[cnt].from->nbytes;
04013 
04014            /* We have the range of character for which the width is
04015               specified described using byte sequences of the multibyte
04016               charset.  We have to convert this to UCS4 now.  And we
04017               cannot simply convert the beginning and the end of the
04018               sequence, we have to iterate over the byte sequence and
04019               convert it for every single character.  */
04020            memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
04021 
04022            while (nbytes < charmap->width_rules[cnt].to->nbytes
04023                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
04024                            nbytes) <= 0)
04025              {
04026               /* Find the UCS value for `bytes'.  */
04027               int inner;
04028               uint32_t wch;
04029               struct charseq *seq =
04030                 charmap_find_symbol (charmap, (char *) bytes, nbytes);
04031 
04032               if (seq == NULL)
04033                 wch = ILLEGAL_CHAR_VALUE;
04034               else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
04035                 wch = seq->ucs4;
04036               else
04037                 wch = repertoire_find_value (ctype->repertoire, seq->name,
04038                                           strlen (seq->name));
04039 
04040               if (wch != ILLEGAL_CHAR_VALUE)
04041                 {
04042                   /* Store the value.  */
04043                   uint32_t *class_bits =
04044                     find_idx (ctype, &ctype->class_collection, NULL,
04045                             &ctype->class_collection_act, wch);
04046 
04047                   if (class_bits != NULL && (*class_bits & BITw (tok_print)))
04048                     wcwidth_table_add (&t, wch,
04049                                     charmap->width_rules[cnt].width);
04050                 }
04051 
04052               /* "Increment" the bytes sequence.  */
04053               inner = nbytes - 1;
04054               while (inner >= 0 && bytes[inner] == 0xff)
04055                 --inner;
04056 
04057               if (inner < 0)
04058                 {
04059                   /* We have to extend the byte sequence.  */
04060                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
04061                     break;
04062 
04063                   bytes[0] = 1;
04064                   memset (&bytes[1], 0, nbytes);
04065                   ++nbytes;
04066                 }
04067               else
04068                 {
04069                   ++bytes[inner];
04070                   while (++inner < nbytes)
04071                     bytes[inner] = 0;
04072                 }
04073              }
04074          }
04075       }
04076 
04077     /* Set the width of L'\0' to 0.  */
04078     wcwidth_table_add (&t, 0, 0);
04079 
04080     wcwidth_table_finalize (&t);
04081 
04082     if (verbose)
04083       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
04084                             "LC_CTYPE", (unsigned long int) t.result_size));
04085 
04086     ctype->width.iov_base = t.result;
04087     ctype->width.iov_len = t.result_size;
04088   }
04089 
04090   /* Set MB_CUR_MAX.  */
04091   ctype->mb_cur_max = charmap->mb_cur_max;
04092 
04093   /* Now determine the table for the transliteration information.
04094 
04095      XXX It is not yet clear to me whether it is worth implementing a
04096      complicated algorithm which uses a hash table to locate the entries.
04097      For now I'll use a simple array which can be searching using binary
04098      search.  */
04099   if (ctype->translit_include != NULL)
04100     /* Traverse the locales mentioned in the `include' statements in a
04101        depth-first way and fold in their transliteration information.  */
04102     translit_flatten (ctype, charmap, &ctype->translit);
04103 
04104   if (ctype->translit != NULL)
04105     {
04106       /* First count how many entries we have.  This is the upper limit
04107         since some entries from the included files might be overwritten.  */
04108       size_t number = 0;
04109       size_t cnt;
04110       struct translit_t *runp = ctype->translit;
04111       struct translit_t **sorted;
04112       size_t from_len, to_len;
04113 
04114       while (runp != NULL)
04115        {
04116          ++number;
04117          runp = runp->next;
04118        }
04119 
04120       /* Next we allocate an array large enough and fill in the values.  */
04121       sorted = (struct translit_t **) alloca (number
04122                                          * sizeof (struct translit_t **));
04123       runp = ctype->translit;
04124       number = 0;
04125       do
04126        {
04127          /* Search for the place where to insert this string.
04128             XXX Better use a real sorting algorithm later.  */
04129          size_t idx = 0;
04130          int replace = 0;
04131 
04132          while (idx < number)
04133            {
04134              int res = wcscmp ((const wchar_t *) sorted[idx]->from,
04135                             (const wchar_t *) runp->from);
04136              if (res == 0)
04137               {
04138                 replace = 1;
04139                 break;
04140               }
04141              if (res > 0)
04142               break;
04143              ++idx;
04144            }
04145 
04146          if (replace)
04147            sorted[idx] = runp;
04148          else
04149            {
04150              memmove (&sorted[idx + 1], &sorted[idx],
04151                      (number - idx) * sizeof (struct translit_t *));
04152              sorted[idx] = runp;
04153              ++number;
04154            }
04155 
04156          runp = runp->next;
04157        }
04158       while (runp != NULL);
04159 
04160       /* The next step is putting all the possible transliteration
04161         strings in one memory block so that we can write it out.
04162         We need several different blocks:
04163         - index to the from-string array
04164         - from-string array
04165         - index to the to-string array
04166         - to-string array.
04167       */
04168       from_len = to_len = 0;
04169       for (cnt = 0; cnt < number; ++cnt)
04170        {
04171          struct translit_to_t *srunp;
04172          from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
04173          srunp = sorted[cnt]->to;
04174          while (srunp != NULL)
04175            {
04176              to_len += wcslen ((const wchar_t *) srunp->str) + 1;
04177              srunp = srunp->next;
04178            }
04179          /* Plus one for the extra NUL character marking the end of
04180             the list for the current entry.  */
04181          ++to_len;
04182        }
04183 
04184       /* We can allocate the arrays for the results.  */
04185       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
04186       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
04187       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
04188       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
04189 
04190       from_len = 0;
04191       to_len = 0;
04192       for (cnt = 0; cnt < number; ++cnt)
04193        {
04194          size_t len;
04195          struct translit_to_t *srunp;
04196 
04197          ctype->translit_from_idx[cnt] = from_len;
04198          ctype->translit_to_idx[cnt] = to_len;
04199 
04200          len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
04201          wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
04202                  (const wchar_t *) sorted[cnt]->from, len);
04203          from_len += len;
04204 
04205          ctype->translit_to_idx[cnt] = to_len;
04206          srunp = sorted[cnt]->to;
04207          while (srunp != NULL)
04208            {
04209              len = wcslen ((const wchar_t *) srunp->str) + 1;
04210              wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
04211                      (const wchar_t *) srunp->str, len);
04212              to_len += len;
04213              srunp = srunp->next;
04214            }
04215          ctype->translit_to_tbl[to_len++] = L'\0';
04216        }
04217 
04218       /* Store the information about the length.  */
04219       ctype->translit_idx_size = number;
04220       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
04221       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
04222     }
04223   else
04224     {
04225       /* Provide some dummy pointers since we have nothing to write out.  */
04226       static uint32_t no_str = { 0 };
04227 
04228       ctype->translit_from_idx = &no_str;
04229       ctype->translit_from_tbl = &no_str;
04230       ctype->translit_to_tbl = &no_str;
04231       ctype->translit_idx_size = 0;
04232       ctype->translit_from_tbl_size = 0;
04233       ctype->translit_to_tbl_size = 0;
04234     }
04235 }