Back to index

glibc  2.9
ld-collate.c
Go to the documentation of this file.
00001 /* Copyright (C) 1995-2003, 2005-2007, 2008 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published
00007    by the Free Software Foundation; version 2 of the License, or
00008    (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software Foundation,
00017    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00018 
00019 #ifdef HAVE_CONFIG_H
00020 # include <config.h>
00021 #endif
00022 
00023 #include <errno.h>
00024 #include <error.h>
00025 #include <stdlib.h>
00026 #include <wchar.h>
00027 #include <sys/param.h>
00028 
00029 #include "localedef.h"
00030 #include "charmap.h"
00031 #include "localeinfo.h"
00032 #include "linereader.h"
00033 #include "locfile.h"
00034 #include "elem-hash.h"
00035 
00036 /* Uncomment the following line in the production version.  */
00037 /* #define NDEBUG 1 */
00038 #include <assert.h>
00039 
00040 #define obstack_chunk_alloc malloc
00041 #define obstack_chunk_free free
00042 
00043 static inline void
00044 __attribute ((always_inline))
00045 obstack_int32_grow (struct obstack *obstack, int32_t data)
00046 {
00047   if (sizeof (int32_t) == sizeof (int))
00048     obstack_int_grow (obstack, data);
00049   else
00050     obstack_grow (obstack, &data, sizeof (int32_t));
00051 }
00052 
00053 static inline void
00054 __attribute ((always_inline))
00055 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
00056 {
00057   if (sizeof (int32_t) == sizeof (int))
00058     obstack_int_grow_fast (obstack, data);
00059   else
00060     obstack_grow (obstack, &data, sizeof (int32_t));
00061 }
00062 
00063 /* Forward declaration.  */
00064 struct element_t;
00065 
00066 /* Data type for list of strings.  */
00067 struct section_list
00068 {
00069   /* Successor in the known_sections list.  */
00070   struct section_list *def_next;
00071   /* Successor in the sections list.  */
00072   struct section_list *next;
00073   /* Name of the section.  */
00074   const char *name;
00075   /* First element of this section.  */
00076   struct element_t *first;
00077   /* Last element of this section.  */
00078   struct element_t *last;
00079   /* These are the rules for this section.  */
00080   enum coll_sort_rule *rules;
00081   /* Index of the rule set in the appropriate section of the output file.  */
00082   int ruleidx;
00083 };
00084 
00085 struct element_t;
00086 
00087 struct element_list_t
00088 {
00089   /* Number of elements.  */
00090   int cnt;
00091 
00092   struct element_t **w;
00093 };
00094 
00095 /* Data type for collating element.  */
00096 struct element_t
00097 {
00098   const char *name;
00099 
00100   const char *mbs;
00101   size_t nmbs;
00102   const uint32_t *wcs;
00103   size_t nwcs;
00104   int *mborder;
00105   int wcorder;
00106 
00107   /* The following is a bit mask which bits are set if this element is
00108      used in the appropriate level.  Interesting for the singlebyte
00109      weight computation.
00110 
00111      XXX The type here restricts the number of levels to 32.  It could
00112      be changed if necessary but I doubt this is necessary.  */
00113   unsigned int used_in_level;
00114 
00115   struct element_list_t *weights;
00116 
00117   /* Nonzero if this is a real character definition.  */
00118   int is_character;
00119 
00120   /* Order of the character in the sequence.  This information will
00121      be used in range expressions.  */
00122   int mbseqorder;
00123   int wcseqorder;
00124 
00125   /* Where does the definition come from.  */
00126   const char *file;
00127   size_t line;
00128 
00129   /* Which section does this belong to.  */
00130   struct section_list *section;
00131 
00132   /* Predecessor and successor in the order list.  */
00133   struct element_t *last;
00134   struct element_t *next;
00135 
00136   /* Next element in multibyte output list.  */
00137   struct element_t *mbnext;
00138   struct element_t *mblast;
00139 
00140   /* Next element in wide character output list.  */
00141   struct element_t *wcnext;
00142   struct element_t *wclast;
00143 };
00144 
00145 /* Special element value.  */
00146 #define ELEMENT_ELLIPSIS2   ((struct element_t *) 1)
00147 #define ELEMENT_ELLIPSIS3   ((struct element_t *) 2)
00148 #define ELEMENT_ELLIPSIS4   ((struct element_t *) 3)
00149 
00150 /* Data type for collating symbol.  */
00151 struct symbol_t
00152 {
00153   const char *name;
00154 
00155   /* Point to place in the order list.  */
00156   struct element_t *order;
00157 
00158   /* Where does the definition come from.  */
00159   const char *file;
00160   size_t line;
00161 };
00162 
00163 /* Sparse table of struct element_t *.  */
00164 #define TABLE wchead_table
00165 #define ELEMENT struct element_t *
00166 #define DEFAULT NULL
00167 #define ITERATE
00168 #define NO_FINALIZE
00169 #include "3level.h"
00170 
00171 /* Sparse table of int32_t.  */
00172 #define TABLE collidx_table
00173 #define ELEMENT int32_t
00174 #define DEFAULT 0
00175 #include "3level.h"
00176 
00177 /* Sparse table of uint32_t.  */
00178 #define TABLE collseq_table
00179 #define ELEMENT uint32_t
00180 #define DEFAULT ~((uint32_t) 0)
00181 #include "3level.h"
00182 
00183 
00184 /* Simple name list for the preprocessor.  */
00185 struct name_list
00186 {
00187   struct name_list *next;
00188   char str[0];
00189 };
00190 
00191 
00192 /* The real definition of the struct for the LC_COLLATE locale.  */
00193 struct locale_collate_t
00194 {
00195   int col_weight_max;
00196   int cur_weight_max;
00197 
00198   /* List of known scripts.  */
00199   struct section_list *known_sections;
00200   /* List of used sections.  */
00201   struct section_list *sections;
00202   /* Current section using definition.  */
00203   struct section_list *current_section;
00204   /* There always can be an unnamed section.  */
00205   struct section_list unnamed_section;
00206   /* To make handling of errors easier we have another section.  */
00207   struct section_list error_section;
00208   /* Sometimes we are defining the values for collating symbols before
00209      the first actual section.  */
00210   struct section_list symbol_section;
00211 
00212   /* Start of the order list.  */
00213   struct element_t *start;
00214 
00215   /* The undefined element.  */
00216   struct element_t undefined;
00217 
00218   /* This is the cursor for `reorder_after' insertions.  */
00219   struct element_t *cursor;
00220 
00221   /* This value is used when handling ellipsis.  */
00222   struct element_t ellipsis_weight;
00223 
00224   /* Known collating elements.  */
00225   hash_table elem_table;
00226 
00227   /* Known collating symbols.  */
00228   hash_table sym_table;
00229 
00230   /* Known collation sequences.  */
00231   hash_table seq_table;
00232 
00233   struct obstack mempool;
00234 
00235   /* The LC_COLLATE category is a bit special as it is sometimes possible
00236      that the definitions from more than one input file contains information.
00237      Therefore we keep all relevant input in a list.  */
00238   struct locale_collate_t *next;
00239 
00240   /* Arrays with heads of the list for each of the leading bytes in
00241      the multibyte sequences.  */
00242   struct element_t *mbheads[256];
00243 
00244   /* Arrays with heads of the list for each of the leading bytes in
00245      the multibyte sequences.  */
00246   struct wchead_table wcheads;
00247 
00248   /* The arrays with the collation sequence order.  */
00249   unsigned char mbseqorder[256];
00250   struct collseq_table wcseqorder;
00251 
00252   /* State of the preprocessor.  */
00253   enum
00254     {
00255       else_none = 0,
00256       else_ignore,
00257       else_seen
00258     }
00259     else_action;
00260 };
00261 
00262 
00263 /* We have a few global variables which are used for reading all
00264    LC_COLLATE category descriptions in all files.  */
00265 static uint32_t nrules;
00266 
00267 /* List of defined preprocessor symbols.  */
00268 static struct name_list *defined;
00269 
00270 
00271 /* We need UTF-8 encoding of numbers.  */
00272 static inline int
00273 __attribute ((always_inline))
00274 utf8_encode (char *buf, int val)
00275 {
00276   int retval;
00277 
00278   if (val < 0x80)
00279     {
00280       *buf++ = (char) val;
00281       retval = 1;
00282     }
00283   else
00284     {
00285       int step;
00286 
00287       for (step = 2; step < 6; ++step)
00288        if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
00289          break;
00290       retval = step;
00291 
00292       *buf = (unsigned char) (~0xff >> step);
00293       --step;
00294       do
00295        {
00296          buf[step] = 0x80 | (val & 0x3f);
00297          val >>= 6;
00298        }
00299       while (--step > 0);
00300       *buf |= val;
00301     }
00302 
00303   return retval;
00304 }
00305 
00306 
00307 static struct section_list *
00308 make_seclist_elem (struct locale_collate_t *collate, const char *string,
00309                  struct section_list *next)
00310 {
00311   struct section_list *newp;
00312 
00313   newp = (struct section_list *) obstack_alloc (&collate->mempool,
00314                                           sizeof (*newp));
00315   newp->next = next;
00316   newp->name = string;
00317   newp->first = NULL;
00318   newp->last = NULL;
00319 
00320   return newp;
00321 }
00322 
00323 
00324 static struct element_t *
00325 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
00326             const uint32_t *wcs, const char *name, size_t namelen,
00327             int is_character)
00328 {
00329   struct element_t *newp;
00330 
00331   newp = (struct element_t *) obstack_alloc (&collate->mempool,
00332                                         sizeof (*newp));
00333   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
00334                                               name, namelen);
00335   if (mbs != NULL)
00336     {
00337       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
00338       newp->nmbs = mbslen;
00339     }
00340   else
00341     {
00342       newp->mbs = NULL;
00343       newp->nmbs = 0;
00344     }
00345   if (wcs != NULL)
00346     {
00347       size_t nwcs = wcslen ((wchar_t *) wcs);
00348       uint32_t zero = 0;
00349       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
00350       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
00351       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
00352       newp->nwcs = nwcs;
00353     }
00354   else
00355     {
00356       newp->wcs = NULL;
00357       newp->nwcs = 0;
00358     }
00359   newp->mborder = NULL;
00360   newp->wcorder = 0;
00361   newp->used_in_level = 0;
00362   newp->is_character = is_character;
00363 
00364   /* Will be assigned later.  XXX  */
00365   newp->mbseqorder = 0;
00366   newp->wcseqorder = 0;
00367 
00368   /* Will be allocated later.  */
00369   newp->weights = NULL;
00370 
00371   newp->file = NULL;
00372   newp->line = 0;
00373 
00374   newp->section = collate->current_section;
00375 
00376   newp->last = NULL;
00377   newp->next = NULL;
00378 
00379   newp->mbnext = NULL;
00380   newp->mblast = NULL;
00381 
00382   newp->wcnext = NULL;
00383   newp->wclast = NULL;
00384 
00385   return newp;
00386 }
00387 
00388 
00389 static struct symbol_t *
00390 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
00391 {
00392   struct symbol_t *newp;
00393 
00394   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
00395 
00396   newp->name = obstack_copy0 (&collate->mempool, name, len);
00397   newp->order = NULL;
00398 
00399   newp->file = NULL;
00400   newp->line = 0;
00401 
00402   return newp;
00403 }
00404 
00405 
00406 /* Test whether this name is already defined somewhere.  */
00407 static int
00408 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
00409                const struct charmap_t *charmap,
00410                struct repertoire_t *repertoire, const char *symbol,
00411                size_t symbol_len)
00412 {
00413   void *ignore = NULL;
00414 
00415   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
00416     {
00417       lr_error (ldfile, _("`%.*s' already defined in charmap"),
00418               (int) symbol_len, symbol);
00419       return 1;
00420     }
00421 
00422   if (repertoire != NULL
00423       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
00424          == 0))
00425     {
00426       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
00427               (int) symbol_len, symbol);
00428       return 1;
00429     }
00430 
00431   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
00432     {
00433       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
00434               (int) symbol_len, symbol);
00435       return 1;
00436     }
00437 
00438   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
00439     {
00440       lr_error (ldfile, _("`%.*s' already defined as collating element"),
00441               (int) symbol_len, symbol);
00442       return 1;
00443     }
00444 
00445   return 0;
00446 }
00447 
00448 
00449 /* Read the direction specification.  */
00450 static void
00451 read_directions (struct linereader *ldfile, struct token *arg,
00452                const struct charmap_t *charmap,
00453                struct repertoire_t *repertoire, struct localedef_t *result)
00454 {
00455   int cnt = 0;
00456   int max = nrules ?: 10;
00457   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
00458   int warned = 0;
00459   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
00460 
00461   while (1)
00462     {
00463       int valid = 0;
00464 
00465       if (arg->tok == tok_forward)
00466        {
00467          if (rules[cnt] & sort_backward)
00468            {
00469              if (! warned)
00470               {
00471                 lr_error (ldfile, _("\
00472 %s: `forward' and `backward' are mutually excluding each other"),
00473                          "LC_COLLATE");
00474                 warned = 1;
00475               }
00476            }
00477          else if (rules[cnt] & sort_forward)
00478            {
00479              if (! warned)
00480               {
00481                 lr_error (ldfile, _("\
00482 %s: `%s' mentioned more than once in definition of weight %d"),
00483                          "LC_COLLATE", "forward", cnt + 1);
00484               }
00485            }
00486          else
00487            rules[cnt] |= sort_forward;
00488 
00489          valid = 1;
00490        }
00491       else if (arg->tok == tok_backward)
00492        {
00493          if (rules[cnt] & sort_forward)
00494            {
00495              if (! warned)
00496               {
00497                 lr_error (ldfile, _("\
00498 %s: `forward' and `backward' are mutually excluding each other"),
00499                          "LC_COLLATE");
00500                 warned = 1;
00501               }
00502            }
00503          else if (rules[cnt] & sort_backward)
00504            {
00505              if (! warned)
00506               {
00507                 lr_error (ldfile, _("\
00508 %s: `%s' mentioned more than once in definition of weight %d"),
00509                          "LC_COLLATE", "backward", cnt + 1);
00510               }
00511            }
00512          else
00513            rules[cnt] |= sort_backward;
00514 
00515          valid = 1;
00516        }
00517       else if (arg->tok == tok_position)
00518        {
00519          if (rules[cnt] & sort_position)
00520            {
00521              if (! warned)
00522               {
00523                 lr_error (ldfile, _("\
00524 %s: `%s' mentioned more than once in definition of weight %d"),
00525                          "LC_COLLATE", "position", cnt + 1);
00526               }
00527            }
00528          else
00529            rules[cnt] |= sort_position;
00530 
00531          valid = 1;
00532        }
00533 
00534       if (valid)
00535        arg = lr_token (ldfile, charmap, result, repertoire, verbose);
00536 
00537       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
00538          || arg->tok == tok_semicolon)
00539        {
00540          if (! valid && ! warned)
00541            {
00542              lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
00543              warned = 1;
00544            }
00545 
00546          /* See whether we have to increment the counter.  */
00547          if (arg->tok != tok_comma && rules[cnt] != 0)
00548            {
00549              /* Add the default `forward' if we have seen only `position'.  */
00550              if (rules[cnt] == sort_position)
00551               rules[cnt] = sort_position | sort_forward;
00552 
00553              ++cnt;
00554            }
00555 
00556          if (arg->tok == tok_eof || arg->tok == tok_eol)
00557            /* End of line or file, so we exit the loop.  */
00558            break;
00559 
00560          if (nrules == 0)
00561            {
00562              /* See whether we have enough room in the array.  */
00563              if (cnt == max)
00564               {
00565                 max += 10;
00566                 rules = (enum coll_sort_rule *) xrealloc (rules,
00567                                                      max
00568                                                      * sizeof (*rules));
00569                 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
00570               }
00571            }
00572          else
00573            {
00574              if (cnt == nrules)
00575               {
00576                 /* There must not be any more rule.  */
00577                 if (! warned)
00578                   {
00579                     lr_error (ldfile, _("\
00580 %s: too many rules; first entry only had %d"),
00581                             "LC_COLLATE", nrules);
00582                     warned = 1;
00583                   }
00584 
00585                 lr_ignore_rest (ldfile, 0);
00586                 break;
00587               }
00588            }
00589        }
00590       else
00591        {
00592          if (! warned)
00593            {
00594              lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
00595              warned = 1;
00596            }
00597        }
00598 
00599       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
00600     }
00601 
00602   if (nrules == 0)
00603     {
00604       /* Now we know how many rules we have.  */
00605       nrules = cnt;
00606       rules = (enum coll_sort_rule *) xrealloc (rules,
00607                                           nrules * sizeof (*rules));
00608     }
00609   else
00610     {
00611       if (cnt < nrules)
00612        {
00613          /* Not enough rules in this specification.  */
00614          if (! warned)
00615            lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
00616 
00617          do
00618            rules[cnt] = sort_forward;
00619          while (++cnt < nrules);
00620        }
00621     }
00622 
00623   collate->current_section->rules = rules;
00624 }
00625 
00626 
00627 static struct element_t *
00628 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
00629              const char *str, size_t len)
00630 {
00631   void *result = NULL;
00632 
00633   /* Search for the entries among the collation sequences already define.  */
00634   if (find_entry (&collate->seq_table, str, len, &result) != 0)
00635     {
00636       /* Nope, not define yet.  So we see whether it is a
00637          collation symbol.  */
00638       void *ptr;
00639 
00640       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
00641        {
00642          /* It's a collation symbol.  */
00643          struct symbol_t *sym = (struct symbol_t *) ptr;
00644          result = sym->order;
00645 
00646          if (result == NULL)
00647            result = sym->order = new_element (collate, NULL, 0, NULL,
00648                                           NULL, 0, 0);
00649        }
00650       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
00651        {
00652          /* It's also no collation element.  So it is a character
00653             element defined later.  */
00654          result = new_element (collate, NULL, 0, NULL, str, len, 1);
00655          /* Insert it into the sequence table.  */
00656          insert_entry (&collate->seq_table, str, len, result);
00657        }
00658     }
00659 
00660   return (struct element_t *) result;
00661 }
00662 
00663 
00664 static void
00665 unlink_element (struct locale_collate_t *collate)
00666 {
00667   if (collate->cursor == collate->start)
00668     {
00669       assert (collate->cursor->next == NULL);
00670       assert (collate->cursor->last == NULL);
00671       collate->cursor = NULL;
00672     }
00673   else
00674     {
00675       if (collate->cursor->next != NULL)
00676        collate->cursor->next->last = collate->cursor->last;
00677       if (collate->cursor->last != NULL)
00678        collate->cursor->last->next = collate->cursor->next;
00679       collate->cursor = collate->cursor->last;
00680     }
00681 }
00682 
00683 
00684 static void
00685 insert_weights (struct linereader *ldfile, struct element_t *elem,
00686               const struct charmap_t *charmap,
00687               struct repertoire_t *repertoire, struct localedef_t *result,
00688               enum token_t ellipsis)
00689 {
00690   int weight_cnt;
00691   struct token *arg;
00692   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
00693 
00694   /* Initialize all the fields.  */
00695   elem->file = ldfile->fname;
00696   elem->line = ldfile->lineno;
00697 
00698   elem->last = collate->cursor;
00699   elem->next = collate->cursor ? collate->cursor->next : NULL;
00700   if (collate->cursor != NULL && collate->cursor->next != NULL)
00701     collate->cursor->next->last = elem;
00702   if (collate->cursor != NULL)
00703     collate->cursor->next = elem;
00704   if (collate->start == NULL)
00705     {
00706       assert (collate->cursor == NULL);
00707       collate->start = elem;
00708     }
00709 
00710   elem->section = collate->current_section;
00711 
00712   if (collate->current_section->first == NULL)
00713     collate->current_section->first = elem;
00714   if (collate->current_section->last == collate->cursor)
00715     collate->current_section->last = elem;
00716 
00717   collate->cursor = elem;
00718 
00719   elem->weights = (struct element_list_t *)
00720     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
00721   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
00722 
00723   weight_cnt = 0;
00724 
00725   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
00726   do
00727     {
00728       if (arg->tok == tok_eof || arg->tok == tok_eol)
00729        break;
00730 
00731       if (arg->tok == tok_ignore)
00732        {
00733          /* The weight for this level has to be ignored.  We use the
00734             null pointer to indicate this.  */
00735          elem->weights[weight_cnt].w = (struct element_t **)
00736            obstack_alloc (&collate->mempool, sizeof (struct element_t *));
00737          elem->weights[weight_cnt].w[0] = NULL;
00738          elem->weights[weight_cnt].cnt = 1;
00739        }
00740       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
00741        {
00742          char ucs4str[10];
00743          struct element_t *val;
00744          char *symstr;
00745          size_t symlen;
00746 
00747          if (arg->tok == tok_bsymbol)
00748            {
00749              symstr = arg->val.str.startmb;
00750              symlen = arg->val.str.lenmb;
00751            }
00752          else
00753            {
00754              snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
00755              symstr = ucs4str;
00756              symlen = 9;
00757            }
00758 
00759          val = find_element (ldfile, collate, symstr, symlen);
00760          if (val == NULL)
00761            break;
00762 
00763          elem->weights[weight_cnt].w = (struct element_t **)
00764            obstack_alloc (&collate->mempool, sizeof (struct element_t *));
00765          elem->weights[weight_cnt].w[0] = val;
00766          elem->weights[weight_cnt].cnt = 1;
00767        }
00768       else if (arg->tok == tok_string)
00769        {
00770          /* Split the string up in the individual characters and put
00771             the element definitions in the list.  */
00772          const char *cp = arg->val.str.startmb;
00773          int cnt = 0;
00774          struct element_t *charelem;
00775          struct element_t **weights = NULL;
00776          int max = 0;
00777 
00778          if (*cp == '\0')
00779            {
00780              lr_error (ldfile, _("%s: empty weight string not allowed"),
00781                      "LC_COLLATE");
00782              lr_ignore_rest (ldfile, 0);
00783              break;
00784            }
00785 
00786          do
00787            {
00788              if (*cp == '<')
00789               {
00790                 /* Ahh, it's a bsymbol or an UCS4 value.  If it's
00791                      the latter we have to unify the name.  */
00792                 const char *startp = ++cp;
00793                 size_t len;
00794 
00795                 while (*cp != '>')
00796                   {
00797                     if (*cp == ldfile->escape_char)
00798                      ++cp;
00799                     if (*cp == '\0')
00800                      /* It's a syntax error.  */
00801                      goto syntax;
00802 
00803                     ++cp;
00804                   }
00805 
00806                 if (cp - startp == 5 && startp[0] == 'U'
00807                     && isxdigit (startp[1]) && isxdigit (startp[2])
00808                     && isxdigit (startp[3]) && isxdigit (startp[4]))
00809                   {
00810                     unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
00811                     char *newstr;
00812 
00813                     newstr = (char *) xmalloc (10);
00814                     snprintf (newstr, 10, "U%08X", ucs4);
00815                     startp = newstr;
00816 
00817                     len = 9;
00818                   }
00819                 else
00820                   len = cp - startp;
00821 
00822                 charelem = find_element (ldfile, collate, startp, len);
00823                 ++cp;
00824               }
00825              else
00826               {
00827                 /* People really shouldn't use characters directly in
00828                    the string.  Especially since it's not really clear
00829                    what this means.  We interpret all characters in the
00830                    string as if that would be bsymbols.  Otherwise we
00831                    would have to match back to bsymbols somehow and this
00832                    is normally not what people normally expect.  */
00833                 charelem = find_element (ldfile, collate, cp++, 1);
00834               }
00835 
00836              if (charelem == NULL)
00837               {
00838                 /* We ignore the rest of the line.  */
00839                 lr_ignore_rest (ldfile, 0);
00840                 break;
00841               }
00842 
00843              /* Add the pointer.  */
00844              if (cnt >= max)
00845               {
00846                 struct element_t **newp;
00847                 max += 10;
00848                 newp = (struct element_t **)
00849                   alloca (max * sizeof (struct element_t *));
00850                 memcpy (newp, weights, cnt * sizeof (struct element_t *));
00851                 weights = newp;
00852               }
00853              weights[cnt++] = charelem;
00854            }
00855          while (*cp != '\0');
00856 
00857          /* Now store the information.  */
00858          elem->weights[weight_cnt].w = (struct element_t **)
00859            obstack_alloc (&collate->mempool,
00860                         cnt * sizeof (struct element_t *));
00861          memcpy (elem->weights[weight_cnt].w, weights,
00862                 cnt * sizeof (struct element_t *));
00863          elem->weights[weight_cnt].cnt = cnt;
00864 
00865          /* We don't need the string anymore.  */
00866          free (arg->val.str.startmb);
00867        }
00868       else if (ellipsis != tok_none
00869               && (arg->tok == tok_ellipsis2
00870                  || arg->tok == tok_ellipsis3
00871                  || arg->tok == tok_ellipsis4))
00872        {
00873          /* It must be the same ellipsis as used in the initial column.  */
00874          if (arg->tok != ellipsis)
00875            lr_error (ldfile, _("\
00876 %s: weights must use the same ellipsis symbol as the name"),
00877                     "LC_COLLATE");
00878 
00879          /* The weight for this level will depend on the element
00880             iterating over the range.  Put a placeholder.  */
00881          elem->weights[weight_cnt].w = (struct element_t **)
00882            obstack_alloc (&collate->mempool, sizeof (struct element_t *));
00883          elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
00884          elem->weights[weight_cnt].cnt = 1;
00885        }
00886       else
00887        {
00888        syntax:
00889          /* It's a syntax error.  */
00890          lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
00891          lr_ignore_rest (ldfile, 0);
00892          break;
00893        }
00894 
00895       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
00896       /* This better should be the end of the line or a semicolon.  */
00897       if (arg->tok == tok_semicolon)
00898        /* OK, ignore this and read the next token.  */
00899        arg = lr_token (ldfile, charmap, result, repertoire, verbose);
00900       else if (arg->tok != tok_eof && arg->tok != tok_eol)
00901        {
00902          /* It's a syntax error.  */
00903          lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
00904          lr_ignore_rest (ldfile, 0);
00905          break;
00906        }
00907     }
00908   while (++weight_cnt < nrules);
00909 
00910   if (weight_cnt < nrules)
00911     {
00912       /* This means the rest of the line uses the current element as
00913         the weight.  */
00914       do
00915        {
00916          elem->weights[weight_cnt].w = (struct element_t **)
00917            obstack_alloc (&collate->mempool, sizeof (struct element_t *));
00918          if (ellipsis == tok_none)
00919            elem->weights[weight_cnt].w[0] = elem;
00920          else
00921            elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
00922          elem->weights[weight_cnt].cnt = 1;
00923        }
00924       while (++weight_cnt < nrules);
00925     }
00926   else
00927     {
00928       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
00929        {
00930          /* Too many rule values.  */
00931          lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
00932          lr_ignore_rest (ldfile, 0);
00933        }
00934       else
00935        lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
00936     }
00937 }
00938 
00939 
00940 static int
00941 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
00942              const struct charmap_t *charmap, struct repertoire_t *repertoire,
00943              struct localedef_t *result)
00944 {
00945   /* First find out what kind of symbol this is.  */
00946   struct charseq *seq;
00947   uint32_t wc;
00948   struct element_t *elem = NULL;
00949   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
00950 
00951   /* Try to find the character in the charmap.  */
00952   seq = charmap_find_value (charmap, symstr, symlen);
00953 
00954   /* Determine the wide character.  */
00955   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
00956     {
00957       wc = repertoire_find_value (repertoire, symstr, symlen);
00958       if (seq != NULL)
00959        seq->ucs4 = wc;
00960     }
00961   else
00962     wc = seq->ucs4;
00963 
00964   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
00965     {
00966       /* It's no character, so look through the collation elements and
00967         symbol list.  */
00968       void *ptr = elem;
00969       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
00970        {
00971          void *result;
00972          struct symbol_t *sym = NULL;
00973 
00974          /* It's also collation element.  Therefore it's either a
00975             collating symbol or it's a character which is not
00976             supported by the character set.  In the later case we
00977             simply create a dummy entry.  */
00978          if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
00979            {
00980              /* It's a collation symbol.  */
00981              sym = (struct symbol_t *) result;
00982 
00983              elem = sym->order;
00984            }
00985 
00986          if (elem == NULL)
00987            {
00988              elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
00989 
00990              if (sym != NULL)
00991               sym->order = elem;
00992              else
00993               /* Enter a fake element in the sequence table.  This
00994                  won't cause anything in the output since there is
00995                  no multibyte or wide character associated with
00996                  it.  */
00997               insert_entry (&collate->seq_table, symstr, symlen, elem);
00998            }
00999        }
01000       else
01001        /* Copy the result back.  */
01002        elem = ptr;
01003     }
01004   else
01005     {
01006       /* Otherwise the symbols stands for a character.  */
01007       void *ptr = elem;
01008       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
01009        {
01010          uint32_t wcs[2] = { wc, 0 };
01011 
01012          /* We have to allocate an entry.  */
01013          elem = new_element (collate,
01014                            seq != NULL ? (char *) seq->bytes : NULL,
01015                            seq != NULL ? seq->nbytes : 0,
01016                            wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
01017                            symstr, symlen, 1);
01018 
01019          /* And add it to the table.  */
01020          if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
01021            /* This cannot happen.  */
01022            assert (! "Internal error");
01023        }
01024       else
01025        {
01026          /* Copy the result back.  */
01027          elem = ptr;
01028 
01029          /* Maybe the character was used before the definition.  In this case
01030             we have to insert the byte sequences now.  */
01031          if (elem->mbs == NULL && seq != NULL)
01032            {
01033              elem->mbs = obstack_copy0 (&collate->mempool,
01034                                     seq->bytes, seq->nbytes);
01035              elem->nmbs = seq->nbytes;
01036            }
01037 
01038          if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
01039            {
01040              uint32_t wcs[2] = { wc, 0 };
01041 
01042              elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
01043              elem->nwcs = 1;
01044            }
01045        }
01046     }
01047 
01048   /* Test whether this element is not already in the list.  */
01049   if (elem->next != NULL || elem == collate->cursor)
01050     {
01051       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
01052               (int) symlen, symstr, elem->file, elem->line);
01053       lr_ignore_rest (ldfile, 0);
01054       return 1;
01055     }
01056 
01057   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
01058 
01059   return 0;
01060 }
01061 
01062 
01063 static void
01064 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
01065                enum token_t ellipsis, const struct charmap_t *charmap,
01066                struct repertoire_t *repertoire,
01067                struct localedef_t *result)
01068 {
01069   struct element_t *startp;
01070   struct element_t *endp;
01071   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
01072 
01073   /* Unlink the entry added for the ellipsis.  */
01074   unlink_element (collate);
01075   startp = collate->cursor;
01076 
01077   /* Process and add the end-entry.  */
01078   if (symstr != NULL
01079       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
01080     /* Something went wrong with inserting the to-value.  This means
01081        we cannot process the ellipsis.  */
01082     return;
01083 
01084   /* Reset the cursor.  */
01085   collate->cursor = startp;
01086 
01087   /* Now we have to handle many different situations:
01088      - we have to distinguish between the three different ellipsis forms
01089      - the is the ellipsis at the beginning, in the middle, or at the end.
01090   */
01091   endp = collate->cursor->next;
01092   assert (symstr == NULL || endp != NULL);
01093 
01094   /* XXX The following is probably very wrong since also collating symbols
01095      can appear in ranges.  But do we want/can refine the test for that?  */
01096 #if 0
01097   /* Both, the start and the end symbol, must stand for characters.  */
01098   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
01099       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
01100     {
01101       lr_error (ldfile, _("\
01102 %s: the start and the end symbol of a range must stand for characters"),
01103               "LC_COLLATE");
01104       return;
01105     }
01106 #endif
01107 
01108   if (ellipsis == tok_ellipsis3)
01109     {
01110       /* One requirement we make here: the length of the byte
01111         sequences for the first and end character must be the same.
01112         This is mainly to prevent unwanted effects and this is often
01113         not what is wanted.  */
01114       size_t len = (startp->mbs != NULL ? startp->nmbs
01115                   : (endp->mbs != NULL ? endp->nmbs : 0));
01116       char mbcnt[len + 1];
01117       char mbend[len + 1];
01118 
01119       /* Well, this should be caught somewhere else already.  Just to
01120         make sure.  */
01121       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
01122       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
01123 
01124       if (startp != NULL && endp != NULL
01125          && startp->mbs != NULL && endp->mbs != NULL
01126          && startp->nmbs != endp->nmbs)
01127        {
01128          lr_error (ldfile, _("\
01129 %s: byte sequences of first and last character must have the same length"),
01130                   "LC_COLLATE");
01131          return;
01132        }
01133 
01134       /* Determine whether we have to generate multibyte sequences.  */
01135       if ((startp == NULL || startp->mbs != NULL)
01136          && (endp == NULL || endp->mbs != NULL))
01137        {
01138          int cnt;
01139          int ret;
01140 
01141          /* Prepare the beginning byte sequence.  This is either from the
01142             beginning byte sequence or it is all nulls if it was an
01143             initial ellipsis.  */
01144          if (startp == NULL || startp->mbs == NULL)
01145            memset (mbcnt, '\0', len);
01146          else
01147            {
01148              memcpy (mbcnt, startp->mbs, len);
01149 
01150              /* And increment it so that the value is the first one we will
01151                try to insert.  */
01152              for (cnt = len - 1; cnt >= 0; --cnt)
01153               if (++mbcnt[cnt] != '\0')
01154                 break;
01155            }
01156          mbcnt[len] = '\0';
01157 
01158          /* And the end sequence.  */
01159          if (endp == NULL || endp->mbs == NULL)
01160            memset (mbend, '\0', len);
01161          else
01162            memcpy (mbend, endp->mbs, len);
01163          mbend[len] = '\0';
01164 
01165          /* Test whether we have a correct range.  */
01166          ret = memcmp (mbcnt, mbend, len);
01167          if (ret >= 0)
01168            {
01169              if (ret > 0)
01170               lr_error (ldfile, _("%s: byte sequence of first character of \
01171 range is not lower than that of the last character"), "LC_COLLATE");
01172              return;
01173            }
01174 
01175          /* Generate the byte sequences data.  */
01176          while (1)
01177            {
01178              struct charseq *seq;
01179 
01180              /* Quite a bit of work ahead.  We have to find the character
01181                definition for the byte sequence and then determine the
01182                wide character belonging to it.  */
01183              seq = charmap_find_symbol (charmap, mbcnt, len);
01184              if (seq != NULL)
01185               {
01186                 struct element_t *elem;
01187                 size_t namelen;
01188 
01189                 /* I don't think this can ever happen.  */
01190                 assert (seq->name != NULL);
01191                 namelen = strlen (seq->name);
01192 
01193                 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
01194                   seq->ucs4 = repertoire_find_value (repertoire, seq->name,
01195                                                  namelen);
01196 
01197                 /* Now we are ready to insert the new value in the
01198                    sequence.  Find out whether the element is
01199                    already known.  */
01200                 void *ptr;
01201                 if (find_entry (&collate->seq_table, seq->name, namelen,
01202                               &ptr) != 0)
01203                   {
01204                     uint32_t wcs[2] = { seq->ucs4, 0 };
01205 
01206                     /* We have to allocate an entry.  */
01207                     elem = new_element (collate, mbcnt, len,
01208                                      seq->ucs4 == ILLEGAL_CHAR_VALUE
01209                                      ? NULL : wcs, seq->name,
01210                                      namelen, 1);
01211 
01212                     /* And add it to the table.  */
01213                     if (insert_entry (&collate->seq_table, seq->name,
01214                                    namelen, elem) != 0)
01215                      /* This cannot happen.  */
01216                      assert (! "Internal error");
01217                   }
01218                 else
01219                   /* Copy the result.  */
01220                   elem = ptr;
01221 
01222                 /* Test whether this element is not already in the list.  */
01223                 if (elem->next != NULL || (collate->cursor != NULL
01224                                         && elem->next == collate->cursor))
01225                   {
01226                     lr_error (ldfile, _("\
01227 order for `%.*s' already defined at %s:%Zu"),
01228                             (int) namelen, seq->name,
01229                             elem->file, elem->line);
01230                     goto increment;
01231                   }
01232 
01233                 /* Enqueue the new element.  */
01234                 elem->last = collate->cursor;
01235                 if (collate->cursor == NULL)
01236                   elem->next = NULL;
01237                 else
01238                   {
01239                     elem->next = collate->cursor->next;
01240                     elem->last->next = elem;
01241                     if (elem->next != NULL)
01242                      elem->next->last = elem;
01243                   }
01244                 if (collate->start == NULL)
01245                   {
01246                     assert (collate->cursor == NULL);
01247                     collate->start = elem;
01248                   }
01249                 collate->cursor = elem;
01250 
01251                /* Add the weight value.  We take them from the
01252                   `ellipsis_weights' member of `collate'.  */
01253                 elem->weights = (struct element_list_t *)
01254                   obstack_alloc (&collate->mempool,
01255                                nrules * sizeof (struct element_list_t));
01256                 for (cnt = 0; cnt < nrules; ++cnt)
01257                   if (collate->ellipsis_weight.weights[cnt].cnt == 1
01258                      && (collate->ellipsis_weight.weights[cnt].w[0]
01259                          == ELEMENT_ELLIPSIS2))
01260                     {
01261                      elem->weights[cnt].w = (struct element_t **)
01262                        obstack_alloc (&collate->mempool,
01263                                     sizeof (struct element_t *));
01264                      elem->weights[cnt].w[0] = elem;
01265                      elem->weights[cnt].cnt = 1;
01266                     }
01267                   else
01268                     {
01269                      /* Simply use the weight from `ellipsis_weight'.  */
01270                      elem->weights[cnt].w =
01271                        collate->ellipsis_weight.weights[cnt].w;
01272                      elem->weights[cnt].cnt =
01273                        collate->ellipsis_weight.weights[cnt].cnt;
01274                     }
01275               }
01276 
01277              /* Increment for the next round.  */
01278            increment:
01279              for (cnt = len - 1; cnt >= 0; --cnt)
01280               if (++mbcnt[cnt] != '\0')
01281                 break;
01282 
01283              /* Find out whether this was all.  */
01284              if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
01285               /* Yep, that's all.  */
01286               break;
01287            }
01288        }
01289     }
01290   else
01291     {
01292       /* For symbolic range we naturally must have a beginning and an
01293         end specified by the user.  */
01294       if (startp == NULL)
01295        lr_error (ldfile, _("\
01296 %s: symbolic range ellipsis must not directly follow `order_start'"),
01297                 "LC_COLLATE");
01298       else if (endp == NULL)
01299        lr_error (ldfile, _("\
01300 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
01301                 "LC_COLLATE");
01302       else
01303        {
01304          /* Determine the range.  To do so we have to determine the
01305              common prefix of the both names and then the numeric
01306              values of both ends.  */
01307          size_t lenfrom = strlen (startp->name);
01308          size_t lento = strlen (endp->name);
01309          char buf[lento + 1];
01310          int preflen = 0;
01311          long int from;
01312          long int to;
01313          char *cp;
01314          int base = ellipsis == tok_ellipsis2 ? 16 : 10;
01315 
01316          if (lenfrom != lento)
01317            {
01318            invalid_range:
01319              lr_error (ldfile, _("\
01320 `%s' and `%.*s' are not valid names for symbolic range"),
01321                      startp->name, (int) lento, endp->name);
01322              return;
01323            }
01324 
01325          while (startp->name[preflen] == endp->name[preflen])
01326            if (startp->name[preflen] == '\0')
01327              /* Nothing to be done.  The start and end point are identical
01328                and while inserting the end point we have already given
01329                the user an error message.  */
01330              return;
01331            else
01332              ++preflen;
01333 
01334          errno = 0;
01335          from = strtol (startp->name + preflen, &cp, base);
01336          if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
01337            goto invalid_range;
01338 
01339          errno = 0;
01340          to = strtol (endp->name + preflen, &cp, base);
01341          if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
01342            goto invalid_range;
01343 
01344          /* Copy the prefix.  */
01345          memcpy (buf, startp->name, preflen);
01346 
01347          /* Loop over all values.  */
01348          for (++from; from < to; ++from)
01349            {
01350              struct element_t *elem = NULL;
01351              struct charseq *seq;
01352              uint32_t wc;
01353              int cnt;
01354 
01355              /* Generate the name.  */
01356              sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
01357                      (int) (lenfrom - preflen), from);
01358 
01359              /* Look whether this name is already defined.  */
01360              void *ptr;
01361              if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
01362               {
01363                 /* Copy back the result.  */
01364                 elem = ptr;
01365 
01366                 if (elem->next != NULL || (collate->cursor != NULL
01367                                         && elem->next == collate->cursor))
01368                   {
01369                     lr_error (ldfile, _("\
01370 %s: order for `%.*s' already defined at %s:%Zu"),
01371                             "LC_COLLATE", (int) lenfrom, buf,
01372                             elem->file, elem->line);
01373                     continue;
01374                   }
01375 
01376                 if (elem->name == NULL)
01377                   {
01378                     lr_error (ldfile, _("%s: `%s' must be a character"),
01379                             "LC_COLLATE", buf);
01380                     continue;
01381                   }
01382               }
01383 
01384              if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
01385               {
01386                 /* Search for a character of this name.  */
01387                 seq = charmap_find_value (charmap, buf, lenfrom);
01388                 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
01389                   {
01390                     wc = repertoire_find_value (repertoire, buf, lenfrom);
01391 
01392                     if (seq != NULL)
01393                      seq->ucs4 = wc;
01394                   }
01395                 else
01396                   wc = seq->ucs4;
01397 
01398                 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
01399                   /* We don't know anything about a character with this
01400                      name.  XXX Should we warn?  */
01401                   continue;
01402 
01403                 if (elem == NULL)
01404                   {
01405                     uint32_t wcs[2] = { wc, 0 };
01406 
01407                     /* We have to allocate an entry.  */
01408                     elem = new_element (collate,
01409                                      seq != NULL
01410                                      ? (char *) seq->bytes : NULL,
01411                                      seq != NULL ? seq->nbytes : 0,
01412                                      wc == ILLEGAL_CHAR_VALUE
01413                                      ? NULL : wcs, buf, lenfrom, 1);
01414                   }
01415                 else
01416                   {
01417                     /* Update the element.  */
01418                     if (seq != NULL)
01419                      {
01420                        elem->mbs = obstack_copy0 (&collate->mempool,
01421                                                seq->bytes, seq->nbytes);
01422                        elem->nmbs = seq->nbytes;
01423                      }
01424 
01425                     if (wc != ILLEGAL_CHAR_VALUE)
01426                      {
01427                        uint32_t zero = 0;
01428 
01429                        obstack_grow (&collate->mempool,
01430                                    &wc, sizeof (uint32_t));
01431                        obstack_grow (&collate->mempool,
01432                                    &zero, sizeof (uint32_t));
01433                        elem->wcs = obstack_finish (&collate->mempool);
01434                        elem->nwcs = 1;
01435                      }
01436                   }
01437 
01438                 elem->file = ldfile->fname;
01439                 elem->line = ldfile->lineno;
01440                 elem->section = collate->current_section;
01441               }
01442 
01443              /* Enqueue the new element.  */
01444              elem->last = collate->cursor;
01445              elem->next = collate->cursor->next;
01446              elem->last->next = elem;
01447              if (elem->next != NULL)
01448               elem->next->last = elem;
01449              collate->cursor = elem;
01450 
01451              /* Now add the weights.  They come from the `ellipsis_weights'
01452                member of `collate'.  */
01453              elem->weights = (struct element_list_t *)
01454               obstack_alloc (&collate->mempool,
01455                             nrules * sizeof (struct element_list_t));
01456              for (cnt = 0; cnt < nrules; ++cnt)
01457               if (collate->ellipsis_weight.weights[cnt].cnt == 1
01458                   && (collate->ellipsis_weight.weights[cnt].w[0]
01459                      == ELEMENT_ELLIPSIS2))
01460                 {
01461                   elem->weights[cnt].w = (struct element_t **)
01462                     obstack_alloc (&collate->mempool,
01463                                  sizeof (struct element_t *));
01464                   elem->weights[cnt].w[0] = elem;
01465                   elem->weights[cnt].cnt = 1;
01466                 }
01467               else
01468                 {
01469                   /* Simly use the weight from `ellipsis_weight'.  */
01470                   elem->weights[cnt].w =
01471                     collate->ellipsis_weight.weights[cnt].w;
01472                   elem->weights[cnt].cnt =
01473                     collate->ellipsis_weight.weights[cnt].cnt;
01474                 }
01475            }
01476        }
01477     }
01478 }
01479 
01480 
01481 static void
01482 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
01483                struct localedef_t *copy_locale, int ignore_content)
01484 {
01485   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
01486     {
01487       struct locale_collate_t *collate;
01488 
01489       if (copy_locale == NULL)
01490        {
01491          collate = locale->categories[LC_COLLATE].collate =
01492            (struct locale_collate_t *)
01493            xcalloc (1, sizeof (struct locale_collate_t));
01494 
01495          /* Init the various data structures.  */
01496          init_hash (&collate->elem_table, 100);
01497          init_hash (&collate->sym_table, 100);
01498          init_hash (&collate->seq_table, 500);
01499          obstack_init (&collate->mempool);
01500 
01501          collate->col_weight_max = -1;
01502        }
01503       else
01504        /* Reuse the copy_locale's data structures.  */
01505        collate = locale->categories[LC_COLLATE].collate =
01506          copy_locale->categories[LC_COLLATE].collate;
01507     }
01508 
01509   ldfile->translate_strings = 0;
01510   ldfile->return_widestr = 0;
01511 }
01512 
01513 
01514 void
01515 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
01516 {
01517   /* Now is the time when we can assign the individual collation
01518      values for all the symbols.  We have possibly different values
01519      for the wide- and the multibyte-character symbols.  This is done
01520      since it might make a difference in the encoding if there is in
01521      some cases no multibyte-character but there are wide-characters.
01522      (The other way around it is not important since theencoded
01523      collation value in the wide-character case is 32 bits wide and
01524      therefore requires no encoding).
01525 
01526      The lowest collation value assigned is 2.  Zero is reserved for
01527      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
01528      functions and 1 is used to separate the individual passes for the
01529      different rules.
01530 
01531      We also have to construct is list with all the bytes/words which
01532      can come first in a sequence, followed by all the elements which
01533      also start with this byte/word.  The order is reverse which has
01534      among others the important effect that longer strings are located
01535      first in the list.  This is required for the output data since
01536      the algorithm used in `strcoll' etc depends on this.
01537 
01538      The multibyte case is easy.  We simply sort into an array with
01539      256 elements.  */
01540   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
01541   int mbact[nrules];
01542   int wcact;
01543   int mbseqact;
01544   int wcseqact;
01545   struct element_t *runp;
01546   int i;
01547   int need_undefined = 0;
01548   struct section_list *sect;
01549   int ruleidx;
01550   int nr_wide_elems = 0;
01551 
01552   if (collate == NULL)
01553     {
01554       /* No data, no check.  */
01555       if (! be_quiet)
01556        WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
01557                             "LC_COLLATE"));
01558       return;
01559     }
01560 
01561   /* If this assertion is hit change the type in `element_t'.  */
01562   assert (nrules <= sizeof (runp->used_in_level) * 8);
01563 
01564   /* Make sure that the `position' rule is used either in all sections
01565      or in none.  */
01566   for (i = 0; i < nrules; ++i)
01567     for (sect = collate->sections; sect != NULL; sect = sect->next)
01568       if (sect != collate->current_section
01569          && sect->rules != NULL
01570          && ((sect->rules[i] & sort_position)
01571              != (collate->current_section->rules[i] & sort_position)))
01572        {
01573          WITH_CUR_LOCALE (error (0, 0, _("\
01574 %s: `position' must be used for a specific level in all sections or none"),
01575                               "LC_COLLATE"));
01576          break;
01577        }
01578 
01579   /* Find out which elements are used at which level.  At the same
01580      time we find out whether we have any undefined symbols.  */
01581   runp = collate->start;
01582   while (runp != NULL)
01583     {
01584       if (runp->mbs != NULL)
01585        {
01586          for (i = 0; i < nrules; ++i)
01587            {
01588              int j;
01589 
01590              for (j = 0; j < runp->weights[i].cnt; ++j)
01591               /* A NULL pointer as the weight means IGNORE.  */
01592               if (runp->weights[i].w[j] != NULL)
01593                 {
01594                   if (runp->weights[i].w[j]->weights == NULL)
01595                     {
01596                      WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
01597                                                  runp->line,
01598                                                  _("symbol `%s' not defined"),
01599                                                  runp->weights[i].w[j]->name));
01600 
01601                      need_undefined = 1;
01602                      runp->weights[i].w[j] = &collate->undefined;
01603                     }
01604                   else
01605                     /* Set the bit for the level.  */
01606                     runp->weights[i].w[j]->used_in_level |= 1 << i;
01607                 }
01608            }
01609        }
01610 
01611       /* Up to the next entry.  */
01612       runp = runp->next;
01613     }
01614 
01615   /* Walk through the list of defined sequences and assign weights.  Also
01616      create the data structure which will allow generating the single byte
01617      character based tables.
01618 
01619      Since at each time only the weights for each of the rules are
01620      only compared to other weights for this rule it is possible to
01621      assign more compact weight values than simply counting all
01622      weights in sequence.  We can assign weights from 3, one for each
01623      rule individually and only for those elements, which are actually
01624      used for this rule.
01625 
01626      Why is this important?  It is not for the wide char table.  But
01627      it is for the singlebyte output since here larger numbers have to
01628      be encoded to make it possible to emit the value as a byte
01629      string.  */
01630   for (i = 0; i < nrules; ++i)
01631     mbact[i] = 2;
01632   wcact = 2;
01633   mbseqact = 0;
01634   wcseqact = 0;
01635   runp = collate->start;
01636   while (runp != NULL)
01637     {
01638       /* Determine the order.  */
01639       if (runp->used_in_level != 0)
01640        {
01641          runp->mborder = (int *) obstack_alloc (&collate->mempool,
01642                                            nrules * sizeof (int));
01643 
01644          for (i = 0; i < nrules; ++i)
01645            if ((runp->used_in_level & (1 << i)) != 0)
01646              runp->mborder[i] = mbact[i]++;
01647            else
01648              runp->mborder[i] = 0;
01649        }
01650 
01651       if (runp->mbs != NULL)
01652        {
01653          struct element_t **eptr;
01654          struct element_t *lastp = NULL;
01655 
01656          /* Find the point where to insert in the list.  */
01657          eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
01658          while (*eptr != NULL)
01659            {
01660              if ((*eptr)->nmbs < runp->nmbs)
01661               break;
01662 
01663              if ((*eptr)->nmbs == runp->nmbs)
01664               {
01665                 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
01666 
01667                 if (c == 0)
01668                   {
01669                     /* This should not happen.  It means that we have
01670                       to symbols with the same byte sequence.  It is
01671                       of course an error.  */
01672                     WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
01673                                                 (*eptr)->line,
01674                                                 _("\
01675 symbol `%s' has the same encoding as"), (*eptr)->name);
01676                                    error_at_line (0, 0, runp->file,
01677                                                 runp->line,
01678                                                 _("symbol `%s'"),
01679                                                 runp->name));
01680                     goto dont_insert;
01681                   }
01682                 else if (c < 0)
01683                   /* Insert it here.  */
01684                   break;
01685               }
01686 
01687              /* To the next entry.  */
01688              lastp = *eptr;
01689              eptr = &(*eptr)->mbnext;
01690            }
01691 
01692          /* Set the pointers.  */
01693          runp->mbnext = *eptr;
01694          runp->mblast = lastp;
01695          if (*eptr != NULL)
01696            (*eptr)->mblast = runp;
01697          *eptr = runp;
01698        dont_insert:
01699          ;
01700        }
01701 
01702       if (runp->used_in_level)
01703        {
01704          runp->wcorder = wcact++;
01705 
01706          /* We take the opportunity to count the elements which have
01707             wide characters.  */
01708          ++nr_wide_elems;
01709        }
01710 
01711       if (runp->is_character)
01712        {
01713          if (runp->nmbs == 1)
01714            collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
01715 
01716          runp->wcseqorder = wcseqact++;
01717        }
01718       else if (runp->mbs != NULL && runp->weights != NULL)
01719        /* This is for collation elements.  */
01720        runp->wcseqorder = wcseqact++;
01721 
01722       /* Up to the next entry.  */
01723       runp = runp->next;
01724     }
01725 
01726   /* Find out whether any of the `mbheads' entries is unset.  In this
01727      case we use the UNDEFINED entry.  */
01728   for (i = 1; i < 256; ++i)
01729     if (collate->mbheads[i] == NULL)
01730       {
01731        need_undefined = 1;
01732        collate->mbheads[i] = &collate->undefined;
01733       }
01734 
01735   /* Now to the wide character case.  */
01736   collate->wcheads.p = 6;
01737   collate->wcheads.q = 10;
01738   wchead_table_init (&collate->wcheads);
01739 
01740   collate->wcseqorder.p = 6;
01741   collate->wcseqorder.q = 10;
01742   collseq_table_init (&collate->wcseqorder);
01743 
01744   /* Start adding.  */
01745   runp = collate->start;
01746   while (runp != NULL)
01747     {
01748       if (runp->wcs != NULL)
01749        {
01750          struct element_t *e;
01751          struct element_t **eptr;
01752          struct element_t *lastp;
01753 
01754          /* Insert the collation sequence value.  */
01755          if (runp->is_character)
01756            collseq_table_add (&collate->wcseqorder, runp->wcs[0],
01757                             runp->wcseqorder);
01758 
01759          /* Find the point where to insert in the list.  */
01760          e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
01761          eptr = &e;
01762          lastp = NULL;
01763          while (*eptr != NULL)
01764            {
01765              if ((*eptr)->nwcs < runp->nwcs)
01766               break;
01767 
01768              if ((*eptr)->nwcs == runp->nwcs)
01769               {
01770                 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
01771                                (wchar_t *) runp->wcs, runp->nwcs);
01772 
01773                 if (c == 0)
01774                   {
01775                     /* This should not happen.  It means that we have
01776                       two symbols with the same byte sequence.  It is
01777                       of course an error.  */
01778                     WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
01779                                                 (*eptr)->line,
01780                                                 _("\
01781 symbol `%s' has the same encoding as"), (*eptr)->name);
01782                                    error_at_line (0, 0, runp->file,
01783                                                 runp->line,
01784                                                 _("symbol `%s'"),
01785                                                 runp->name));
01786                     goto dont_insertwc;
01787                   }
01788                 else if (c < 0)
01789                   /* Insert it here.  */
01790                   break;
01791               }
01792 
01793              /* To the next entry.  */
01794              lastp = *eptr;
01795              eptr = &(*eptr)->wcnext;
01796            }
01797 
01798          /* Set the pointers.  */
01799          runp->wcnext = *eptr;
01800          runp->wclast = lastp;
01801          if (*eptr != NULL)
01802            (*eptr)->wclast = runp;
01803          *eptr = runp;
01804          if (eptr == &e)
01805            wchead_table_add (&collate->wcheads, runp->wcs[0], e);
01806        dont_insertwc:
01807          ;
01808        }
01809 
01810       /* Up to the next entry.  */
01811       runp = runp->next;
01812     }
01813 
01814   collseq_table_finalize (&collate->wcseqorder);
01815 
01816   /* Now determine whether the UNDEFINED entry is needed and if yes,
01817      whether it was defined.  */
01818   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
01819   if (collate->undefined.file == NULL)
01820     {
01821       if (need_undefined)
01822        {
01823          /* This seems not to be enforced by recent standards.  Don't
01824             emit an error, simply append UNDEFINED at the end.  */
01825          if (0)
01826            WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
01827 
01828          /* Add UNDEFINED at the end.  */
01829          collate->undefined.mborder =
01830            (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
01831 
01832          for (i = 0; i < nrules; ++i)
01833            collate->undefined.mborder[i] = mbact[i]++;
01834        }
01835 
01836       /* In any case we will need the definition for the wide character
01837         case.  But we will not complain that it is missing since the
01838         specification strangely enough does not seem to account for
01839         this.  */
01840       collate->undefined.wcorder = wcact++;
01841     }
01842 
01843   /* Finally, try to unify the rules for the sections.  Whenever the rules
01844      for a section are the same as those for another section give the
01845      ruleset the same index.  Since there are never many section we can
01846      use an O(n^2) algorithm here.  */
01847   sect = collate->sections;
01848   while (sect != NULL && sect->rules == NULL)
01849     sect = sect->next;
01850 
01851   /* Bail out if we have no sections because of earlier errors.  */
01852   if (sect == NULL)
01853     {
01854       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
01855                            _("too many errors; giving up")));
01856       return;
01857     }
01858 
01859   ruleidx = 0;
01860   do
01861     {
01862       struct section_list *osect = collate->sections;
01863 
01864       while (osect != sect)
01865        if (osect->rules != NULL
01866            && memcmp (osect->rules, sect->rules,
01867                      nrules * sizeof (osect->rules[0])) == 0)
01868          break;
01869        else
01870          osect = osect->next;
01871 
01872       if (osect == sect)
01873        sect->ruleidx = ruleidx++;
01874       else
01875        sect->ruleidx = osect->ruleidx;
01876 
01877       /* Next section.  */
01878       do
01879        sect = sect->next;
01880       while (sect != NULL && sect->rules == NULL);
01881     }
01882   while (sect != NULL);
01883   /* We are currently not prepared for more than 128 rulesets.  But this
01884      should never really be a problem.  */
01885   assert (ruleidx <= 128);
01886 }
01887 
01888 
01889 static int32_t
01890 output_weight (struct obstack *pool, struct locale_collate_t *collate,
01891               struct element_t *elem)
01892 {
01893   size_t cnt;
01894   int32_t retval;
01895 
01896   /* Optimize the use of UNDEFINED.  */
01897   if (elem == &collate->undefined)
01898     /* The weights are already inserted.  */
01899     return 0;
01900 
01901   /* This byte can start exactly one collation element and this is
01902      a single byte.  We can directly give the index to the weights.  */
01903   retval = obstack_object_size (pool);
01904 
01905   /* Construct the weight.  */
01906   for (cnt = 0; cnt < nrules; ++cnt)
01907     {
01908       char buf[elem->weights[cnt].cnt * 7];
01909       int len = 0;
01910       int i;
01911 
01912       for (i = 0; i < elem->weights[cnt].cnt; ++i)
01913        /* Encode the weight value.  We do nothing for IGNORE entries.  */
01914        if (elem->weights[cnt].w[i] != NULL)
01915          len += utf8_encode (&buf[len],
01916                            elem->weights[cnt].w[i]->mborder[cnt]);
01917 
01918       /* And add the buffer content.  */
01919       obstack_1grow (pool, len);
01920       obstack_grow (pool, buf, len);
01921     }
01922 
01923   return retval | ((elem->section->ruleidx & 0x7f) << 24);
01924 }
01925 
01926 
01927 static int32_t
01928 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
01929                struct element_t *elem)
01930 {
01931   size_t cnt;
01932   int32_t retval;
01933 
01934   /* Optimize the use of UNDEFINED.  */
01935   if (elem == &collate->undefined)
01936     /* The weights are already inserted.  */
01937     return 0;
01938 
01939   /* This byte can start exactly one collation element and this is
01940      a single byte.  We can directly give the index to the weights.  */
01941   retval = obstack_object_size (pool) / sizeof (int32_t);
01942 
01943   /* Construct the weight.  */
01944   for (cnt = 0; cnt < nrules; ++cnt)
01945     {
01946       int32_t buf[elem->weights[cnt].cnt];
01947       int i;
01948       int32_t j;
01949 
01950       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
01951        if (elem->weights[cnt].w[i] != NULL)
01952          buf[j++] = elem->weights[cnt].w[i]->wcorder;
01953 
01954       /* And add the buffer content.  */
01955       obstack_int32_grow (pool, j);
01956 
01957       obstack_grow (pool, buf, j * sizeof (int32_t));
01958     }
01959 
01960   return retval | ((elem->section->ruleidx & 0x7f) << 24);
01961 }
01962 
01963 /* If localedef is every threaded, this would need to be __thread var.  */
01964 static struct
01965 {
01966   struct obstack *weightpool;
01967   struct obstack *extrapool;
01968   struct obstack *indpool;
01969   struct locale_collate_t *collate;
01970   struct collidx_table *tablewc;
01971 } atwc;
01972 
01973 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
01974 
01975 static void
01976 add_to_tablewc (uint32_t ch, struct element_t *runp)
01977 {
01978   if (runp->wcnext == NULL && runp->nwcs == 1)
01979     {
01980       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
01981                                       runp);
01982       collidx_table_add (atwc.tablewc, ch, weigthidx);
01983     }
01984   else
01985     {
01986       /* As for the singlebyte table, we recognize sequences and
01987         compress them.  */
01988       struct element_t *lastp;
01989 
01990       collidx_table_add (atwc.tablewc, ch,
01991                       -(obstack_object_size (atwc.extrapool)
01992                       / sizeof (uint32_t)));
01993 
01994       do
01995        {
01996          /* Store the current index in the weight table.  We know that
01997             the current position in the `extrapool' is aligned on a
01998             32-bit address.  */
01999          int32_t weightidx;
02000          int added;
02001 
02002          /* Find out wether this is a single entry or we have more than
02003             one consecutive entry.  */
02004          if (runp->wcnext != NULL
02005              && runp->nwcs == runp->wcnext->nwcs
02006              && wmemcmp ((wchar_t *) runp->wcs,
02007                        (wchar_t *)runp->wcnext->wcs,
02008                        runp->nwcs - 1) == 0
02009              && (runp->wcs[runp->nwcs - 1]
02010                 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
02011            {
02012              int i;
02013              struct element_t *series_startp = runp;
02014              struct element_t *curp;
02015 
02016              /* Now add first the initial byte sequence.  */
02017              added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
02018              if (sizeof (int32_t) == sizeof (int))
02019               obstack_make_room (atwc.extrapool, added);
02020 
02021              /* More than one consecutive entry.  We mark this by having
02022                a negative index into the indirect table.  */
02023              obstack_int32_grow_fast (atwc.extrapool,
02024                                    -(obstack_object_size (atwc.indpool)
02025                                     / sizeof (int32_t)));
02026              obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
02027 
02028              do
02029               runp = runp->wcnext;
02030              while (runp->wcnext != NULL
02031                    && runp->nwcs == runp->wcnext->nwcs
02032                    && wmemcmp ((wchar_t *) runp->wcs,
02033                              (wchar_t *)runp->wcnext->wcs,
02034                              runp->nwcs - 1) == 0
02035                    && (runp->wcs[runp->nwcs - 1]
02036                       == runp->wcnext->wcs[runp->nwcs - 1] + 1));
02037 
02038              /* Now walk backward from here to the beginning.  */
02039              curp = runp;
02040 
02041              for (i = 1; i < runp->nwcs; ++i)
02042               obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
02043 
02044              /* Now find the end of the consecutive sequence and
02045                add all the indeces in the indirect pool.  */
02046              do
02047               {
02048                 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
02049                                           curp);
02050                 obstack_int32_grow (atwc.indpool, weightidx);
02051 
02052                 curp = curp->wclast;
02053               }
02054              while (curp != series_startp);
02055 
02056              /* Add the final weight.  */
02057              weightidx = output_weightwc (atwc.weightpool, atwc.collate,
02058                                       curp);
02059              obstack_int32_grow (atwc.indpool, weightidx);
02060 
02061              /* And add the end byte sequence.  Without length this
02062                time.  */
02063              for (i = 1; i < curp->nwcs; ++i)
02064               obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
02065            }
02066          else
02067            {
02068              /* A single entry.  Simply add the index and the length and
02069                string (except for the first character which is already
02070                tested for).  */
02071              int i;
02072 
02073              /* Output the weight info.  */
02074              weightidx = output_weightwc (atwc.weightpool, atwc.collate,
02075                                       runp);
02076 
02077              added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
02078              if (sizeof (int) == sizeof (int32_t))
02079               obstack_make_room (atwc.extrapool, added);
02080 
02081              obstack_int32_grow_fast (atwc.extrapool, weightidx);
02082              obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
02083              for (i = 1; i < runp->nwcs; ++i)
02084               obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
02085            }
02086 
02087          /* Next entry.  */
02088          lastp = runp;
02089          runp = runp->wcnext;
02090        }
02091       while (runp != NULL);
02092     }
02093 }
02094 
02095 void
02096 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
02097               const char *output_path)
02098 {
02099   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
02100   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
02101   struct iovec iov[2 + nelems];
02102   struct locale_file data;
02103   uint32_t idx[nelems];
02104   size_t cnt;
02105   size_t ch;
02106   int32_t tablemb[256];
02107   struct obstack weightpool;
02108   struct obstack extrapool;
02109   struct obstack indirectpool;
02110   struct section_list *sect;
02111   struct collidx_table tablewc;
02112   uint32_t elem_size;
02113   uint32_t *elem_table;
02114   int i;
02115   struct element_t *runp;
02116 
02117   data.magic = LIMAGIC (LC_COLLATE);
02118   data.n = nelems;
02119   iov[0].iov_base = (void *) &data;
02120   iov[0].iov_len = sizeof (data);
02121 
02122   iov[1].iov_base = (void *) idx;
02123   iov[1].iov_len = sizeof (idx);
02124 
02125   idx[0] = iov[0].iov_len + iov[1].iov_len;
02126   cnt = 0;
02127 
02128   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
02129   iov[2 + cnt].iov_base = &nrules;
02130   iov[2 + cnt].iov_len = sizeof (uint32_t);
02131   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02132   ++cnt;
02133 
02134   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
02135   if (collate == NULL)
02136     {
02137       int32_t dummy = 0;
02138 
02139       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
02140        {
02141          /* The words have to be handled specially.  */
02142          if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
02143            {
02144              iov[2 + cnt].iov_base = &dummy;
02145              iov[2 + cnt].iov_len = sizeof (int32_t);
02146            }
02147          else
02148            {
02149              iov[2 + cnt].iov_base = NULL;
02150              iov[2 + cnt].iov_len = 0;
02151            }
02152 
02153          if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
02154            idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02155          ++cnt;
02156        }
02157 
02158       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
02159 
02160       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
02161 
02162       return;
02163     }
02164 
02165   obstack_init (&weightpool);
02166   obstack_init (&extrapool);
02167   obstack_init (&indirectpool);
02168 
02169   /* Since we are using the sign of an integer to mark indirection the
02170      offsets in the arrays we are indirectly referring to must not be
02171      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
02172   obstack_int32_grow (&extrapool, 0);
02173   obstack_int32_grow (&indirectpool, 0);
02174 
02175   /* Prepare the ruleset table.  */
02176   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
02177     if (sect->rules != NULL && sect->ruleidx == i)
02178       {
02179        int j;
02180 
02181        obstack_make_room (&weightpool, nrules);
02182 
02183        for (j = 0; j < nrules; ++j)
02184          obstack_1grow_fast (&weightpool, sect->rules[j]);
02185        ++i;
02186       }
02187   /* And align the output.  */
02188   i = (nrules * i) % __alignof__ (int32_t);
02189   if (i > 0)
02190     do
02191       obstack_1grow (&weightpool, '\0');
02192     while (++i < __alignof__ (int32_t));
02193 
02194   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
02195   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
02196   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
02197   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02198   ++cnt;
02199 
02200   /* Generate the 8-bit table.  Walk through the lists of sequences
02201      starting with the same byte and add them one after the other to
02202      the table.  In case we have more than one sequence starting with
02203      the same byte we have to use extra indirection.
02204 
02205      First add a record for the NUL byte.  This entry will never be used
02206      so it does not matter.  */
02207   tablemb[0] = 0;
02208 
02209   /* Now insert the `UNDEFINED' value if it is used.  Since this value
02210      will probably be used more than once it is good to store the
02211      weights only once.  */
02212   if (collate->undefined.used_in_level != 0)
02213     output_weight (&weightpool, collate, &collate->undefined);
02214 
02215   for (ch = 1; ch < 256; ++ch)
02216     if (collate->mbheads[ch]->mbnext == NULL
02217        && collate->mbheads[ch]->nmbs <= 1)
02218       {
02219        tablemb[ch] = output_weight (&weightpool, collate,
02220                                  collate->mbheads[ch]);
02221       }
02222     else
02223       {
02224        /* The entries in the list are sorted by length and then
02225            alphabetically.  This is the order in which we will add the
02226            elements to the collation table.  This allows simply walking
02227           the table in sequence and stopping at the first matching
02228            entry.  Since the longer sequences are coming first in the
02229            list they have the possibility to match first, just as it
02230            has to be.  In the worst case we are walking to the end of
02231            the list where we put, if no singlebyte sequence is defined
02232            in the locale definition, the weights for UNDEFINED.
02233 
02234           To reduce the length of the search list we compress them a bit.
02235           This happens by collecting sequences of consecutive byte
02236           sequences in one entry (having and begin and end byte sequence)
02237           and add only one index into the weight table.  We can find the
02238           consecutive entries since they are also consecutive in the list.  */
02239        struct element_t *runp = collate->mbheads[ch];
02240        struct element_t *lastp;
02241 
02242        assert ((obstack_object_size (&extrapool)
02243                & (__alignof__ (int32_t) - 1)) == 0);
02244 
02245        tablemb[ch] = -obstack_object_size (&extrapool);
02246 
02247        do
02248          {
02249            /* Store the current index in the weight table.  We know that
02250               the current position in the `extrapool' is aligned on a
02251               32-bit address.  */
02252            int32_t weightidx;
02253            int added;
02254 
02255            /* Find out wether this is a single entry or we have more than
02256               one consecutive entry.  */
02257            if (runp->mbnext != NULL
02258               && runp->nmbs == runp->mbnext->nmbs
02259               && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
02260               && (runp->mbs[runp->nmbs - 1]
02261                   == runp->mbnext->mbs[runp->nmbs - 1] + 1))
02262              {
02263               int i;
02264               struct element_t *series_startp = runp;
02265               struct element_t *curp;
02266 
02267               /* Compute how much space we will need.  */
02268               added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
02269                        + __alignof__ (int32_t) - 1)
02270                       & ~(__alignof__ (int32_t) - 1));
02271               assert ((obstack_object_size (&extrapool)
02272                       & (__alignof__ (int32_t) - 1)) == 0);
02273               obstack_make_room (&extrapool, added);
02274 
02275               /* More than one consecutive entry.  We mark this by having
02276                  a negative index into the indirect table.  */
02277               obstack_int32_grow_fast (&extrapool,
02278                                     -(obstack_object_size (&indirectpool)
02279                                       / sizeof (int32_t)));
02280 
02281               /* Now search first the end of the series.  */
02282               do
02283                 runp = runp->mbnext;
02284               while (runp->mbnext != NULL
02285                      && runp->nmbs == runp->mbnext->nmbs
02286                      && memcmp (runp->mbs, runp->mbnext->mbs,
02287                               runp->nmbs - 1) == 0
02288                      && (runp->mbs[runp->nmbs - 1]
02289                         == runp->mbnext->mbs[runp->nmbs - 1] + 1));
02290 
02291               /* Now walk backward from here to the beginning.  */
02292               curp = runp;
02293 
02294               assert (runp->nmbs <= 256);
02295               obstack_1grow_fast (&extrapool, curp->nmbs - 1);
02296               for (i = 1; i < curp->nmbs; ++i)
02297                 obstack_1grow_fast (&extrapool, curp->mbs[i]);
02298 
02299               /* Now find the end of the consecutive sequence and
02300                    add all the indeces in the indirect pool.  */
02301               do
02302                 {
02303                   weightidx = output_weight (&weightpool, collate, curp);
02304                   obstack_int32_grow (&indirectpool, weightidx);
02305 
02306                   curp = curp->mblast;
02307                 }
02308               while (curp != series_startp);
02309 
02310               /* Add the final weight.  */
02311               weightidx = output_weight (&weightpool, collate, curp);
02312               obstack_int32_grow (&indirectpool, weightidx);
02313 
02314               /* And add the end byte sequence.  Without length this
02315                    time.  */
02316               for (i = 1; i < curp->nmbs; ++i)
02317                 obstack_1grow_fast (&extrapool, curp->mbs[i]);
02318              }
02319            else
02320              {
02321               /* A single entry.  Simply add the index and the length and
02322                  string (except for the first character which is already
02323                  tested for).  */
02324               int i;
02325 
02326               /* Output the weight info.  */
02327               weightidx = output_weight (&weightpool, collate, runp);
02328 
02329               added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
02330                        + __alignof__ (int32_t) - 1)
02331                       & ~(__alignof__ (int32_t) - 1));
02332               assert ((obstack_object_size (&extrapool)
02333                       & (__alignof__ (int32_t) - 1)) == 0);
02334               obstack_make_room (&extrapool, added);
02335 
02336               obstack_int32_grow_fast (&extrapool, weightidx);
02337               assert (runp->nmbs <= 256);
02338               obstack_1grow_fast (&extrapool, runp->nmbs - 1);
02339 
02340               for (i = 1; i < runp->nmbs; ++i)
02341                 obstack_1grow_fast (&extrapool, runp->mbs[i]);
02342              }
02343 
02344            /* Add alignment bytes if necessary.  */
02345            while ((obstack_object_size (&extrapool)
02346                   & (__alignof__ (int32_t) - 1)) != 0)
02347              obstack_1grow_fast (&extrapool, '\0');
02348 
02349            /* Next entry.  */
02350            lastp = runp;
02351            runp = runp->mbnext;
02352          }
02353        while (runp != NULL);
02354 
02355        assert ((obstack_object_size (&extrapool)
02356                & (__alignof__ (int32_t) - 1)) == 0);
02357 
02358        /* If the final entry in the list is not a single character we
02359            add an UNDEFINED entry here.  */
02360        if (lastp->nmbs != 1)
02361          {
02362            int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
02363                       & ~(__alignof__ (int32_t) - 1));
02364            obstack_make_room (&extrapool, added);
02365 
02366            obstack_int32_grow_fast (&extrapool, 0);
02367            /* XXX What rule? We just pick the first.  */
02368            obstack_1grow_fast (&extrapool, 0);
02369            /* Length is zero.  */
02370            obstack_1grow_fast (&extrapool, 0);
02371 
02372            /* Add alignment bytes if necessary.  */
02373            while ((obstack_object_size (&extrapool)
02374                   & (__alignof__ (int32_t) - 1)) != 0)
02375              obstack_1grow_fast (&extrapool, '\0');
02376          }
02377       }
02378 
02379   /* Add padding to the tables if necessary.  */
02380   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
02381         != 0)
02382     obstack_1grow (&weightpool, 0);
02383 
02384   /* Now add the four tables.  */
02385   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
02386   iov[2 + cnt].iov_base = tablemb;
02387   iov[2 + cnt].iov_len = sizeof (tablemb);
02388   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02389   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
02390   ++cnt;
02391 
02392   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
02393   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
02394   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
02395   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02396   ++cnt;
02397 
02398   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
02399   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
02400   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
02401   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02402   ++cnt;
02403 
02404   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
02405   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
02406   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
02407   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02408   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
02409   ++cnt;
02410 
02411 
02412   /* Now the same for the wide character table.  We need to store some
02413      more information here.  */
02414   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
02415   iov[2 + cnt].iov_base = NULL;
02416   iov[2 + cnt].iov_len = 0;
02417   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02418   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02419   ++cnt;
02420 
02421   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
02422   iov[2 + cnt].iov_base = NULL;
02423   iov[2 + cnt].iov_len = 0;
02424   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02425   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02426   ++cnt;
02427 
02428   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
02429   iov[2 + cnt].iov_base = NULL;
02430   iov[2 + cnt].iov_len = 0;
02431   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02432   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02433   ++cnt;
02434 
02435   /* Since we are using the sign of an integer to mark indirection the
02436      offsets in the arrays we are indirectly referring to must not be
02437      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
02438   obstack_int32_grow (&extrapool, 0);
02439   obstack_int32_grow (&indirectpool, 0);
02440 
02441   /* Now insert the `UNDEFINED' value if it is used.  Since this value
02442      will probably be used more than once it is good to store the
02443      weights only once.  */
02444   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
02445     abort ();
02446 
02447   /* Generate the table.  Walk through the lists of sequences starting
02448      with the same wide character and add them one after the other to
02449      the table.  In case we have more than one sequence starting with
02450      the same byte we have to use extra indirection.  */
02451   tablewc.p = 6;
02452   tablewc.q = 10;
02453   collidx_table_init (&tablewc);
02454 
02455   atwc.weightpool = &weightpool;
02456   atwc.extrapool = &extrapool;
02457   atwc.indpool = &indirectpool;
02458   atwc.collate = collate;
02459   atwc.tablewc = &tablewc;
02460 
02461   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
02462 
02463   memset (&atwc, 0, sizeof (atwc));
02464 
02465   collidx_table_finalize (&tablewc);
02466 
02467   /* Now add the four tables.  */
02468   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
02469   iov[2 + cnt].iov_base = tablewc.result;
02470   iov[2 + cnt].iov_len = tablewc.result_size;
02471   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02472   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
02473   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02474   ++cnt;
02475 
02476   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
02477   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
02478   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
02479   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02480   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
02481   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02482   ++cnt;
02483 
02484   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
02485   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
02486   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
02487   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02488   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
02489   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02490   ++cnt;
02491 
02492   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
02493   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
02494   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
02495   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02496   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
02497   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02498   ++cnt;
02499 
02500 
02501   /* Finally write the table with collation element names out.  It is
02502      a hash table with a simple function which gets the name of the
02503      character as the input.  One character might have many names.  The
02504      value associated with the name is an index into the weight table
02505      where we are then interested in the first-level weight value.
02506 
02507      To determine how large the table should be we are counting the
02508      elements have to put in.  Since we are using internal chaining
02509      using a secondary hash function we have to make the table a bit
02510      larger to avoid extremely long search times.  We can achieve
02511      good results with a 40% larger table than there are entries.  */
02512   elem_size = 0;
02513   runp = collate->start;
02514   while (runp != NULL)
02515     {
02516       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
02517        /* Yep, the element really counts.  */
02518        ++elem_size;
02519 
02520       runp = runp->next;
02521     }
02522   /* Add 40% and find the next prime number.  */
02523   elem_size = next_prime (elem_size * 1.4);
02524 
02525   /* Allocate the table.  Each entry consists of two words: the hash
02526      value and an index in a secondary table which provides the index
02527      into the weight table and the string itself (so that a match can
02528      be determined).  */
02529   elem_table = (uint32_t *) obstack_alloc (&extrapool,
02530                                       elem_size * 2 * sizeof (uint32_t));
02531   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
02532 
02533   /* Now add the elements.  */
02534   runp = collate->start;
02535   while (runp != NULL)
02536     {
02537       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
02538        {
02539          /* Compute the hash value of the name.  */
02540          uint32_t namelen = strlen (runp->name);
02541          uint32_t hash = elem_hash (runp->name, namelen);
02542          size_t idx = hash % elem_size;
02543 #ifndef NDEBUG
02544          size_t start_idx = idx;
02545 #endif
02546 
02547          if (elem_table[idx * 2] != 0)
02548            {
02549              /* The spot is already taken.  Try iterating using the value
02550                from the secondary hashing function.  */
02551              size_t iter = hash % (elem_size - 2) + 1;
02552 
02553              do
02554               {
02555                 idx += iter;
02556                 if (idx >= elem_size)
02557                   idx -= elem_size;
02558                 assert (idx != start_idx);
02559               }
02560              while (elem_table[idx * 2] != 0);
02561            }
02562          /* This is the spot where we will insert the value.  */
02563          elem_table[idx * 2] = hash;
02564          elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
02565 
02566          /* The the string itself including length.  */
02567          obstack_1grow (&extrapool, namelen);
02568          obstack_grow (&extrapool, runp->name, namelen);
02569 
02570          /* And the multibyte representation.  */
02571          obstack_1grow (&extrapool, runp->nmbs);
02572          obstack_grow (&extrapool, runp->mbs, runp->nmbs);
02573 
02574          /* And align again to 32 bits.  */
02575          if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
02576            obstack_grow (&extrapool, "\0\0",
02577                        (sizeof (int32_t)
02578                         - ((1 + namelen + 1 + runp->nmbs)
02579                            % sizeof (int32_t))));
02580 
02581          /* Now some 32-bit values: multibyte collation sequence,
02582             wide char string (including length), and wide char
02583             collation sequence.  */
02584          obstack_int32_grow (&extrapool, runp->mbseqorder);
02585 
02586          obstack_int32_grow (&extrapool, runp->nwcs);
02587          obstack_grow (&extrapool, runp->wcs,
02588                      runp->nwcs * sizeof (uint32_t));
02589 
02590          obstack_int32_grow (&extrapool, runp->wcseqorder);
02591        }
02592 
02593       runp = runp->next;
02594     }
02595 
02596   /* Prepare to write out this data.  */
02597   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
02598   iov[2 + cnt].iov_base = &elem_size;
02599   iov[2 + cnt].iov_len = sizeof (int32_t);
02600   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02601   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02602   ++cnt;
02603 
02604   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
02605   iov[2 + cnt].iov_base = elem_table;
02606   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
02607   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02608   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02609   ++cnt;
02610 
02611   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
02612   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
02613   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
02614   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02615   ++cnt;
02616 
02617   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
02618   iov[2 + cnt].iov_base = collate->mbseqorder;
02619   iov[2 + cnt].iov_len = 256;
02620   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02621   ++cnt;
02622 
02623   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
02624   iov[2 + cnt].iov_base = collate->wcseqorder.result;
02625   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
02626   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
02627   assert (idx[cnt] % __alignof__ (int32_t) == 0);
02628   ++cnt;
02629 
02630   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
02631   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
02632   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
02633   ++cnt;
02634 
02635   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
02636 
02637   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
02638 
02639   obstack_free (&weightpool, NULL);
02640   obstack_free (&extrapool, NULL);
02641   obstack_free (&indirectpool, NULL);
02642 }
02643 
02644 
02645 static enum token_t
02646 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
02647         const struct charmap_t *charmap, int to_endif)
02648 {
02649   while (1)
02650     {
02651       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
02652       enum token_t nowtok = now->tok;
02653 
02654       if (nowtok == tok_eof || nowtok == tok_end)
02655        return nowtok;
02656 
02657       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
02658        {
02659          lr_error (ldfile, _("%s: nested conditionals not supported"),
02660                   "LC_COLLATE");
02661          nowtok = skip_to (ldfile, collate, charmap, tok_endif);
02662          if (nowtok == tok_eof || nowtok == tok_end)
02663            return nowtok;
02664        }
02665       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
02666        {
02667          lr_ignore_rest (ldfile, 1);
02668          return nowtok;
02669        }
02670       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
02671        {
02672          /* Do not read the rest of the line.  */
02673          return nowtok;
02674        }
02675       else if (nowtok == tok_else)
02676        {
02677          lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
02678        }
02679 
02680       lr_ignore_rest (ldfile, 0);
02681     }
02682 }
02683 
02684 
02685 void
02686 collate_read (struct linereader *ldfile, struct localedef_t *result,
02687              const struct charmap_t *charmap, const char *repertoire_name,
02688              int ignore_content)
02689 {
02690   struct repertoire_t *repertoire = NULL;
02691   struct locale_collate_t *collate;
02692   struct token *now;
02693   struct token *arg = NULL;
02694   enum token_t nowtok;
02695   enum token_t was_ellipsis = tok_none;
02696   struct localedef_t *copy_locale = NULL;
02697   /* Parsing state:
02698      0 - start
02699      1 - between `order-start' and `order-end'
02700      2 - after `order-end'
02701      3 - after `reorder-after', waiting for `reorder-end'
02702      4 - after `reorder-end'
02703      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
02704      6 - after `reorder-sections-end'
02705   */
02706   int state = 0;
02707 
02708   /* Get the repertoire we have to use.  */
02709   if (repertoire_name != NULL)
02710     repertoire = repertoire_read (repertoire_name);
02711 
02712   /* The rest of the line containing `LC_COLLATE' must be free.  */
02713   lr_ignore_rest (ldfile, 1);
02714 
02715   while (1)
02716     {
02717       do
02718        {
02719          now = lr_token (ldfile, charmap, result, NULL, verbose);
02720          nowtok = now->tok;
02721        }
02722       while (nowtok == tok_eol);
02723 
02724       if (nowtok != tok_define)
02725        break;
02726 
02727       if (ignore_content)
02728        lr_ignore_rest (ldfile, 0);
02729       else
02730        {
02731          arg = lr_token (ldfile, charmap, result, NULL, verbose);
02732          if (arg->tok != tok_ident)
02733            SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
02734          else
02735            {
02736              /* Simply add the new symbol.  */
02737              struct name_list *newsym = xmalloc (sizeof (*newsym)
02738                                             + arg->val.str.lenmb + 1);
02739              memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
02740              newsym->str[arg->val.str.lenmb] = '\0';
02741              newsym->next = defined;
02742              defined = newsym;
02743 
02744              lr_ignore_rest (ldfile, 1);
02745            }
02746        }
02747     }
02748 
02749   if (nowtok == tok_copy)
02750     {
02751       now = lr_token (ldfile, charmap, result, NULL, verbose);
02752       if (now->tok != tok_string)
02753        {
02754          SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
02755 
02756        skip_category:
02757          do
02758            now = lr_token (ldfile, charmap, result, NULL, verbose);
02759          while (now->tok != tok_eof && now->tok != tok_end);
02760 
02761          if (now->tok != tok_eof
02762              || (now = lr_token (ldfile, charmap, result, NULL, verbose),
02763                 now->tok == tok_eof))
02764            lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
02765          else if (now->tok != tok_lc_collate)
02766            {
02767              lr_error (ldfile, _("\
02768 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
02769              lr_ignore_rest (ldfile, 0);
02770            }
02771          else
02772            lr_ignore_rest (ldfile, 1);
02773 
02774          return;
02775        }
02776 
02777       if (! ignore_content)
02778        {
02779          /* Get the locale definition.  */
02780          copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
02781                                  repertoire_name, charmap, NULL);
02782          if ((copy_locale->avail & COLLATE_LOCALE) == 0)
02783            {
02784              /* Not yet loaded.  So do it now.  */
02785              if (locfile_read (copy_locale, charmap) != 0)
02786               goto skip_category;
02787            }
02788 
02789          if (copy_locale->categories[LC_COLLATE].collate == NULL)
02790            return;
02791        }
02792 
02793       lr_ignore_rest (ldfile, 1);
02794 
02795       now = lr_token (ldfile, charmap, result, NULL, verbose);
02796       nowtok = now->tok;
02797     }
02798 
02799   /* Prepare the data structures.  */
02800   collate_startup (ldfile, result, copy_locale, ignore_content);
02801   collate = result->categories[LC_COLLATE].collate;
02802 
02803   while (1)
02804     {
02805       char ucs4buf[10];
02806       char *symstr;
02807       size_t symlen;
02808 
02809       /* Of course we don't proceed beyond the end of file.  */
02810       if (nowtok == tok_eof)
02811        break;
02812 
02813       /* Ingore empty lines.  */
02814       if (nowtok == tok_eol)
02815        {
02816          now = lr_token (ldfile, charmap, result, NULL, verbose);
02817          nowtok = now->tok;
02818          continue;
02819        }
02820 
02821       switch (nowtok)
02822        {
02823        case tok_copy:
02824          /* Allow copying other locales.  */
02825          now = lr_token (ldfile, charmap, result, NULL, verbose);
02826          if (now->tok != tok_string)
02827            goto err_label;
02828 
02829          if (! ignore_content)
02830            load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
02831                       charmap, result);
02832 
02833          lr_ignore_rest (ldfile, 1);
02834          break;
02835 
02836        case tok_coll_weight_max:
02837          /* Ignore the rest of the line if we don't need the input of
02838             this line.  */
02839          if (ignore_content)
02840            {
02841              lr_ignore_rest (ldfile, 0);
02842              break;
02843            }
02844 
02845          if (state != 0)
02846            goto err_label;
02847 
02848          arg = lr_token (ldfile, charmap, result, NULL, verbose);
02849          if (arg->tok != tok_number)
02850            goto err_label;
02851          if (collate->col_weight_max != -1)
02852            lr_error (ldfile, _("%s: duplicate definition of `%s'"),
02853                     "LC_COLLATE", "col_weight_max");
02854          else
02855            collate->col_weight_max = arg->val.num;
02856          lr_ignore_rest (ldfile, 1);
02857          break;
02858 
02859        case tok_section_symbol:
02860          /* Ignore the rest of the line if we don't need the input of
02861             this line.  */
02862          if (ignore_content)
02863            {
02864              lr_ignore_rest (ldfile, 0);
02865              break;
02866            }
02867 
02868          if (state != 0)
02869            goto err_label;
02870 
02871          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02872          if (arg->tok != tok_bsymbol)
02873            goto err_label;
02874          else if (!ignore_content)
02875            {
02876              /* Check whether this section is already known.  */
02877              struct section_list *known = collate->sections;
02878              while (known != NULL)
02879               {
02880                 if (strcmp (known->name, arg->val.str.startmb) == 0)
02881                   break;
02882                 known = known->next;
02883               }
02884 
02885              if (known != NULL)
02886               {
02887                 lr_error (ldfile,
02888                          _("%s: duplicate declaration of section `%s'"),
02889                          "LC_COLLATE", arg->val.str.startmb);
02890                 free (arg->val.str.startmb);
02891               }
02892              else
02893               collate->sections = make_seclist_elem (collate,
02894                                                  arg->val.str.startmb,
02895                                                  collate->sections);
02896 
02897              lr_ignore_rest (ldfile, known == NULL);
02898            }
02899          else
02900            {
02901              free (arg->val.str.startmb);
02902              lr_ignore_rest (ldfile, 0);
02903            }
02904          break;
02905 
02906        case tok_collating_element:
02907          /* Ignore the rest of the line if we don't need the input of
02908             this line.  */
02909          if (ignore_content)
02910            {
02911              lr_ignore_rest (ldfile, 0);
02912              break;
02913            }
02914 
02915          if (state != 0 && state != 2)
02916            goto err_label;
02917 
02918          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02919          if (arg->tok != tok_bsymbol)
02920            goto err_label;
02921          else
02922            {
02923              const char *symbol = arg->val.str.startmb;
02924              size_t symbol_len = arg->val.str.lenmb;
02925 
02926              /* Next the `from' keyword.  */
02927              arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02928              if (arg->tok != tok_from)
02929               {
02930                 free ((char *) symbol);
02931                 goto err_label;
02932               }
02933 
02934              ldfile->return_widestr = 1;
02935              ldfile->translate_strings = 1;
02936 
02937              /* Finally the string with the replacement.  */
02938              arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02939 
02940              ldfile->return_widestr = 0;
02941              ldfile->translate_strings = 0;
02942 
02943              if (arg->tok != tok_string)
02944               goto err_label;
02945 
02946              if (!ignore_content && symbol != NULL)
02947               {
02948                 /* The name is already defined.  */
02949                 if (check_duplicate (ldfile, collate, charmap,
02950                                    repertoire, symbol, symbol_len))
02951                   goto col_elem_free;
02952 
02953                 if (arg->val.str.startmb != NULL)
02954                   insert_entry (&collate->elem_table, symbol, symbol_len,
02955                               new_element (collate,
02956                                           arg->val.str.startmb,
02957                                           arg->val.str.lenmb - 1,
02958                                           arg->val.str.startwc,
02959                                           symbol, symbol_len, 0));
02960               }
02961              else
02962               {
02963               col_elem_free:
02964                 free ((char *) symbol);
02965                 free (arg->val.str.startmb);
02966                 free (arg->val.str.startwc);
02967               }
02968              lr_ignore_rest (ldfile, 1);
02969            }
02970          break;
02971 
02972        case tok_collating_symbol:
02973          /* Ignore the rest of the line if we don't need the input of
02974             this line.  */
02975          if (ignore_content)
02976            {
02977              lr_ignore_rest (ldfile, 0);
02978              break;
02979            }
02980 
02981          if (state != 0 && state != 2)
02982            goto err_label;
02983 
02984          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02985          if (arg->tok != tok_bsymbol)
02986            goto err_label;
02987          else
02988            {
02989              char *symbol = arg->val.str.startmb;
02990              size_t symbol_len = arg->val.str.lenmb;
02991              char *endsymbol = NULL;
02992              size_t endsymbol_len = 0;
02993              enum token_t ellipsis = tok_none;
02994 
02995              arg = lr_token (ldfile, charmap, result, repertoire, verbose);
02996              if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
02997               {
02998                 ellipsis = arg->tok;
02999 
03000                 arg = lr_token (ldfile, charmap, result, repertoire,
03001                               verbose);
03002                 if (arg->tok != tok_bsymbol)
03003                   {
03004                     free (symbol);
03005                     goto err_label;
03006                   }
03007 
03008                 endsymbol = arg->val.str.startmb;
03009                 endsymbol_len = arg->val.str.lenmb;
03010 
03011                 lr_ignore_rest (ldfile, 1);
03012               }
03013              else if (arg->tok != tok_eol)
03014               {
03015                 free (symbol);
03016                 goto err_label;
03017               }
03018 
03019              if (!ignore_content)
03020               {
03021                 if (symbol == NULL
03022                     || (ellipsis != tok_none && endsymbol == NULL))
03023                   {
03024                     lr_error (ldfile, _("\
03025 %s: unknown character in collating symbol name"),
03026                             "LC_COLLATE");
03027                     goto col_sym_free;
03028                   }
03029                 else if (ellipsis == tok_none)
03030                   {
03031                     /* A single symbol, no ellipsis.  */
03032                     if (check_duplicate (ldfile, collate, charmap,
03033                                       repertoire, symbol, symbol_len))
03034                      /* The name is already defined.  */
03035                      goto col_sym_free;
03036 
03037                     insert_entry (&collate->sym_table, symbol, symbol_len,
03038                                 new_symbol (collate, symbol, symbol_len));
03039                   }
03040                 else if (symbol_len != endsymbol_len)
03041                   {
03042                   col_sym_inv_range:
03043                     lr_error (ldfile,
03044                             _("invalid names for character range"));
03045                     goto col_sym_free;
03046                   }
03047                 else
03048                   {
03049                     /* Oh my, we have to handle an ellipsis.  First, as
03050                       usual, determine the common prefix and then
03051                       convert the rest into a range.  */
03052                     size_t prefixlen;
03053                     unsigned long int from;
03054                     unsigned long int to;
03055                     char *endp;
03056 
03057                     for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
03058                      if (symbol[prefixlen] != endsymbol[prefixlen])
03059                        break;
03060 
03061                     /* Convert the rest into numbers.  */
03062                     symbol[symbol_len] = '\0';
03063                     from = strtoul (&symbol[prefixlen], &endp,
03064                                   ellipsis == tok_ellipsis2 ? 16 : 10);
03065                     if (*endp != '\0')
03066                      goto col_sym_inv_range;
03067 
03068                     endsymbol[symbol_len] = '\0';
03069                     to = strtoul (&endsymbol[prefixlen], &endp,
03070                                 ellipsis == tok_ellipsis2 ? 16 : 10);
03071                     if (*endp != '\0')
03072                      goto col_sym_inv_range;
03073 
03074                     if (from > to)
03075                      goto col_sym_inv_range;
03076 
03077                     /* Now loop over all entries.  */
03078                     while (from <= to)
03079                      {
03080                        char *symbuf;
03081 
03082                        symbuf = (char *) obstack_alloc (&collate->mempool,
03083                                                     symbol_len + 1);
03084 
03085                        /* Create the name.  */
03086                        sprintf (symbuf,
03087                                ellipsis == tok_ellipsis2
03088                                ? "%.*s%.*lX" : "%.*s%.*lu",
03089                                (int) prefixlen, symbol,
03090                                (int) (symbol_len - prefixlen), from);
03091 
03092                        if (check_duplicate (ldfile, collate, charmap,
03093                                           repertoire, symbuf, symbol_len))
03094                          /* The name is already defined.  */
03095                          goto col_sym_free;
03096 
03097                        insert_entry (&collate->sym_table, symbuf,
03098                                    symbol_len,
03099                                    new_symbol (collate, symbuf,
03100                                               symbol_len));
03101 
03102                        /* Increment the counter.  */
03103                        ++from;
03104                      }
03105 
03106                     goto col_sym_free;
03107                   }
03108               }
03109              else
03110               {
03111               col_sym_free:
03112                 free (symbol);
03113                 free (endsymbol);
03114               }
03115            }
03116          break;
03117 
03118        case tok_symbol_equivalence:
03119          /* Ignore the rest of the line if we don't need the input of
03120             this line.  */
03121          if (ignore_content)
03122            {
03123              lr_ignore_rest (ldfile, 0);
03124              break;
03125            }
03126 
03127          if (state != 0)
03128            goto err_label;
03129 
03130          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03131          if (arg->tok != tok_bsymbol)
03132            goto err_label;
03133          else
03134            {
03135              const char *newname = arg->val.str.startmb;
03136              size_t newname_len = arg->val.str.lenmb;
03137              const char *symname;
03138              size_t symname_len;
03139              void *symval;  /* Actually struct symbol_t*  */
03140 
03141              arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03142              if (arg->tok != tok_bsymbol)
03143               {
03144                 free ((char *) newname);
03145                 goto err_label;
03146               }
03147 
03148              symname = arg->val.str.startmb;
03149              symname_len = arg->val.str.lenmb;
03150 
03151              if (newname == NULL)
03152               {
03153                 lr_error (ldfile, _("\
03154 %s: unknown character in equivalent definition name"),
03155                          "LC_COLLATE");
03156 
03157               sym_equiv_free:
03158                 free ((char *) newname);
03159                 free ((char *) symname);
03160                 break;
03161               }
03162              if (symname == NULL)
03163               {
03164                 lr_error (ldfile, _("\
03165 %s: unknown character in equivalent definition value"),
03166                          "LC_COLLATE");
03167                 goto sym_equiv_free;
03168               }
03169 
03170              /* See whether the symbol name is already defined.  */
03171              if (find_entry (&collate->sym_table, symname, symname_len,
03172                            &symval) != 0)
03173               {
03174                 lr_error (ldfile, _("\
03175 %s: unknown symbol `%s' in equivalent definition"),
03176                          "LC_COLLATE", symname);
03177                 goto sym_equiv_free;
03178               }
03179 
03180              if (insert_entry (&collate->sym_table,
03181                             newname, newname_len, symval) < 0)
03182               {
03183                 lr_error (ldfile, _("\
03184 error while adding equivalent collating symbol"));
03185                 goto sym_equiv_free;
03186               }
03187 
03188              free ((char *) symname);
03189            }
03190          lr_ignore_rest (ldfile, 1);
03191          break;
03192 
03193        case tok_script:
03194          /* Ignore the rest of the line if we don't need the input of
03195             this line.  */
03196          if (ignore_content)
03197            {
03198              lr_ignore_rest (ldfile, 0);
03199              break;
03200            }
03201 
03202          /* We get told about the scripts we know.  */
03203          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03204          if (arg->tok != tok_bsymbol)
03205            goto err_label;
03206          else
03207            {
03208              struct section_list *runp = collate->known_sections;
03209              char *name;
03210 
03211              while (runp != NULL)
03212               if (strncmp (runp->name, arg->val.str.startmb,
03213                           arg->val.str.lenmb) == 0
03214                   && runp->name[arg->val.str.lenmb] == '\0')
03215                 break;
03216               else
03217                 runp = runp->def_next;
03218 
03219              if (runp != NULL)
03220               {
03221                 lr_error (ldfile, _("duplicate definition of script `%s'"),
03222                          runp->name);
03223                 lr_ignore_rest (ldfile, 0);
03224                 break;
03225               }
03226 
03227              runp = (struct section_list *) xcalloc (1, sizeof (*runp));
03228              name = (char *) xmalloc (arg->val.str.lenmb + 1);
03229              memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
03230              name[arg->val.str.lenmb] = '\0';
03231              runp->name = name;
03232 
03233              runp->def_next = collate->known_sections;
03234              collate->known_sections = runp;
03235            }
03236          lr_ignore_rest (ldfile, 1);
03237          break;
03238 
03239        case tok_order_start:
03240          /* Ignore the rest of the line if we don't need the input of
03241             this line.  */
03242          if (ignore_content)
03243            {
03244              lr_ignore_rest (ldfile, 0);
03245              break;
03246            }
03247 
03248          if (state != 0 && state != 1 && state != 2)
03249            goto err_label;
03250          state = 1;
03251 
03252          /* The 14652 draft does not specify whether all `order_start' lines
03253             must contain the same number of sort-rules, but 14651 does.  So
03254             we require this here as well.  */
03255          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03256          if (arg->tok == tok_bsymbol)
03257            {
03258              /* This better should be a section name.  */
03259              struct section_list *sp = collate->known_sections;
03260              while (sp != NULL
03261                    && (sp->name == NULL
03262                       || strncmp (sp->name, arg->val.str.startmb,
03263                                  arg->val.str.lenmb) != 0
03264                       || sp->name[arg->val.str.lenmb] != '\0'))
03265               sp = sp->def_next;
03266 
03267              if (sp == NULL)
03268               {
03269                 lr_error (ldfile, _("\
03270 %s: unknown section name `%.*s'"),
03271                          "LC_COLLATE", (int) arg->val.str.lenmb,
03272                          arg->val.str.startmb);
03273                 /* We use the error section.  */
03274                 collate->current_section = &collate->error_section;
03275 
03276                 if (collate->error_section.first == NULL)
03277                   {
03278                     /* Insert &collate->error_section at the end of
03279                       the collate->sections list.  */
03280                     if (collate->sections == NULL)
03281                      collate->sections = &collate->error_section;
03282                     else
03283                      {
03284                        sp = collate->sections;
03285                        while (sp->next != NULL)
03286                          sp = sp->next;
03287 
03288                        sp->next = &collate->error_section;
03289                      }
03290                     collate->error_section.next = NULL;
03291                   }
03292               }
03293              else
03294               {
03295                 /* One should not be allowed to open the same
03296                      section twice.  */
03297                 if (sp->first != NULL)
03298                   lr_error (ldfile, _("\
03299 %s: multiple order definitions for section `%s'"),
03300                            "LC_COLLATE", sp->name);
03301                 else
03302                   {
03303                     /* Insert sp in the collate->sections list,
03304                       right after collate->current_section.  */
03305                     if (collate->current_section != NULL)
03306                      {
03307                        sp->next = collate->current_section->next;
03308                        collate->current_section->next = sp;
03309                      }
03310                     else if (collate->sections == NULL)
03311                      /* This is the first section to be defined.  */
03312                      collate->sections = sp;
03313 
03314                     collate->current_section = sp;
03315                   }
03316 
03317                 /* Next should come the end of the line or a semicolon.  */
03318                 arg = lr_token (ldfile, charmap, result, repertoire,
03319                               verbose);
03320                 if (arg->tok == tok_eol)
03321                   {
03322                     uint32_t cnt;
03323 
03324                     /* This means we have exactly one rule: `forward'.  */
03325                     if (nrules > 1)
03326                      lr_error (ldfile, _("\
03327 %s: invalid number of sorting rules"),
03328                               "LC_COLLATE");
03329                     else
03330                      nrules = 1;
03331                     sp->rules = obstack_alloc (&collate->mempool,
03332                                            (sizeof (enum coll_sort_rule)
03333                                             * nrules));
03334                     for (cnt = 0; cnt < nrules; ++cnt)
03335                      sp->rules[cnt] = sort_forward;
03336 
03337                     /* Next line.  */
03338                     break;
03339                   }
03340 
03341                 /* Get the next token.  */
03342                 arg = lr_token (ldfile, charmap, result, repertoire,
03343                               verbose);
03344               }
03345            }
03346          else
03347            {
03348              /* There is no section symbol.  Therefore we use the unnamed
03349                section.  */
03350              collate->current_section = &collate->unnamed_section;
03351 
03352              if (collate->unnamed_section.first != NULL)
03353               lr_error (ldfile, _("\
03354 %s: multiple order definitions for unnamed section"),
03355                        "LC_COLLATE");
03356              else
03357               {
03358                 /* Insert &collate->unnamed_section at the beginning of
03359                    the collate->sections list.  */
03360                 collate->unnamed_section.next = collate->sections;
03361                 collate->sections = &collate->unnamed_section;
03362               }
03363            }
03364 
03365          /* Now read the direction names.  */
03366          read_directions (ldfile, arg, charmap, repertoire, result);
03367 
03368          /* From now we need the strings untranslated.  */
03369          ldfile->translate_strings = 0;
03370          break;
03371 
03372        case tok_order_end:
03373          /* Ignore the rest of the line if we don't need the input of
03374             this line.  */
03375          if (ignore_content)
03376            {
03377              lr_ignore_rest (ldfile, 0);
03378              break;
03379            }
03380 
03381          if (state != 1)
03382            goto err_label;
03383 
03384          /* Handle ellipsis at end of list.  */
03385          if (was_ellipsis != tok_none)
03386            {
03387              handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
03388                             repertoire, result);
03389              was_ellipsis = tok_none;
03390            }
03391 
03392          state = 2;
03393          lr_ignore_rest (ldfile, 1);
03394          break;
03395 
03396        case tok_reorder_after:
03397          /* Ignore the rest of the line if we don't need the input of
03398             this line.  */
03399          if (ignore_content)
03400            {
03401              lr_ignore_rest (ldfile, 0);
03402              break;
03403            }
03404 
03405          if (state == 1)
03406            {
03407              lr_error (ldfile, _("%s: missing `order_end' keyword"),
03408                      "LC_COLLATE");
03409              state = 2;
03410 
03411              /* Handle ellipsis at end of list.  */
03412              if (was_ellipsis != tok_none)
03413               {
03414                 handle_ellipsis (ldfile, arg->val.str.startmb,
03415                                arg->val.str.lenmb, was_ellipsis, charmap,
03416                                repertoire, result);
03417                 was_ellipsis = tok_none;
03418               }
03419            }
03420          else if (state == 0 && copy_locale == NULL)
03421            goto err_label;
03422          else if (state != 0 && state != 2 && state != 3)
03423            goto err_label;
03424          state = 3;
03425 
03426          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03427          if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
03428            {
03429              /* Find this symbol in the sequence table.  */
03430              char ucsbuf[10];
03431              char *startmb;
03432              size_t lenmb;
03433              struct element_t *insp;
03434              int no_error = 1;
03435              void *ptr;
03436 
03437              if (arg->tok == tok_bsymbol)
03438               {
03439                 startmb = arg->val.str.startmb;
03440                 lenmb = arg->val.str.lenmb;
03441               }
03442              else
03443               {
03444                 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
03445                 startmb = ucsbuf;
03446                 lenmb = 9;
03447               }
03448 
03449              if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
03450               /* Yes, the symbol exists.  Simply point the cursor
03451                  to it.  */
03452               collate->cursor = (struct element_t *) ptr;
03453              else
03454               {
03455                 struct symbol_t *symbp;
03456                 void *ptr;
03457 
03458                 if (find_entry (&collate->sym_table, startmb, lenmb,
03459                               &ptr) == 0)
03460                   {
03461                     symbp = ptr;
03462 
03463                     if (symbp->order->last != NULL
03464                        || symbp->order->next != NULL)
03465                      collate->cursor = symbp->order;
03466                     else
03467                      {
03468                        /* This is a collating symbol but its position
03469                           is not yet defined.  */
03470                        lr_error (ldfile, _("\
03471 %s: order for collating symbol %.*s not yet defined"),
03472                                 "LC_COLLATE", (int) lenmb, startmb);
03473                        collate->cursor = NULL;
03474                        no_error = 0;
03475                      }
03476                   }
03477                 else if (find_entry (&collate->elem_table, startmb, lenmb,
03478                                    &ptr) == 0)
03479                   {
03480                     insp = (struct element_t *) ptr;
03481 
03482                     if (insp->last != NULL || insp->next != NULL)
03483                      collate->cursor = insp;
03484                     else
03485                      {
03486                        /* This is a collating element but its position
03487                           is not yet defined.  */
03488                        lr_error (ldfile, _("\
03489 %s: order for collating element %.*s not yet defined"),
03490                                 "LC_COLLATE", (int) lenmb, startmb);
03491                        collate->cursor = NULL;
03492                        no_error = 0;
03493                      }
03494                   }
03495                 else
03496                   {
03497                     /* This is bad.  The symbol after which we have to
03498                       insert does not exist.  */
03499                     lr_error (ldfile, _("\
03500 %s: cannot reorder after %.*s: symbol not known"),
03501                             "LC_COLLATE", (int) lenmb, startmb);
03502                     collate->cursor = NULL;
03503                     no_error = 0;
03504                   }
03505               }
03506 
03507              lr_ignore_rest (ldfile, no_error);
03508            }
03509          else
03510            /* This must not happen.  */
03511            goto err_label;
03512          break;
03513 
03514        case tok_reorder_end:
03515          /* Ignore the rest of the line if we don't need the input of
03516             this line.  */
03517          if (ignore_content)
03518            break;
03519 
03520          if (state != 3)
03521            goto err_label;
03522          state = 4;
03523          lr_ignore_rest (ldfile, 1);
03524          break;
03525 
03526        case tok_reorder_sections_after:
03527          /* Ignore the rest of the line if we don't need the input of
03528             this line.  */
03529          if (ignore_content)
03530            {
03531              lr_ignore_rest (ldfile, 0);
03532              break;
03533            }
03534 
03535          if (state == 1)
03536            {
03537              lr_error (ldfile, _("%s: missing `order_end' keyword"),
03538                      "LC_COLLATE");
03539              state = 2;
03540 
03541              /* Handle ellipsis at end of list.  */
03542              if (was_ellipsis != tok_none)
03543               {
03544                 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
03545                                repertoire, result);
03546                 was_ellipsis = tok_none;
03547               }
03548            }
03549          else if (state == 3)
03550            {
03551              WITH_CUR_LOCALE (error (0, 0, _("\
03552 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
03553              state = 4;
03554            }
03555          else if (state != 2 && state != 4)
03556            goto err_label;
03557          state = 5;
03558 
03559          /* Get the name of the sections we are adding after.  */
03560          arg = lr_token (ldfile, charmap, result, repertoire, verbose);
03561          if (arg->tok == tok_bsymbol)
03562            {
03563              /* Now find a section with this name.  */
03564              struct section_list *runp = collate->sections;
03565 
03566              while (runp != NULL)
03567               {
03568                 if (runp->name != NULL
03569                     && strlen (runp->name) == arg->val.str.lenmb
03570                     && memcmp (runp->name, arg->val.str.startmb,
03571                              arg->val.str.lenmb) == 0)
03572                   break;
03573 
03574                 runp = runp->next;
03575               }
03576 
03577              if (runp != NULL)
03578               collate->current_section = runp;
03579              else
03580               {
03581                 /* This is bad.  The section after which we have to
03582                      reorder does not exist.  Therefore we cannot
03583                      process the whole rest of this reorder
03584                      specification.  */
03585                 lr_error (ldfile, _("%s: section `%.*s' not known"),
03586                          "LC_COLLATE", (int) arg->val.str.lenmb,
03587                          arg->val.str.startmb);
03588 
03589                 do
03590                   {
03591                     lr_ignore_rest (ldfile, 0);
03592 
03593                     now = lr_token (ldfile, charmap, result, NULL, verbose);
03594                   }
03595                 while (now->tok == tok_reorder_sections_after
03596                       || now->tok == tok_reorder_sections_end
03597                       || now->tok == tok_end);
03598 
03599                 /* Process the token we just saw.  */
03600                 nowtok = now->tok;
03601                 continue;
03602               }
03603            }
03604          else
03605            /* This must not happen.  */
03606            goto err_label;
03607          break;
03608 
03609        case tok_reorder_sections_end:
03610          /* Ignore the rest of the line if we don't need the input of
03611             this line.  */
03612          if (ignore_content)
03613            break;
03614 
03615          if (state != 5)
03616            goto err_label;
03617          state = 6;
03618          lr_ignore_rest (ldfile, 1);
03619          break;
03620 
03621        case tok_bsymbol:
03622        case tok_ucs4:
03623          /* Ignore the rest of the line if we don't need the input of
03624             this line.  */
03625          if (ignore_content)
03626            {
03627              lr_ignore_rest (ldfile, 0);
03628              break;
03629            }
03630 
03631          if (state != 0 && state != 1 && state != 3 && state != 5)
03632            goto err_label;
03633 
03634          if ((state == 0 || state == 5) && nowtok == tok_ucs4)
03635            goto err_label;
03636 
03637          if (nowtok == tok_ucs4)
03638            {
03639              snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
03640              symstr = ucs4buf;
03641              symlen = 9;
03642            }
03643          else if (arg != NULL)
03644            {
03645              symstr = arg->val.str.startmb;
03646              symlen = arg->val.str.lenmb;
03647            }
03648          else
03649            {
03650              lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
03651                      (int) ldfile->token.val.str.lenmb,
03652                      ldfile->token.val.str.startmb);
03653              break;
03654            }
03655 
03656          struct element_t *seqp;
03657          if (state == 0)
03658            {
03659              /* We are outside an `order_start' region.  This means
03660                  we must only accept definitions of values for
03661                  collation symbols since these are purely abstract
03662                  values and don't need directions associated.  */
03663              void *ptr;
03664 
03665              if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
03666               {
03667                 seqp = ptr;
03668 
03669                 /* It's already defined.  First check whether this
03670                    is really a collating symbol.  */
03671                 if (seqp->is_character)
03672                   goto err_label;
03673 
03674                 goto move_entry;
03675               }
03676              else
03677               {
03678                 void *result;
03679 
03680                 if (find_entry (&collate->sym_table, symstr, symlen,
03681                               &result) != 0)
03682                   /* No collating symbol, it's an error.  */
03683                   goto err_label;
03684 
03685                 /* Maybe this is the first time we define a symbol
03686                    value and it is before the first actual section.  */
03687                 if (collate->sections == NULL)
03688                   collate->sections = collate->current_section =
03689                     &collate->symbol_section;
03690               }
03691 
03692              if (was_ellipsis != tok_none)
03693               {
03694                 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
03695                                charmap, repertoire, result);
03696 
03697                 /* Remember that we processed the ellipsis.  */
03698                 was_ellipsis = tok_none;
03699 
03700                 /* And don't add the value a second time.  */
03701                 break;
03702               }
03703            }
03704          else if (state == 3)
03705            {
03706              /* It is possible that we already have this collation sequence.
03707                In this case we move the entry.  */
03708              void *sym;
03709              void *ptr;
03710 
03711              /* If the symbol after which we have to insert was not found
03712                ignore all entries.  */
03713              if (collate->cursor == NULL)
03714               {
03715                 lr_ignore_rest (ldfile, 0);
03716                 break;
03717               }
03718 
03719              if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
03720               {
03721                 seqp = (struct element_t *) ptr;
03722                 goto move_entry;
03723               }
03724 
03725              if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
03726                 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
03727               goto move_entry;
03728 
03729              if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
03730                 && (seqp = (struct element_t *) ptr,
03731                     seqp->last != NULL || seqp->next != NULL
03732                     || (collate->start != NULL && seqp == collate->start)))
03733               {
03734               move_entry:
03735                 /* Remove the entry from the old position.  */
03736                 if (seqp->last == NULL)
03737                   collate->start = seqp->next;
03738                 else
03739                   seqp->last->next = seqp->next;
03740                 if (seqp->next != NULL)
03741                   seqp->next->last = seqp->last;
03742 
03743                 /* We also have to check whether this entry is the
03744                      first or last of a section.  */
03745                 if (seqp->section->first == seqp)
03746                   {
03747                     if (seqp->section->first == seqp->section->last)
03748                      /* This section has no content anymore.  */
03749                      seqp->section->first = seqp->section->last = NULL;
03750                     else
03751                      seqp->section->first = seqp->next;
03752                   }
03753                 else if (seqp->section->last == seqp)
03754                   seqp->section->last = seqp->last;
03755 
03756                 /* Now insert it in the new place.  */
03757                 insert_weights (ldfile, seqp, charmap, repertoire, result,
03758                               tok_none);
03759                 break;
03760               }
03761 
03762              /* Otherwise we just add a new entry.  */
03763            }
03764          else if (state == 5)
03765            {
03766              /* We are reordering sections.  Find the named section.  */
03767              struct section_list *runp = collate->sections;
03768              struct section_list *prevp = NULL;
03769 
03770              while (runp != NULL)
03771               {
03772                 if (runp->name != NULL
03773                     && strlen (runp->name) == symlen
03774                     && memcmp (runp->name, symstr, symlen) == 0)
03775                   break;
03776 
03777                 prevp = runp;
03778                 runp = runp->next;
03779               }
03780 
03781              if (runp == NULL)
03782               {
03783                 lr_error (ldfile, _("%s: section `%.*s' not known"),
03784                          "LC_COLLATE", (int) symlen, symstr);
03785                 lr_ignore_rest (ldfile, 0);
03786               }
03787              else
03788               {
03789                 if (runp != collate->current_section)
03790                   {
03791                     /* Remove the named section from the old place and
03792                       insert it in the new one.  */
03793                     prevp->next = runp->next;
03794 
03795                     runp->next = collate->current_section->next;
03796                     collate->current_section->next = runp;
03797                     collate->current_section = runp;
03798                   }
03799 
03800                 /* Process the rest of the line which might change
03801                      the collation rules.  */
03802                 arg = lr_token (ldfile, charmap, result, repertoire,
03803                               verbose);
03804                 if (arg->tok != tok_eof && arg->tok != tok_eol)
03805                   read_directions (ldfile, arg, charmap, repertoire,
03806                                  result);
03807               }
03808              break;
03809            }
03810          else if (was_ellipsis != tok_none)
03811            {
03812              /* Using the information in the `ellipsis_weight'
03813                  element and this and the last value we have to handle
03814                  the ellipsis now.  */
03815              assert (state == 1);
03816 
03817              handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
03818                             repertoire, result);
03819 
03820              /* Remember that we processed the ellipsis.  */
03821              was_ellipsis = tok_none;
03822 
03823              /* And don't add the value a second time.  */
03824              break;
03825            }
03826 
03827          /* Now insert in the new place.  */
03828          insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
03829          break;
03830 
03831        case tok_undefined:
03832          /* Ignore the rest of the line if we don't need the input of
03833             this line.  */
03834          if (ignore_content)
03835            {
03836              lr_ignore_rest (ldfile, 0);
03837              break;
03838            }
03839 
03840          if (state != 1)
03841            goto err_label;
03842 
03843          if (was_ellipsis != tok_none)
03844            {
03845              lr_error (ldfile,
03846                      _("%s: cannot have `%s' as end of ellipsis range"),
03847                      "LC_COLLATE", "UNDEFINED");
03848 
03849              unlink_element (collate);
03850              was_ellipsis = tok_none;
03851            }
03852 
03853          /* See whether UNDEFINED already appeared somewhere.  */
03854          if (collate->undefined.next != NULL
03855              || &collate->undefined == collate->cursor)
03856            {
03857              lr_error (ldfile,
03858                      _("%s: order for `%.*s' already defined at %s:%Zu"),
03859                      "LC_COLLATE", 9, "UNDEFINED",
03860                      collate->undefined.file,
03861                      collate->undefined.line);
03862              lr_ignore_rest (ldfile, 0);
03863            }
03864          else
03865            /* Parse the weights.  */
03866             insert_weights (ldfile, &collate->undefined, charmap,
03867                           repertoire, result, tok_none);
03868          break;
03869 
03870        case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
03871        case tok_ellipsis3: /* absolute ellipsis */
03872        case tok_ellipsis4: /* symbolic decimal ellipsis */
03873          /* This is the symbolic (decimal or hexadecimal) or absolute
03874              ellipsis.  */
03875          if (was_ellipsis != tok_none)
03876            goto err_label;
03877 
03878          if (state != 0 && state != 1 && state != 3)
03879            goto err_label;
03880 
03881          was_ellipsis = nowtok;
03882 
03883          insert_weights (ldfile, &collate->ellipsis_weight, charmap,
03884                        repertoire, result, nowtok);
03885          break;
03886 
03887        case tok_end:
03888        seen_end:
03889          /* Next we assume `LC_COLLATE'.  */
03890          if (!ignore_content)
03891            {
03892              if (state == 0 && copy_locale == NULL)
03893               /* We must either see a copy statement or have
03894                  ordering values.  */
03895               lr_error (ldfile,
03896                        _("%s: empty category description not allowed"),
03897                        "LC_COLLATE");
03898              else if (state == 1)
03899               {
03900                 lr_error (ldfile, _("%s: missing `order_end' keyword"),
03901                          "LC_COLLATE");
03902 
03903                 /* Handle ellipsis at end of list.  */
03904                 if (was_ellipsis != tok_none)
03905                   {
03906                     handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
03907                                    repertoire, result);
03908                     was_ellipsis = tok_none;
03909                   }
03910               }
03911              else if (state == 3)
03912               WITH_CUR_LOCALE (error (0, 0, _("\
03913 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
03914              else if (state == 5)
03915               WITH_CUR_LOCALE (error (0, 0, _("\
03916 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
03917            }
03918          arg = lr_token (ldfile, charmap, result, NULL, verbose);
03919          if (arg->tok == tok_eof)
03920            break;
03921          if (arg->tok == tok_eol)
03922            lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
03923          else if (arg->tok != tok_lc_collate)
03924            lr_error (ldfile, _("\
03925 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
03926          lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
03927          return;
03928 
03929        case tok_define:
03930          if (ignore_content)
03931            {
03932              lr_ignore_rest (ldfile, 0);
03933              break;
03934            }
03935 
03936          arg = lr_token (ldfile, charmap, result, NULL, verbose);
03937          if (arg->tok != tok_ident)
03938            goto err_label;
03939 
03940          /* Simply add the new symbol.  */
03941          struct name_list *newsym = xmalloc (sizeof (*newsym)
03942                                          + arg->val.str.lenmb + 1);
03943          memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
03944          newsym->str[arg->val.str.lenmb] = '\0';
03945          newsym->next = defined;
03946          defined = newsym;
03947 
03948          lr_ignore_rest (ldfile, 1);
03949          break;
03950 
03951        case tok_undef:
03952          if (ignore_content)
03953            {
03954              lr_ignore_rest (ldfile, 0);
03955              break;
03956            }
03957 
03958          arg = lr_token (ldfile, charmap, result, NULL, verbose);
03959          if (arg->tok != tok_ident)
03960            goto err_label;
03961 
03962          /* Remove _all_ occurrences of the symbol from the list.  */
03963          struct name_list *prevdef = NULL;
03964          struct name_list *curdef = defined;
03965          while (curdef != NULL)
03966            if (strncmp (arg->val.str.startmb, curdef->str,
03967                       arg->val.str.lenmb) == 0
03968               && curdef->str[arg->val.str.lenmb] == '\0')
03969              {
03970               if (prevdef == NULL)
03971                 defined = curdef->next;
03972               else
03973                 prevdef->next = curdef->next;
03974 
03975               struct name_list *olddef = curdef;
03976               curdef = curdef->next;
03977 
03978               free (olddef);
03979              }
03980            else
03981              {
03982               prevdef = curdef;
03983               curdef = curdef->next;
03984              }
03985 
03986          lr_ignore_rest (ldfile, 1);
03987          break;
03988 
03989        case tok_ifdef:
03990        case tok_ifndef:
03991          if (ignore_content)
03992            {
03993              lr_ignore_rest (ldfile, 0);
03994              break;
03995            }
03996 
03997        found_ifdef:
03998          arg = lr_token (ldfile, charmap, result, NULL, verbose);
03999          if (arg->tok != tok_ident)
04000            goto err_label;
04001          lr_ignore_rest (ldfile, 1);
04002 
04003          if (collate->else_action == else_none)
04004            {
04005              curdef = defined;
04006              while (curdef != NULL)
04007               if (strncmp (arg->val.str.startmb, curdef->str,
04008                           arg->val.str.lenmb) == 0
04009                   && curdef->str[arg->val.str.lenmb] == '\0')
04010                 break;
04011               else
04012                 curdef = curdef->next;
04013 
04014              if ((nowtok == tok_ifdef && curdef != NULL)
04015                 || (nowtok == tok_ifndef && curdef == NULL))
04016               {
04017                 /* We have to use the if-branch.  */
04018                 collate->else_action = else_ignore;
04019               }
04020              else
04021               {
04022                 /* We have to use the else-branch, if there is one.  */
04023                 nowtok = skip_to (ldfile, collate, charmap, 0);
04024                 if (nowtok == tok_else)
04025                   collate->else_action = else_seen;
04026                 else if (nowtok == tok_elifdef)
04027                   {
04028                     nowtok = tok_ifdef;
04029                     goto found_ifdef;
04030                   }
04031                 else if (nowtok == tok_elifndef)
04032                   {
04033                     nowtok = tok_ifndef;
04034                     goto found_ifdef;
04035                   }
04036                 else if (nowtok == tok_eof)
04037                   goto seen_eof;
04038                 else if (nowtok == tok_end)
04039                   goto seen_end;
04040               }
04041            }
04042          else
04043            {
04044              /* XXX Should it really become necessary to support nested
04045                preprocessor handling we will push the state here.  */
04046              lr_error (ldfile, _("%s: nested conditionals not supported"),
04047                      "LC_COLLATE");
04048              nowtok = skip_to (ldfile, collate, charmap, 1);
04049              if (nowtok == tok_eof)
04050               goto seen_eof;
04051              else if (nowtok == tok_end)
04052               goto seen_end;
04053            }
04054          break;
04055 
04056        case tok_elifdef:
04057        case tok_elifndef:
04058        case tok_else:
04059          if (ignore_content)
04060            {
04061              lr_ignore_rest (ldfile, 0);
04062              break;
04063            }
04064 
04065          lr_ignore_rest (ldfile, 1);
04066 
04067          if (collate->else_action == else_ignore)
04068            {
04069              /* Ignore everything until the endif.  */
04070              nowtok = skip_to (ldfile, collate, charmap, 1);
04071              if (nowtok == tok_eof)
04072               goto seen_eof;
04073              else if (nowtok == tok_end)
04074               goto seen_end;
04075            }
04076          else
04077            {
04078              assert (collate->else_action == else_none);
04079              lr_error (ldfile, _("\
04080 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
04081                      nowtok == tok_else ? "else"
04082                      : nowtok == tok_elifdef ? "elifdef" : "elifndef");
04083            }
04084          break;
04085 
04086        case tok_endif:
04087          if (ignore_content)
04088            {
04089              lr_ignore_rest (ldfile, 0);
04090              break;
04091            }
04092 
04093          lr_ignore_rest (ldfile, 1);
04094 
04095          if (collate->else_action != else_ignore
04096              && collate->else_action != else_seen)
04097            lr_error (ldfile, _("\
04098 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
04099 
04100          /* XXX If we support nested preprocessor directives we pop
04101             the state here.  */
04102          collate->else_action = else_none;
04103          break;
04104 
04105        default:
04106        err_label:
04107          SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
04108        }
04109 
04110       /* Prepare for the next round.  */
04111       now = lr_token (ldfile, charmap, result, NULL, verbose);
04112       nowtok = now->tok;
04113     }
04114 
04115  seen_eof:
04116   /* When we come here we reached the end of the file.  */
04117   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
04118 }