Back to index

glibc  2.9
gencat.c
Go to the documentation of this file.
00001 /* Copyright (C) 1996-2005, 2006, 2007, 2008 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published
00007    by the Free Software Foundation; version 2 of the License, or
00008    (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software Foundation,
00017    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00018 
00019 #ifdef HAVE_CONFIG_H
00020 # include "config.h"
00021 #endif
00022 
00023 #include <argp.h>
00024 #include <assert.h>
00025 #include <ctype.h>
00026 #include <endian.h>
00027 #include <errno.h>
00028 #include <error.h>
00029 #include <fcntl.h>
00030 #include <iconv.h>
00031 #include <langinfo.h>
00032 #include <locale.h>
00033 #include <libintl.h>
00034 #include <limits.h>
00035 #include <nl_types.h>
00036 #include <obstack.h>
00037 #include <stdint.h>
00038 #include <stdio.h>
00039 #include <stdlib.h>
00040 #include <string.h>
00041 #include <unistd.h>
00042 #include <wchar.h>
00043 
00044 #include "version.h"
00045 
00046 #include "catgetsinfo.h"
00047 
00048 
00049 #define SWAPU32(w) \
00050   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
00051 
00052 struct message_list
00053 {
00054   int number;
00055   const char *message;
00056 
00057   const char *fname;
00058   size_t line;
00059   const char *symbol;
00060 
00061   struct message_list *next;
00062 };
00063 
00064 
00065 struct set_list
00066 {
00067   int number;
00068   int deleted;
00069   struct message_list *messages;
00070   int last_message;
00071 
00072   const char *fname;
00073   size_t line;
00074   const char *symbol;
00075 
00076   struct set_list *next;
00077 };
00078 
00079 
00080 struct catalog
00081 {
00082   struct set_list *all_sets;
00083   struct set_list *current_set;
00084   size_t total_messages;
00085   wint_t quote_char;
00086   int last_set;
00087 
00088   struct obstack mem_pool;
00089 };
00090 
00091 
00092 /* If non-zero force creation of new file, not using existing one.  */
00093 static int force_new;
00094 
00095 /* Name of output file.  */
00096 static const char *output_name;
00097 
00098 /* Name of generated C header file.  */
00099 static const char *header_name;
00100 
00101 /* Name and version of program.  */
00102 static void print_version (FILE *stream, struct argp_state *state);
00103 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
00104 
00105 #define OPT_NEW 1
00106 
00107 /* Definitions of arguments for argp functions.  */
00108 static const struct argp_option options[] =
00109 {
00110   { "header", 'H', N_("NAME"), 0,
00111     N_("Create C header file NAME containing symbol definitions") },
00112   { "new", OPT_NEW, NULL, 0,
00113     N_("Do not use existing catalog, force new output file") },
00114   { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
00115   { NULL, 0, NULL, 0, NULL }
00116 };
00117 
00118 /* Short description of program.  */
00119 static const char doc[] = N_("Generate message catalog.\
00120 \vIf INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n\
00121 is -, output is written to standard output.\n");
00122 
00123 /* Strings for arguments in help texts.  */
00124 static const char args_doc[] = N_("\
00125 -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
00126 
00127 /* Prototype for option handler.  */
00128 static error_t parse_opt (int key, char *arg, struct argp_state *state);
00129 
00130 /* Function to print some extra text in the help message.  */
00131 static char *more_help (int key, const char *text, void *input);
00132 
00133 /* Data structure to communicate with argp functions.  */
00134 static struct argp argp =
00135 {
00136   options, parse_opt, args_doc, doc, NULL, more_help
00137 };
00138 
00139 
00140 /* Wrapper functions with error checking for standard functions.  */
00141 extern void *xmalloc (size_t n);
00142 extern void *xcalloc (size_t n, size_t s);
00143 extern void *xrealloc (void *o, size_t n);
00144 extern char *xstrdup (const char *);
00145 
00146 /* Prototypes for local functions.  */
00147 static void error_print (void);
00148 static struct catalog *read_input_file (struct catalog *current,
00149                                    const char *fname);
00150 static void write_out (struct catalog *result, const char *output_name,
00151                      const char *header_name);
00152 static struct set_list *find_set (struct catalog *current, int number);
00153 static void normalize_line (const char *fname, size_t line, iconv_t cd,
00154                          wchar_t *string, wchar_t quote_char,
00155                          wchar_t escape_char);
00156 static void read_old (struct catalog *catalog, const char *file_name);
00157 static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
00158                          iconv_t *cd_tombp, wchar_t *escape_charp);
00159 
00160 
00161 int
00162 main (int argc, char *argv[])
00163 {
00164   struct catalog *result;
00165   int remaining;
00166 
00167   /* Set program name for messages.  */
00168   error_print_progname = error_print;
00169 
00170   /* Set locale via LC_ALL.  */
00171   setlocale (LC_ALL, "");
00172 
00173   /* Set the text message domain.  */
00174   textdomain (PACKAGE);
00175 
00176   /* Initialize local variables.  */
00177   result = NULL;
00178 
00179   /* Parse and process arguments.  */
00180   argp_parse (&argp, argc, argv, 0, &remaining, NULL);
00181 
00182   /* Determine output file.  */
00183   if (output_name == NULL)
00184     output_name = remaining < argc ? argv[remaining++] : "-";
00185 
00186   /* Process all input files.  */
00187   setlocale (LC_CTYPE, "C");
00188   if (remaining < argc)
00189     do
00190       result = read_input_file (result, argv[remaining]);
00191     while (++remaining < argc);
00192   else
00193     result = read_input_file (NULL, "-");
00194 
00195   /* Write out the result.  */
00196   if (result != NULL)
00197     write_out (result, output_name, header_name);
00198 
00199   return error_message_count != 0;
00200 }
00201 
00202 
00203 /* Handle program arguments.  */
00204 static error_t
00205 parse_opt (int key, char *arg, struct argp_state *state)
00206 {
00207   switch (key)
00208     {
00209     case 'H':
00210       header_name = arg;
00211       break;
00212     case OPT_NEW:
00213       force_new = 1;
00214       break;
00215     case 'o':
00216       output_name = arg;
00217       break;
00218     default:
00219       return ARGP_ERR_UNKNOWN;
00220     }
00221   return 0;
00222 }
00223 
00224 
00225 static char *
00226 more_help (int key, const char *text, void *input)
00227 {
00228   switch (key)
00229     {
00230     case ARGP_KEY_HELP_EXTRA:
00231       /* We print some extra information.  */
00232       return strdup (gettext ("\
00233 For bug reporting instructions, please see:\n\
00234 <http://www.gnu.org/software/libc/bugs.html>.\n"));
00235     default:
00236       break;
00237     }
00238   return (char *) text;
00239 }
00240 
00241 /* Print the version information.  */
00242 static void
00243 print_version (FILE *stream, struct argp_state *state)
00244 {
00245   fprintf (stream, "gencat (GNU %s) %s\n", PACKAGE, VERSION);
00246   fprintf (stream, gettext ("\
00247 Copyright (C) %s Free Software Foundation, Inc.\n\
00248 This is free software; see the source for copying conditions.  There is NO\n\
00249 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
00250 "), "2008");
00251   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
00252 }
00253 
00254 
00255 /* The address of this function will be assigned to the hook in the
00256    error functions.  */
00257 static void
00258 error_print ()
00259 {
00260   /* We don't want the program name to be printed in messages.  Emacs'
00261      compile.el does not like this.  */
00262 }
00263 
00264 
00265 static struct catalog *
00266 read_input_file (struct catalog *current, const char *fname)
00267 {
00268   FILE *fp;
00269   char *buf;
00270   size_t len;
00271   size_t line_number;
00272   wchar_t *wbuf;
00273   size_t wbufsize;
00274   iconv_t cd_towc = (iconv_t) -1;
00275   iconv_t cd_tomb = (iconv_t) -1;
00276   wchar_t escape_char = L'\\';
00277   char *codeset = NULL;
00278 
00279   if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
00280     {
00281       fp = stdin;
00282       fname = gettext ("*standard input*");
00283     }
00284   else
00285     fp = fopen (fname, "r");
00286   if (fp == NULL)
00287     {
00288       error (0, errno, gettext ("cannot open input file `%s'"), fname);
00289       return current;
00290     }
00291 
00292   /* If we haven't seen anything yet, allocate result structure.  */
00293   if (current == NULL)
00294     {
00295       current = (struct catalog *) xcalloc (1, sizeof (*current));
00296 
00297 #define obstack_chunk_alloc malloc
00298 #define obstack_chunk_free free
00299       obstack_init (&current->mem_pool);
00300 
00301       current->current_set = find_set (current, NL_SETD);
00302     }
00303 
00304   buf = NULL;
00305   len = 0;
00306   line_number = 0;
00307 
00308   wbufsize = 1024;
00309   wbuf = (wchar_t *) xmalloc (wbufsize);
00310 
00311   while (!feof (fp))
00312     {
00313       int continued;
00314       int used;
00315       size_t start_line = line_number + 1;
00316       char *this_line;
00317 
00318       do
00319        {
00320          int act_len;
00321 
00322          act_len = getline (&buf, &len, fp);
00323          if (act_len <= 0)
00324            break;
00325          ++line_number;
00326 
00327          /* It the line continued?  */
00328          continued = 0;
00329          if (buf[act_len - 1] == '\n')
00330            {
00331              --act_len;
00332 
00333              /* There might be more than one backslash at the end of
00334                the line.  Only if there is an odd number of them is
00335                the line continued.  */
00336              if (act_len > 0 && buf[act_len - 1] == '\\')
00337               {
00338                 int temp_act_len = act_len;
00339 
00340                 do
00341                   {
00342                     --temp_act_len;
00343                     continued = !continued;
00344                   }
00345                 while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
00346 
00347                 if (continued)
00348                   --act_len;
00349               }
00350            }
00351 
00352          /* Append to currently selected line.  */
00353          obstack_grow (&current->mem_pool, buf, act_len);
00354        }
00355       while (continued);
00356 
00357       obstack_1grow (&current->mem_pool, '\0');
00358       this_line = (char *) obstack_finish (&current->mem_pool);
00359 
00360       used = 0;
00361       if (this_line[0] == '$')
00362        {
00363          if (isblank (this_line[1]))
00364            {
00365              int cnt = 1;
00366              while (isblank (this_line[cnt]))
00367               ++cnt;
00368              if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
00369               /* This is a comment line. Do nothing.  */;
00370              else if (codeset != NULL)
00371               /* Ignore multiple codeset. */;
00372              else
00373               {
00374                 int start = cnt + 8;
00375                 cnt = start;
00376                 while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
00377                   ++cnt;
00378                 if (cnt != start)
00379                   {
00380                     int len = cnt - start;
00381                     codeset = xmalloc (len + 1);
00382                     *((char *) mempcpy (codeset, &this_line[start], len))
00383                      = '\0';
00384                   }
00385               }
00386            }
00387          else if (strncmp (&this_line[1], "set", 3) == 0)
00388            {
00389              int cnt = sizeof ("set");
00390              int set_number;
00391              const char *symbol = NULL;
00392              while (isspace (this_line[cnt]))
00393               ++cnt;
00394 
00395              if (isdigit (this_line[cnt]))
00396               {
00397                 set_number = atol (&this_line[cnt]);
00398 
00399                 /* If the given number for the character set is
00400                    higher than any we used for symbolic set names
00401                    avoid clashing by using only higher numbers for
00402                    the following symbolic definitions.  */
00403                 if (set_number > current->last_set)
00404                   current->last_set = set_number;
00405               }
00406              else
00407               {
00408                 /* See whether it is a reasonable identifier.  */
00409                 int start = cnt;
00410                 while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
00411                   ++cnt;
00412 
00413                 if (cnt == start)
00414                   {
00415                     /* No correct character found.  */
00416                     error_at_line (0, 0, fname, start_line,
00417                                  gettext ("illegal set number"));
00418                     set_number = 0;
00419                   }
00420                 else
00421                   {
00422                     /* We have found seomthing that looks like a
00423                       correct identifier.  */
00424                     struct set_list *runp;
00425 
00426                     this_line[cnt] = '\0';
00427                     used = 1;
00428                     symbol = &this_line[start];
00429 
00430                     /* Test whether the identifier was already used.  */
00431                     runp = current->all_sets;
00432                     while (runp != 0)
00433                      if (runp->symbol != NULL
00434                          && strcmp (runp->symbol, symbol) == 0)
00435                        break;
00436                      else
00437                        runp = runp->next;
00438 
00439                     if (runp != NULL)
00440                      {
00441                        /* We cannot allow duplicate identifiers for
00442                           message sets.  */
00443                        error_at_line (0, 0, fname, start_line,
00444                                     gettext ("duplicate set definition"));
00445                        error_at_line (0, 0, runp->fname, runp->line,
00446                                     gettext ("\
00447 this is the first definition"));
00448                        set_number = 0;
00449                      }
00450                     else
00451                      /* Allocate next free message set for identifier.  */
00452                      set_number = ++current->last_set;
00453                   }
00454               }
00455 
00456              if (set_number != 0)
00457               {
00458                 /* We found a legal set number.  */
00459                 current->current_set = find_set (current, set_number);
00460                 if (symbol != NULL)
00461                     used = 1;
00462                 current->current_set->symbol = symbol;
00463                 current->current_set->fname = fname;
00464                 current->current_set->line = start_line;
00465               }
00466            }
00467          else if (strncmp (&this_line[1], "delset", 6) == 0)
00468            {
00469              int cnt = sizeof ("delset");
00470              size_t set_number;
00471              while (isspace (this_line[cnt]))
00472               ++cnt;
00473 
00474              if (isdigit (this_line[cnt]))
00475               {
00476                 size_t set_number = atol (&this_line[cnt]);
00477                 struct set_list *set;
00478 
00479                 /* Mark the message set with the given number as
00480                    deleted.  */
00481                 set = find_set (current, set_number);
00482                 set->deleted = 1;
00483               }
00484              else
00485               {
00486                 /* See whether it is a reasonable identifier.  */
00487                 int start = cnt;
00488                 while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
00489                   ++cnt;
00490 
00491                 if (cnt == start)
00492                   {
00493                     error_at_line (0, 0, fname, start_line,
00494                                  gettext ("illegal set number"));
00495                     set_number = 0;
00496                   }
00497                 else
00498                   {
00499                     const char *symbol;
00500                     struct set_list *runp;
00501 
00502                     this_line[cnt] = '\0';
00503                     used = 1;
00504                     symbol = &this_line[start];
00505 
00506                     /* We have a symbolic set name.  This name must
00507                       appear somewhere else in the catalogs read so
00508                       far.  */
00509                     set_number = 0;
00510                     for (runp = current->all_sets; runp != NULL;
00511                         runp = runp->next)
00512                      {
00513                        if (strcmp (runp->symbol, symbol) == 0)
00514                          {
00515                            runp->deleted = 1;
00516                            break;
00517                          }
00518                      }
00519                     if (runp == NULL)
00520                      /* Name does not exist before.  */
00521                      error_at_line (0, 0, fname, start_line,
00522                                    gettext ("unknown set `%s'"), symbol);
00523                   }
00524               }
00525            }
00526          else if (strncmp (&this_line[1], "quote", 5) == 0)
00527            {
00528              char buf[2];
00529              char *bufptr;
00530              size_t buflen;
00531              char *wbufptr;
00532              size_t wbuflen;
00533              int cnt;
00534 
00535              cnt = sizeof ("quote");
00536              while (isspace (this_line[cnt]))
00537               ++cnt;
00538 
00539              /* We need the conversion.  */
00540              if (cd_towc == (iconv_t) -1
00541                 && open_conversion (codeset, &cd_towc, &cd_tomb,
00542                                   &escape_char) != 0)
00543               /* Something is wrong.  */
00544               goto out;
00545 
00546              /* Yes, the quote char can be '\0'; this means no quote
00547                char.  The function using the information works on
00548                wide characters so we have to convert it here.  */
00549              buf[0] = this_line[cnt];
00550              buf[1] = '\0';
00551              bufptr = buf;
00552              buflen = 2;
00553 
00554              wbufptr = (char *) wbuf;
00555              wbuflen = wbufsize;
00556 
00557              /* Flush the state.  */
00558              iconv (cd_towc, NULL, NULL, NULL, NULL);
00559 
00560              iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
00561              if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
00562               error_at_line (0, 0, fname, start_line,
00563                             gettext ("invalid quote character"));
00564              else
00565               /* Use the converted wide character.  */
00566               current->quote_char = wbuf[0];
00567            }
00568          else
00569            {
00570              int cnt;
00571              cnt = 2;
00572              while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
00573               ++cnt;
00574              this_line[cnt] = '\0';
00575              error_at_line (0, 0, fname, start_line,
00576                           gettext ("unknown directive `%s': line ignored"),
00577                           &this_line[1]);
00578            }
00579        }
00580       else if (isalnum (this_line[0]) || this_line[0] == '_')
00581        {
00582          const char *ident = this_line;
00583          char *line = this_line;
00584          int message_number;
00585 
00586          do
00587            ++line;
00588          while (line[0] != '\0' && !isspace (line[0]));
00589          if (line[0] != '\0')
00590            *line++ = '\0';  /* Terminate the identifier.  */
00591 
00592          /* Now we found the beginning of the message itself.  */
00593 
00594          if (isdigit (ident[0]))
00595            {
00596              struct message_list *runp;
00597              struct message_list *lastp;
00598 
00599              message_number = atoi (ident);
00600 
00601              /* Find location to insert the new message.  */
00602              runp = current->current_set->messages;
00603              lastp = NULL;
00604              while (runp != NULL)
00605               if (runp->number == message_number)
00606                 break;
00607               else
00608                 {
00609                   lastp = runp;
00610                   runp = runp->next;
00611                 }
00612              if (runp != NULL)
00613               {
00614                 /* Oh, oh.  There is already a message with this
00615                    number in the message set.  */
00616                 if (runp->symbol == NULL)
00617                   {
00618                     /* The existing message had its number specified
00619                       by the user.  Fatal collision type uh, oh.  */
00620                     error_at_line (0, 0, fname, start_line,
00621                                  gettext ("duplicated message number"));
00622                     error_at_line (0, 0, runp->fname, runp->line,
00623                                  gettext ("this is the first definition"));
00624                     message_number = 0;
00625                   }
00626                 else
00627                   {
00628                     /* Collision was with number auto-assigned to a
00629                       symbolic.  Change existing symbolic number
00630                       and move to end the list (if not already there).  */
00631                     runp->number = ++current->current_set->last_message;
00632 
00633                     if (runp->next != NULL)
00634                      {
00635                        struct message_list *endp;
00636 
00637                        if (lastp == NULL)
00638                          current->current_set->messages=runp->next;
00639                        else
00640                          lastp->next=runp->next;
00641 
00642                        endp = runp->next;
00643                        while (endp->next != NULL)
00644                          endp = endp->next;
00645 
00646                        endp->next = runp;
00647                        runp->next = NULL;
00648                      }
00649                   }
00650               }
00651              ident = NULL;  /* We don't have a symbol.  */
00652 
00653              if (message_number != 0
00654                 && message_number > current->current_set->last_message)
00655               current->current_set->last_message = message_number;
00656            }
00657          else if (ident[0] != '\0')
00658            {
00659              struct message_list *runp;
00660              struct message_list *lastp;
00661 
00662              /* Test whether the symbolic name was not used for
00663                another message in this message set.  */
00664              runp = current->current_set->messages;
00665              lastp = NULL;
00666              while (runp != NULL)
00667               if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
00668                 break;
00669               else
00670                 runp = runp->next;
00671              if (runp != NULL)
00672               {
00673                 /* The name is already used.  */
00674                 error_at_line (0, 0, fname, start_line, gettext ("\
00675 duplicated message identifier"));
00676                 error_at_line (0, 0, runp->fname, runp->line,
00677                              gettext ("this is the first definition"));
00678                 message_number = 0;
00679               }
00680              else
00681               /* Give the message the next unused number.  */
00682               message_number = ++current->current_set->last_message;
00683            }
00684          else
00685            message_number = 0;
00686 
00687          if (message_number != 0)
00688            {
00689              char *inbuf;
00690              size_t inlen;
00691              char *outbuf;
00692              size_t outlen;
00693              struct message_list *newp;
00694              size_t line_len = strlen (line) + 1;
00695              size_t ident_len = 0;
00696 
00697              /* We need the conversion.  */
00698              if (cd_towc == (iconv_t) -1
00699                 && open_conversion (codeset, &cd_towc, &cd_tomb,
00700                                   &escape_char) != 0)
00701               /* Something is wrong.  */
00702               goto out;
00703 
00704              /* Convert to a wide character string.  We have to
00705                interpret escape sequences which will be impossible
00706                without doing the conversion if the codeset of the
00707                message is stateful.  */
00708              while (1)
00709               {
00710                 inbuf = line;
00711                 inlen = line_len;
00712                 outbuf = (char *) wbuf;
00713                 outlen = wbufsize;
00714 
00715                 /* Flush the state.  */
00716                 iconv (cd_towc, NULL, NULL, NULL, NULL);
00717 
00718                 iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
00719                 if (inlen == 0)
00720                   {
00721                     /* The string is converted.  */
00722                     assert (outlen < wbufsize);
00723                     assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
00724                            == L'\0');
00725                     break;
00726                   }
00727 
00728                 if (outlen != 0)
00729                   {
00730                     /* Something is wrong with this string, we ignore it.  */
00731                     error_at_line (0, 0, fname, start_line, gettext ("\
00732 invalid character: message ignored"));
00733                     goto ignore;
00734                   }
00735 
00736                 /* The output buffer is too small.  */
00737                 wbufsize *= 2;
00738                 wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
00739               }
00740 
00741              /* Strip quote characters, change escape sequences into
00742                correct characters etc.  */
00743              normalize_line (fname, start_line, cd_towc, wbuf,
00744                            current->quote_char, escape_char);
00745 
00746              if (ident)
00747               ident_len = line - this_line;
00748 
00749              /* Now the string is free of escape sequences.  Convert it
00750                back into a multibyte character string.  First free the
00751                memory allocated for the original string.  */
00752              obstack_free (&current->mem_pool, this_line);
00753 
00754              used = 1;      /* Yes, we use the line.  */
00755 
00756              /* Now fill in the new string.  It should never happen that
00757                the replaced string is longer than the original.  */
00758              inbuf = (char *) wbuf;
00759              inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
00760 
00761              outlen = obstack_room (&current->mem_pool);
00762              obstack_blank (&current->mem_pool, outlen);
00763              this_line = (char *) obstack_base (&current->mem_pool);
00764              outbuf = this_line + ident_len;
00765              outlen -= ident_len;
00766 
00767              /* Flush the state.  */
00768              iconv (cd_tomb, NULL, NULL, NULL, NULL);
00769 
00770              iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
00771              if (inlen != 0)
00772               {
00773                 error_at_line (0, 0, fname, start_line,
00774                              gettext ("invalid line"));
00775                 goto ignore;
00776               }
00777              assert (outbuf[-1] == '\0');
00778 
00779              /* Free the memory in the obstack we don't use.  */
00780              obstack_blank (&current->mem_pool, -(int) outlen);
00781              line = obstack_finish (&current->mem_pool);
00782 
00783              newp = (struct message_list *) xmalloc (sizeof (*newp));
00784              newp->number = message_number;
00785              newp->message = line + ident_len;
00786              /* Remember symbolic name; is NULL if no is given.  */
00787              newp->symbol = ident ? line : NULL;
00788              /* Remember where we found the character.  */
00789              newp->fname = fname;
00790              newp->line = start_line;
00791 
00792              /* Find place to insert to message.  We keep them in a
00793                sorted single linked list.  */
00794              if (current->current_set->messages == NULL
00795                 || current->current_set->messages->number > message_number)
00796               {
00797                 newp->next = current->current_set->messages;
00798                 current->current_set->messages = newp;
00799               }
00800              else
00801               {
00802                 struct message_list *runp;
00803                 runp = current->current_set->messages;
00804                 while (runp->next != NULL)
00805                   if (runp->next->number > message_number)
00806                     break;
00807                   else
00808                     runp = runp->next;
00809                 newp->next = runp->next;
00810                 runp->next = newp;
00811               }
00812            }
00813          ++current->total_messages;
00814        }
00815       else
00816        {
00817          size_t cnt;
00818 
00819          cnt = 0;
00820          /* See whether we have any non-white space character in this
00821             line.  */
00822          while (this_line[cnt] != '\0' && isspace (this_line[cnt]))
00823            ++cnt;
00824 
00825          if (this_line[cnt] != '\0')
00826            /* Yes, some unknown characters found.  */
00827            error_at_line (0, 0, fname, start_line,
00828                         gettext ("malformed line ignored"));
00829        }
00830 
00831     ignore:
00832       /* We can save the memory for the line if it was not used.  */
00833       if (!used)
00834        obstack_free (&current->mem_pool, this_line);
00835     }
00836 
00837   /* Close the conversion modules.  */
00838   iconv_close (cd_towc);
00839   iconv_close (cd_tomb);
00840   free (codeset);
00841 
00842  out:
00843   free (wbuf);
00844 
00845   if (fp != stdin)
00846     fclose (fp);
00847   return current;
00848 }
00849 
00850 
00851 static void
00852 write_out (struct catalog *catalog, const char *output_name,
00853           const char *header_name)
00854 {
00855   /* Computing the "optimal" size.  */
00856   struct set_list *set_run;
00857   size_t best_total, best_size, best_depth;
00858   size_t act_size, act_depth;
00859   struct catalog_obj obj;
00860   struct obstack string_pool;
00861   const char *strings;
00862   size_t strings_size;
00863   uint32_t *array1, *array2;
00864   size_t cnt;
00865   int fd;
00866 
00867   /* If not otherwise told try to read file with existing
00868      translations.  */
00869   if (!force_new)
00870     read_old (catalog, output_name);
00871 
00872   /* Initialize best_size with a very high value.  */
00873   best_total = best_size = best_depth = UINT_MAX;
00874 
00875   /* We need some start size for testing.  Let's start with
00876      TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
00877      5.  */
00878   act_size = 1 + catalog->total_messages / 5;
00879 
00880   /* We determine the size of a hash table here.  Because the message
00881      numbers can be chosen arbitrary by the programmer we cannot use
00882      the simple method of accessing the array using the message
00883      number.  The algorithm is based on the trivial hash function
00884      NUMBER % TABLE_SIZE, where collisions are stored in a second
00885      dimension up to TABLE_DEPTH.  We here compute TABLE_SIZE so that
00886      the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal.  */
00887   while (act_size <= best_total)
00888     {
00889       size_t deep[act_size];
00890 
00891       act_depth = 1;
00892       memset (deep, '\0', act_size * sizeof (size_t));
00893       set_run = catalog->all_sets;
00894       while (set_run != NULL)
00895        {
00896          struct message_list *message_run;
00897 
00898          message_run = set_run->messages;
00899          while (message_run != NULL)
00900            {
00901              size_t idx = (message_run->number * set_run->number) % act_size;
00902 
00903              ++deep[idx];
00904              if (deep[idx] > act_depth)
00905               {
00906                 act_depth = deep[idx];
00907                 if (act_depth * act_size > best_total)
00908                   break;
00909               }
00910              message_run = message_run->next;
00911            }
00912          set_run = set_run->next;
00913        }
00914 
00915       if (act_depth * act_size <= best_total)
00916        {
00917          /* We have found a better solution.  */
00918          best_total = act_depth * act_size;
00919          best_size = act_size;
00920          best_depth = act_depth;
00921        }
00922 
00923       ++act_size;
00924     }
00925 
00926   /* let's be prepared for an empty message file.  */
00927   if (best_size == UINT_MAX)
00928     {
00929       best_size = 1;
00930       best_depth = 1;
00931     }
00932 
00933   /* OK, now we have the size we will use.  Fill in the header, build
00934      the table and the second one with swapped byte order.  */
00935   obj.magic = CATGETS_MAGIC;
00936   obj.plane_size = best_size;
00937   obj.plane_depth = best_depth;
00938 
00939   /* Allocate room for all needed arrays.  */
00940   array1 =
00941     (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
00942   memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3);
00943   array2
00944     = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
00945   obstack_init (&string_pool);
00946 
00947   set_run = catalog->all_sets;
00948   while (set_run != NULL)
00949     {
00950       struct message_list *message_run;
00951 
00952       message_run = set_run->messages;
00953       while (message_run != NULL)
00954        {
00955          size_t idx = (((message_run->number * set_run->number) % best_size)
00956                      * 3);
00957          /* Determine collision depth.  */
00958          while (array1[idx] != 0)
00959            idx += best_size * 3;
00960 
00961          /* Store set number, message number and pointer into string
00962             space, relative to the first string.  */
00963          array1[idx + 0] = set_run->number;
00964          array1[idx + 1] = message_run->number;
00965          array1[idx + 2] = obstack_object_size (&string_pool);
00966 
00967          /* Add current string to the continuous space containing all
00968             strings.  */
00969          obstack_grow0 (&string_pool, message_run->message,
00970                       strlen (message_run->message));
00971 
00972          message_run = message_run->next;
00973        }
00974 
00975       set_run = set_run->next;
00976     }
00977   strings_size = obstack_object_size (&string_pool);
00978   strings = obstack_finish (&string_pool);
00979 
00980   /* Compute ARRAY2 by changing the byte order.  */
00981   for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
00982     array2[cnt] = SWAPU32 (array1[cnt]);
00983 
00984   /* Now we can write out the whole data.  */
00985   if (strcmp (output_name, "-") == 0
00986       || strcmp (output_name, "/dev/stdout") == 0)
00987     fd = STDOUT_FILENO;
00988   else
00989     {
00990       fd = creat (output_name, 0666);
00991       if (fd < 0)
00992        error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
00993               output_name);
00994     }
00995 
00996   /* Write out header.  */
00997   write (fd, &obj, sizeof (obj));
00998 
00999   /* We always write out the little endian version of the index
01000      arrays.  */
01001 #if __BYTE_ORDER == __LITTLE_ENDIAN
01002   write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
01003   write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
01004 #elif __BYTE_ORDER == __BIG_ENDIAN
01005   write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
01006   write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
01007 #else
01008 # error Cannot handle __BYTE_ORDER byte order
01009 #endif
01010 
01011   /* Finally write the strings.  */
01012   write (fd, strings, strings_size);
01013 
01014   if (fd != STDOUT_FILENO)
01015     close (fd);
01016 
01017   /* If requested now write out the header file.  */
01018   if (header_name != NULL)
01019     {
01020       int first = 1;
01021       FILE *fp;
01022 
01023       /* Open output file.  "-" or "/dev/stdout" means write to
01024         standard output.  */
01025       if (strcmp (header_name, "-") == 0
01026          || strcmp (header_name, "/dev/stdout") == 0)
01027        fp = stdout;
01028       else
01029        {
01030          fp = fopen (header_name, "w");
01031          if (fp == NULL)
01032            error (EXIT_FAILURE, errno,
01033                  gettext ("cannot open output file `%s'"), header_name);
01034        }
01035 
01036       /* Iterate over all sets and all messages.  */
01037       set_run = catalog->all_sets;
01038       while (set_run != NULL)
01039        {
01040          struct message_list *message_run;
01041 
01042          /* If the current message set has a symbolic name write this
01043             out first.  */
01044          if (set_run->symbol != NULL)
01045            fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n",
01046                    first ? "" : "\n", set_run->symbol, set_run->number - 1,
01047                    set_run->fname, set_run->line);
01048          first = 0;
01049 
01050          message_run = set_run->messages;
01051          while (message_run != NULL)
01052            {
01053              /* If the current message has a symbolic name write
01054                #define out.  But we have to take care for the set
01055                not having a symbolic name.  */
01056              if (message_run->symbol != NULL)
01057               {
01058                 if (set_run->symbol == NULL)
01059                   fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
01060                           set_run->number, message_run->symbol,
01061                           message_run->number, message_run->fname,
01062                           message_run->line);
01063                 else
01064                   fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n",
01065                           set_run->symbol, message_run->symbol,
01066                           message_run->number, message_run->fname,
01067                           message_run->line);
01068               }
01069 
01070              message_run = message_run->next;
01071            }
01072 
01073          set_run = set_run->next;
01074        }
01075 
01076       if (fp != stdout)
01077        fclose (fp);
01078     }
01079 }
01080 
01081 
01082 static struct set_list *
01083 find_set (struct catalog *current, int number)
01084 {
01085   struct set_list *result = current->all_sets;
01086 
01087   /* We must avoid set number 0 because a set of this number signals
01088      in the tables that the entry is not occupied.  */
01089   ++number;
01090 
01091   while (result != NULL)
01092     if (result->number == number)
01093       return result;
01094     else
01095       result = result->next;
01096 
01097   /* Prepare new message set.  */
01098   result = (struct set_list *) xcalloc (1, sizeof (*result));
01099   result->number = number;
01100   result->next = current->all_sets;
01101   current->all_sets = result;
01102 
01103   return result;
01104 }
01105 
01106 
01107 /* Normalize given string *in*place* by processing escape sequences
01108    and quote characters.  */
01109 static void
01110 normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
01111               wchar_t quote_char, wchar_t escape_char)
01112 {
01113   int is_quoted;
01114   wchar_t *rp = string;
01115   wchar_t *wp = string;
01116 
01117   if (quote_char != L'\0' && *rp == quote_char)
01118     {
01119       is_quoted = 1;
01120       ++rp;
01121     }
01122   else
01123     is_quoted = 0;
01124 
01125   while (*rp != L'\0')
01126     if (*rp == quote_char)
01127       /* We simply end the string when we find the first time an
01128         not-escaped quote character.  */
01129        break;
01130     else if (*rp == escape_char)
01131       {
01132        ++rp;
01133        if (quote_char != L'\0' && *rp == quote_char)
01134          /* This is an extension to XPG.  */
01135          *wp++ = *rp++;
01136        else
01137          /* Recognize escape sequences.  */
01138          switch (*rp)
01139            {
01140            case L'n':
01141              *wp++ = L'\n';
01142              ++rp;
01143              break;
01144            case L't':
01145              *wp++ = L'\t';
01146              ++rp;
01147              break;
01148            case L'v':
01149              *wp++ = L'\v';
01150              ++rp;
01151              break;
01152            case L'b':
01153              *wp++ = L'\b';
01154              ++rp;
01155              break;
01156            case L'r':
01157              *wp++ = L'\r';
01158              ++rp;
01159              break;
01160            case L'f':
01161              *wp++ = L'\f';
01162              ++rp;
01163              break;
01164            case L'0' ... L'7':
01165              {
01166               int number;
01167               char cbuf[2];
01168               char *cbufptr;
01169               size_t cbufin;
01170               wchar_t wcbuf[2];
01171               char *wcbufptr;
01172               size_t wcbufin;
01173 
01174               number = *rp++ - L'0';
01175               while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
01176                 {
01177                   number *= 8;
01178                   number += *rp++ - L'0';
01179                 }
01180 
01181               cbuf[0] = (char) number;
01182               cbuf[1] = '\0';
01183               cbufptr = cbuf;
01184               cbufin = 2;
01185 
01186               wcbufptr = (char *) wcbuf;
01187               wcbufin = sizeof (wcbuf);
01188 
01189               /* Flush the state.  */
01190               iconv (cd, NULL, NULL, NULL, NULL);
01191 
01192               iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
01193               if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
01194                 error_at_line (0, 0, fname, line,
01195                              gettext ("invalid escape sequence"));
01196               else
01197                 *wp++ = wcbuf[0];
01198              }
01199              break;
01200            default:
01201              if (*rp == escape_char)
01202               {
01203                 *wp++ = escape_char;
01204                 ++rp;
01205               }
01206              else
01207               /* Simply ignore the backslash character.  */;
01208              break;
01209            }
01210       }
01211     else
01212       *wp++ = *rp++;
01213 
01214   /* If we saw a quote character at the beginning we expect another
01215      one at the end.  */
01216   if (is_quoted && *rp != quote_char)
01217     error_at_line (0, 0, fname, line, gettext ("unterminated message"));
01218 
01219   /* Terminate string.  */
01220   *wp = L'\0';
01221   return;
01222 }
01223 
01224 
01225 static void
01226 read_old (struct catalog *catalog, const char *file_name)
01227 {
01228   struct catalog_info old_cat_obj;
01229   struct set_list *set = NULL;
01230   int last_set = -1;
01231   size_t cnt;
01232 
01233   /* Try to open catalog, but don't look through the NLSPATH.  */
01234   if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
01235     {
01236       if (errno == ENOENT)
01237        /* No problem, the catalog simply does not exist.  */
01238        return;
01239       else
01240        error (EXIT_FAILURE, errno,
01241               gettext ("while opening old catalog file"));
01242     }
01243 
01244   /* OK, we have the catalog loaded.  Now read all messages and merge
01245      them.  When set and message number clash for any message the new
01246      one is used.  If the new one is empty it indicates that the
01247      message should be deleted.  */
01248   for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
01249     {
01250       struct message_list *message, *last;
01251 
01252       if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0)
01253        /* No message in this slot.  */
01254        continue;
01255 
01256       if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
01257        {
01258          last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
01259          set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
01260        }
01261 
01262       last = NULL;
01263       message = set->messages;
01264       while (message != NULL)
01265        {
01266          if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
01267            break;
01268          last = message;
01269          message = message->next;
01270        }
01271 
01272       if (message == NULL
01273          || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
01274        {
01275          /* We have found a message which is not yet in the catalog.
01276             Insert it at the right position.  */
01277          struct message_list *newp;
01278 
01279          newp = (struct message_list *) xmalloc (sizeof(*newp));
01280          newp->number = old_cat_obj.name_ptr[cnt * 3 + 1];
01281          newp->message =
01282            &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]];
01283          newp->fname = NULL;
01284          newp->line = 0;
01285          newp->symbol = NULL;
01286          newp->next = message;
01287 
01288          if (last == NULL)
01289            set->messages = newp;
01290          else
01291            last->next = newp;
01292 
01293          ++catalog->total_messages;
01294        }
01295       else if (*message->message == '\0')
01296        {
01297          /* The new empty message has overridden the old one thus
01298             "deleting" it as required.  Now remove the empty remains. */
01299          if (last == NULL)
01300            set->messages = message->next;
01301          else
01302            last->next = message->next;
01303        }
01304     }
01305 }
01306 
01307 
01308 static int
01309 open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
01310                wchar_t *escape_charp)
01311 {
01312   char buf[2];
01313   char *bufptr;
01314   size_t bufsize;
01315   wchar_t wbuf[2];
01316   char *wbufptr;
01317   size_t wbufsize;
01318 
01319   /* If the input file does not specify the codeset use the locale's.  */
01320   if (codeset == NULL)
01321     {
01322       setlocale (LC_ALL, "");
01323       codeset = nl_langinfo (CODESET);
01324       setlocale (LC_ALL, "C");
01325     }
01326 
01327   /* Get the conversion modules.  */
01328   *cd_towcp = iconv_open ("WCHAR_T", codeset);
01329   *cd_tombp = iconv_open (codeset, "WCHAR_T");
01330   if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
01331     {
01332       error (0, 0, gettext ("conversion modules not available"));
01333       if (*cd_towcp != (iconv_t) -1)
01334        iconv_close (*cd_towcp);
01335 
01336       return 1;
01337     }
01338 
01339   /* One special case for historical reasons is the backslash
01340      character.  In some codesets the byte value 0x5c is not mapped to
01341      U005c in Unicode.  These charsets then don't have a backslash
01342      character at all.  Therefore we have to live with whatever the
01343      codeset provides and recognize, instead of the U005c, the character
01344      the byte value 0x5c is mapped to.  */
01345   buf[0] = '\\';
01346   buf[1] = '\0';
01347   bufptr = buf;
01348   bufsize = 2;
01349 
01350   wbufptr = (char *) wbuf;
01351   wbufsize = sizeof (wbuf);
01352 
01353   iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
01354   if (bufsize != 0 || wbufsize != 0)
01355     {
01356       /* Something went wrong, we couldn't convert the byte 0x5c.  Go
01357         on with using U005c.  */
01358       error (0, 0, gettext ("cannot determine escape character"));
01359       *escape_charp = L'\\';
01360     }
01361   else
01362     *escape_charp = wbuf[0];
01363 
01364   return 0;
01365 }