Back to index

glibc  2.9
linereader.c
Go to the documentation of this file.
00001 /* Copyright (C) 1996-2005, 2006 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published
00007    by the Free Software Foundation; version 2 of the License, or
00008    (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software Foundation,
00017    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00018 
00019 #ifdef HAVE_CONFIG_H
00020 # include <config.h>
00021 #endif
00022 
00023 #include <assert.h>
00024 #include <ctype.h>
00025 #include <errno.h>
00026 #include <libintl.h>
00027 #include <stdarg.h>
00028 #include <stdlib.h>
00029 #include <string.h>
00030 
00031 #include "localedef.h"
00032 #include "charmap.h"
00033 #include "error.h"
00034 #include "linereader.h"
00035 #include "locfile.h"
00036 
00037 /* Prototypes for local functions.  */
00038 static struct token *get_toplvl_escape (struct linereader *lr);
00039 static struct token *get_symname (struct linereader *lr);
00040 static struct token *get_ident (struct linereader *lr);
00041 static struct token *get_string (struct linereader *lr,
00042                              const struct charmap_t *charmap,
00043                              struct localedef_t *locale,
00044                              const struct repertoire_t *repertoire,
00045                              int verbose);
00046 
00047 
00048 struct linereader *
00049 lr_open (const char *fname, kw_hash_fct_t hf)
00050 {
00051   FILE *fp;
00052 
00053   if (fname == NULL || strcmp (fname, "-") == 0
00054       || strcmp (fname, "/dev/stdin") == 0)
00055     return lr_create (stdin, "<stdin>", hf);
00056   else
00057     {
00058       fp = fopen (fname, "rm");
00059       if (fp == NULL)
00060        return NULL;
00061       return lr_create (fp, fname, hf);
00062     }
00063 }
00064 
00065 struct linereader *
00066 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
00067 {
00068   struct linereader *result;
00069   int n;
00070 
00071   result = (struct linereader *) xmalloc (sizeof (*result));
00072 
00073   result->fp = fp;
00074   result->fname = xstrdup (fname);
00075   result->buf = NULL;
00076   result->bufsize = 0;
00077   result->lineno = 1;
00078   result->idx = 0;
00079   result->comment_char = '#';
00080   result->escape_char = '\\';
00081   result->translate_strings = 1;
00082   result->return_widestr = 0;
00083 
00084   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
00085   if (n < 0)
00086     {
00087       int save = errno;
00088       fclose (result->fp);
00089       free ((char *) result->fname);
00090       free (result);
00091       errno = save;
00092       return NULL;
00093     }
00094 
00095   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
00096     n -= 2;
00097 
00098   result->buf[n] = '\0';
00099   result->bufact = n;
00100   result->hash_fct = hf;
00101 
00102   return result;
00103 }
00104 
00105 
00106 int
00107 lr_eof (struct linereader *lr)
00108 {
00109   return lr->bufact = 0;
00110 }
00111 
00112 
00113 void
00114 lr_ignore_rest (struct linereader *lr, int verbose)
00115 {
00116   if (verbose)
00117     {
00118       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
00119             && lr->buf[lr->idx] != lr->comment_char)
00120        if (lr->buf[lr->idx] == '\0')
00121          {
00122            if (lr_next (lr) < 0)
00123              return;
00124          }
00125        else
00126          ++lr->idx;
00127 
00128       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
00129          && lr->buf[lr->idx] != lr->comment_char)
00130        lr_error (lr, _("trailing garbage at end of line"));
00131     }
00132 
00133   /* Ignore continued line.  */
00134   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
00135     if (lr_next (lr) < 0)
00136       break;
00137 
00138   lr->idx = lr->bufact;
00139 }
00140 
00141 
00142 void
00143 lr_close (struct linereader *lr)
00144 {
00145   fclose (lr->fp);
00146   free (lr->buf);
00147   free (lr);
00148 }
00149 
00150 
00151 int
00152 lr_next (struct linereader *lr)
00153 {
00154   int n;
00155 
00156   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
00157   if (n < 0)
00158     return -1;
00159 
00160   ++lr->lineno;
00161 
00162   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
00163     {
00164 #if 0
00165       /* XXX Is this correct?  */
00166       /* An escaped newline character is substituted with a single <SP>.  */
00167       --n;
00168       lr->buf[n - 1] = ' ';
00169 #else
00170       n -= 2;
00171 #endif
00172     }
00173 
00174   lr->buf[n] = '\0';
00175   lr->bufact = n;
00176   lr->idx = 0;
00177 
00178   return 0;
00179 }
00180 
00181 
00182 /* Defined in error.c.  */
00183 /* This variable is incremented each time `error' is called.  */
00184 extern unsigned int error_message_count;
00185 
00186 /* The calling program should define program_name and set it to the
00187    name of the executing program.  */
00188 extern char *program_name;
00189 
00190 
00191 struct token *
00192 lr_token (struct linereader *lr, const struct charmap_t *charmap,
00193          struct localedef_t *locale, const struct repertoire_t *repertoire,
00194          int verbose)
00195 {
00196   int ch;
00197 
00198   while (1)
00199     {
00200       do
00201        {
00202          ch = lr_getc (lr);
00203 
00204          if (ch == EOF)
00205            {
00206              lr->token.tok = tok_eof;
00207              return &lr->token;
00208            };
00209 
00210          if (ch == '\n')
00211            {
00212              lr->token.tok = tok_eol;
00213              return &lr->token;
00214            }
00215        }
00216       while (isspace (ch));
00217 
00218       if (ch != lr->comment_char)
00219        break;
00220 
00221       /* Is there an newline at the end of the buffer?  */
00222       if (lr->buf[lr->bufact - 1] != '\n')
00223        {
00224          /* No.  Some people want this to mean that only the line in
00225             the file not the logical, concatenated line is ignored.
00226             Let's try this.  */
00227          lr->idx = lr->bufact;
00228          continue;
00229        }
00230 
00231       /* Ignore rest of line.  */
00232       lr_ignore_rest (lr, 0);
00233       lr->token.tok = tok_eol;
00234       return &lr->token;
00235     }
00236 
00237   /* Match escape sequences.  */
00238   if (ch == lr->escape_char)
00239     return get_toplvl_escape (lr);
00240 
00241   /* Match ellipsis.  */
00242   if (ch == '.')
00243     {
00244       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
00245        {
00246          int cnt;
00247          for (cnt = 0; cnt < 10; ++cnt)
00248            lr_getc (lr);
00249          lr->token.tok = tok_ellipsis4_2;
00250          return &lr->token;
00251        }
00252       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
00253        {
00254          lr_getc (lr);
00255          lr_getc (lr);
00256          lr_getc (lr);
00257          lr->token.tok = tok_ellipsis4;
00258          return &lr->token;
00259        }
00260       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
00261        {
00262          lr_getc (lr);
00263          lr_getc (lr);
00264          lr->token.tok = tok_ellipsis3;
00265          return &lr->token;
00266        }
00267       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
00268        {
00269          int cnt;
00270          for (cnt = 0; cnt < 6; ++cnt)
00271            lr_getc (lr);
00272          lr->token.tok = tok_ellipsis2_2;
00273          return &lr->token;
00274        }
00275       if (lr->buf[lr->idx] == '.')
00276        {
00277          lr_getc (lr);
00278          lr->token.tok = tok_ellipsis2;
00279          return &lr->token;
00280        }
00281     }
00282 
00283   switch (ch)
00284     {
00285     case '<':
00286       return get_symname (lr);
00287 
00288     case '0' ... '9':
00289       lr->token.tok = tok_number;
00290       lr->token.val.num = ch - '0';
00291 
00292       while (isdigit (ch = lr_getc (lr)))
00293        {
00294          lr->token.val.num *= 10;
00295          lr->token.val.num += ch - '0';
00296        }
00297       if (isalpha (ch))
00298        lr_error (lr, _("garbage at end of number"));
00299       lr_ungetn (lr, 1);
00300 
00301       return &lr->token;
00302 
00303     case ';':
00304       lr->token.tok = tok_semicolon;
00305       return &lr->token;
00306 
00307     case ',':
00308       lr->token.tok = tok_comma;
00309       return &lr->token;
00310 
00311     case '(':
00312       lr->token.tok = tok_open_brace;
00313       return &lr->token;
00314 
00315     case ')':
00316       lr->token.tok = tok_close_brace;
00317       return &lr->token;
00318 
00319     case '"':
00320       return get_string (lr, charmap, locale, repertoire, verbose);
00321 
00322     case '-':
00323       ch = lr_getc (lr);
00324       if (ch == '1')
00325        {
00326          lr->token.tok = tok_minus1;
00327          return &lr->token;
00328        }
00329       lr_ungetn (lr, 2);
00330       break;
00331     }
00332 
00333   return get_ident (lr);
00334 }
00335 
00336 
00337 static struct token *
00338 get_toplvl_escape (struct linereader *lr)
00339 {
00340   /* This is supposed to be a numeric value.  We return the
00341      numerical value and the number of bytes.  */
00342   size_t start_idx = lr->idx - 1;
00343   unsigned char *bytes = lr->token.val.charcode.bytes;
00344   size_t nbytes = 0;
00345   int ch;
00346 
00347   do
00348     {
00349       unsigned int byte = 0;
00350       unsigned int base = 8;
00351 
00352       ch = lr_getc (lr);
00353 
00354       if (ch == 'd')
00355        {
00356          base = 10;
00357          ch = lr_getc (lr);
00358        }
00359       else if (ch == 'x')
00360        {
00361          base = 16;
00362          ch = lr_getc (lr);
00363        }
00364 
00365       if ((base == 16 && !isxdigit (ch))
00366          || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
00367        {
00368        esc_error:
00369          lr->token.val.str.startmb = &lr->buf[start_idx];
00370 
00371          while (ch != EOF && !isspace (ch))
00372            ch = lr_getc (lr);
00373          lr->token.val.str.lenmb = lr->idx - start_idx;
00374 
00375          lr->token.tok = tok_error;
00376          return &lr->token;
00377        }
00378 
00379       if (isdigit (ch))
00380        byte = ch - '0';
00381       else
00382        byte = tolower (ch) - 'a' + 10;
00383 
00384       ch = lr_getc (lr);
00385       if ((base == 16 && !isxdigit (ch))
00386          || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
00387        goto esc_error;
00388 
00389       byte *= base;
00390       if (isdigit (ch))
00391        byte += ch - '0';
00392       else
00393        byte += tolower (ch) - 'a' + 10;
00394 
00395       ch = lr_getc (lr);
00396       if (base != 16 && isdigit (ch))
00397        {
00398          byte *= base;
00399          byte += ch - '0';
00400 
00401          ch = lr_getc (lr);
00402        }
00403 
00404       bytes[nbytes++] = byte;
00405     }
00406   while (ch == lr->escape_char
00407         && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
00408 
00409   if (!isspace (ch))
00410     lr_error (lr, _("garbage at end of character code specification"));
00411 
00412   lr_ungetn (lr, 1);
00413 
00414   lr->token.tok = tok_charcode;
00415   lr->token.val.charcode.nbytes = nbytes;
00416 
00417   return &lr->token;
00418 }
00419 
00420 
00421 #define ADDC(ch) \
00422   do                                                                 \
00423     {                                                                \
00424       if (bufact == bufmax)                                          \
00425        {                                                             \
00426          bufmax *= 2;                                                       \
00427          buf = xrealloc (buf, bufmax);                                      \
00428        }                                                             \
00429       buf[bufact++] = (ch);                                          \
00430     }                                                                \
00431   while (0)
00432 
00433 
00434 #define ADDS(s, l) \
00435   do                                                                 \
00436     {                                                                \
00437       size_t _l = (l);                                                      \
00438       if (bufact + _l > bufmax)                                             \
00439        {                                                             \
00440          if (bufact < _l)                                            \
00441            bufact = _l;                                              \
00442          bufmax *= 2;                                                       \
00443          buf = xrealloc (buf, bufmax);                                      \
00444        }                                                             \
00445       memcpy (&buf[bufact], s, _l);                                         \
00446       bufact += _l;                                                  \
00447     }                                                                \
00448   while (0)
00449 
00450 
00451 #define ADDWC(ch) \
00452   do                                                                 \
00453     {                                                                \
00454       if (buf2act == buf2max)                                               \
00455        {                                                             \
00456          buf2max *= 2;                                                      \
00457          buf2 = xrealloc (buf2, buf2max * 4);                               \
00458        }                                                             \
00459       buf2[buf2act++] = (ch);                                               \
00460     }                                                                \
00461   while (0)
00462 
00463 
00464 static struct token *
00465 get_symname (struct linereader *lr)
00466 {
00467   /* Symbol in brackets.  We must distinguish three kinds:
00468      1. reserved words
00469      2. ISO 10646 position values
00470      3. all other.  */
00471   char *buf;
00472   size_t bufact = 0;
00473   size_t bufmax = 56;
00474   const struct keyword_t *kw;
00475   int ch;
00476 
00477   buf = (char *) xmalloc (bufmax);
00478 
00479   do
00480     {
00481       ch = lr_getc (lr);
00482       if (ch == lr->escape_char)
00483        {
00484          int c2 = lr_getc (lr);
00485          ADDC (c2);
00486 
00487          if (c2 == '\n')
00488            ch = '\n';
00489        }
00490       else
00491        ADDC (ch);
00492     }
00493   while (ch != '>' && ch != '\n');
00494 
00495   if (ch == '\n')
00496     lr_error (lr, _("unterminated symbolic name"));
00497 
00498   /* Test for ISO 10646 position value.  */
00499   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
00500     {
00501       char *cp = buf + 1;
00502       while (cp < &buf[bufact - 1] && isxdigit (*cp))
00503        ++cp;
00504 
00505       if (cp == &buf[bufact - 1])
00506        {
00507          /* Yes, it is.  */
00508          lr->token.tok = tok_ucs4;
00509          lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
00510 
00511          return &lr->token;
00512        }
00513     }
00514 
00515   /* It is a symbolic name.  Test for reserved words.  */
00516   kw = lr->hash_fct (buf, bufact - 1);
00517 
00518   if (kw != NULL && kw->symname_or_ident == 1)
00519     {
00520       lr->token.tok = kw->token;
00521       free (buf);
00522     }
00523   else
00524     {
00525       lr->token.tok = tok_bsymbol;
00526 
00527       buf = xrealloc (buf, bufact + 1);
00528       buf[bufact] = '\0';
00529 
00530       lr->token.val.str.startmb = buf;
00531       lr->token.val.str.lenmb = bufact - 1;
00532     }
00533 
00534   return &lr->token;
00535 }
00536 
00537 
00538 static struct token *
00539 get_ident (struct linereader *lr)
00540 {
00541   char *buf;
00542   size_t bufact;
00543   size_t bufmax = 56;
00544   const struct keyword_t *kw;
00545   int ch;
00546 
00547   buf = xmalloc (bufmax);
00548   bufact = 0;
00549 
00550   ADDC (lr->buf[lr->idx - 1]);
00551 
00552   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
00553         && ch != '<' && ch != ',' && ch != EOF)
00554     {
00555       if (ch == lr->escape_char)
00556        {
00557          ch = lr_getc (lr);
00558          if (ch == '\n' || ch == EOF)
00559            {
00560              lr_error (lr, _("invalid escape sequence"));
00561              break;
00562            }
00563        }
00564       ADDC (ch);
00565     }
00566 
00567   lr_ungetc (lr, ch);
00568 
00569   kw = lr->hash_fct (buf, bufact);
00570 
00571   if (kw != NULL && kw->symname_or_ident == 0)
00572     {
00573       lr->token.tok = kw->token;
00574       free (buf);
00575     }
00576   else
00577     {
00578       lr->token.tok = tok_ident;
00579 
00580       buf = xrealloc (buf, bufact + 1);
00581       buf[bufact] = '\0';
00582 
00583       lr->token.val.str.startmb = buf;
00584       lr->token.val.str.lenmb = bufact;
00585     }
00586 
00587   return &lr->token;
00588 }
00589 
00590 
00591 static struct token *
00592 get_string (struct linereader *lr, const struct charmap_t *charmap,
00593            struct localedef_t *locale, const struct repertoire_t *repertoire,
00594            int verbose)
00595 {
00596   int return_widestr = lr->return_widestr;
00597   char *buf;
00598   wchar_t *buf2 = NULL;
00599   size_t bufact;
00600   size_t bufmax = 56;
00601 
00602   /* We must return two different strings.  */
00603   buf = xmalloc (bufmax);
00604   bufact = 0;
00605 
00606   /* We know it'll be a string.  */
00607   lr->token.tok = tok_string;
00608 
00609   /* If we need not translate the strings (i.e., expand <...> parts)
00610      we can run a simple loop.  */
00611   if (!lr->translate_strings)
00612     {
00613       int ch;
00614 
00615       buf2 = NULL;
00616       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
00617        ADDC (ch);
00618 
00619       /* Catch errors with trailing escape character.  */
00620       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
00621          && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
00622        {
00623          lr_error (lr, _("illegal escape sequence at end of string"));
00624          --bufact;
00625        }
00626       else if (ch == '\n' || ch == EOF)
00627        lr_error (lr, _("unterminated string"));
00628 
00629       ADDC ('\0');
00630     }
00631   else
00632     {
00633       int illegal_string = 0;
00634       size_t buf2act = 0;
00635       size_t buf2max = 56 * sizeof (uint32_t);
00636       int ch;
00637       int warned = 0;
00638 
00639       /* We have to provide the wide character result as well.  */
00640       if (return_widestr)
00641        buf2 = xmalloc (buf2max);
00642 
00643       /* Read until the end of the string (or end of the line or file).  */
00644       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
00645        {
00646          size_t startidx;
00647          uint32_t wch;
00648          struct charseq *seq;
00649 
00650          if (ch != '<')
00651            {
00652              /* The standards leave it up to the implementation to decide
00653                what to do with character which stand for themself.  We
00654                could jump through hoops to find out the value relative to
00655                the charmap and the repertoire map, but instead we leave
00656                it up to the locale definition author to write a better
00657                definition.  We assume here that every character which
00658                stands for itself is encoded using ISO 8859-1.  Using the
00659                escape character is allowed.  */
00660              if (ch == lr->escape_char)
00661               {
00662                 ch = lr_getc (lr);
00663                 if (ch == '\n' || ch == EOF)
00664                   break;
00665               }
00666 
00667              if (verbose && !warned)
00668               {
00669                 lr_error (lr, _("\
00670 non-symbolic character value should not be used"));
00671                 warned = 1;
00672               }
00673 
00674              ADDC (ch);
00675              if (return_widestr)
00676               ADDWC ((uint32_t) ch);
00677 
00678              continue;
00679            }
00680 
00681          /* Now we have to search for the end of the symbolic name, i.e.,
00682             the closing '>'.  */
00683          startidx = bufact;
00684          while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
00685            {
00686              if (ch == lr->escape_char)
00687               {
00688                 ch = lr_getc (lr);
00689                 if (ch == '\n' || ch == EOF)
00690                   break;
00691               }
00692              ADDC (ch);
00693            }
00694          if (ch == '\n' || ch == EOF)
00695            /* Not a correct string.  */
00696            break;
00697          if (bufact == startidx)
00698            {
00699              /* <> is no correct name.  Ignore it and also signal an
00700                error.  */
00701              illegal_string = 1;
00702              continue;
00703            }
00704 
00705          /* It might be a Uxxxx symbol.  */
00706          if (buf[startidx] == 'U'
00707              && (bufact - startidx == 5 || bufact - startidx == 9))
00708            {
00709              char *cp = buf + startidx + 1;
00710              while (cp < &buf[bufact] && isxdigit (*cp))
00711               ++cp;
00712 
00713              if (cp == &buf[bufact])
00714               {
00715                 char utmp[10];
00716 
00717                 /* Yes, it is.  */
00718                 ADDC ('\0');
00719                 wch = strtoul (buf + startidx + 1, NULL, 16);
00720 
00721                 /* Now forget about the name we just added.  */
00722                 bufact = startidx;
00723 
00724                 if (return_widestr)
00725                   ADDWC (wch);
00726 
00727                 /* See whether the charmap contains the Uxxxxxxxx names.  */
00728                 snprintf (utmp, sizeof (utmp), "U%08X", wch);
00729                 seq = charmap_find_value (charmap, utmp, 9);
00730 
00731                 if (seq == NULL)
00732                   {
00733                    /* No, this isn't the case.  Now determine from
00734                      the repertoire the name of the character and
00735                      find it in the charmap.  */
00736                     if (repertoire != NULL)
00737                      {
00738                        const char *symbol;
00739 
00740                        symbol = repertoire_find_symbol (repertoire, wch);
00741 
00742                        if (symbol != NULL)
00743                          seq = charmap_find_value (charmap, symbol,
00744                                                 strlen (symbol));
00745                      }
00746 
00747                     if (seq == NULL)
00748                      {
00749 #ifndef NO_TRANSLITERATION
00750                        /* Transliterate if possible.  */
00751                        if (locale != NULL)
00752                          {
00753                            uint32_t *translit;
00754 
00755                            if ((locale->avail & CTYPE_LOCALE) == 0)
00756                             {
00757                               /* Load the CTYPE data now.  */
00758                               int old_needed = locale->needed;
00759 
00760                               locale->needed = 0;
00761                               locale = load_locale (LC_CTYPE,
00762                                                  locale->name,
00763                                                  locale->repertoire_name,
00764                                                  charmap, locale);
00765                               locale->needed = old_needed;
00766                             }
00767 
00768                            if ((locale->avail & CTYPE_LOCALE) != 0
00769                               && ((translit = find_translit (locale,
00770                                                          charmap, wch))
00771                                   != NULL))
00772                             /* The CTYPE data contains a matching
00773                                transliteration.  */
00774                             {
00775                               int i;
00776 
00777                               for (i = 0; translit[i] != 0; ++i)
00778                                 {
00779                                   char utmp[10];
00780 
00781                                   snprintf (utmp, sizeof (utmp), "U%08X",
00782                                           translit[i]);
00783                                   seq = charmap_find_value (charmap, utmp,
00784                                                         9);
00785                                   assert (seq != NULL);
00786                                   ADDS (seq->bytes, seq->nbytes);
00787                                 }
00788 
00789                               continue;
00790                             }
00791                          }
00792 #endif /* NO_TRANSLITERATION */
00793 
00794                        /* Not a known name.  */
00795                        illegal_string = 1;
00796                      }
00797                   }
00798 
00799                 if (seq != NULL)
00800                   ADDS (seq->bytes, seq->nbytes);
00801 
00802                 continue;
00803               }
00804            }
00805 
00806          /* We now have the symbolic name in buf[startidx] to
00807             buf[bufact-1].  Now find out the value for this character
00808             in the charmap as well as in the repertoire map (in this
00809             order).  */
00810          seq = charmap_find_value (charmap, &buf[startidx],
00811                                 bufact - startidx);
00812 
00813          if (seq == NULL)
00814            {
00815              /* This name is not in the charmap.  */
00816              lr_error (lr, _("symbol `%.*s' not in charmap"),
00817                      (int) (bufact - startidx), &buf[startidx]);
00818              illegal_string = 1;
00819            }
00820 
00821          if (return_widestr)
00822            {
00823              /* Now the same for the multibyte representation.  */
00824              if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
00825               wch = seq->ucs4;
00826              else
00827               {
00828                 wch = repertoire_find_value (repertoire, &buf[startidx],
00829                                           bufact - startidx);
00830                 if (seq != NULL)
00831                   seq->ucs4 = wch;
00832               }
00833 
00834              if (wch == ILLEGAL_CHAR_VALUE)
00835               {
00836                 /* This name is not in the repertoire map.  */
00837                 lr_error (lr, _("symbol `%.*s' not in repertoire map"),
00838                          (int) (bufact - startidx), &buf[startidx]);
00839                 illegal_string = 1;
00840               }
00841              else
00842               ADDWC (wch);
00843            }
00844 
00845          /* Now forget about the name we just added.  */
00846          bufact = startidx;
00847 
00848          /* And copy the bytes.  */
00849          if (seq != NULL)
00850            ADDS (seq->bytes, seq->nbytes);
00851        }
00852 
00853       if (ch == '\n' || ch == EOF)
00854        {
00855          lr_error (lr, _("unterminated string"));
00856          illegal_string = 1;
00857        }
00858 
00859       if (illegal_string)
00860        {
00861          free (buf);
00862          free (buf2);
00863          lr->token.val.str.startmb = NULL;
00864          lr->token.val.str.lenmb = 0;
00865          lr->token.val.str.startwc = NULL;
00866          lr->token.val.str.lenwc = 0;
00867 
00868          return &lr->token;
00869        }
00870 
00871       ADDC ('\0');
00872 
00873       if (return_widestr)
00874        {
00875          ADDWC (0);
00876          lr->token.val.str.startwc = xrealloc (buf2,
00877                                           buf2act * sizeof (uint32_t));
00878          lr->token.val.str.lenwc = buf2act;
00879        }
00880     }
00881 
00882   lr->token.val.str.startmb = xrealloc (buf, bufact);
00883   lr->token.val.str.lenmb = bufact;
00884 
00885   return &lr->token;
00886 }