Back to index

glibc  2.9
Defines | Functions | Variables
linereader.c File Reference
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "localedef.h"
#include "charmap.h"
#include "error.h"
#include "linereader.h"
#include "locfile.h"

Go to the source code of this file.

Defines

#define ADDC(ch)
#define ADDS(s, l)
#define ADDWC(ch)

Functions

static struct tokenget_toplvl_escape (struct linereader *lr)
static struct tokenget_symname (struct linereader *lr)
static struct tokenget_ident (struct linereader *lr)
static struct tokenget_string (struct linereader *lr, const struct charmap_t *charmap, struct localedef_t *locale, const struct repertoire_t *repertoire, int verbose)
struct linereaderlr_open (const char *fname, kw_hash_fct_t hf)
struct linereaderlr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
int lr_eof (struct linereader *lr)
void lr_ignore_rest (struct linereader *lr, int verbose)
void lr_close (struct linereader *lr)
int lr_next (struct linereader *lr)
struct tokenlr_token (struct linereader *lr, const struct charmap_t *charmap, struct localedef_t *locale, const struct repertoire_t *repertoire, int verbose)

Variables

unsigned int error_message_count
char * program_name

Define Documentation

#define ADDC (   ch)
Value:
do                                                                   \
    {                                                                \
      if (bufact == bufmax)                                          \
       {                                                             \
         bufmax *= 2;                                                       \
         buf = xrealloc (buf, bufmax);                                      \
       }                                                             \
      buf[bufact++] = (ch);                                          \
    }                                                                \
  while (0)

Definition at line 421 of file linereader.c.

#define ADDS (   s,
  l 
)
Value:
do                                                                   \
    {                                                                \
      size_t _l = (l);                                                      \
      if (bufact + _l > bufmax)                                             \
       {                                                             \
         if (bufact < _l)                                            \
           bufact = _l;                                              \
         bufmax *= 2;                                                       \
         buf = xrealloc (buf, bufmax);                                      \
       }                                                             \
      memcpy (&buf[bufact], s, _l);                                         \
      bufact += _l;                                                  \
    }                                                                \
  while (0)

Definition at line 434 of file linereader.c.

#define ADDWC (   ch)
Value:
do                                                                   \
    {                                                                \
      if (buf2act == buf2max)                                               \
       {                                                             \
         buf2max *= 2;                                                      \
         buf2 = xrealloc (buf2, buf2max * 4);                               \
       }                                                             \
      buf2[buf2act++] = (ch);                                               \
    }                                                                \
  while (0)

Definition at line 451 of file linereader.c.


Function Documentation

static struct token * get_ident ( struct linereader lr) [static, read]

Definition at line 539 of file linereader.c.

{
  char *buf;
  size_t bufact;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = xmalloc (bufmax);
  bufact = 0;

  ADDC (lr->buf[lr->idx - 1]);

  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
        && ch != '<' && ch != ',' && ch != EOF)
    {
      if (ch == lr->escape_char)
       {
         ch = lr_getc (lr);
         if (ch == '\n' || ch == EOF)
           {
             lr_error (lr, _("invalid escape sequence"));
             break;
           }
       }
      ADDC (ch);
    }

  lr_ungetc (lr, ch);

  kw = lr->hash_fct (buf, bufact);

  if (kw != NULL && kw->symname_or_ident == 0)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_ident;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact;
    }

  return &lr->token;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static struct token * get_string ( struct linereader lr,
const struct charmap_t charmap,
struct localedef_t locale,
const struct repertoire_t repertoire,
int  verbose 
) [static, read]

Definition at line 592 of file linereader.c.

{
  int return_widestr = lr->return_widestr;
  char *buf;
  wchar_t *buf2 = NULL;
  size_t bufact;
  size_t bufmax = 56;

  /* We must return two different strings.  */
  buf = xmalloc (bufmax);
  bufact = 0;

  /* We know it'll be a string.  */
  lr->token.tok = tok_string;

  /* If we need not translate the strings (i.e., expand <...> parts)
     we can run a simple loop.  */
  if (!lr->translate_strings)
    {
      int ch;

      buf2 = NULL;
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
       ADDC (ch);

      /* Catch errors with trailing escape character.  */
      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
         && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
       {
         lr_error (lr, _("illegal escape sequence at end of string"));
         --bufact;
       }
      else if (ch == '\n' || ch == EOF)
       lr_error (lr, _("unterminated string"));

      ADDC ('\0');
    }
  else
    {
      int illegal_string = 0;
      size_t buf2act = 0;
      size_t buf2max = 56 * sizeof (uint32_t);
      int ch;
      int warned = 0;

      /* We have to provide the wide character result as well.  */
      if (return_widestr)
       buf2 = xmalloc (buf2max);

      /* Read until the end of the string (or end of the line or file).  */
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
       {
         size_t startidx;
         uint32_t wch;
         struct charseq *seq;

         if (ch != '<')
           {
             /* The standards leave it up to the implementation to decide
               what to do with character which stand for themself.  We
               could jump through hoops to find out the value relative to
               the charmap and the repertoire map, but instead we leave
               it up to the locale definition author to write a better
               definition.  We assume here that every character which
               stands for itself is encoded using ISO 8859-1.  Using the
               escape character is allowed.  */
             if (ch == lr->escape_char)
              {
                ch = lr_getc (lr);
                if (ch == '\n' || ch == EOF)
                  break;
              }

             if (verbose && !warned)
              {
                lr_error (lr, _("\
non-symbolic character value should not be used"));
                warned = 1;
              }

             ADDC (ch);
             if (return_widestr)
              ADDWC ((uint32_t) ch);

             continue;
           }

         /* Now we have to search for the end of the symbolic name, i.e.,
            the closing '>'.  */
         startidx = bufact;
         while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
           {
             if (ch == lr->escape_char)
              {
                ch = lr_getc (lr);
                if (ch == '\n' || ch == EOF)
                  break;
              }
             ADDC (ch);
           }
         if (ch == '\n' || ch == EOF)
           /* Not a correct string.  */
           break;
         if (bufact == startidx)
           {
             /* <> is no correct name.  Ignore it and also signal an
               error.  */
             illegal_string = 1;
             continue;
           }

         /* It might be a Uxxxx symbol.  */
         if (buf[startidx] == 'U'
             && (bufact - startidx == 5 || bufact - startidx == 9))
           {
             char *cp = buf + startidx + 1;
             while (cp < &buf[bufact] && isxdigit (*cp))
              ++cp;

             if (cp == &buf[bufact])
              {
                char utmp[10];

                /* Yes, it is.  */
                ADDC ('\0');
                wch = strtoul (buf + startidx + 1, NULL, 16);

                /* Now forget about the name we just added.  */
                bufact = startidx;

                if (return_widestr)
                  ADDWC (wch);

                /* See whether the charmap contains the Uxxxxxxxx names.  */
                snprintf (utmp, sizeof (utmp), "U%08X", wch);
                seq = charmap_find_value (charmap, utmp, 9);

                if (seq == NULL)
                  {
                   /* No, this isn't the case.  Now determine from
                     the repertoire the name of the character and
                     find it in the charmap.  */
                    if (repertoire != NULL)
                     {
                       const char *symbol;

                       symbol = repertoire_find_symbol (repertoire, wch);

                       if (symbol != NULL)
                         seq = charmap_find_value (charmap, symbol,
                                                strlen (symbol));
                     }

                    if (seq == NULL)
                     {
#ifndef NO_TRANSLITERATION
                       /* Transliterate if possible.  */
                       if (locale != NULL)
                         {
                           uint32_t *translit;

                           if ((locale->avail & CTYPE_LOCALE) == 0)
                            {
                              /* Load the CTYPE data now.  */
                              int old_needed = locale->needed;

                              locale->needed = 0;
                              locale = load_locale (LC_CTYPE,
                                                 locale->name,
                                                 locale->repertoire_name,
                                                 charmap, locale);
                              locale->needed = old_needed;
                            }

                           if ((locale->avail & CTYPE_LOCALE) != 0
                              && ((translit = find_translit (locale,
                                                         charmap, wch))
                                  != NULL))
                            /* The CTYPE data contains a matching
                               transliteration.  */
                            {
                              int i;

                              for (i = 0; translit[i] != 0; ++i)
                                {
                                  char utmp[10];

                                  snprintf (utmp, sizeof (utmp), "U%08X",
                                          translit[i]);
                                  seq = charmap_find_value (charmap, utmp,
                                                        9);
                                  assert (seq != NULL);
                                  ADDS (seq->bytes, seq->nbytes);
                                }

                              continue;
                            }
                         }
#endif /* NO_TRANSLITERATION */

                       /* Not a known name.  */
                       illegal_string = 1;
                     }
                  }

                if (seq != NULL)
                  ADDS (seq->bytes, seq->nbytes);

                continue;
              }
           }

         /* We now have the symbolic name in buf[startidx] to
            buf[bufact-1].  Now find out the value for this character
            in the charmap as well as in the repertoire map (in this
            order).  */
         seq = charmap_find_value (charmap, &buf[startidx],
                                bufact - startidx);

         if (seq == NULL)
           {
             /* This name is not in the charmap.  */
             lr_error (lr, _("symbol `%.*s' not in charmap"),
                     (int) (bufact - startidx), &buf[startidx]);
             illegal_string = 1;
           }

         if (return_widestr)
           {
             /* Now the same for the multibyte representation.  */
             if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
              wch = seq->ucs4;
             else
              {
                wch = repertoire_find_value (repertoire, &buf[startidx],
                                          bufact - startidx);
                if (seq != NULL)
                  seq->ucs4 = wch;
              }

             if (wch == ILLEGAL_CHAR_VALUE)
              {
                /* This name is not in the repertoire map.  */
                lr_error (lr, _("symbol `%.*s' not in repertoire map"),
                         (int) (bufact - startidx), &buf[startidx]);
                illegal_string = 1;
              }
             else
              ADDWC (wch);
           }

         /* Now forget about the name we just added.  */
         bufact = startidx;

         /* And copy the bytes.  */
         if (seq != NULL)
           ADDS (seq->bytes, seq->nbytes);
       }

      if (ch == '\n' || ch == EOF)
       {
         lr_error (lr, _("unterminated string"));
         illegal_string = 1;
       }

      if (illegal_string)
       {
         free (buf);
         free (buf2);
         lr->token.val.str.startmb = NULL;
         lr->token.val.str.lenmb = 0;
         lr->token.val.str.startwc = NULL;
         lr->token.val.str.lenwc = 0;

         return &lr->token;
       }

      ADDC ('\0');

      if (return_widestr)
       {
         ADDWC (0);
         lr->token.val.str.startwc = xrealloc (buf2,
                                          buf2act * sizeof (uint32_t));
         lr->token.val.str.lenwc = buf2act;
       }
    }

  lr->token.val.str.startmb = xrealloc (buf, bufact);
  lr->token.val.str.lenmb = bufact;

  return &lr->token;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static struct token * get_symname ( struct linereader lr) [static, read]

Definition at line 465 of file linereader.c.

{
  /* Symbol in brackets.  We must distinguish three kinds:
     1. reserved words
     2. ISO 10646 position values
     3. all other.  */
  char *buf;
  size_t bufact = 0;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = (char *) xmalloc (bufmax);

  do
    {
      ch = lr_getc (lr);
      if (ch == lr->escape_char)
       {
         int c2 = lr_getc (lr);
         ADDC (c2);

         if (c2 == '\n')
           ch = '\n';
       }
      else
       ADDC (ch);
    }
  while (ch != '>' && ch != '\n');

  if (ch == '\n')
    lr_error (lr, _("unterminated symbolic name"));

  /* Test for ISO 10646 position value.  */
  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
    {
      char *cp = buf + 1;
      while (cp < &buf[bufact - 1] && isxdigit (*cp))
       ++cp;

      if (cp == &buf[bufact - 1])
       {
         /* Yes, it is.  */
         lr->token.tok = tok_ucs4;
         lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);

         return &lr->token;
       }
    }

  /* It is a symbolic name.  Test for reserved words.  */
  kw = lr->hash_fct (buf, bufact - 1);

  if (kw != NULL && kw->symname_or_ident == 1)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_bsymbol;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact - 1;
    }

  return &lr->token;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static struct token * get_toplvl_escape ( struct linereader lr) [static, read]

Definition at line 338 of file linereader.c.

{
  /* This is supposed to be a numeric value.  We return the
     numerical value and the number of bytes.  */
  size_t start_idx = lr->idx - 1;
  unsigned char *bytes = lr->token.val.charcode.bytes;
  size_t nbytes = 0;
  int ch;

  do
    {
      unsigned int byte = 0;
      unsigned int base = 8;

      ch = lr_getc (lr);

      if (ch == 'd')
       {
         base = 10;
         ch = lr_getc (lr);
       }
      else if (ch == 'x')
       {
         base = 16;
         ch = lr_getc (lr);
       }

      if ((base == 16 && !isxdigit (ch))
         || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
       {
       esc_error:
         lr->token.val.str.startmb = &lr->buf[start_idx];

         while (ch != EOF && !isspace (ch))
           ch = lr_getc (lr);
         lr->token.val.str.lenmb = lr->idx - start_idx;

         lr->token.tok = tok_error;
         return &lr->token;
       }

      if (isdigit (ch))
       byte = ch - '0';
      else
       byte = tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if ((base == 16 && !isxdigit (ch))
         || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
       goto esc_error;

      byte *= base;
      if (isdigit (ch))
       byte += ch - '0';
      else
       byte += tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if (base != 16 && isdigit (ch))
       {
         byte *= base;
         byte += ch - '0';

         ch = lr_getc (lr);
       }

      bytes[nbytes++] = byte;
    }
  while (ch == lr->escape_char
        && nbytes < (int) sizeof (lr->token.val.charcode.bytes));

  if (!isspace (ch))
    lr_error (lr, _("garbage at end of character code specification"));

  lr_ungetn (lr, 1);

  lr->token.tok = tok_charcode;
  lr->token.val.charcode.nbytes = nbytes;

  return &lr->token;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void lr_close ( struct linereader lr)

Definition at line 143 of file linereader.c.

{
  fclose (lr->fp);
  free (lr->buf);
  free (lr);
}

Here is the call graph for this function:

Here is the caller graph for this function:

struct linereader* lr_create ( FILE fp,
const char *  fname,
kw_hash_fct_t  hf 
) [read]

Definition at line 66 of file linereader.c.

{
  struct linereader *result;
  int n;

  result = (struct linereader *) xmalloc (sizeof (*result));

  result->fp = fp;
  result->fname = xstrdup (fname);
  result->buf = NULL;
  result->bufsize = 0;
  result->lineno = 1;
  result->idx = 0;
  result->comment_char = '#';
  result->escape_char = '\\';
  result->translate_strings = 1;
  result->return_widestr = 0;

  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  if (n < 0)
    {
      int save = errno;
      fclose (result->fp);
      free ((char *) result->fname);
      free (result);
      errno = save;
      return NULL;
    }

  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
    n -= 2;

  result->buf[n] = '\0';
  result->bufact = n;
  result->hash_fct = hf;

  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int lr_eof ( struct linereader lr)

Definition at line 107 of file linereader.c.

{
  return lr->bufact = 0;
}
void lr_ignore_rest ( struct linereader lr,
int  verbose 
)

Definition at line 114 of file linereader.c.

{
  if (verbose)
    {
      while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
            && lr->buf[lr->idx] != lr->comment_char)
       if (lr->buf[lr->idx] == '\0')
         {
           if (lr_next (lr) < 0)
             return;
         }
       else
         ++lr->idx;

      if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
         && lr->buf[lr->idx] != lr->comment_char)
       lr_error (lr, _("trailing garbage at end of line"));
    }

  /* Ignore continued line.  */
  while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
    if (lr_next (lr) < 0)
      break;

  lr->idx = lr->bufact;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int lr_next ( struct linereader lr)

Definition at line 152 of file linereader.c.

{
  int n;

  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
  if (n < 0)
    return -1;

  ++lr->lineno;

  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
    {
#if 0
      /* XXX Is this correct?  */
      /* An escaped newline character is substituted with a single <SP>.  */
      --n;
      lr->buf[n - 1] = ' ';
#else
      n -= 2;
#endif
    }

  lr->buf[n] = '\0';
  lr->bufact = n;
  lr->idx = 0;

  return 0;
}

Here is the caller graph for this function:

struct linereader* lr_open ( const char *  fname,
kw_hash_fct_t  hf 
) [read]

Definition at line 49 of file linereader.c.

{
  FILE *fp;

  if (fname == NULL || strcmp (fname, "-") == 0
      || strcmp (fname, "/dev/stdin") == 0)
    return lr_create (stdin, "<stdin>", hf);
  else
    {
      fp = fopen (fname, "rm");
      if (fp == NULL)
       return NULL;
      return lr_create (fp, fname, hf);
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

struct token* lr_token ( struct linereader lr,
const struct charmap_t charmap,
struct localedef_t locale,
const struct repertoire_t repertoire,
int  verbose 
) [read]

Definition at line 192 of file linereader.c.

{
  int ch;

  while (1)
    {
      do
       {
         ch = lr_getc (lr);

         if (ch == EOF)
           {
             lr->token.tok = tok_eof;
             return &lr->token;
           };

         if (ch == '\n')
           {
             lr->token.tok = tok_eol;
             return &lr->token;
           }
       }
      while (isspace (ch));

      if (ch != lr->comment_char)
       break;

      /* Is there an newline at the end of the buffer?  */
      if (lr->buf[lr->bufact - 1] != '\n')
       {
         /* No.  Some people want this to mean that only the line in
            the file not the logical, concatenated line is ignored.
            Let's try this.  */
         lr->idx = lr->bufact;
         continue;
       }

      /* Ignore rest of line.  */
      lr_ignore_rest (lr, 0);
      lr->token.tok = tok_eol;
      return &lr->token;
    }

  /* Match escape sequences.  */
  if (ch == lr->escape_char)
    return get_toplvl_escape (lr);

  /* Match ellipsis.  */
  if (ch == '.')
    {
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
       {
         int cnt;
         for (cnt = 0; cnt < 10; ++cnt)
           lr_getc (lr);
         lr->token.tok = tok_ellipsis4_2;
         return &lr->token;
       }
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
       {
         lr_getc (lr);
         lr_getc (lr);
         lr_getc (lr);
         lr->token.tok = tok_ellipsis4;
         return &lr->token;
       }
      if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
       {
         lr_getc (lr);
         lr_getc (lr);
         lr->token.tok = tok_ellipsis3;
         return &lr->token;
       }
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
       {
         int cnt;
         for (cnt = 0; cnt < 6; ++cnt)
           lr_getc (lr);
         lr->token.tok = tok_ellipsis2_2;
         return &lr->token;
       }
      if (lr->buf[lr->idx] == '.')
       {
         lr_getc (lr);
         lr->token.tok = tok_ellipsis2;
         return &lr->token;
       }
    }

  switch (ch)
    {
    case '<':
      return get_symname (lr);

    case '0' ... '9':
      lr->token.tok = tok_number;
      lr->token.val.num = ch - '0';

      while (isdigit (ch = lr_getc (lr)))
       {
         lr->token.val.num *= 10;
         lr->token.val.num += ch - '0';
       }
      if (isalpha (ch))
       lr_error (lr, _("garbage at end of number"));
      lr_ungetn (lr, 1);

      return &lr->token;

    case ';':
      lr->token.tok = tok_semicolon;
      return &lr->token;

    case ',':
      lr->token.tok = tok_comma;
      return &lr->token;

    case '(':
      lr->token.tok = tok_open_brace;
      return &lr->token;

    case ')':
      lr->token.tok = tok_close_brace;
      return &lr->token;

    case '"':
      return get_string (lr, charmap, locale, repertoire, verbose);

    case '-':
      ch = lr_getc (lr);
      if (ch == '1')
       {
         lr->token.tok = tok_minus1;
         return &lr->token;
       }
      lr_ungetn (lr, 2);
      break;
    }

  return get_ident (lr);
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

Definition at line 51 of file error.c.

char* program_name