Back to index

courier  0.68.2
printf-parse.c
Go to the documentation of this file.
00001 /* Formatted output to strings.
00002    Copyright (C) 1999-2000, 2002-2003, 2006-2008 Free Software Foundation, Inc.
00003 
00004    This program is free software; you can redistribute it and/or modify it
00005    under the terms of the GNU Library General Public License as published
00006    by the Free Software Foundation; either version 2, or (at your option)
00007    any later version.
00008 
00009    This program is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public
00015    License along with this program; if not, write to the Free Software
00016    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
00017    USA.  */
00018 
00019 /* This file can be parametrized with the following macros:
00020      CHAR_T             The element type of the format string.
00021      CHAR_T_ONLY_ASCII  Set to 1 to enable verification that all characters
00022                         in the format string are ASCII.
00023      DIRECTIVE          Structure denoting a format directive.
00024                         Depends on CHAR_T.
00025      DIRECTIVES         Structure denoting the set of format directives of a
00026                         format string.  Depends on CHAR_T.
00027      PRINTF_PARSE       Function that parses a format string.
00028                         Depends on CHAR_T.
00029      STATIC             Set to 'static' to declare the function static.
00030      ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.  */
00031 
00032 #ifndef PRINTF_PARSE
00033 # include <config.h>
00034 #endif
00035 
00036 /* Specification.  */
00037 #ifndef PRINTF_PARSE
00038 # include "printf-parse.h"
00039 #endif
00040 
00041 /* Default parameters.  */
00042 #ifndef PRINTF_PARSE
00043 # define PRINTF_PARSE printf_parse
00044 # define CHAR_T char
00045 # define DIRECTIVE char_directive
00046 # define DIRECTIVES char_directives
00047 #endif
00048 
00049 /* Get size_t, NULL.  */
00050 #include <stddef.h>
00051 
00052 /* Get intmax_t.  */
00053 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
00054 # if HAVE_STDINT_H_WITH_UINTMAX
00055 #  include <stdint.h>
00056 # endif
00057 # if HAVE_INTTYPES_H_WITH_UINTMAX
00058 #  include <inttypes.h>
00059 # endif
00060 #else
00061 # include <stdint.h>
00062 #endif
00063 
00064 /* malloc(), realloc(), free().  */
00065 #include <stdlib.h>
00066 
00067 /* errno.  */
00068 #include <errno.h>
00069 
00070 /* Checked size_t computations.  */
00071 #include "xsize.h"
00072 
00073 #if CHAR_T_ONLY_ASCII
00074 /* c_isascii().  */
00075 # include "c-ctype.h"
00076 #endif
00077 
00078 #ifdef STATIC
00079 STATIC
00080 #endif
00081 int
00082 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
00083 {
00084   const CHAR_T *cp = format;            /* pointer into format */
00085   size_t arg_posn = 0;          /* number of regular arguments consumed */
00086   size_t d_allocated;                   /* allocated elements of d->dir */
00087   size_t a_allocated;                   /* allocated elements of a->arg */
00088   size_t max_width_length = 0;
00089   size_t max_precision_length = 0;
00090 
00091   d->count = 0;
00092   d_allocated = 1;
00093   d->dir = (DIRECTIVE *) malloc (d_allocated * sizeof (DIRECTIVE));
00094   if (d->dir == NULL)
00095     /* Out of memory.  */
00096     goto out_of_memory_1;
00097 
00098   a->count = 0;
00099   a_allocated = 0;
00100   a->arg = NULL;
00101 
00102 #define REGISTER_ARG(_index_,_type_) \
00103   {                                                                     \
00104     size_t n = (_index_);                                               \
00105     if (n >= a_allocated)                                               \
00106       {                                                                 \
00107         size_t memory_size;                                             \
00108         argument *memory;                                               \
00109                                                                         \
00110         a_allocated = xtimes (a_allocated, 2);                          \
00111         if (a_allocated <= n)                                           \
00112           a_allocated = xsum (n, 1);                                    \
00113         memory_size = xtimes (a_allocated, sizeof (argument));          \
00114         if (size_overflow_p (memory_size))                              \
00115           /* Overflow, would lead to out of memory.  */                 \
00116           goto out_of_memory;                                           \
00117         memory = (argument *) (a->arg                                   \
00118                                ? realloc (a->arg, memory_size)          \
00119                                : malloc (memory_size));                 \
00120         if (memory == NULL)                                             \
00121           /* Out of memory.  */                                         \
00122           goto out_of_memory;                                           \
00123         a->arg = memory;                                                \
00124       }                                                                 \
00125     while (a->count <= n)                                               \
00126       a->arg[a->count++].type = TYPE_NONE;                              \
00127     if (a->arg[n].type == TYPE_NONE)                                    \
00128       a->arg[n].type = (_type_);                                        \
00129     else if (a->arg[n].type != (_type_))                                \
00130       /* Ambiguous type for positional argument.  */                    \
00131       goto error;                                                       \
00132   }
00133 
00134   while (*cp != '\0')
00135     {
00136       CHAR_T c = *cp++;
00137       if (c == '%')
00138         {
00139           size_t arg_index = ARG_NONE;
00140           DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
00141 
00142           /* Initialize the next directive.  */
00143           dp->dir_start = cp - 1;
00144           dp->flags = 0;
00145           dp->width_start = NULL;
00146           dp->width_end = NULL;
00147           dp->width_arg_index = ARG_NONE;
00148           dp->precision_start = NULL;
00149           dp->precision_end = NULL;
00150           dp->precision_arg_index = ARG_NONE;
00151           dp->arg_index = ARG_NONE;
00152 
00153           /* Test for positional argument.  */
00154           if (*cp >= '0' && *cp <= '9')
00155             {
00156               const CHAR_T *np;
00157 
00158               for (np = cp; *np >= '0' && *np <= '9'; np++)
00159                 ;
00160               if (*np == '$')
00161                 {
00162                   size_t n = 0;
00163 
00164                   for (np = cp; *np >= '0' && *np <= '9'; np++)
00165                     n = xsum (xtimes (n, 10), *np - '0');
00166                   if (n == 0)
00167                     /* Positional argument 0.  */
00168                     goto error;
00169                   if (size_overflow_p (n))
00170                     /* n too large, would lead to out of memory later.  */
00171                     goto error;
00172                   arg_index = n - 1;
00173                   cp = np + 1;
00174                 }
00175             }
00176 
00177           /* Read the flags.  */
00178           for (;;)
00179             {
00180               if (*cp == '\'')
00181                 {
00182                   dp->flags |= FLAG_GROUP;
00183                   cp++;
00184                 }
00185               else if (*cp == '-')
00186                 {
00187                   dp->flags |= FLAG_LEFT;
00188                   cp++;
00189                 }
00190               else if (*cp == '+')
00191                 {
00192                   dp->flags |= FLAG_SHOWSIGN;
00193                   cp++;
00194                 }
00195               else if (*cp == ' ')
00196                 {
00197                   dp->flags |= FLAG_SPACE;
00198                   cp++;
00199                 }
00200               else if (*cp == '#')
00201                 {
00202                   dp->flags |= FLAG_ALT;
00203                   cp++;
00204                 }
00205               else if (*cp == '0')
00206                 {
00207                   dp->flags |= FLAG_ZERO;
00208                   cp++;
00209                 }
00210               else
00211                 break;
00212             }
00213 
00214           /* Parse the field width.  */
00215           if (*cp == '*')
00216             {
00217               dp->width_start = cp;
00218               cp++;
00219               dp->width_end = cp;
00220               if (max_width_length < 1)
00221                 max_width_length = 1;
00222 
00223               /* Test for positional argument.  */
00224               if (*cp >= '0' && *cp <= '9')
00225                 {
00226                   const CHAR_T *np;
00227 
00228                   for (np = cp; *np >= '0' && *np <= '9'; np++)
00229                     ;
00230                   if (*np == '$')
00231                     {
00232                       size_t n = 0;
00233 
00234                       for (np = cp; *np >= '0' && *np <= '9'; np++)
00235                         n = xsum (xtimes (n, 10), *np - '0');
00236                       if (n == 0)
00237                         /* Positional argument 0.  */
00238                         goto error;
00239                       if (size_overflow_p (n))
00240                         /* n too large, would lead to out of memory later.  */
00241                         goto error;
00242                       dp->width_arg_index = n - 1;
00243                       cp = np + 1;
00244                     }
00245                 }
00246               if (dp->width_arg_index == ARG_NONE)
00247                 {
00248                   dp->width_arg_index = arg_posn++;
00249                   if (dp->width_arg_index == ARG_NONE)
00250                     /* arg_posn wrapped around.  */
00251                     goto error;
00252                 }
00253               REGISTER_ARG (dp->width_arg_index, TYPE_INT);
00254             }
00255           else if (*cp >= '0' && *cp <= '9')
00256             {
00257               size_t width_length;
00258 
00259               dp->width_start = cp;
00260               for (; *cp >= '0' && *cp <= '9'; cp++)
00261                 ;
00262               dp->width_end = cp;
00263               width_length = dp->width_end - dp->width_start;
00264               if (max_width_length < width_length)
00265                 max_width_length = width_length;
00266             }
00267 
00268           /* Parse the precision.  */
00269           if (*cp == '.')
00270             {
00271               cp++;
00272               if (*cp == '*')
00273                 {
00274                   dp->precision_start = cp - 1;
00275                   cp++;
00276                   dp->precision_end = cp;
00277                   if (max_precision_length < 2)
00278                     max_precision_length = 2;
00279 
00280                   /* Test for positional argument.  */
00281                   if (*cp >= '0' && *cp <= '9')
00282                     {
00283                       const CHAR_T *np;
00284 
00285                       for (np = cp; *np >= '0' && *np <= '9'; np++)
00286                         ;
00287                       if (*np == '$')
00288                         {
00289                           size_t n = 0;
00290 
00291                           for (np = cp; *np >= '0' && *np <= '9'; np++)
00292                             n = xsum (xtimes (n, 10), *np - '0');
00293                           if (n == 0)
00294                             /* Positional argument 0.  */
00295                             goto error;
00296                           if (size_overflow_p (n))
00297                             /* n too large, would lead to out of memory
00298                                later.  */
00299                             goto error;
00300                           dp->precision_arg_index = n - 1;
00301                           cp = np + 1;
00302                         }
00303                     }
00304                   if (dp->precision_arg_index == ARG_NONE)
00305                     {
00306                       dp->precision_arg_index = arg_posn++;
00307                       if (dp->precision_arg_index == ARG_NONE)
00308                         /* arg_posn wrapped around.  */
00309                         goto error;
00310                     }
00311                   REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
00312                 }
00313               else
00314                 {
00315                   size_t precision_length;
00316 
00317                   dp->precision_start = cp - 1;
00318                   for (; *cp >= '0' && *cp <= '9'; cp++)
00319                     ;
00320                   dp->precision_end = cp;
00321                   precision_length = dp->precision_end - dp->precision_start;
00322                   if (max_precision_length < precision_length)
00323                     max_precision_length = precision_length;
00324                 }
00325             }
00326 
00327           {
00328             arg_type type;
00329 
00330             /* Parse argument type/size specifiers.  */
00331             {
00332               int flags = 0;
00333 
00334               for (;;)
00335                 {
00336                   if (*cp == 'h')
00337                     {
00338                       flags |= (1 << (flags & 1));
00339                       cp++;
00340                     }
00341                   else if (*cp == 'L')
00342                     {
00343                       flags |= 4;
00344                       cp++;
00345                     }
00346                   else if (*cp == 'l')
00347                     {
00348                       flags += 8;
00349                       cp++;
00350                     }
00351                   else if (*cp == 'j')
00352                     {
00353                       if (sizeof (intmax_t) > sizeof (long))
00354                         {
00355                           /* intmax_t = long long */
00356                           flags += 16;
00357                         }
00358                       else if (sizeof (intmax_t) > sizeof (int))
00359                         {
00360                           /* intmax_t = long */
00361                           flags += 8;
00362                         }
00363                       cp++;
00364                     }
00365                   else if (*cp == 'z' || *cp == 'Z')
00366                     {
00367                       /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
00368                          because the warning facility in gcc-2.95.2 understands
00369                          only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
00370                       if (sizeof (size_t) > sizeof (long))
00371                         {
00372                           /* size_t = long long */
00373                           flags += 16;
00374                         }
00375                       else if (sizeof (size_t) > sizeof (int))
00376                         {
00377                           /* size_t = long */
00378                           flags += 8;
00379                         }
00380                       cp++;
00381                     }
00382                   else if (*cp == 't')
00383                     {
00384                       if (sizeof (ptrdiff_t) > sizeof (long))
00385                         {
00386                           /* ptrdiff_t = long long */
00387                           flags += 16;
00388                         }
00389                       else if (sizeof (ptrdiff_t) > sizeof (int))
00390                         {
00391                           /* ptrdiff_t = long */
00392                           flags += 8;
00393                         }
00394                       cp++;
00395                     }
00396 #if defined __APPLE__ && defined __MACH__
00397                   /* On MacOS X 10.3, PRIdMAX is defined as "qd".
00398                      We cannot change it to "lld" because PRIdMAX must also
00399                      be understood by the system's printf routines.  */
00400                   else if (*cp == 'q')
00401                     {
00402                       if (64 / 8 > sizeof (long))
00403                         {
00404                           /* int64_t = long long */
00405                           flags += 16;
00406                         }
00407                       else
00408                         {
00409                           /* int64_t = long */
00410                           flags += 8;
00411                         }
00412                       cp++;
00413                     }
00414 #endif
00415 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
00416                   /* On native Win32, PRIdMAX is defined as "I64d".
00417                      We cannot change it to "lld" because PRIdMAX must also
00418                      be understood by the system's printf routines.  */
00419                   else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
00420                     {
00421                       if (64 / 8 > sizeof (long))
00422                         {
00423                           /* __int64 = long long */
00424                           flags += 16;
00425                         }
00426                       else
00427                         {
00428                           /* __int64 = long */
00429                           flags += 8;
00430                         }
00431                       cp += 3;
00432                     }
00433 #endif
00434                   else
00435                     break;
00436                 }
00437 
00438               /* Read the conversion character.  */
00439               c = *cp++;
00440               switch (c)
00441                 {
00442                 case 'd': case 'i':
00443 #if HAVE_LONG_LONG_INT
00444                   /* If 'long long' exists and is larger than 'long':  */
00445                   if (flags >= 16 || (flags & 4))
00446                     type = TYPE_LONGLONGINT;
00447                   else
00448 #endif
00449                   /* If 'long long' exists and is the same as 'long', we parse
00450                      "lld" into TYPE_LONGINT.  */
00451                   if (flags >= 8)
00452                     type = TYPE_LONGINT;
00453                   else if (flags & 2)
00454                     type = TYPE_SCHAR;
00455                   else if (flags & 1)
00456                     type = TYPE_SHORT;
00457                   else
00458                     type = TYPE_INT;
00459                   break;
00460                 case 'o': case 'u': case 'x': case 'X':
00461 #if HAVE_LONG_LONG_INT
00462                   /* If 'long long' exists and is larger than 'long':  */
00463                   if (flags >= 16 || (flags & 4))
00464                     type = TYPE_ULONGLONGINT;
00465                   else
00466 #endif
00467                   /* If 'unsigned long long' exists and is the same as
00468                      'unsigned long', we parse "llu" into TYPE_ULONGINT.  */
00469                   if (flags >= 8)
00470                     type = TYPE_ULONGINT;
00471                   else if (flags & 2)
00472                     type = TYPE_UCHAR;
00473                   else if (flags & 1)
00474                     type = TYPE_USHORT;
00475                   else
00476                     type = TYPE_UINT;
00477                   break;
00478                 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
00479                 case 'a': case 'A':
00480                   if (flags >= 16 || (flags & 4))
00481                     type = TYPE_LONGDOUBLE;
00482                   else
00483                     type = TYPE_DOUBLE;
00484                   break;
00485                 case 'c':
00486                   if (flags >= 8)
00487 #if HAVE_WINT_T
00488                     type = TYPE_WIDE_CHAR;
00489 #else
00490                     goto error;
00491 #endif
00492                   else
00493                     type = TYPE_CHAR;
00494                   break;
00495 #if HAVE_WINT_T
00496                 case 'C':
00497                   type = TYPE_WIDE_CHAR;
00498                   c = 'c';
00499                   break;
00500 #endif
00501                 case 's':
00502                   if (flags >= 8)
00503 #if HAVE_WCHAR_T
00504                     type = TYPE_WIDE_STRING;
00505 #else
00506                     goto error;
00507 #endif
00508                   else
00509                     type = TYPE_STRING;
00510                   break;
00511 #if HAVE_WCHAR_T
00512                 case 'S':
00513                   type = TYPE_WIDE_STRING;
00514                   c = 's';
00515                   break;
00516 #endif
00517                 case 'p':
00518                   type = TYPE_POINTER;
00519                   break;
00520                 case 'n':
00521 #if HAVE_LONG_LONG_INT
00522                   /* If 'long long' exists and is larger than 'long':  */
00523                   if (flags >= 16 || (flags & 4))
00524                     type = TYPE_COUNT_LONGLONGINT_POINTER;
00525                   else
00526 #endif
00527                   /* If 'long long' exists and is the same as 'long', we parse
00528                      "lln" into TYPE_COUNT_LONGINT_POINTER.  */
00529                   if (flags >= 8)
00530                     type = TYPE_COUNT_LONGINT_POINTER;
00531                   else if (flags & 2)
00532                     type = TYPE_COUNT_SCHAR_POINTER;
00533                   else if (flags & 1)
00534                     type = TYPE_COUNT_SHORT_POINTER;
00535                   else
00536                     type = TYPE_COUNT_INT_POINTER;
00537                   break;
00538 #if ENABLE_UNISTDIO
00539                 /* The unistdio extensions.  */
00540                 case 'U':
00541                   if (flags >= 16)
00542                     type = TYPE_U32_STRING;
00543                   else if (flags >= 8)
00544                     type = TYPE_U16_STRING;
00545                   else
00546                     type = TYPE_U8_STRING;
00547                   break;
00548 #endif
00549                 case '%':
00550                   type = TYPE_NONE;
00551                   break;
00552                 default:
00553                   /* Unknown conversion character.  */
00554                   goto error;
00555                 }
00556             }
00557 
00558             if (type != TYPE_NONE)
00559               {
00560                 dp->arg_index = arg_index;
00561                 if (dp->arg_index == ARG_NONE)
00562                   {
00563                     dp->arg_index = arg_posn++;
00564                     if (dp->arg_index == ARG_NONE)
00565                       /* arg_posn wrapped around.  */
00566                       goto error;
00567                   }
00568                 REGISTER_ARG (dp->arg_index, type);
00569               }
00570             dp->conversion = c;
00571             dp->dir_end = cp;
00572           }
00573 
00574           d->count++;
00575           if (d->count >= d_allocated)
00576             {
00577               size_t memory_size;
00578               DIRECTIVE *memory;
00579 
00580               d_allocated = xtimes (d_allocated, 2);
00581               memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
00582               if (size_overflow_p (memory_size))
00583                 /* Overflow, would lead to out of memory.  */
00584                 goto out_of_memory;
00585               memory = (DIRECTIVE *) realloc (d->dir, memory_size);
00586               if (memory == NULL)
00587                 /* Out of memory.  */
00588                 goto out_of_memory;
00589               d->dir = memory;
00590             }
00591         }
00592 #if CHAR_T_ONLY_ASCII
00593       else if (!c_isascii (c))
00594         {
00595           /* Non-ASCII character.  Not supported.  */
00596           goto error;
00597         }
00598 #endif
00599     }
00600   d->dir[d->count].dir_start = cp;
00601 
00602   d->max_width_length = max_width_length;
00603   d->max_precision_length = max_precision_length;
00604   return 0;
00605 
00606 error:
00607   if (a->arg)
00608     free (a->arg);
00609   if (d->dir)
00610     free (d->dir);
00611   errno = EINVAL;
00612   return -1;
00613 
00614 out_of_memory:
00615   if (a->arg)
00616     free (a->arg);
00617   if (d->dir)
00618     free (d->dir);
00619 out_of_memory_1:
00620   errno = ENOMEM;
00621   return -1;
00622 }
00623 
00624 #undef PRINTF_PARSE
00625 #undef DIRECTIVES
00626 #undef DIRECTIVE
00627 #undef CHAR_T_ONLY_ASCII
00628 #undef CHAR_T