Back to index

glibc  2.9
strtol_l.c
Go to the documentation of this file.
00001 /* Convert string representing a number to integer value, using given locale.
00002    Copyright (C) 1997, 2002, 2004, 2006, 2007 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 
00022 #if HAVE_CONFIG_H
00023 # include <config.h>
00024 #endif
00025 
00026 #ifdef _LIBC
00027 # define USE_NUMBER_GROUPING
00028 # define STDC_HEADERS
00029 # define HAVE_LIMITS_H
00030 #endif
00031 
00032 #include <ctype.h>
00033 #include <errno.h>
00034 #ifndef __set_errno
00035 # define __set_errno(Val) errno = (Val)
00036 #endif
00037 
00038 #ifdef HAVE_LIMITS_H
00039 # include <limits.h>
00040 #endif
00041 
00042 #include <stddef.h>
00043 #include <stdlib.h>
00044 #include <string.h>
00045 #include <locale.h>
00046 #include <xlocale.h>
00047 #include <bits/wordsize.h>
00048 
00049 #ifdef USE_NUMBER_GROUPING
00050 # include "../locale/localeinfo.h"
00051 #endif
00052 
00053 /* Nonzero if we are defining `strtoul' or `strtoull', operating on
00054    unsigned integers.  */
00055 #ifndef UNSIGNED
00056 # define UNSIGNED 0
00057 # define INT LONG int
00058 #else
00059 # define INT unsigned LONG int
00060 #endif
00061 
00062 /* Determine the name.  */
00063 #if UNSIGNED
00064 # ifdef USE_WIDE_CHAR
00065 #  ifdef QUAD
00066 #   define strtol_l wcstoull_l
00067 #  else
00068 #   define strtol_l wcstoul_l
00069 #  endif
00070 # else
00071 #  ifdef QUAD
00072 #   define strtol_l strtoull_l
00073 #  else
00074 #   define strtol_l strtoul_l
00075 #  endif
00076 # endif
00077 #else
00078 # ifdef USE_WIDE_CHAR
00079 #  ifdef QUAD
00080 #   define strtol_l wcstoll_l
00081 #  else
00082 #   define strtol_l wcstol_l
00083 #  endif
00084 # else
00085 #  ifdef QUAD
00086 #   define strtol_l strtoll_l
00087 #  else
00088 #   define strtol_l strtol_l
00089 #  endif
00090 # endif
00091 #endif
00092 
00093 #define __strtol_l __strtol_l2(strtol_l)
00094 #define __strtol_l2(name) __strtol_l3(name)
00095 #define __strtol_l3(name) __##name
00096 
00097 
00098 /* If QUAD is defined, we are defining `strtoll' or `strtoull',
00099    operating on `long long int's.  */
00100 #ifdef QUAD
00101 # define LONG long long
00102 # define STRTOL_LONG_MIN LONG_LONG_MIN
00103 # define STRTOL_LONG_MAX LONG_LONG_MAX
00104 # define STRTOL_ULONG_MAX ULONG_LONG_MAX
00105 #else
00106 # define LONG long
00107 
00108 # ifndef ULONG_MAX
00109 #  define ULONG_MAX ((unsigned long int) ~(unsigned long int) 0)
00110 # endif
00111 # ifndef LONG_MAX
00112 #  define LONG_MAX ((long int) (ULONG_MAX >> 1))
00113 # endif
00114 # define STRTOL_LONG_MIN LONG_MIN
00115 # define STRTOL_LONG_MAX LONG_MAX
00116 # define STRTOL_ULONG_MAX ULONG_MAX
00117 #endif
00118 
00119 
00120 /* We use this code for the extended locale handling where the
00121    function gets as an additional argument the locale which has to be
00122    used.  To access the values we have to redefine the _NL_CURRENT and
00123    _NL_CURRENT_WORD macros.  */
00124 #undef _NL_CURRENT
00125 #define _NL_CURRENT(category, item) \
00126   (current->values[_NL_ITEM_INDEX (item)].string)
00127 #undef _NL_CURRENT_WORD
00128 #define _NL_CURRENT_WORD(category, item) \
00129   ((uint32_t) current->values[_NL_ITEM_INDEX (item)].word)
00130 
00131 #if defined _LIBC || defined HAVE_WCHAR_H
00132 # include <wchar.h>
00133 #endif
00134 
00135 #ifdef USE_WIDE_CHAR
00136 # include <wctype.h>
00137 # define L_(Ch) L##Ch
00138 # define UCHAR_TYPE wint_t
00139 # define STRING_TYPE wchar_t
00140 # define ISSPACE(Ch) __iswspace_l ((Ch), loc)
00141 # define ISALPHA(Ch) __iswalpha_l ((Ch), loc)
00142 # define TOUPPER(Ch) __towupper_l ((Ch), loc)
00143 #else
00144 # if defined _LIBC \
00145    || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
00146 #  define IN_CTYPE_DOMAIN(c) 1
00147 # else
00148 #  define IN_CTYPE_DOMAIN(c) isascii(c)
00149 # endif
00150 # define L_(Ch) Ch
00151 # define UCHAR_TYPE unsigned char
00152 # define STRING_TYPE char
00153 # define ISSPACE(Ch) __isspace_l ((Ch), loc)
00154 # define ISALPHA(Ch) __isalpha_l ((Ch), loc)
00155 # define TOUPPER(Ch) __toupper_l ((Ch), loc)
00156 #endif
00157 
00158 #define INTERNAL(X) INTERNAL1(X)
00159 #define INTERNAL1(X) __##X##_internal
00160 #define WEAKNAME(X) WEAKNAME1(X)
00161 
00162 #ifdef USE_NUMBER_GROUPING
00163 /* This file defines a function to check for correct grouping.  */
00164 # include "grouping.h"
00165 #endif
00166 
00167 
00168 /* Define tables of maximum values and remainders in order to detect
00169    overflow.  Do this at compile-time in order to avoid the runtime
00170    overhead of the division.  */
00171 extern const unsigned long __strtol_ul_max_tab[] attribute_hidden;
00172 extern const unsigned char __strtol_ul_rem_tab[] attribute_hidden;
00173 #if defined(QUAD) && __WORDSIZE == 32
00174 extern const unsigned long long __strtol_ull_max_tab[] attribute_hidden;
00175 extern const unsigned char __strtol_ull_rem_tab[] attribute_hidden;
00176 #endif
00177 
00178 #define DEF(TYPE, NAME)                                                  \
00179   const TYPE NAME[] attribute_hidden =                                   \
00180   {                                                               \
00181     F(2), F(3), F(4), F(5), F(6), F(7), F(8), F(9), F(10),               \
00182     F(11), F(12), F(13), F(14), F(15), F(16), F(17), F(18), F(19), F(20),  \
00183     F(21), F(22), F(23), F(24), F(25), F(26), F(27), F(28), F(29), F(30),  \
00184     F(31), F(32), F(33), F(34), F(35), F(36)                             \
00185   }
00186 
00187 #if !UNSIGNED && !defined (USE_WIDE_CHAR) && !defined (QUAD)
00188 # define F(X) ULONG_MAX / X
00189   DEF (unsigned long, __strtol_ul_max_tab);
00190 # undef F
00191 # define F(X) ULONG_MAX % X
00192   DEF (unsigned char, __strtol_ul_rem_tab);
00193 # undef F
00194 #endif
00195 #if !UNSIGNED && !defined (USE_WIDE_CHAR) && defined (QUAD) \
00196     && __WORDSIZE == 32
00197 # define F(X) ULONG_LONG_MAX / X
00198   DEF (unsigned long long, __strtol_ull_max_tab);
00199 # undef F
00200 # define F(X) ULONG_LONG_MAX % X
00201   DEF (unsigned char, __strtol_ull_rem_tab);
00202 # undef F
00203 #endif
00204 #undef DEF
00205 
00206 /* Define some more readable aliases for these arrays which correspond
00207    to how they'll be used in the function below.  */
00208 #define jmax_tab     __strtol_ul_max_tab
00209 #if defined(QUAD) && __WORDSIZE == 32
00210 # define cutoff_tab  __strtol_ull_max_tab
00211 # define cutlim_tab  __strtol_ull_rem_tab
00212 #else
00213 # define cutoff_tab  __strtol_ul_max_tab
00214 # define cutlim_tab  __strtol_ul_rem_tab
00215 #endif
00216 
00217 
00218 /* Convert NPTR to an `unsigned long int' or `long int' in base BASE.
00219    If BASE is 0 the base is determined by the presence of a leading
00220    zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal.
00221    If BASE is < 2 or > 36, it is reset to 10.
00222    If ENDPTR is not NULL, a pointer to the character after the last
00223    one converted is stored in *ENDPTR.  */
00224 
00225 INT
00226 INTERNAL (__strtol_l) (nptr, endptr, base, group, loc)
00227      const STRING_TYPE *nptr;
00228      STRING_TYPE **endptr;
00229      int base;
00230      int group;
00231      __locale_t loc;
00232 {
00233   int negative;
00234   register unsigned LONG int cutoff;
00235   register unsigned int cutlim;
00236   register unsigned LONG int i;
00237   register const STRING_TYPE *s;
00238   register UCHAR_TYPE c;
00239   const STRING_TYPE *save, *end;
00240   int overflow;
00241 #ifndef USE_WIDE_CHAR
00242   size_t cnt;
00243 #endif
00244 
00245 #ifdef USE_NUMBER_GROUPING
00246   struct locale_data *current = loc->__locales[LC_NUMERIC];
00247   /* The thousands character of the current locale.  */
00248 # ifdef USE_WIDE_CHAR
00249   wchar_t thousands = L'\0';
00250 # else
00251   const char *thousands = NULL;
00252   size_t thousands_len = 0;
00253 # endif
00254   /* The numeric grouping specification of the current locale,
00255      in the format described in <locale.h>.  */
00256   const char *grouping;
00257 
00258   if (__builtin_expect (group, 0))
00259     {
00260       grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
00261       if (*grouping <= 0 || *grouping == CHAR_MAX)
00262        grouping = NULL;
00263       else
00264        {
00265          /* Figure out the thousands separator character.  */
00266 # ifdef USE_WIDE_CHAR
00267 #  ifdef _LIBC
00268          thousands = _NL_CURRENT_WORD (LC_NUMERIC,
00269                                    _NL_NUMERIC_THOUSANDS_SEP_WC);
00270 #  endif
00271          if (thousands == L'\0')
00272            grouping = NULL;
00273 # else
00274 #  ifdef _LIBC
00275          thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
00276 #  endif
00277          if (*thousands == '\0')
00278            {
00279              thousands = NULL;
00280              grouping = NULL;
00281            }
00282 # endif
00283        }
00284     }
00285   else
00286     grouping = NULL;
00287 #endif
00288 
00289   if (base < 0 || base == 1 || base > 36)
00290     {
00291       __set_errno (EINVAL);
00292       return 0;
00293     }
00294 
00295   save = s = nptr;
00296 
00297   /* Skip white space.  */
00298   while (ISSPACE (*s))
00299     ++s;
00300   if (__builtin_expect (*s == L_('\0'), 0))
00301     goto noconv;
00302 
00303   /* Check for a sign.  */
00304   negative = 0;
00305   if (*s == L_('-'))
00306     {
00307       negative = 1;
00308       ++s;
00309     }
00310   else if (*s == L_('+'))
00311     ++s;
00312 
00313   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
00314   if (*s == L_('0'))
00315     {
00316       if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
00317        {
00318          s += 2;
00319          base = 16;
00320        }
00321       else if (base == 0)
00322        base = 8;
00323     }
00324   else if (base == 0)
00325     base = 10;
00326 
00327   /* Save the pointer so we can check later if anything happened.  */
00328   save = s;
00329 
00330 #ifdef USE_NUMBER_GROUPING
00331   if (base != 10)
00332     grouping = NULL;
00333 
00334   if (__builtin_expect (grouping != NULL, 0))
00335     {
00336 # ifndef USE_WIDE_CHAR
00337       thousands_len = strlen (thousands);
00338 # endif
00339 
00340       /* Find the end of the digit string and check its grouping.  */
00341       end = s;
00342       if (
00343 # ifdef USE_WIDE_CHAR
00344          *s != thousands
00345 # else
00346          ({ for (cnt = 0; cnt < thousands_len; ++cnt)
00347               if (thousands[cnt] != end[cnt])
00348                break;
00349             cnt < thousands_len; })
00350 # endif
00351          )
00352        {
00353          for (c = *end; c != L_('\0'); c = *++end)
00354            if (((STRING_TYPE) c < L_('0') || (STRING_TYPE) c > L_('9'))
00355 # ifdef USE_WIDE_CHAR
00356               && (wchar_t) c != thousands
00357 # else
00358               && ({ for (cnt = 0; cnt < thousands_len; ++cnt)
00359                     if (thousands[cnt] != end[cnt])
00360                      break;
00361                     cnt < thousands_len; })
00362 # endif
00363               && (!ISALPHA (c)
00364                   || (int) (TOUPPER (c) - L_('A') + 10) >= base))
00365              break;
00366 
00367 # ifdef USE_WIDE_CHAR
00368          end = __correctly_grouped_prefixwc (s, end, thousands, grouping);
00369 # else
00370          end = __correctly_grouped_prefixmb (s, end, thousands, grouping);
00371 # endif
00372        }
00373     }
00374   else
00375 #endif
00376     end = NULL;
00377 
00378   /* Avoid runtime division; lookup cutoff and limit.  */
00379   cutoff = cutoff_tab[base - 2];
00380   cutlim = cutlim_tab[base - 2];
00381 
00382   overflow = 0;
00383   i = 0;
00384   c = *s;
00385   if (sizeof (long int) != sizeof (LONG int))
00386     {
00387       unsigned long int j = 0;
00388       unsigned long int jmax = jmax_tab[base - 2];
00389 
00390       for (;c != L_('\0'); c = *++s)
00391        {
00392          if (s == end)
00393            break;
00394          if (c >= L_('0') && c <= L_('9'))
00395            c -= L_('0');
00396 #ifdef USE_NUMBER_GROUPING
00397 # ifdef USE_WIDE_CHAR
00398          else if (grouping && (wchar_t) c == thousands)
00399            continue;
00400 # else
00401          else if (thousands_len)
00402            {
00403              for (cnt = 0; cnt < thousands_len; ++cnt)
00404               if (thousands[cnt] != s[cnt])
00405                 break;
00406              if (cnt == thousands_len)
00407               {
00408                 s += thousands_len - 1;
00409                 continue;
00410               }
00411              if (ISALPHA (c))
00412               c = TOUPPER (c) - L_('A') + 10;
00413              else
00414               break;
00415            }
00416 # endif
00417 #endif
00418          else if (ISALPHA (c))
00419            c = TOUPPER (c) - L_('A') + 10;
00420          else
00421            break;
00422          if ((int) c >= base)
00423            break;
00424          /* Note that we never can have an overflow.  */
00425          else if (j >= jmax)
00426            {
00427              /* We have an overflow.  Now use the long representation.  */
00428              i = (unsigned LONG int) j;
00429              goto use_long;
00430            }
00431          else
00432            j = j * (unsigned long int) base + c;
00433        }
00434 
00435       i = (unsigned LONG int) j;
00436     }
00437   else
00438     for (;c != L_('\0'); c = *++s)
00439       {
00440        if (s == end)
00441          break;
00442        if (c >= L_('0') && c <= L_('9'))
00443          c -= L_('0');
00444 #ifdef USE_NUMBER_GROUPING
00445 # ifdef USE_WIDE_CHAR
00446        else if (grouping && (wchar_t) c == thousands)
00447          continue;
00448 # else
00449        else if (thousands_len)
00450          {
00451            for (cnt = 0; cnt < thousands_len; ++cnt)
00452              if (thousands[cnt] != s[cnt])
00453               break;
00454            if (cnt == thousands_len)
00455              {
00456               s += thousands_len - 1;
00457               continue;
00458              }
00459            if (ISALPHA (c))
00460              c = TOUPPER (c) - L_('A') + 10;
00461            else
00462              break;
00463          }
00464 # endif
00465 #endif
00466        else if (ISALPHA (c))
00467          c = TOUPPER (c) - L_('A') + 10;
00468        else
00469          break;
00470        if ((int) c >= base)
00471          break;
00472        /* Check for overflow.  */
00473        if (i > cutoff || (i == cutoff && c > cutlim))
00474          overflow = 1;
00475        else
00476          {
00477          use_long:
00478            i *= (unsigned LONG int) base;
00479            i += c;
00480          }
00481       }
00482 
00483   /* Check if anything actually happened.  */
00484   if (s == save)
00485     goto noconv;
00486 
00487   /* Store in ENDPTR the address of one character
00488      past the last character we converted.  */
00489   if (endptr != NULL)
00490     *endptr = (STRING_TYPE *) s;
00491 
00492 #if !UNSIGNED
00493   /* Check for a value that is within the range of
00494      `unsigned LONG int', but outside the range of `LONG int'.  */
00495   if (overflow == 0
00496       && i > (negative
00497              ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1
00498              : (unsigned LONG int) STRTOL_LONG_MAX))
00499     overflow = 1;
00500 #endif
00501 
00502   if (__builtin_expect (overflow, 0))
00503     {
00504       __set_errno (ERANGE);
00505 #if UNSIGNED
00506       return STRTOL_ULONG_MAX;
00507 #else
00508       return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX;
00509 #endif
00510     }
00511 
00512   /* Return the result of the appropriate sign.  */
00513   return negative ? -i : i;
00514 
00515 noconv:
00516   /* We must handle a special case here: the base is 0 or 16 and the
00517      first two characters are '0' and 'x', but the rest are no
00518      hexadecimal digits.  This is no error case.  We return 0 and
00519      ENDPTR points to the `x`.  */
00520   if (endptr != NULL)
00521     {
00522       if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
00523          && save[-2] == L_('0'))
00524        *endptr = (STRING_TYPE *) &save[-1];
00525       else
00526        /*  There was no number to convert.  */
00527        *endptr = (STRING_TYPE *) nptr;
00528     }
00529 
00530   return 0L;
00531 }
00532 #if defined _LIBC && !defined USE_WIDE_CHAR
00533 libc_hidden_def (INTERNAL (__strtol_l))
00534 #endif
00535 
00536 /* External user entry point.  */
00537 
00538 #if _LIBC - 0 == 0
00539 # undef PARAMS
00540 # if defined (__STDC__) && __STDC__
00541 #  define PARAMS(Args) Args
00542 # else
00543 #  define PARAMS(Args) ()
00544 # endif
00545 
00546 /* Prototype.  */
00547 extern INT __strtol_l PARAMS ((const STRING_TYPE *nptr, STRING_TYPE **endptr,
00548                             int base));
00549 #endif
00550 
00551 
00552 INT
00553 #ifdef weak_function
00554 weak_function
00555 #endif
00556 __strtol_l (nptr, endptr, base, loc)
00557      const STRING_TYPE *nptr;
00558      STRING_TYPE **endptr;
00559      int base;
00560      __locale_t loc;
00561 {
00562   return INTERNAL (__strtol_l) (nptr, endptr, base, 0, loc);
00563 }
00564 libc_hidden_def (__strtol_l)
00565 weak_alias (__strtol_l, strtol_l)