Back to index

lightning-sunbird  0.9+nobinonly
prscanf.c
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Netscape Portable Runtime (NSPR).
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998-2000
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 /*
00039  * Scan functions for NSPR types
00040  *
00041  * Author: Wan-Teh Chang
00042  *
00043  * Acknowledgment: The implementation is inspired by the source code
00044  * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
00045  */
00046 
00047 #include <limits.h>
00048 #include <ctype.h>
00049 #include <string.h>
00050 #include <stdlib.h>
00051 #ifdef SUNOS4
00052 #include "md/sunos4.h"  /* for strtoul */
00053 #endif
00054 #include "prprf.h"
00055 #include "prdtoa.h"
00056 #include "prlog.h"
00057 #include "prerror.h"
00058 
00059 /*
00060  * A function that reads a character from 'stream'.
00061  * Returns the character read, or EOF if end of stream is reached.
00062  */
00063 typedef int (*_PRGetCharFN)(void *stream);
00064 
00065 /*
00066  * A function that pushes the character 'ch' back to 'stream'.
00067  */
00068 typedef void (*_PRUngetCharFN)(void *stream, int ch); 
00069 
00070 /*
00071  * The size specifier for the integer and floating point number
00072  * conversions in format control strings.
00073  */
00074 typedef enum {
00075     _PR_size_none,  /* No size specifier is given */
00076     _PR_size_h,     /* The 'h' specifier, suggesting "short" */
00077     _PR_size_l,     /* The 'l' specifier, suggesting "long" */
00078     _PR_size_L,     /* The 'L' specifier, meaning a 'long double' */
00079     _PR_size_ll     /* The 'll' specifier, suggesting "long long" */
00080 } _PRSizeSpec;
00081 
00082 /*
00083  * The collection of data that is passed between the scan function
00084  * and its subordinate functions.  The fields of this structure
00085  * serve as the input or output arguments for these functions.
00086  */
00087 typedef struct {
00088     _PRGetCharFN get;        /* get a character from input stream */
00089     _PRUngetCharFN unget;    /* unget (push back) a character */
00090     void *stream;            /* argument for get and unget */
00091     va_list ap;              /* the variable argument list */
00092     int nChar;               /* number of characters read from 'stream' */
00093 
00094     PRBool assign;           /* assign, or suppress assignment? */
00095     int width;               /* field width */
00096     _PRSizeSpec sizeSpec;    /* 'h', 'l', 'L', or 'll' */
00097 
00098     PRBool converted;        /* is the value actually converted? */
00099 } ScanfState;
00100 
00101 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
00102 #define UNGET(state, ch) \
00103         ((state)->nChar--, (state)->unget((state)->stream, ch))
00104 
00105 /*
00106  * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
00107  * are always used together.
00108  *
00109  * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
00110  * value to 'ch' only if we have not exceeded the field width of
00111  * 'state'.  Therefore, after GET_IF_WITHIN_WIDTH, the value of
00112  * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
00113  */
00114 
00115 #define GET_IF_WITHIN_WIDTH(state, ch) \
00116         if (--(state)->width >= 0) { \
00117             (ch) = GET(state); \
00118         }
00119 #define WITHIN_WIDTH(state) ((state)->width >= 0)
00120 
00121 /*
00122  * _pr_strtoull:
00123  *     Convert a string to an unsigned 64-bit integer.  The string
00124  *     'str' is assumed to be a representation of the integer in
00125  *     base 'base'.
00126  *
00127  * Warning: 
00128  *     - Only handle base 8, 10, and 16.
00129  *     - No overflow checking.
00130  */
00131 
00132 static PRUint64
00133 _pr_strtoull(const char *str, char **endptr, int base)
00134 {
00135     static const int BASE_MAX = 16;
00136     static const char digits[] = "0123456789abcdef";
00137     char *digitPtr;
00138     PRUint64 x;    /* return value */
00139     PRInt64 base64;
00140     const char *cPtr;
00141     PRBool negative;
00142     const char *digitStart;
00143 
00144     PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
00145     if (base < 0 || base == 1 || base > BASE_MAX) {
00146         if (endptr) {
00147             *endptr = (char *) str;
00148             return LL_ZERO;
00149         }
00150     }
00151 
00152     cPtr = str;
00153     while (isspace(*cPtr)) {
00154         ++cPtr;
00155     }
00156 
00157     negative = PR_FALSE;
00158     if (*cPtr == '-') {
00159         negative = PR_TRUE;
00160         cPtr++;
00161     } else if (*cPtr == '+') {
00162         cPtr++;
00163     }
00164 
00165     if (base == 16) {
00166         if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
00167             cPtr += 2;
00168         }
00169     } else if (base == 0) {
00170         if (*cPtr != '0') {
00171             base = 10;
00172         } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
00173             base = 16;
00174             cPtr += 2;
00175         } else {
00176             base = 8;
00177         } 
00178     }
00179     PR_ASSERT(base != 0);
00180     LL_I2L(base64, base);
00181     digitStart = cPtr;
00182 
00183     /* Skip leading zeros */
00184     while (*cPtr == '0') {
00185         cPtr++;
00186     }
00187 
00188     LL_I2L(x, 0);
00189     while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
00190         PRUint64 d;
00191 
00192         LL_I2L(d, (digitPtr - digits));
00193         LL_MUL(x, x, base64);
00194         LL_ADD(x, x, d);
00195         cPtr++;
00196     }
00197 
00198     if (cPtr == digitStart) {
00199         if (endptr) {
00200             *endptr = (char *) str;
00201         }
00202         return LL_ZERO;
00203     }
00204 
00205     if (negative) {
00206 #ifdef HAVE_LONG_LONG
00207         /* The cast to a signed type is to avoid a compiler warning */
00208         x = -(PRInt64)x;
00209 #else
00210         LL_NEG(x, x);
00211 #endif
00212     }
00213 
00214     if (endptr) {
00215         *endptr = (char *) cPtr;
00216     }
00217     return x;
00218 }
00219 
00220 /*
00221  * The maximum field width (in number of characters) that is enough
00222  * (may be more than necessary) to represent a 64-bit integer or
00223  * floating point number.
00224  */
00225 #define FMAX 31
00226 #define DECIMAL_POINT '.'
00227 
00228 static PRStatus
00229 GetInt(ScanfState *state, int code)
00230 {
00231     char buf[FMAX + 1], *p;
00232     int ch;
00233     static const char digits[] = "0123456789abcdefABCDEF";
00234     PRBool seenDigit = PR_FALSE;
00235     int base;
00236     int dlen;
00237 
00238     switch (code) {
00239         case 'd': case 'u':
00240             base = 10;
00241             break;
00242         case 'i':
00243             base = 0;
00244             break;
00245         case 'x': case 'X': case 'p':
00246             base = 16;
00247             break;
00248         case 'o':
00249             base = 8;
00250             break;
00251         default:
00252             return PR_FAILURE;
00253     }
00254     if (state->width == 0 || state->width > FMAX) {
00255         state->width = FMAX;
00256     }
00257     p = buf;
00258     GET_IF_WITHIN_WIDTH(state, ch);
00259     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
00260         *p++ = ch;
00261         GET_IF_WITHIN_WIDTH(state, ch);
00262     }
00263     if (WITHIN_WIDTH(state) && ch == '0') {
00264         seenDigit = PR_TRUE;
00265         *p++ = ch;
00266         GET_IF_WITHIN_WIDTH(state, ch);
00267         if (WITHIN_WIDTH(state)
00268                 && (ch == 'x' || ch == 'X')
00269                 && (base == 0 || base == 16)) {
00270             base = 16;
00271             *p++ = ch;
00272             GET_IF_WITHIN_WIDTH(state, ch);
00273         } else if (base == 0) {
00274             base = 8;
00275         }
00276     }
00277     if (base == 0 || base == 10) {
00278         dlen = 10;
00279     } else if (base == 8) {
00280         dlen = 8;
00281     } else {
00282         PR_ASSERT(base == 16);
00283         dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
00284     }
00285     while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
00286         *p++ = ch;
00287         GET_IF_WITHIN_WIDTH(state, ch);
00288         seenDigit = PR_TRUE;
00289     }
00290     if (WITHIN_WIDTH(state)) {
00291         UNGET(state, ch);
00292     }
00293     if (!seenDigit) {
00294         return PR_FAILURE;
00295     }
00296     *p = '\0';
00297     if (state->assign) {
00298         if (code == 'd' || code == 'i') {
00299             if (state->sizeSpec == _PR_size_ll) {
00300                 PRInt64 llval = _pr_strtoull(buf, NULL, base);
00301                 *va_arg(state->ap, PRInt64 *) = llval;
00302             } else {
00303                 long lval = strtol(buf, NULL, base);
00304 
00305                 if (state->sizeSpec == _PR_size_none) {
00306                     *va_arg(state->ap, PRIntn *) = lval;
00307                 } else if (state->sizeSpec == _PR_size_h) {
00308                     *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
00309                 } else if (state->sizeSpec == _PR_size_l) {
00310                     *va_arg(state->ap, PRInt32 *) = lval;
00311                 } else {
00312                     return PR_FAILURE;
00313                 }
00314             }
00315         } else {
00316             if (state->sizeSpec == _PR_size_ll) {
00317                 PRUint64 llval = _pr_strtoull(buf, NULL, base);
00318                 *va_arg(state->ap, PRUint64 *) = llval;
00319             } else {
00320                 unsigned long lval = strtoul(buf, NULL, base);
00321 
00322                 if (state->sizeSpec == _PR_size_none) {
00323                     *va_arg(state->ap, PRUintn *) = lval;
00324                 } else if (state->sizeSpec == _PR_size_h) {
00325                     *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
00326                 } else if (state->sizeSpec == _PR_size_l) {
00327                     *va_arg(state->ap, PRUint32 *) = lval;
00328                 } else {
00329                     return PR_FAILURE;
00330                 }
00331             }
00332         }
00333         state->converted = PR_TRUE;
00334     }
00335     return PR_SUCCESS;
00336 }
00337 
00338 static PRStatus
00339 GetFloat(ScanfState *state)
00340 {
00341     char buf[FMAX + 1], *p;
00342     int ch;
00343     PRBool seenDigit = PR_FALSE;
00344 
00345     if (state->width == 0 || state->width > FMAX) {
00346         state->width = FMAX;
00347     }
00348     p = buf;
00349     GET_IF_WITHIN_WIDTH(state, ch);
00350     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
00351         *p++ = ch;
00352         GET_IF_WITHIN_WIDTH(state, ch);
00353     }
00354     while (WITHIN_WIDTH(state) && isdigit(ch)) {
00355         *p++ = ch;
00356         GET_IF_WITHIN_WIDTH(state, ch);
00357         seenDigit = PR_TRUE;
00358     }
00359     if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
00360         *p++ = ch;
00361         GET_IF_WITHIN_WIDTH(state, ch);
00362         while (WITHIN_WIDTH(state) && isdigit(ch)) {
00363             *p++ = ch;
00364             GET_IF_WITHIN_WIDTH(state, ch);
00365             seenDigit = PR_TRUE;
00366         }
00367     }
00368 
00369     /*
00370      * This is not robust.  For example, "1.2e+" would confuse
00371      * the code below to read 'e' and '+', only to realize that
00372      * it should have stopped at "1.2".  But we can't push back
00373      * more than one character, so there is nothing I can do.
00374      */
00375 
00376     /* Parse exponent */
00377     if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
00378         *p++ = ch;
00379         GET_IF_WITHIN_WIDTH(state, ch);
00380         if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
00381             *p++ = ch;
00382             GET_IF_WITHIN_WIDTH(state, ch);
00383         }
00384         while (WITHIN_WIDTH(state) && isdigit(ch)) {
00385             *p++ = ch;
00386             GET_IF_WITHIN_WIDTH(state, ch);
00387         }
00388     }
00389     if (WITHIN_WIDTH(state)) {
00390         UNGET(state, ch);
00391     }
00392     if (!seenDigit) {
00393         return PR_FAILURE;
00394     }
00395     *p = '\0';
00396     if (state->assign) {
00397         PRFloat64 dval = PR_strtod(buf, NULL);
00398 
00399         state->converted = PR_TRUE;
00400         if (state->sizeSpec == _PR_size_l) {
00401             *va_arg(state->ap, PRFloat64 *) = dval;
00402         } else if (state->sizeSpec == _PR_size_L) {
00403 #if defined(OSF1) || defined(IRIX)
00404             *va_arg(state->ap, double *) = dval;
00405 #else
00406             *va_arg(state->ap, long double *) = dval;
00407 #endif
00408         } else {
00409             *va_arg(state->ap, float *) = (float) dval;
00410         }
00411     }
00412     return PR_SUCCESS;
00413 }
00414 
00415 /*
00416  * Convert, and return the end of the conversion spec.
00417  * Return NULL on error.
00418  */
00419 
00420 static const char *
00421 Convert(ScanfState *state, const char *fmt)
00422 {
00423     const char *cPtr;
00424     int ch;
00425     char *cArg = NULL;
00426 
00427     state->converted = PR_FALSE;
00428     cPtr = fmt;
00429     if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
00430         do {
00431             ch = GET(state);
00432         } while (isspace(ch));
00433         UNGET(state, ch);
00434     }
00435     switch (*cPtr) {
00436         case 'c':
00437             if (state->assign) {
00438                 cArg = va_arg(state->ap, char *);
00439             }
00440             if (state->width == 0) {
00441                 state->width = 1;
00442             }
00443             for (; state->width > 0; state->width--) {
00444                 ch = GET(state);
00445                 if (ch == EOF) {
00446                     return NULL;
00447                 } else if (state->assign) {
00448                     *cArg++ = ch;
00449                 }
00450             }
00451             if (state->assign) {
00452                 state->converted = PR_TRUE;
00453             }
00454             break;
00455         case 'p':
00456         case 'd': case 'i': case 'o':
00457         case 'u': case 'x': case 'X':
00458             if (GetInt(state, *cPtr) == PR_FAILURE) {
00459                 return NULL;
00460             }
00461             break;
00462         case 'e': case 'E': case 'f':
00463         case 'g': case 'G':
00464             if (GetFloat(state) == PR_FAILURE) {
00465                 return NULL;
00466             }
00467             break;
00468         case 'n':
00469             /* do not consume any input */
00470             if (state->assign) {
00471                 switch (state->sizeSpec) {
00472                     case _PR_size_none:
00473                         *va_arg(state->ap, PRIntn *) = state->nChar;
00474                         break;
00475                     case _PR_size_h:
00476                         *va_arg(state->ap, PRInt16 *) = state->nChar;
00477                         break;
00478                     case _PR_size_l:
00479                         *va_arg(state->ap, PRInt32 *) = state->nChar;
00480                         break;
00481                     case _PR_size_ll:
00482                         LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
00483                         break;
00484                     default:
00485                         PR_ASSERT(0);
00486                 }
00487             }
00488             break;
00489         case 's':
00490             if (state->width == 0) {
00491                 state->width = INT_MAX;
00492             }
00493             if (state->assign) {
00494                 cArg = va_arg(state->ap, char *);
00495             }
00496             for (; state->width > 0; state->width--) {
00497                 ch = GET(state);
00498                 if ((ch == EOF) || isspace(ch)) {
00499                     UNGET(state, ch);
00500                     break;
00501                 }
00502                 if (state->assign) {
00503                     *cArg++ = ch;
00504                 }
00505             }
00506             if (state->assign) {
00507                 *cArg = '\0';
00508                 state->converted = PR_TRUE;
00509             }
00510             break;
00511         case '%':
00512             ch = GET(state);
00513             if (ch != '%') {
00514                 UNGET(state, ch);
00515                 return NULL;
00516             }
00517             break;
00518         case '[':
00519             {
00520                 PRBool complement = PR_FALSE;
00521                 const char *closeBracket;
00522                 size_t n;
00523 
00524                 if (*++cPtr == '^') {
00525                     complement = PR_TRUE;
00526                     cPtr++;
00527                 }
00528                 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
00529                 if (closeBracket == NULL) {
00530                     return NULL;
00531                 }
00532                 n = closeBracket - cPtr;
00533                 if (state->width == 0) {
00534                     state->width = INT_MAX;
00535                 }
00536                 if (state->assign) {
00537                     cArg = va_arg(state->ap, char *);
00538                 }
00539                 for (; state->width > 0; state->width--) {
00540                     ch = GET(state);
00541                     if ((ch == EOF) 
00542                             || (!complement && !memchr(cPtr, ch, n))
00543                             || (complement && memchr(cPtr, ch, n))) {
00544                         UNGET(state, ch);
00545                         break;
00546                     }
00547                     if (state->assign) {
00548                         *cArg++ = ch;
00549                     }
00550                 }
00551                 if (state->assign) {
00552                     *cArg = '\0';
00553                     state->converted = PR_TRUE;
00554                 }
00555                 cPtr = closeBracket;
00556             }
00557             break;
00558         default:
00559             return NULL;
00560     }
00561     return cPtr;
00562 }
00563 
00564 static PRInt32
00565 DoScanf(ScanfState *state, const char *fmt)
00566 {
00567     PRInt32 nConverted = 0;
00568     const char *cPtr;
00569     int ch;
00570 
00571     state->nChar = 0;
00572     cPtr = fmt;
00573     while (1) {
00574         if (isspace(*cPtr)) {
00575             /* white space: skip */
00576             do {
00577                 cPtr++;
00578             } while (isspace(*cPtr));
00579             do {
00580                 ch = GET(state);
00581             } while (isspace(ch));
00582             UNGET(state, ch);
00583         } else if (*cPtr == '%') {
00584             /* format spec: convert */
00585             cPtr++;
00586             state->assign = PR_TRUE;
00587             if (*cPtr == '*') {
00588                 cPtr++;
00589                 state->assign = PR_FALSE;
00590             }
00591             for (state->width = 0; isdigit(*cPtr); cPtr++) {
00592                 state->width = state->width * 10 + *cPtr - '0';
00593             }
00594             state->sizeSpec = _PR_size_none;
00595             if (*cPtr == 'h') {
00596                 cPtr++;
00597                 state->sizeSpec = _PR_size_h;
00598             } else if (*cPtr == 'l') {
00599                 cPtr++;
00600                 if (*cPtr == 'l') {
00601                     cPtr++;
00602                     state->sizeSpec = _PR_size_ll;
00603                 } else {
00604                     state->sizeSpec = _PR_size_l;
00605                 }
00606             } else if (*cPtr == 'L') {
00607                 cPtr++;
00608                 state->sizeSpec = _PR_size_L;
00609             }
00610             cPtr = Convert(state, cPtr);
00611             if (cPtr == NULL) {
00612                 return (nConverted > 0 ? nConverted : EOF);
00613             }
00614             if (state->converted) {
00615                 nConverted++;
00616             }
00617             cPtr++;
00618         } else {
00619             /* others: must match */
00620             if (*cPtr == '\0') {
00621                 return nConverted;
00622             }
00623             ch = GET(state);
00624             if (ch != *cPtr) {
00625                 UNGET(state, ch);
00626                 return nConverted;
00627             }
00628             cPtr++;
00629         }
00630     }
00631 }
00632 
00633 static int
00634 StringGetChar(void *stream)
00635 {
00636     char *cPtr = *((char **) stream);
00637 
00638     if (*cPtr == '\0') {
00639         return EOF;
00640     } else {
00641         *((char **) stream) = cPtr + 1;
00642         return (unsigned char) *cPtr;
00643     }
00644 }
00645 
00646 static void
00647 StringUngetChar(void *stream, int ch)
00648 {
00649     char *cPtr = *((char **) stream);
00650 
00651     if (ch != EOF) {
00652         *((char **) stream) = cPtr - 1;
00653     }
00654 }
00655 
00656 PR_IMPLEMENT(PRInt32)
00657 PR_sscanf(const char *buf, const char *fmt, ...)
00658 {
00659     PRInt32 rv;
00660     ScanfState state;
00661 
00662     state.get = &StringGetChar;
00663     state.unget = &StringUngetChar;
00664     state.stream = (void *) &buf;
00665     va_start(state.ap, fmt);
00666     rv = DoScanf(&state, fmt);
00667     va_end(state.ap);
00668     return rv;
00669 }