Back to index

php5  5.3.10
scanf.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Clayton Collie <clcollie@mindspring.com>                     |
00016    +----------------------------------------------------------------------+
00017 */
00018 
00019 /* $Id: scanf.c 321634 2012-01-01 13:15:04Z felipe $ */
00020 
00021 /*
00022        scanf.c --
00023 
00024        This file contains the base code which implements sscanf and by extension
00025        fscanf. Original code is from TCL8.3.0 and bears the following copyright:
00026 
00027        This software is copyrighted by the Regents of the University of
00028        California, Sun Microsystems, Inc., Scriptics Corporation,
00029        and other parties.  The following terms apply to all files associated
00030        with the software unless explicitly disclaimed in individual files.
00031 
00032        The authors hereby grant permission to use, copy, modify, distribute,
00033        and license this software and its documentation for any purpose, provided
00034        that existing copyright notices are retained in all copies and that this
00035        notice is included verbatim in any distributions. No written agreement,
00036        license, or royalty fee is required for any of the authorized uses.
00037        Modifications to this software may be copyrighted by their authors
00038        and need not follow the licensing terms described here, provided that
00039        the new terms are clearly indicated on the first page of each file where
00040        they apply.
00041 
00042        IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
00043        FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
00044        ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
00045        DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
00046        POSSIBILITY OF SUCH DAMAGE.
00047 
00048        THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
00049        INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
00050        FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
00051        IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
00052        NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
00053        MODIFICATIONS.
00054 
00055        GOVERNMENT USE: If you are acquiring this software on behalf of the
00056        U.S. government, the Government shall have only "Restricted Rights"
00057        in the software and related documentation as defined in the Federal
00058        Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
00059        are acquiring the software on behalf of the Department of Defense, the
00060        software shall be classified as "Commercial Computer Software" and the
00061        Government shall have only "Restricted Rights" as defined in Clause
00062        252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
00063        authors grant the U.S. Government and others acting in its behalf
00064        permission to use and distribute the software in accordance with the
00065        terms specified in this license.
00066 */
00067 
00068 #include <stdio.h>
00069 #include <limits.h>
00070 #include <ctype.h>
00071 #include "php.h"
00072 #include "php_variables.h"
00073 #ifdef HAVE_LOCALE_H
00074 #include <locale.h>
00075 #endif
00076 #include "zend_execute.h"
00077 #include "zend_operators.h"
00078 #include "zend_strtod.h"
00079 #include "php_globals.h"
00080 #include "basic_functions.h"
00081 #include "scanf.h"
00082 
00083 /*
00084  * Flag values used internally by [f|s]canf.
00085  */
00086 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
00087 #define SCAN_SUPPRESS       0x2      /* Suppress assignment. */
00088 #define SCAN_UNSIGNED       0x4      /* Read an unsigned value. */
00089 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
00090 
00091 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
00092 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
00093 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
00094 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
00095 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
00096 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
00097 
00098 #define UCHAR(x)            (zend_uchar)(x)
00099 
00100 /*
00101  * The following structure contains the information associated with
00102  * a character set.
00103  */
00104 typedef struct CharSet {
00105        int exclude;         /* 1 if this is an exclusion set. */
00106        int nchars;
00107        char *chars;
00108        int nranges;
00109        struct Range {
00110               char start;
00111               char end;
00112        } *ranges;
00113 } CharSet;
00114 
00115 /*
00116  * Declarations for functions used only in this file.
00117  */
00118 static char *BuildCharSet(CharSet *cset, char *format);
00119 static int    CharInSet(CharSet *cset, int ch);
00120 static void   ReleaseCharSet(CharSet *cset);
00121 static inline void scan_set_error_return(int numVars, zval **return_value);
00122 
00123 
00124 /* {{{ BuildCharSet
00125  *----------------------------------------------------------------------
00126  *
00127  * BuildCharSet --
00128  *
00129  *     This function examines a character set format specification
00130  *     and builds a CharSet containing the individual characters and
00131  *     character ranges specified.
00132  *
00133  * Results:
00134  *     Returns the next format position.
00135  *
00136  * Side effects:
00137  *     Initializes the charset.
00138  *
00139  *----------------------------------------------------------------------
00140  */
00141 static char * BuildCharSet(CharSet *cset, char *format)
00142 {
00143        char *ch, start;
00144        int  nranges;
00145        char *end;
00146 
00147        memset(cset, 0, sizeof(CharSet));
00148 
00149        ch = format;
00150        if (*ch == '^') {
00151               cset->exclude = 1;
00152               ch = ++format;
00153        }
00154        end = format + 1;    /* verify this - cc */
00155 
00156        /*
00157         * Find the close bracket so we can overallocate the set.
00158         */
00159        if (*ch == ']') {
00160               ch = end++;
00161        }
00162        nranges = 0;
00163        while (*ch != ']') {
00164               if (*ch == '-') {
00165                      nranges++;
00166               }
00167               ch = end++;
00168        }
00169 
00170        cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
00171        if (nranges > 0) {
00172               cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
00173        } else {
00174               cset->ranges = NULL;
00175        }
00176 
00177        /*
00178         * Now build the character set.
00179         */
00180        cset->nchars = cset->nranges = 0;
00181        ch    = format++;
00182        start = *ch;
00183        if (*ch == ']' || *ch == '-') {
00184               cset->chars[cset->nchars++] = *ch;
00185               ch = format++;
00186        }
00187        while (*ch != ']') {
00188               if (*format == '-') {
00189                      /*
00190                       * This may be the first character of a range, so don't add
00191                       * it yet.
00192                       */
00193                      start = *ch;
00194               } else if (*ch == '-') {
00195                      /*
00196                       * Check to see if this is the last character in the set, in which
00197                       * case it is not a range and we should add the previous character
00198                       * as well as the dash.
00199                       */
00200                      if (*format == ']') {
00201                             cset->chars[cset->nchars++] = start;
00202                             cset->chars[cset->nchars++] = *ch;
00203                      } else {
00204                             ch = format++;
00205 
00206                             /*
00207                              * Check to see if the range is in reverse order.
00208                              */
00209                             if (start < *ch) {
00210                                    cset->ranges[cset->nranges].start = start;
00211                                    cset->ranges[cset->nranges].end = *ch;
00212                             } else {
00213                                    cset->ranges[cset->nranges].start = *ch;
00214                                    cset->ranges[cset->nranges].end = start;
00215                             }
00216                             cset->nranges++;
00217                      }
00218               } else {
00219                      cset->chars[cset->nchars++] = *ch;
00220               }
00221               ch = format++;
00222        }
00223        return format;
00224 }
00225 /* }}} */
00226 
00227 /* {{{ CharInSet
00228  *----------------------------------------------------------------------
00229  *
00230  * CharInSet --
00231  *
00232  *     Check to see if a character matches the given set.
00233  *
00234  * Results:
00235  *     Returns non-zero if the character matches the given set.
00236  *
00237  * Side effects:
00238  *     None.
00239  *
00240  *----------------------------------------------------------------------
00241  */
00242 static int CharInSet(CharSet *cset, int c)
00243 {
00244        char ch = (char) c;
00245        int i, match = 0;
00246 
00247        for (i = 0; i < cset->nchars; i++) {
00248               if (cset->chars[i] == ch) {
00249                      match = 1;
00250                      break;
00251               }
00252        }
00253        if (!match) {
00254               for (i = 0; i < cset->nranges; i++) {
00255                      if ((cset->ranges[i].start <= ch)
00256                             && (ch <= cset->ranges[i].end)) {
00257                             match = 1;
00258                             break;
00259                      }
00260               }
00261        }
00262        return (cset->exclude ? !match : match);
00263 }
00264 /* }}} */
00265 
00266 /* {{{ ReleaseCharSet
00267  *----------------------------------------------------------------------
00268  *
00269  * ReleaseCharSet --
00270  *
00271  *     Free the storage associated with a character set.
00272  *
00273  * Results:
00274  *     None.
00275  *
00276  * Side effects:
00277  *     None.
00278  *
00279  *----------------------------------------------------------------------
00280  */
00281 static void ReleaseCharSet(CharSet *cset)
00282 {
00283        efree((char *)cset->chars);
00284        if (cset->ranges) {
00285               efree((char *)cset->ranges);
00286        }
00287 }
00288 /* }}} */
00289 
00290 /* {{{ ValidateFormat
00291  *----------------------------------------------------------------------
00292  *
00293  * ValidateFormat --
00294  *
00295  *     Parse the format string and verify that it is properly formed
00296  *     and that there are exactly enough variables on the command line.
00297  *
00298  * Results:
00299  *    FAILURE or SUCCESS.
00300  *
00301  * Side effects:
00302  *     May set php_error based on abnormal conditions.
00303  *
00304  * Parameters :
00305  *     format     The format string.
00306  *     numVars    The number of variables passed to the scan command.
00307  *     totalSubs  The number of variables that will be required.
00308  *
00309  *----------------------------------------------------------------------
00310 */
00311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
00312 {
00313 #define STATIC_LIST_SIZE 16
00314        int gotXpg, gotSequential, value, i, flags;
00315        char *end, *ch = NULL;
00316        int staticAssign[STATIC_LIST_SIZE];
00317        int *nassign = staticAssign;
00318        int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
00319        TSRMLS_FETCH();
00320 
00321        /*
00322         * Initialize an array that records the number of times a variable
00323         * is assigned to by the format string.  We use this to detect if
00324         * a variable is multiply assigned or left unassigned.
00325         */
00326        if (numVars > nspace) {
00327               nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
00328               nspace = numVars;
00329        }
00330        for (i = 0; i < nspace; i++) {
00331               nassign[i] = 0;
00332        }
00333 
00334        xpgSize = objIndex = gotXpg = gotSequential = 0;
00335 
00336        while (*format != '\0') {
00337               ch = format++;
00338               flags = 0;
00339 
00340               if (*ch != '%') {
00341                      continue;
00342               }
00343               ch = format++;
00344               if (*ch == '%') {
00345                      continue;
00346               }
00347               if (*ch == '*') {
00348                      flags |= SCAN_SUPPRESS;
00349                      ch = format++;
00350                      goto xpgCheckDone;
00351               }
00352 
00353               if ( isdigit( (int)*ch ) ) {
00354                      /*
00355                       * Check for an XPG3-style %n$ specification.  Note: there
00356                       * must not be a mixture of XPG3 specs and non-XPG3 specs
00357                       * in the same format string.
00358                       */
00359                      value = strtoul(format-1, &end, 10);
00360                      if (*end != '$') {
00361                             goto notXpg;
00362                      }
00363                      format = end+1;
00364                      ch     = format++;
00365                      gotXpg = 1;
00366                      if (gotSequential) {
00367                             goto mixedXPG;
00368                      }
00369                      objIndex = value - 1;
00370                      if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
00371                             goto badIndex;
00372                      } else if (numVars == 0) {
00373                             /*
00374                              * In the case where no vars are specified, the user can
00375                              * specify %9999$ legally, so we have to consider special
00376                              * rules for growing the assign array.  'value' is
00377                              * guaranteed to be > 0.
00378                              */
00379 
00380                             /* set a lower artificial limit on this
00381                              * in the interest of security and resource friendliness
00382                              * 255 arguments should be more than enough. - cc
00383                              */
00384                             if (value > SCAN_MAX_ARGS) {
00385                                    goto badIndex;
00386                             }
00387 
00388                             xpgSize = (xpgSize > value) ? xpgSize : value;
00389                      }
00390                      goto xpgCheckDone;
00391               }
00392 
00393 notXpg:
00394               gotSequential = 1;
00395               if (gotXpg) {
00396 mixedXPG:
00397                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
00398                      goto error;
00399               }
00400 
00401 xpgCheckDone:
00402               /*
00403                * Parse any width specifier.
00404                */
00405               if (isdigit(UCHAR(*ch))) {
00406                      value = strtoul(format-1, &format, 10);
00407                      flags |= SCAN_WIDTH;
00408                      ch = format++;
00409               }
00410 
00411               /*
00412                * Ignore size specifier.
00413                */
00414               if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
00415                      ch = format++;
00416               }
00417 
00418               if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
00419                      goto badIndex;
00420               }
00421 
00422               /*
00423                * Handle the various field types.
00424                */
00425               switch (*ch) {
00426                      case 'n':
00427                      case 'd':
00428                      case 'D':
00429                      case 'i':
00430                      case 'o':
00431                      case 'x':
00432                      case 'X':
00433                      case 'u':
00434                      case 'f':
00435                      case 'e':
00436                      case 'E':
00437                      case 'g':
00438                      case 's':
00439                             break;
00440 
00441                      case 'c':
00442                             /* we differ here with the TCL implementation in allowing for */
00443                             /* a character width specification, to be more consistent with */
00444                             /* ANSI. since Zend auto allocates space for vars, this is no */
00445                             /* problem - cc                                               */
00446                             /*
00447                             if (flags & SCAN_WIDTH) {
00448                                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
00449                                    goto error;
00450                             }
00451                             */
00452                             break;
00453 
00454                      case '[':
00455                             if (*format == '\0') {
00456                                    goto badSet;
00457                             }
00458                             ch = format++;
00459                             if (*ch == '^') {
00460                                    if (*format == '\0') {
00461                                           goto badSet;
00462                                    }
00463                                    ch = format++;
00464                             }
00465                             if (*ch == ']') {
00466                                    if (*format == '\0') {
00467                                           goto badSet;
00468                                    }
00469                                    ch = format++;
00470                             }
00471                             while (*ch != ']') {
00472                                    if (*format == '\0') {
00473                                           goto badSet;
00474                                    }
00475                                    ch = format++;
00476                             }
00477                             break;
00478 badSet:
00479                             php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
00480                             goto error;
00481 
00482                      default: {
00483                             php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
00484                             goto error;
00485                      }
00486               }
00487 
00488               if (!(flags & SCAN_SUPPRESS)) {
00489                      if (objIndex >= nspace) {
00490                             /*
00491                              * Expand the nassign buffer.  If we are using XPG specifiers,
00492                              * make sure that we grow to a large enough size.  xpgSize is
00493                              * guaranteed to be at least one larger than objIndex.
00494                              */
00495                             value = nspace;
00496                             if (xpgSize) {
00497                                    nspace = xpgSize;
00498                             } else {
00499                                    nspace += STATIC_LIST_SIZE;
00500                             }
00501                             if (nassign == staticAssign) {
00502                                    nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
00503                                    for (i = 0; i < STATIC_LIST_SIZE; ++i) {
00504                                           nassign[i] = staticAssign[i];
00505                                    }
00506                             } else {
00507                                    nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
00508                             }
00509                             for (i = value; i < nspace; i++) {
00510                                    nassign[i] = 0;
00511                             }
00512                      }
00513                      nassign[objIndex]++;
00514                      objIndex++;
00515               }
00516        } /* while (*format != '\0') */
00517 
00518        /*
00519         * Verify that all of the variable were assigned exactly once.
00520         */
00521        if (numVars == 0) {
00522               if (xpgSize) {
00523                      numVars = xpgSize;
00524               } else {
00525                      numVars = objIndex;
00526               }
00527        }
00528        if (totalSubs) {
00529               *totalSubs = numVars;
00530        }
00531        for (i = 0; i < numVars; i++) {
00532               if (nassign[i] > 1) {
00533                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
00534                      goto error;
00535               } else if (!xpgSize && (nassign[i] == 0)) {
00536                      /*
00537                       * If the space is empty, and xpgSize is 0 (means XPG wasn't
00538                       * used, and/or numVars != 0), then too many vars were given
00539                       */
00540                      php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
00541                      goto error;
00542               }
00543        }
00544 
00545        if (nassign != staticAssign) {
00546               efree((char *)nassign);
00547        }
00548        return SCAN_SUCCESS;
00549 
00550 badIndex:
00551        if (gotXpg) {
00552               php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
00553        } else {
00554               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
00555        }
00556 
00557 error:
00558        if (nassign != staticAssign) {
00559               efree((char *)nassign);
00560        }
00561        return SCAN_ERROR_INVALID_FORMAT;
00562 #undef STATIC_LIST_SIZE
00563 }
00564 /* }}} */
00565 
00566 /* {{{ php_sscanf_internal
00567  * This is the internal function which does processing on behalf of
00568  * both sscanf() and fscanf()
00569  *
00570  * parameters :
00571  *            string        literal string to be processed
00572  *            format        format string
00573  *            argCount      total number of elements in the args array
00574  *            args          arguments passed in from user function (f|s)scanf
00575  *            varStart      offset (in args) of 1st variable passed in to (f|s)scanf
00576  *            return_value set with the results of the scan
00577  */
00578 
00579 PHPAPI int php_sscanf_internal( char *string, char *format,
00580                             int argCount, zval ***args,
00581                             int varStart, zval **return_value TSRMLS_DC)
00582 {
00583        int  numVars, nconversions, totalVars = -1;
00584        int  i, result;
00585        long value;
00586        int  objIndex;
00587        char *end, *baseString;
00588        zval **current;
00589        char op   = 0;
00590        int  base = 0;
00591        int  underflow = 0;
00592        size_t width;
00593        long (*fn)() = NULL;
00594        char *ch, sch;
00595        int  flags;
00596        char buf[64]; /* Temporary buffer to hold scanned number
00597                                     * strings before they are passed to strtoul() */
00598 
00599        /* do some sanity checking */
00600        if ((varStart > argCount) || (varStart < 0)){
00601               varStart = SCAN_MAX_ARGS + 1;
00602        }
00603        numVars = argCount - varStart;
00604        if (numVars < 0) {
00605               numVars = 0;
00606        }
00607 
00608 #if 0
00609        zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
00610                                    string, format, numVars, varStart);
00611 #endif
00612        /*
00613         * Check for errors in the format string.
00614         */
00615        if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
00616               scan_set_error_return( numVars, return_value );
00617               return SCAN_ERROR_INVALID_FORMAT;
00618        }
00619 
00620        objIndex = numVars ? varStart : 0;
00621 
00622        /*
00623         * If any variables are passed, make sure they are all passed by reference
00624         */
00625        if (numVars) {
00626               for (i = varStart;i < argCount;i++){
00627                      if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
00628                             php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
00629                             scan_set_error_return(numVars, return_value);
00630                             return SCAN_ERROR_VAR_PASSED_BYVAL;
00631                      }
00632               }
00633        }
00634 
00635        /*
00636         * Allocate space for the result objects. Only happens when no variables
00637         * are specified
00638         */
00639        if (!numVars) {
00640               zval *tmp;
00641 
00642               /* allocate an array for return */
00643               array_init(*return_value);
00644 
00645               for (i = 0; i < totalVars; i++) {
00646                      MAKE_STD_ZVAL(tmp);
00647                      ZVAL_NULL(tmp);
00648                      if (add_next_index_zval(*return_value, tmp) == FAILURE) {
00649                             scan_set_error_return(0, return_value);
00650                             return FAILURE;
00651                      }
00652               }
00653               varStart = 0; /* Array index starts from 0 */
00654        }
00655 
00656        baseString = string;
00657 
00658        /*
00659         * Iterate over the format string filling in the result objects until
00660         * we reach the end of input, the end of the format string, or there
00661         * is a mismatch.
00662         */
00663        nconversions = 0;
00664        /* note ! - we need to limit the loop for objIndex to keep it in bounds */
00665 
00666        while (*format != '\0') {
00667               ch    = format++;
00668               flags = 0;
00669 
00670               /*
00671                * If we see whitespace in the format, skip whitespace in the string.
00672                */
00673               if ( isspace( (int)*ch ) ) {
00674                      sch = *string;
00675                      while ( isspace( (int)sch ) ) {
00676                             if (*string == '\0') {
00677                                    goto done;
00678                             }
00679                             string++;
00680                             sch = *string;
00681                      }
00682                      continue;
00683               }
00684 
00685               if (*ch != '%') {
00686 literal:
00687                      if (*string == '\0') {
00688                             underflow = 1;
00689                             goto done;
00690                      }
00691                      sch = *string;
00692                      string++;
00693                      if (*ch != sch) {
00694                             goto done;
00695                      }
00696                      continue;
00697               }
00698 
00699               ch = format++;
00700               if (*ch == '%') {
00701                      goto literal;
00702               }
00703 
00704               /*
00705                * Check for assignment suppression ('*') or an XPG3-style
00706                * assignment ('%n$').
00707                */
00708               if (*ch == '*') {
00709                      flags |= SCAN_SUPPRESS;
00710                      ch = format++;
00711               } else if ( isdigit(UCHAR(*ch))) {
00712                      value = strtoul(format-1, &end, 10);
00713                      if (*end == '$') {
00714                             format = end+1;
00715                             ch = format++;
00716                             objIndex = varStart + value - 1;
00717                      }
00718               }
00719 
00720               /*
00721                * Parse any width specifier.
00722                */
00723               if ( isdigit(UCHAR(*ch))) {
00724                      width = strtoul(format-1, &format, 10);
00725                      ch = format++;
00726               } else {
00727                      width = 0;
00728               }
00729 
00730               /*
00731                * Ignore size specifier.
00732                */
00733               if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
00734                      ch = format++;
00735               }
00736 
00737               /*
00738                * Handle the various field types.
00739                */
00740               switch (*ch) {
00741                      case 'n':
00742                             if (!(flags & SCAN_SUPPRESS)) {
00743                                    if (numVars && objIndex >= argCount) {
00744                                           break;
00745                                    } else if (numVars) {
00746                                           zend_uint refcount;
00747 
00748                                           current = args[objIndex++];
00749                                           refcount = Z_REFCOUNT_PP(current);
00750                                           zval_dtor( *current );
00751                                           ZVAL_LONG( *current, (long)(string - baseString) );
00752                                           Z_SET_REFCOUNT_PP(current, refcount);
00753                                           Z_SET_ISREF_PP(current);
00754                                    } else {
00755                                           add_index_long(*return_value, objIndex++, string - baseString);
00756                                    }
00757                             }
00758                             nconversions++;
00759                             continue;
00760 
00761                      case 'd':
00762                      case 'D':
00763                             op = 'i';
00764                             base = 10;
00765                             fn = (long (*)())strtol;
00766                             break;
00767                      case 'i':
00768                             op = 'i';
00769                             base = 0;
00770                             fn = (long (*)())strtol;
00771                             break;
00772                      case 'o':
00773                             op = 'i';
00774                             base = 8;
00775                             fn = (long (*)())strtol;
00776                             break;
00777                      case 'x':
00778                      case 'X':
00779                             op = 'i';
00780                             base = 16;
00781                             fn = (long (*)())strtol;
00782                             break;
00783                      case 'u':
00784                             op = 'i';
00785                             base = 10;
00786                             flags |= SCAN_UNSIGNED;
00787                             fn = (long (*)())strtoul;
00788                             break;
00789 
00790                      case 'f':
00791                      case 'e':
00792                      case 'E':
00793                      case 'g':
00794                             op = 'f';
00795                             break;
00796 
00797                      case 's':
00798                             op = 's';
00799                             break;
00800 
00801                      case 'c':
00802                             op = 's';
00803                             flags |= SCAN_NOSKIP;
00804                             /*-cc-*/
00805                             if (0 == width) {
00806                                    width = 1;
00807                             }
00808                             /*-cc-*/
00809                             break;
00810                      case '[':
00811                             op = '[';
00812                             flags |= SCAN_NOSKIP;
00813                             break;
00814               }   /* switch */
00815 
00816               /*
00817                * At this point, we will need additional characters from the
00818                * string to proceed.
00819                */
00820               if (*string == '\0') {
00821                      underflow = 1;
00822                      goto done;
00823               }
00824 
00825               /*
00826                * Skip any leading whitespace at the beginning of a field unless
00827                * the format suppresses this behavior.
00828                */
00829               if (!(flags & SCAN_NOSKIP)) {
00830                      while (*string != '\0') {
00831                             sch = *string;
00832                             if (! isspace((int)sch) ) {
00833                                    break;
00834                             }
00835                             string++;
00836                      }
00837                      if (*string == '\0') {
00838                             underflow = 1;
00839                             goto done;
00840                      }
00841               }
00842 
00843               /*
00844                * Perform the requested scanning operation.
00845                */
00846               switch (op) {
00847                      case 'c':
00848                      case 's':
00849                             /*
00850                              * Scan a string up to width characters or whitespace.
00851                              */
00852                             if (width == 0) {
00853                                    width = (size_t) ~0;
00854                             }
00855                             end = string;
00856                             while (*end != '\0') {
00857                                    sch = *end;
00858                                    if ( isspace( (int)sch ) ) {
00859                                           break;
00860                                    }
00861                                    end++;
00862                                    if (--width == 0) {
00863                                       break;
00864                                    }
00865                             }
00866                             if (!(flags & SCAN_SUPPRESS)) {
00867                                    if (numVars && objIndex >= argCount) {
00868                                           break;
00869                                    } else if (numVars) {
00870                                           zend_uint refcount;
00871 
00872                                           current = args[objIndex++];
00873                                           refcount = Z_REFCOUNT_PP(current);
00874                                           zval_dtor( *current );
00875                                           ZVAL_STRINGL( *current, string, end-string, 1);
00876                                           Z_SET_REFCOUNT_PP(current, refcount);
00877                                           Z_SET_ISREF_PP(current);
00878                                    } else {
00879                                           add_index_stringl( *return_value, objIndex++, string, end-string, 1);
00880                                    }
00881                             }
00882                             string = end;
00883                             break;
00884 
00885                      case '[': {
00886                             CharSet cset;
00887 
00888                             if (width == 0) {
00889                                    width = (size_t) ~0;
00890                             }
00891                             end = string;
00892 
00893                             format = BuildCharSet(&cset, format);
00894                             while (*end != '\0') {
00895                                    sch = *end;
00896                                    if (!CharInSet(&cset, (int)sch)) {
00897                                           break;
00898                                    }
00899                                    end++;
00900                                    if (--width == 0) {
00901                                           break;
00902                                    }
00903                             }
00904                             ReleaseCharSet(&cset);
00905 
00906                             if (string == end) {
00907                                    /*
00908                                     * Nothing matched the range, stop processing
00909                                     */
00910                                    goto done;
00911                             }
00912                             if (!(flags & SCAN_SUPPRESS)) {
00913                                    if (numVars && objIndex >= argCount) {
00914                                           break;
00915                                    } else if (numVars) {
00916                                           current = args[objIndex++];
00917                                           zval_dtor( *current );
00918                                           ZVAL_STRINGL( *current, string, end-string, 1);
00919                                    } else {
00920                                           add_index_stringl(*return_value, objIndex++, string, end-string, 1);
00921                                    }
00922                             }
00923                             string = end;
00924                             break;
00925                      }
00926 /*
00927                      case 'c':
00928                         / Scan a single character./
00929 
00930                             sch = *string;
00931                             string++;
00932                             if (!(flags & SCAN_SUPPRESS)) {
00933                                    if (numVars) {
00934                                           char __buf[2];
00935                                           __buf[0] = sch;
00936                                           __buf[1] = '\0';;
00937                                           current = args[objIndex++];
00938                                           zval_dtor(*current);
00939                                           ZVAL_STRINGL( *current, __buf, 1, 1);
00940                                    } else {
00941                                           add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
00942                                    }
00943                             }
00944                             break;
00945 */
00946                      case 'i':
00947                             /*
00948                              * Scan an unsigned or signed integer.
00949                              */
00950                             /*-cc-*/
00951                             buf[0] = '\0';
00952                             /*-cc-*/
00953                             if ((width == 0) || (width > sizeof(buf) - 1)) {
00954                                    width = sizeof(buf) - 1;
00955                             }
00956 
00957                             flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
00958                             for (end = buf; width > 0; width--) {
00959                                    switch (*string) {
00960                                           /*
00961                                            * The 0 digit has special meaning at the beginning of
00962                                            * a number.  If we are unsure of the base, it
00963                                            * indicates that we are in base 8 or base 16 (if it is
00964                                            * followed by an 'x').
00965                                            */
00966                                           case '0':
00967                                                  /*-cc-*/
00968                                                  if (base == 16) {
00969                                                         flags |= SCAN_XOK;
00970                                                  }
00971                                                  /*-cc-*/
00972                                                  if (base == 0) {
00973                                                         base = 8;
00974                                                         flags |= SCAN_XOK;
00975                                                  }
00976                                                  if (flags & SCAN_NOZERO) {
00977                                                         flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
00978                                                  } else {
00979                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
00980                                                  }
00981                                                  goto addToInt;
00982 
00983                                           case '1': case '2': case '3': case '4':
00984                                           case '5': case '6': case '7':
00985                                                  if (base == 0) {
00986                                                         base = 10;
00987                                                  }
00988                                                  flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
00989                                                  goto addToInt;
00990 
00991                                           case '8': case '9':
00992                                                  if (base == 0) {
00993                                                         base = 10;
00994                                                  }
00995                                                  if (base <= 8) {
00996                                                     break;
00997                                                  }
00998                                                  flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
00999                                                  goto addToInt;
01000 
01001                                           case 'A': case 'B': case 'C':
01002                                           case 'D': case 'E': case 'F':
01003                                           case 'a': case 'b': case 'c':
01004                                           case 'd': case 'e': case 'f':
01005                                                  if (base <= 10) {
01006                                                         break;
01007                                                  }
01008                                                  flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
01009                                                  goto addToInt;
01010 
01011                                           case '+': case '-':
01012                                                  if (flags & SCAN_SIGNOK) {
01013                                                         flags &= ~SCAN_SIGNOK;
01014                                                         goto addToInt;
01015                                                  }
01016                                                  break;
01017 
01018                                           case 'x': case 'X':
01019                                                  if ((flags & SCAN_XOK) && (end == buf+1)) {
01020                                                         base = 16;
01021                                                         flags &= ~SCAN_XOK;
01022                                                         goto addToInt;
01023                                                  }
01024                                                  break;
01025                                    }
01026 
01027                                    /*
01028                                     * We got an illegal character so we are done accumulating.
01029                                     */
01030                                    break;
01031 
01032 addToInt:
01033                                    /*
01034                                     * Add the character to the temporary buffer.
01035                                     */
01036                                    *end++ = *string++;
01037                                    if (*string == '\0') {
01038                                           break;
01039                                    }
01040                             }
01041 
01042                             /*
01043                              * Check to see if we need to back up because we only got a
01044                              * sign or a trailing x after a 0.
01045                              */
01046                             if (flags & SCAN_NODIGITS) {
01047                                    if (*string == '\0') {
01048                                           underflow = 1;
01049                                    }
01050                                    goto done;
01051                             } else if (end[-1] == 'x' || end[-1] == 'X') {
01052                                    end--;
01053                                    string--;
01054                             }
01055 
01056                             /*
01057                              * Scan the value from the temporary buffer.  If we are
01058                              * returning a large unsigned value, we have to convert it back
01059                              * to a string since PHP only supports signed values.
01060                              */
01061                             if (!(flags & SCAN_SUPPRESS)) {
01062                                    *end = '\0';
01063                                    value = (long) (*fn)(buf, NULL, base);
01064                                    if ((flags & SCAN_UNSIGNED) && (value < 0)) {
01065                                           snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
01066                                           if (numVars && objIndex >= argCount) {
01067                                                  break;
01068                                           } else if (numVars) {
01069                                             /* change passed value type to string */
01070                                                  current = args[objIndex++];
01071                                                  zval_dtor(*current);
01072                                                  ZVAL_STRING( *current, buf, 1 );
01073                                           } else {
01074                                                  add_index_string(*return_value, objIndex++, buf, 1);
01075                                           }
01076                                    } else {
01077                                           if (numVars && objIndex >= argCount) {
01078                                                  break;
01079                                           } else if (numVars) {
01080                                                  current = args[objIndex++];
01081                                                  zval_dtor(*current);
01082                                                  ZVAL_LONG(*current, value);
01083                                           } else {
01084                                                  add_index_long(*return_value, objIndex++, value);
01085                                           }
01086                                    }
01087                             }
01088                             break;
01089 
01090                      case 'f':
01091                             /*
01092                              * Scan a floating point number
01093                              */
01094                             buf[0] = '\0';     /* call me pedantic */
01095                             if ((width == 0) || (width > sizeof(buf) - 1)) {
01096                                    width = sizeof(buf) - 1;
01097                             }
01098                             flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
01099                             for (end = buf; width > 0; width--) {
01100                                    switch (*string) {
01101                                           case '0': case '1': case '2': case '3':
01102                                           case '4': case '5': case '6': case '7':
01103                                           case '8': case '9':
01104                                                  flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
01105                                                  goto addToFloat;
01106                                           case '+':
01107                                           case '-':
01108                                                  if (flags & SCAN_SIGNOK) {
01109                                                         flags &= ~SCAN_SIGNOK;
01110                                                         goto addToFloat;
01111                                                  }
01112                                                  break;
01113                                           case '.':
01114                                                  if (flags & SCAN_PTOK) {
01115                                                         flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
01116                                                         goto addToFloat;
01117                                                  }
01118                                                  break;
01119                                           case 'e':
01120                                           case 'E':
01121                                                  /*
01122                                                   * An exponent is not allowed until there has
01123                                                   * been at least one digit.
01124                                                   */
01125                                                  if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
01126                                                         flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
01127                                                                | SCAN_SIGNOK | SCAN_NODIGITS;
01128                                                         goto addToFloat;
01129                                                  }
01130                                                  break;
01131                                    }
01132 
01133                                    /*
01134                                     * We got an illegal character so we are done accumulating.
01135                                     */
01136                                    break;
01137 
01138 addToFloat:
01139                                    /*
01140                                     * Add the character to the temporary buffer.
01141                                     */
01142                                    *end++ = *string++;
01143                                    if (*string == '\0') {
01144                                           break;
01145                                    }
01146                             }
01147 
01148                             /*
01149                              * Check to see if we need to back up because we saw a
01150                              * trailing 'e' or sign.
01151                              */
01152                             if (flags & SCAN_NODIGITS) {
01153                                    if (flags & SCAN_EXPOK) {
01154                                           /*
01155                                            * There were no digits at all so scanning has
01156                                            * failed and we are done.
01157                                            */
01158                                           if (*string == '\0') {
01159                                                  underflow = 1;
01160                                           }
01161                                           goto done;
01162                                    }
01163 
01164                                    /*
01165                                     * We got a bad exponent ('e' and maybe a sign).
01166                                     */
01167                                    end--;
01168                                    string--;
01169                                    if (*end != 'e' && *end != 'E') {
01170                                           end--;
01171                                           string--;
01172                                    }
01173                             }
01174 
01175                             /*
01176                              * Scan the value from the temporary buffer.
01177                              */
01178                             if (!(flags & SCAN_SUPPRESS)) {
01179                                    double dvalue;
01180                                    *end = '\0';
01181                                    dvalue = zend_strtod(buf, NULL);
01182                                    if (numVars && objIndex >= argCount) {
01183                                           break;
01184                                    } else if (numVars) {
01185                                           current = args[objIndex++];
01186                                           zval_dtor(*current);
01187                                           ZVAL_DOUBLE(*current, dvalue);
01188                                    } else {
01189                                           add_index_double( *return_value, objIndex++, dvalue );
01190                                    }
01191                             }
01192                             break;
01193               } /* switch (op) */
01194               nconversions++;
01195        } /*  while (*format != '\0') */
01196 
01197 done:
01198        result = SCAN_SUCCESS;
01199 
01200        if (underflow && (0==nconversions)) {
01201               scan_set_error_return( numVars, return_value );
01202               result = SCAN_ERROR_EOF;
01203        } else if (numVars) {
01204               convert_to_long( *return_value );
01205               Z_LVAL_PP(return_value) = nconversions;
01206        } else if (nconversions < totalVars) {
01207               /* TODO: not all elements converted. we need to prune the list - cc */
01208        }
01209        return result;
01210 }
01211 /* }}} */
01212 
01213 /* the compiler choked when i tried to make this a macro    */
01214 static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
01215 {
01216        if (numVars) {
01217               Z_TYPE_PP(return_value) = IS_LONG;
01218               Z_LVAL_PP(return_value) = SCAN_ERROR_EOF;  /* EOF marker */
01219        } else {
01220               /* convert_to_null calls destructor */
01221               convert_to_null( *return_value );
01222        }
01223 }
01224 /* }}} */
01225 
01226 /*
01227  * Local variables:
01228  * tab-width: 4
01229  * c-basic-offset: 4
01230  * End:
01231  * vim600: sw=4 ts=4 fdm=marker
01232  * vim<600: sw=4 ts=4
01233  */