Back to index

php5  5.3.10
split.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <string.h>
00003 
00004 /*
00005  - split - divide a string into fields, like awk split()
00006  = int split(char *string, char *fields[], int nfields, char *sep);
00007  */
00008 int                         /* number of fields, including overflow */
00009 split(string, fields, nfields, sep)
00010 char *string;
00011 char *fields[];                    /* list is not NULL-terminated */
00012 int nfields;                /* number of entries available in fields[] */
00013 char *sep;                  /* "" white, "c" single char, "ab" [ab]+ */
00014 {
00015        register char *p = string;
00016        register char c;                   /* latest character */
00017        register char sepc = sep[0];
00018        register char sepc2;
00019        register int fn;
00020        register char **fp = fields;
00021        register char *sepp;
00022        register int trimtrail;
00023 
00024        /* white space */
00025        if (sepc == '\0') {
00026               while ((c = *p++) == ' ' || c == '\t')
00027                      continue;
00028               p--;
00029               trimtrail = 1;
00030               sep = " \t";  /* note, code below knows this is 2 long */
00031               sepc = ' ';
00032        } else
00033               trimtrail = 0;
00034        sepc2 = sep[1];             /* now we can safely pick this up */
00035 
00036        /* catch empties */
00037        if (*p == '\0')
00038               return(0);
00039 
00040        /* single separator */
00041        if (sepc2 == '\0') {
00042               fn = nfields;
00043               for (;;) {
00044                      *fp++ = p;
00045                      fn--;
00046                      if (fn == 0)
00047                             break;
00048                      while ((c = *p++) != sepc)
00049                             if (c == '\0')
00050                                    return(nfields - fn);
00051                      *(p-1) = '\0';
00052               }
00053               /* we have overflowed the fields vector -- just count them */
00054               fn = nfields;
00055               for (;;) {
00056                      while ((c = *p++) != sepc)
00057                             if (c == '\0')
00058                                    return(fn);
00059                      fn++;
00060               }
00061               /* not reached */
00062        }
00063 
00064        /* two separators */
00065        if (sep[2] == '\0') {
00066               fn = nfields;
00067               for (;;) {
00068                      *fp++ = p;
00069                      fn--;
00070                      while ((c = *p++) != sepc && c != sepc2)
00071                             if (c == '\0') {
00072                                    if (trimtrail && **(fp-1) == '\0')
00073                                           fn++;
00074                                    return(nfields - fn);
00075                             }
00076                      if (fn == 0)
00077                             break;
00078                      *(p-1) = '\0';
00079                      while ((c = *p++) == sepc || c == sepc2)
00080                             continue;
00081                      p--;
00082               }
00083               /* we have overflowed the fields vector -- just count them */
00084               fn = nfields;
00085               while (c != '\0') {
00086                      while ((c = *p++) == sepc || c == sepc2)
00087                             continue;
00088                      p--;
00089                      fn++;
00090                      while ((c = *p++) != '\0' && c != sepc && c != sepc2)
00091                             continue;
00092               }
00093               /* might have to trim trailing white space */
00094               if (trimtrail) {
00095                      p--;
00096                      while ((c = *--p) == sepc || c == sepc2)
00097                             continue;
00098                      p++;
00099                      if (*p != '\0') {
00100                             if (fn == nfields+1)
00101                                    *p = '\0';
00102                             fn--;
00103                      }
00104               }
00105               return(fn);
00106        }
00107 
00108        /* n separators */
00109        fn = 0;
00110        for (;;) {
00111               if (fn < nfields)
00112                      *fp++ = p;
00113               fn++;
00114               for (;;) {
00115                      c = *p++;
00116                      if (c == '\0')
00117                             return(fn);
00118                      sepp = sep;
00119                      while ((sepc = *sepp++) != '\0' && sepc != c)
00120                             continue;
00121                      if (sepc != '\0')    /* it was a separator */
00122                             break;
00123               }
00124               if (fn < nfields)
00125                      *(p-1) = '\0';
00126               for (;;) {
00127                      c = *p++;
00128                      sepp = sep;
00129                      while ((sepc = *sepp++) != '\0' && sepc != c)
00130                             continue;
00131                      if (sepc == '\0')    /* it wasn't a separator */
00132                             break;
00133               }
00134               p--;
00135        }
00136 
00137        /* not reached */
00138 }
00139 
00140 #ifdef TEST_SPLIT
00141 
00142 
00143 /*
00144  * test program
00145  * pgm        runs regression
00146  * pgm sep    splits stdin lines by sep
00147  * pgm str sep       splits str by sep
00148  * pgm str sep n     splits str by sep n times
00149  */
00150 int
00151 main(argc, argv)
00152 int argc;
00153 char *argv[];
00154 {
00155        char buf[512];
00156        register int n;
00157 #      define MNF    10
00158        char *fields[MNF];
00159 
00160        if (argc > 4)
00161               for (n = atoi(argv[3]); n > 0; n--) {
00162                      (void) strcpy(buf, argv[1]);
00163               }
00164        else if (argc > 3)
00165               for (n = atoi(argv[3]); n > 0; n--) {
00166                      (void) strcpy(buf, argv[1]);
00167                      (void) split(buf, fields, MNF, argv[2]);
00168               }
00169        else if (argc > 2)
00170               dosplit(argv[1], argv[2]);
00171        else if (argc > 1)
00172               while (fgets(buf, sizeof(buf), stdin) != NULL) {
00173                      buf[strlen(buf)-1] = '\0';  /* stomp newline */
00174                      dosplit(buf, argv[1]);
00175               }
00176        else
00177               regress();
00178 
00179        exit(0);
00180 }
00181 
00182 dosplit(string, seps)
00183 char *string;
00184 char *seps;
00185 {
00186 #      define NF     5
00187        char *fields[NF];
00188        register int nf;
00189 
00190        nf = split(string, fields, NF, seps);
00191        print(nf, NF, fields);
00192 }
00193 
00194 print(nf, nfp, fields)
00195 int nf;
00196 int nfp;
00197 char *fields[];
00198 {
00199        register int fn;
00200        register int bound;
00201 
00202        bound = (nf > nfp) ? nfp : nf;
00203        printf("%d:\t", nf);
00204        for (fn = 0; fn < bound; fn++)
00205               printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
00206 }
00207 
00208 #define       RNF    5             /* some table entries know this */
00209 struct {
00210        char *str;
00211        char *seps;
00212        int nf;
00213        char *fi[RNF];
00214 } tests[] = {
00215        "",           " ",   0,     { "" },
00216        " ",          " ",   2,     { "", "" },
00217        "x",          " ",   1,     { "x" },
00218        "xy",         " ",   1,     { "xy" },
00219        "x y",        " ",   2,     { "x", "y" },
00220        "abc def  g ",       " ",   5,     { "abc", "def", "", "g", "" },
00221        "  a bcd",    " ",   4,     { "", "", "a", "bcd" },
00222        "a b c d e f",       " ",   6,     { "a", "b", "c", "d", "e f" },
00223        " a b c d ",  " ",   6,     { "", "a", "b", "c", "d " },
00224 
00225        "",           " _",  0,     { "" },
00226        " ",          " _",  2,     { "", "" },
00227        "x",          " _",  1,     { "x" },
00228        "x y",        " _",  2,     { "x", "y" },
00229        "ab _ cd",    " _",  2,     { "ab", "cd" },
00230        " a_b  c ",   " _",  5,     { "", "a", "b", "c", "" },
00231        "a b c_d e f",       " _",  6,     { "a", "b", "c", "d", "e f" },
00232        " a b c d ",  " _",  6,     { "", "a", "b", "c", "d " },
00233 
00234        "",           " _~", 0,     { "" },
00235        " ",          " _~", 2,     { "", "" },
00236        "x",          " _~", 1,     { "x" },
00237        "x y",        " _~", 2,     { "x", "y" },
00238        "ab _~ cd",   " _~", 2,     { "ab", "cd" },
00239        " a_b  c~",   " _~", 5,     { "", "a", "b", "c", "" },
00240        "a b_c d~e f",       " _~", 6,     { "a", "b", "c", "d", "e f" },
00241        "~a b c d ",  " _~", 6,     { "", "a", "b", "c", "d " },
00242 
00243        "",           " _~-",       0,     { "" },
00244        " ",          " _~-",       2,     { "", "" },
00245        "x",          " _~-",       1,     { "x" },
00246        "x y",        " _~-",       2,     { "x", "y" },
00247        "ab _~- cd",  " _~-",       2,     { "ab", "cd" },
00248        " a_b  c~",   " _~-",       5,     { "", "a", "b", "c", "" },
00249        "a b_c-d~e f",       " _~-",       6,     { "a", "b", "c", "d", "e f" },
00250        "~a-b c d ",  " _~-",       6,     { "", "a", "b", "c", "d " },
00251 
00252        "",           "  ",  0,     { "" },
00253        " ",          "  ",  2,     { "", "" },
00254        "x",          "  ",  1,     { "x" },
00255        "xy",         "  ",  1,     { "xy" },
00256        "x y",        "  ",  2,     { "x", "y" },
00257        "abc def  g ",       "  ",  4,     { "abc", "def", "g", "" },
00258        "  a bcd",    "  ",  3,     { "", "a", "bcd" },
00259        "a b c d e f",       "  ",  6,     { "a", "b", "c", "d", "e f" },
00260        " a b c d ",  "  ",  6,     { "", "a", "b", "c", "d " },
00261 
00262        "",           "",    0,     { "" },
00263        " ",          "",    0,     { "" },
00264        "x",          "",    1,     { "x" },
00265        "xy",         "",    1,     { "xy" },
00266        "x y",        "",    2,     { "x", "y" },
00267        "abc def  g ",       "",    3,     { "abc", "def", "g" },
00268        "\t a bcd",   "",    2,     { "a", "bcd" },
00269        "  a \tb\t c ",      "",    3,     { "a", "b", "c" },
00270        "a b c d e ", "",    5,     { "a", "b", "c", "d", "e" },
00271        "a b\tc d e f",      "",    6,     { "a", "b", "c", "d", "e f" },
00272        " a b c d e f ",     "",    6,     { "a", "b", "c", "d", "e f " },
00273 
00274        NULL,         NULL,  0,     { NULL },
00275 };
00276 
00277 regress()
00278 {
00279        char buf[512];
00280        register int n;
00281        char *fields[RNF+1];
00282        register int nf;
00283        register int i;
00284        register int printit;
00285        register char *f;
00286 
00287        for (n = 0; tests[n].str != NULL; n++) {
00288               (void) strcpy(buf, tests[n].str);
00289               fields[RNF] = NULL;
00290               nf = split(buf, fields, RNF, tests[n].seps);
00291               printit = 0;
00292               if (nf != tests[n].nf) {
00293                      printf("split `%s' by `%s' gave %d fields, not %d\n",
00294                             tests[n].str, tests[n].seps, nf, tests[n].nf);
00295                      printit = 1;
00296               } else if (fields[RNF] != NULL) {
00297                      printf("split() went beyond array end\n");
00298                      printit = 1;
00299               } else {
00300                      for (i = 0; i < nf && i < RNF; i++) {
00301                             f = fields[i];
00302                             if (f == NULL)
00303                                    f = "(NULL)";
00304                             if (strcmp(f, tests[n].fi[i]) != 0) {
00305                                    printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
00306                                           tests[n].str, tests[n].seps,
00307                                           i, fields[i], tests[n].fi[i]);
00308                                    printit = 1;
00309                             }
00310                      }
00311               }
00312               if (printit)
00313                      print(nf, RNF, fields);
00314        }
00315 }
00316 #endif