Back to index

avfs  1.0.1
string_utils.c
Go to the documentation of this file.
00001 /* 
00002    String utility functions
00003    Copyright (C) 1999-2001, Joe Orton <joe@light.plus.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009    
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public
00016    License along with this library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00018    MA 02111-1307, USA
00019 
00020 */
00021 
00022 #ifdef HAVE_CONFIG_H
00023 #include "config.h"
00024 #endif
00025 
00026 #ifdef HAVE_STDLIB_H
00027 #include <stdlib.h>
00028 #endif
00029 #ifdef HAVE_STRING_H
00030 #include <string.h>
00031 #endif
00032 #ifdef HAVE_UNISTD_H
00033 #include <unistd.h>
00034 #endif
00035 
00036 #include "ne_alloc.h"
00037 
00038 #include "string_utils.h"
00039 
00040 struct sbuffer_s {
00041     char *data; /* KEEP THIS HERE, to allow SBUFFER_CAST to work */
00042     size_t used; /* used bytes in buffer */
00043     size_t length; /* length of buffer */
00044 };
00045 
00046 /* TODO: These are both crap. Rewrite to be like strsep(). */
00047 
00048 char **split_string(const char *str, const char separator,
00049                    const char *quotes, const char *whitespace) 
00050 {
00051     return split_string_c(str, separator, quotes, whitespace, NULL);
00052 }
00053 
00054 char **split_string_c(const char *str, const char separator,
00055                     const char *quotes, const char *whitespace,
00056                     int *give_count) 
00057 {
00058     char **comps;
00059     const char *pnt, *quot = NULL,
00060        *start, *end; /* The start of the current component */
00061     int count, /* The number of components */
00062        iswhite, /* is it whitespace */
00063        issep, /* is it the separator */
00064        curr, /* current component index */
00065        length, /* length of component */
00066        leading_wspace; /* in leading whitespace still? */
00067 
00068     /* Inefficient, but easier - first off, count the number of 
00069      * components we have. */
00070     count = 1;
00071     for (pnt = str; *pnt!='\0'; pnt++) {
00072        if (quotes != NULL) {
00073            quot = strchr(quotes, *pnt);
00074        }
00075        if (quot != NULL) {
00076            /* We found a quote, so skip till the next quote */
00077            for (pnt++; (*pnt!=*quot) && (*pnt!='\0'); pnt++)
00078               /* nullop */;
00079        } else if (*pnt == separator) {
00080            count++;
00081        }
00082     }
00083 
00084     if (give_count) {
00085        /* Write the count */
00086        *give_count = count;
00087     }
00088 
00089     /* Now, have got the number of components.
00090      * Allocate the comps array. +1 for the NULL */
00091     comps = ne_malloc(sizeof(char *) * (count + 1));
00092 
00093     comps[count] = NULL;
00094     
00095     quot = end = start = NULL;
00096     curr = 0;
00097     leading_wspace = 1;
00098 
00099     /* Now fill in the array */
00100     for (pnt = str; *pnt != '\0'; pnt++) {
00101        /* What is the current character - quote, whitespace, separator? */
00102        if (quotes != NULL) {
00103            quot = strchr(quotes, *pnt);
00104        }
00105        iswhite = (whitespace!=NULL) && 
00106            (strchr(whitespace, *pnt) != NULL);
00107        issep = (*pnt == separator);
00108        /* What to do? */
00109        if (leading_wspace) {
00110            if (quot!=NULL) {
00111               /* Quoted bit */
00112               start = pnt;
00113               length = 1;
00114               leading_wspace = 0;
00115            } else if (issep) {
00116               /* Zero-length component */
00117               comps[curr++] = ne_strdup("");
00118            } else if (!iswhite) {
00119               start = end = pnt;
00120               length = 1;
00121               leading_wspace = 0;
00122            }
00123        } else {
00124            if (quot!=NULL) {
00125               /* Quoted bit */
00126               length++;
00127            } else if (issep) {
00128               /* End of component - enter it into the array */
00129               length = (end - start) + 1;
00130               comps[curr] = ne_malloc(length+1);
00131               memcpy(comps[curr], start, length);
00132               comps[curr][length] = '\0';
00133               curr++;
00134               leading_wspace = 1;
00135            } else if (!iswhite) {
00136               /* Not whitespace - update end marker */
00137               end = pnt;
00138            }
00139        }
00140        if (quot != NULL) {
00141            /* Skip to closing quote */
00142            for (pnt++; *pnt!=*quot && *pnt != '\0'; ++pnt)
00143               /* nullop */;
00144            /* Last non-wspace char is closing quote */
00145            end = pnt;
00146        }
00147     }
00148     /* Handle final component */
00149     if (leading_wspace) {
00150        comps[curr] = ne_strdup("");
00151     } else {
00152        /* End of component - enter it into the array */
00153        length = (end - start) + 1;
00154        comps[curr] = ne_malloc(length+1);
00155        memcpy(comps[curr], start, length);
00156        comps[curr][length] = '\0';
00157     }
00158     return comps;
00159 }
00160 
00161 char **pair_string(const char *str, const char compsep, const char kvsep, 
00162                const char *quotes, const char *whitespace) 
00163 {
00164     char **comps, **pairs, *split;
00165     int count = 0, n, length;
00166     comps = split_string_c(str, compsep, quotes, whitespace, &count);
00167     /* Allocate space for 2* as many components as split_string returned,
00168      * +2 for the NULLS. */
00169     pairs = ne_malloc((2*count+2) * sizeof(char *));
00170     if (pairs == NULL) {
00171        return NULL;
00172     }
00173     for (n = 0; n < count; n++) {
00174        /* Find the split */
00175        split = strchr(comps[n], kvsep);
00176        if (split == NULL) {
00177            /* No seperator found */
00178            length = strlen(comps[n]);
00179        } else {
00180            length = split-comps[n];
00181        }
00182        /* Enter the key into the array */
00183        pairs[2*n] = comps[n];
00184        /* Null-terminate the key */
00185        pairs[2*n][length] = '\0';
00186        pairs[2*n+1] = split?(split + 1):NULL;
00187     }
00188     free(comps);
00189     pairs[2*count] = pairs[2*count+1] = NULL;    
00190     return pairs;
00191 }
00192 
00193 void split_string_free(char **components) 
00194 {
00195     char **pnt = components;
00196     while (*pnt != NULL) {
00197        free(*pnt);
00198        pnt++;
00199     }
00200     free(components);
00201 }
00202 
00203 void pair_string_free(char **pairs) 
00204 {
00205     int n;
00206     for (n = 0; pairs[n] != NULL; n+=2) {
00207        free(pairs[n]);
00208     }
00209     free(pairs);
00210 }
00211 
00212 char *shave_string(const char *str, const char ch) 
00213 {
00214     size_t len = strlen(str);
00215     char *ret;
00216     if (str[len-1] == ch) {
00217        len--;
00218     }
00219     if (str[0] == ch) {
00220        len--;
00221        str++;
00222     }
00223     ret = ne_malloc(len + 1);
00224     memcpy(ret, str, len);
00225     ret[len] = '\0';
00226     return ret;
00227 }
00228 
00229 char *ne_concat(const char *str, ...)
00230 {
00231     va_list ap;
00232     sbuffer tmp = sbuffer_create();
00233 
00234     sbuffer_zappend(tmp, str);
00235 
00236     va_start(ap, str);
00237     sbuffer_concat(tmp, ap);
00238     va_end(ap);
00239     
00240     return sbuffer_finish(tmp);
00241 }
00242 
00243 char *sbuffer_data(sbuffer buf) 
00244 {
00245     return buf->data;
00246 }
00247 
00248 int sbuffer_size(sbuffer buf) 
00249 {
00250     return buf->used - 1;
00251 }
00252 
00253 void sbuffer_clear(sbuffer buf) 
00254 {
00255     memset(buf->data, 0, buf->length);
00256     buf->used = 1;
00257 }  
00258 
00259 /* Grows for given size, returns 0 on success, -1 on error. */
00260 int sbuffer_grow(sbuffer buf, size_t newsize) 
00261 {
00262     size_t newlen, oldbuflen;
00263 
00264 #define SBUFFER_GROWTH 512
00265 
00266     if (newsize <= buf->length) return 0; /* big enough already */
00267     /* FIXME: ah, can't remember my maths... better way to do this? */
00268     newlen = ((newsize / SBUFFER_GROWTH) + 1) * SBUFFER_GROWTH;
00269     
00270     oldbuflen = buf->length;
00271     /* Reallocate bigger buffer */
00272     buf->data = realloc(buf->data, newlen);
00273     if (buf->data == NULL) return -1;
00274     buf->length = newlen;
00275     /* Zero-out the new bit of buffer */
00276     memset(buf->data+oldbuflen, 0, newlen-oldbuflen);
00277 
00278     return 0;
00279 }
00280 
00281 int sbuffer_concat(sbuffer buf, ...) 
00282 {
00283     va_list ap;
00284     char *next;
00285     size_t totallen = buf->used; 
00286 
00287     /* Find out how much space we need for all the args */
00288     va_start(ap, buf);
00289     do {
00290        next = va_arg(ap, char *);
00291        if (next != NULL) {
00292            totallen += strlen(next);
00293        }
00294     } while (next != NULL);
00295     va_end(ap);
00296     
00297     /* Grow the buffer */
00298     if (sbuffer_grow(buf, totallen))
00299        return -1;
00300     
00301     /* Now append the arguments to the buffer */
00302     va_start(ap, buf);
00303     do {
00304        next = va_arg(ap, char *);
00305        if (next != NULL) {
00306            /* TODO: use stpcpy */
00307            strcat(buf->data, next);
00308        }
00309     } while (next != NULL);
00310     va_end(ap);
00311     
00312     buf->used = totallen;
00313     return 0;
00314 }
00315 
00316 /* Append zero-terminated string... returns 0 on success or -1 on
00317  * realloc failure. */
00318 int sbuffer_zappend(sbuffer buf, const char *str) 
00319 {
00320     size_t len = strlen(str);
00321 
00322     if (sbuffer_grow(buf, buf->used + len)) {
00323        return -1;
00324     }
00325     strcat(buf->data, str);
00326     buf->used += len;
00327     return 0;
00328 }
00329 
00330 int sbuffer_append(sbuffer buf, const char *data, size_t len) 
00331 {
00332     if (sbuffer_grow(buf, buf->used + len)) {
00333        return -1;
00334     }
00335     memcpy(buf->data + buf->used - 1, data, len);
00336     buf->used += len;
00337     buf->data[buf->used - 1] = '\0';
00338     return 0;
00339 }
00340 
00341 sbuffer sbuffer_create(void) 
00342 {
00343     return sbuffer_create_sized(512);
00344 }
00345 
00346 sbuffer sbuffer_create_sized(size_t s) 
00347 {
00348     sbuffer buf = ne_malloc(sizeof(struct sbuffer_s));
00349     buf->data = ne_calloc(s);
00350     buf->length = s;
00351     buf->used = 1;
00352     return buf;
00353 }
00354 
00355 void sbuffer_destroy(sbuffer buf) 
00356 {
00357     if (buf->data) {
00358        free(buf->data);
00359     }
00360     free(buf);
00361 }
00362 
00363 char *sbuffer_finish(sbuffer buf)
00364 {
00365     char *ret = buf->data;
00366     free(buf);
00367     return ret;
00368 }
00369 
00370 void sbuffer_altered(sbuffer buf)
00371 {
00372     buf->used = strlen(buf->data) + 1;
00373 }
00374 
00375 /* Writes the ASCII representation of the MD5 digest into the
00376  * given buffer, which must be at least 33 characters long. */
00377 void md5_to_ascii(const unsigned char md5_buf[16], char *buffer) 
00378 {
00379     int count;
00380     for (count = 0; count<16; count++) {
00381        buffer[count*2] = HEX2ASC(md5_buf[count] >> 4);
00382        buffer[count*2+1] = HEX2ASC(md5_buf[count] & 0x0f);
00383     }
00384     buffer[32] = '\0';
00385 }
00386 
00387 /* Reads the ASCII representation of an MD5 digest. The buffer must
00388  * be at least 32 characters long. */
00389 void ascii_to_md5(const char *buffer, unsigned char md5_buf[16]) 
00390 {
00391     int count;
00392     for (count = 0; count<16; count++) {
00393        md5_buf[count] = ((ASC2HEX(buffer[count*2])) << 4) |
00394            ASC2HEX(buffer[count*2+1]);
00395     }
00396 }
00397 
00398 char *ne_utf8_encode(const char *str)
00399 {
00400     char *buffer = ne_malloc(strlen(str) * 2 + 1);
00401     int in, len = strlen(str);
00402     char *out;
00403 
00404     for (in = 0, out = buffer; in < len; in++, out++) {
00405        if ((unsigned char)str[in] <= 0x7D) {
00406            *out = str[in];
00407        } else {
00408            *out++ = 0xC0 | ((str[in] & 0xFC) >> 6);
00409            *out = str[in] & 0xBF;
00410        }
00411     }
00412 
00413     /* nul-terminate */
00414     *out = '\0';
00415     return buffer;
00416 }
00417 
00418 #ifdef SPLIT_STRING_TEST
00419 
00420 #include <stdio.h>
00421 
00422 int main(int argc, char *argv[]) 
00423 {
00424     char *str, sep, **comps, *wspace, *quotes;
00425     int count;
00426     if (argc < 3) {
00427        printf("Usage: split_string <sep> <string> [whitespace] [quotes]\n");
00428        return -1;
00429     }
00430     sep = *argv[1];
00431     str = argv[2];
00432     if (argc > 3) {
00433         wspace = argv[3];
00434     } else {
00435        wspace = " ";
00436     }
00437     if (argc > 4) {
00438        quotes = argv[4];
00439     } else {
00440        quotes = "\"";
00441     }
00442     printf("String: [%s]  Separator: `%c'  Whitespace: [%s]  Quotes: [%s]\n", str, sep, wspace, quotes);
00443     comps = split_string(str, sep, quotes, wspace);
00444     count = 0;
00445     do {
00446        printf("Component #%d: [%s]\n", count, comps[count]);
00447     } while (comps[++count] != NULL);
00448     return 0;
00449 }
00450 
00451 #endif
00452 
00453 #ifdef PAIR_STRING_TEST
00454 
00455 #include <stdio.h>
00456 
00457 int main(int argc, char *argv[]) 
00458 {
00459     char *str, compsep, kvsep, **comps, *wspace, *quotes;
00460     int count;
00461     if (argc < 4) {
00462        printf("Usage: pair_string <compsep> <kvsep> <string> [whitespace] [quotes]\n");
00463        return -1;
00464     }
00465     compsep = *argv[1];
00466     kvsep = *argv[2];
00467     str = argv[3];
00468     if (argc > 4) {
00469         wspace = argv[4];
00470     } else {
00471        wspace = " ";
00472     }
00473     if (argc > 5) {
00474        quotes = argv[5];
00475     } else {
00476        quotes = "\"";
00477     }
00478     printf("String: [%s]  CompSep: `%c' K/VSep: `%c'\nWhitespace: [%s]  Quotes: [%s]\n", str, compsep, kvsep, wspace, quotes);
00479     comps = pair_string(str, compsep, kvsep, quotes, wspace);
00480     count = 0;
00481     do {
00482        printf("Component #%d: Key [%s] Value [%s]\n", count, 
00483               comps[count], comps[count+1]);
00484     } while (comps[(count+=2)] != NULL);
00485     return 0;
00486 }
00487 
00488 #endif
00489 
00490 /* variables:
00491  *
00492  * Local variables:
00493  *  compile-command: "gcc -g -O2 -Wall -I.. -ansi -DHAVE_CONFIG_H -DSPLIT_STRING_TEST -o split_string string_utils.c"
00494  * End:
00495  */