Back to index

avfs  1.0.1
uri.c
Go to the documentation of this file.
00001 /* 
00002    HTTP URI handling
00003    Copyright (C) 1999-2001, Joe Orton <joe@light.plus.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009    
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public
00016    License along with this library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00018    MA 02111-1307, USA
00019 
00020 */
00021 
00022 #include "config.h"
00023 
00024 #ifdef HAVE_STRING_H
00025 #include <string.h>
00026 #endif
00027 #ifdef HAVE_UNISTD_H
00028 #include <unistd.h>
00029 #endif
00030 #ifdef HAVE_STDLIB_H
00031 #include <stdlib.h>
00032 #endif
00033 
00034 #include <ctype.h>
00035 #include <stdio.h>
00036 
00037 #include "http_utils.h" /* for 'min' */
00038 #include "string_utils.h" /* for CONCAT3 */
00039 #include "uri.h"
00040 #include "ne_alloc.h"
00041 
00042 char *uri_parent(const char *uri) 
00043 {
00044     const char *pnt;
00045     char *ret;
00046     pnt = uri+strlen(uri)-1;
00047     while (*(--pnt) != '/' && pnt >= uri) /* noop */;
00048     if (pnt < uri) {
00049        /* not a valid absPath */
00050        return NULL;
00051     }
00052     /*  uri    
00053      *   V
00054      *   |---|
00055      *   /foo/bar/
00056      */
00057     ret = ne_malloc((pnt - uri) + 2);
00058     memcpy(ret, uri, (pnt - uri) + 1);
00059     ret[1+(pnt-uri)] = '\0';
00060     pnt++;
00061     return ret;
00062 }
00063 
00064 int uri_has_trailing_slash(const char *uri) 
00065 {
00066      return (uri[strlen(uri)-1] == '/');
00067 }
00068 
00069 const char *uri_abspath(const char *uri) 
00070 {
00071     const char *ret;
00072     /* Look for the scheme: */
00073     ret = strstr(uri, "://");
00074     if (ret == NULL) {
00075        /* No scheme */
00076        ret = uri;
00077     } else {
00078        /* Look for the abs_path */
00079        ret = strchr(ret+3, '/');
00080        if (ret == NULL) {
00081            /* Uh-oh */
00082            ret = uri;
00083        }
00084     }
00085     return ret;
00086 }
00087 
00088 /* TODO: not a proper URI parser */
00089 int uri_parse(const char *uri, struct uri *parsed, 
00090              const struct uri *defaults)
00091 {
00092     const char *pnt, *slash, *colon;
00093 
00094     parsed->port = -1;
00095     parsed->host = NULL;
00096     parsed->path = NULL;
00097     parsed->scheme = NULL;
00098 
00099     pnt = strstr(uri, "://");
00100     if (pnt) {
00101        parsed->scheme = ne_strndup(uri, pnt - uri);
00102        pnt += 3; /* start of hostport segment */
00103     } else {
00104        pnt = uri;
00105        if (defaults && defaults->scheme != NULL) {
00106            parsed->scheme = ne_strdup(defaults->scheme);
00107        }
00108     }
00109     
00110     slash = strchr(pnt, '/');
00111     colon = strchr(pnt, ':');
00112     if (slash == NULL) {
00113        parsed->path = ne_strdup("/");
00114        if (colon == NULL) {
00115            if (defaults) parsed->port = defaults->port;
00116            parsed->host = ne_strdup(pnt);
00117        } else {
00118            parsed->port = atoi(colon+1);
00119            parsed->host = ne_strndup(pnt, colon - pnt);
00120        }
00121     } else {
00122        if (colon == NULL || colon > slash) {
00123            /* No port segment */
00124            if (defaults) parsed->port = defaults->port;
00125            parsed->host = ne_strndup(pnt, slash - pnt);
00126        } else {
00127            /* Port segment */
00128            parsed->port = atoi(colon + 1);
00129            parsed->host = ne_strndup(pnt, colon - pnt);
00130        }
00131        parsed->path = ne_strdup(slash);
00132     }
00133 
00134     return 0;
00135 }
00136 
00137 void uri_free(struct uri *uri)
00138 {
00139     HTTP_FREE(uri->host);
00140     HTTP_FREE(uri->path);
00141     HTTP_FREE(uri->scheme);
00142 }
00143 
00144 /* Returns an absoluteURI */
00145 char *uri_absolute(const char *uri, const char *scheme, 
00146                  const char *hostport) 
00147 {
00148     char *ret;
00149     /* Is it absolute already? */
00150     if (strncmp(uri, scheme, strlen(scheme)) == 0)  {
00151        /* Yes it is */
00152        ret = ne_strdup(uri);
00153     } else {
00154        /* Oh no it isn't */
00155        CONCAT3(ret, scheme, hostport, uri);
00156     }
00157     return ret;
00158 }
00159 
00160 /* Un-escapes a URI. Returns ne_malloc-allocated URI */
00161 char *uri_unescape(const char *uri) 
00162 {
00163     const char *pnt;
00164     char *ret, *retpos, buf[5] = { "0x00\0" };
00165     retpos = ret = ne_malloc(strlen(uri) + 1);
00166     for (pnt = uri; *pnt != '\0'; pnt++) {
00167        if (*pnt == '%') {
00168            if (!isxdigit((unsigned char) pnt[1]) || 
00169               !isxdigit((unsigned char) pnt[2])) {
00170               /* Invalid URI */
00171               return NULL;
00172            }
00173            buf[2] = *++pnt; buf[3] = *++pnt; /* bit faster than memcpy */
00174            *retpos++ = (char)strtol(buf, NULL, 16);
00175        } else {
00176            *retpos++ = *pnt;
00177        }
00178     }
00179     *retpos = '\0';
00180     return ret;
00181 }
00182 
00183 /* RFC2396 spake:
00184  * "Data must be escaped if it does not have a representation 
00185  * using an unreserved character".
00186  */
00187 
00188 /* Lookup table: character classes from 2396. (This is overkill) */
00189 
00190 #define SP 0   /* space    = <US-ASCII coded character 20 hexadecimal>                 */
00191 #define CO 0   /* control  = <US-ASCII coded characters 00-1F and 7F hexadecimal>      */
00192 #define DE 0   /* delims   = "<" | ">" | "#" | "%" | <">                               */
00193 #define UW 0   /* unwise   = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"             */
00194 #define MA 1   /* mark     = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"       */
00195 #define AN 2   /* alphanum = alpha | digit                                             */
00196 #define RE 2   /* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," */
00197 
00198 static const char uri_chars[128] = {
00199 /*                +2      +4      +6      +8     +10     +12     +14     */
00200 /*   0 */ CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO,
00201 /*  16 */ CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO,
00202 /*  32 */ SP, MA, DE, DE, RE, DE, RE, MA, MA, MA, MA, RE, RE, MA, MA, RE,
00203 /*  48 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, RE, RE, DE, RE, DE, RE,
00204 /*  64 */ RE, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN,
00205 /*  80 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, UW, UW, UW, UW, MA,
00206 /*  96 */ UW, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN,
00207 /* 112 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, UW, UW, UW, MA, CO 
00208 };
00209 
00210 #define ESCAPE(ch) (((const signed char)(ch) < 0 || \
00211               uri_chars[(unsigned int)(ch)] == 0))
00212 
00213 #undef SP
00214 #undef CO
00215 #undef DE
00216 #undef UW
00217 #undef MA
00218 #undef AN
00219 #undef RE
00220 
00221 /* Escapes the abspath segment of a URI.
00222  * Returns ne_malloc-allocated string.
00223  */
00224 char *uri_abspath_escape(const char *abs_path) 
00225 {
00226     const char *pnt;
00227     char *ret, *retpos;
00228     int count = 0;
00229     for (pnt = abs_path; *pnt != '\0'; pnt++) {
00230        if (ESCAPE(*pnt)) {
00231            count++;
00232        }
00233     }
00234     if (count == 0) {
00235        return ne_strdup(abs_path);
00236     }
00237     /* An escaped character is "%xx", i.e., two MORE
00238      * characters than the original string */
00239     retpos = ret = ne_malloc(strlen(abs_path) + 2*count + 1);
00240     for (pnt = abs_path; *pnt != '\0'; pnt++) {
00241        if (ESCAPE(*pnt)) {
00242            /* Escape it - %<hex><hex> */
00243            sprintf(retpos, "%%%02x", (unsigned char) *pnt);
00244            retpos += 3;
00245        } else {
00246            /* It's cool */
00247            *retpos++ = *pnt;
00248        }
00249     }
00250     *retpos = '\0';
00251     return ret;
00252 }
00253 
00254 #undef ESCAPE
00255 
00256 /* TODO: implement properly */
00257 int uri_compare(const char *a, const char *b) 
00258 {
00259     int ret = strcasecmp(a, b);
00260     if (ret) {
00261        /* This logic says: "If the lengths of the two URIs differ by
00262         * exactly one, and the LONGER of the two URIs has a trailing
00263         * slash and the SHORTER one DOESN'T, then..." */
00264        int traila = uri_has_trailing_slash(a),
00265            trailb = uri_has_trailing_slash(b),
00266            lena = strlen(a), lenb = strlen(b);
00267        if (traila != trailb && abs(lena - lenb) == 1 &&
00268            ((traila && lena > lenb) || (trailb && lenb > lena))) {
00269            /* Compare them, ignoring the trailing slash on the longer
00270             * URI */
00271            if (strncasecmp(a, b, min(lena, lenb)) == 0)
00272               ret = 0;
00273        }
00274     }
00275     return ret;
00276 }
00277 
00278 /* Give it a path segment, it returns non-zero if child is 
00279  * a child of parent. */
00280 int uri_childof(const char *parent, const char *child) 
00281 {
00282     char *root = ne_strdup(child);
00283     int ret;
00284     if (strlen(parent) >= strlen(child)) {
00285        ret = 0;
00286     } else {
00287        /* root is the first of child, equal to length of parent */
00288        root[strlen(parent)] = '\0';
00289        ret = (uri_compare(parent, root) == 0);
00290     }
00291     free(root);
00292     return ret;
00293 }
00294 
00295 #ifdef URITEST
00296 
00297 int main(int argc, char *argv[]) 
00298 {
00299     char *tmp;
00300     if (argc<2 || argc>3) {
00301        printf("Usage:\nuritest uria [urib]\n"
00302               "e.g. uritest \"/this/is/a silly<filename>/but/hey\"\n");
00303        exit(-1);
00304     }
00305     if (argv[2]) {
00306        printf("uri_compare: %s with %s: %s\n",
00307               argv[1], argv[2],
00308               uri_compare(argv[1], argv[2])==0?"true":"false");
00309     } else {
00310        printf("Input URI: %s\n", argv[1]);
00311        tmp = uri_abspath_escape(argv[1]);
00312        printf("Encoded: %s\n", tmp);
00313        printf("Decoded: %s\n", uri_unescape(tmp));
00314     }
00315     return 0;
00316 }
00317 
00318 #endif /* URITEST */