Back to index

php5  5.3.10
Functions | Variables
url.c File Reference
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include "php.h"
#include "url.h"
#include "file.h"

Go to the source code of this file.

Functions

PHPAPI void php_url_free (php_url *theurl)
PHPAPI char * php_replace_controlchars_ex (char *str, int len)
PHPAPI char * php_replace_controlchars (char *str)
PHPAPI php_urlphp_url_parse (char const *str)
PHPAPI php_urlphp_url_parse_ex (char const *str, int length)
 PHP_FUNCTION (parse_url)
static int php_htoi (char *s)
PHPAPI char * php_url_encode (char const *s, int len, int *new_length)
 PHP_FUNCTION (urlencode)
 PHP_FUNCTION (urldecode)
PHPAPI int php_url_decode (char *str, int len)
PHPAPI char * php_raw_url_encode (char const *s, int len, int *new_length)
 PHP_FUNCTION (rawurlencode)
 PHP_FUNCTION (rawurldecode)
PHPAPI int php_raw_url_decode (char *str, int len)
 PHP_FUNCTION (get_headers)

Variables

static unsigned char hexchars [] = "0123456789ABCDEF"

Function Documentation

PHP_FUNCTION ( parse_url  )

Definition at line 371 of file url.c.

{
       char *str;
       int str_len;
       php_url *resource;
       long key = -1;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &str_len, &key) == FAILURE) {
              return;
       }

       resource = php_url_parse_ex(str, str_len);
       if (resource == NULL) {
              /* @todo Find a method to determine why php_url_parse_ex() failed */
              RETURN_FALSE;
       }

       if (key > -1) {
              switch (key) {
                     case PHP_URL_SCHEME:
                            if (resource->scheme != NULL) RETVAL_STRING(resource->scheme, 1);
                            break;
                     case PHP_URL_HOST:
                            if (resource->host != NULL) RETVAL_STRING(resource->host, 1);
                            break;
                     case PHP_URL_PORT:
                            if (resource->port != 0) RETVAL_LONG(resource->port);
                            break;
                     case PHP_URL_USER:
                            if (resource->user != NULL) RETVAL_STRING(resource->user, 1);
                            break;
                     case PHP_URL_PASS:
                            if (resource->pass != NULL) RETVAL_STRING(resource->pass, 1);
                            break;
                     case PHP_URL_PATH:
                            if (resource->path != NULL) RETVAL_STRING(resource->path, 1);
                            break;
                     case PHP_URL_QUERY:
                            if (resource->query != NULL) RETVAL_STRING(resource->query, 1);
                            break;
                     case PHP_URL_FRAGMENT:
                            if (resource->fragment != NULL) RETVAL_STRING(resource->fragment, 1);
                            break;
                     default:
                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid URL component identifier %ld", key);
                            RETVAL_FALSE;
              }
              goto done;
       }

       /* allocate an array for return */
       array_init(return_value);

    /* add the various elements to the array */
       if (resource->scheme != NULL)
              add_assoc_string(return_value, "scheme", resource->scheme, 1);
       if (resource->host != NULL)
              add_assoc_string(return_value, "host", resource->host, 1);
       if (resource->port != 0)
              add_assoc_long(return_value, "port", resource->port);
       if (resource->user != NULL)
              add_assoc_string(return_value, "user", resource->user, 1);
       if (resource->pass != NULL)
              add_assoc_string(return_value, "pass", resource->pass, 1);
       if (resource->path != NULL)
              add_assoc_string(return_value, "path", resource->path, 1);
       if (resource->query != NULL)
              add_assoc_string(return_value, "query", resource->query, 1);
       if (resource->fragment != NULL)
              add_assoc_string(return_value, "fragment", resource->fragment, 1);
done:  
       php_url_free(resource);
}

Here is the call graph for this function:

PHP_FUNCTION ( urlencode  )

Definition at line 530 of file url.c.

{
       char *in_str, *out_str;
       int in_str_len, out_str_len;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
                                                   &in_str_len) == FAILURE) {
              return;
       }

       out_str = php_url_encode(in_str, in_str_len, &out_str_len);
       RETURN_STRINGL(out_str, out_str_len, 0);
}

Here is the call graph for this function:

PHP_FUNCTION ( urldecode  )

Definition at line 547 of file url.c.

{
       char *in_str, *out_str;
       int in_str_len, out_str_len;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
                                                   &in_str_len) == FAILURE) {
              return;
       }

       out_str = estrndup(in_str, in_str_len);
       out_str_len = php_url_decode(out_str, in_str_len);

    RETURN_STRINGL(out_str, out_str_len, 0);
}

Here is the call graph for this function:

PHP_FUNCTION ( rawurlencode  )

Definition at line 631 of file url.c.

{
       char *in_str, *out_str;
       int in_str_len, out_str_len;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
                                                   &in_str_len) == FAILURE) {
              return;
       }

       out_str = php_raw_url_encode(in_str, in_str_len, &out_str_len);
       RETURN_STRINGL(out_str, out_str_len, 0);
}

Here is the call graph for this function:

PHP_FUNCTION ( rawurldecode  )

Definition at line 648 of file url.c.

{
       char *in_str, *out_str;
       int in_str_len, out_str_len;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
                                                   &in_str_len) == FAILURE) {
              return;
       }

       out_str = estrndup(in_str, in_str_len);
       out_str_len = php_raw_url_decode(out_str, in_str_len);

    RETURN_STRINGL(out_str, out_str_len, 0);
}

Here is the call graph for this function:

PHP_FUNCTION ( get_headers  )

Definition at line 695 of file url.c.

{
       char *url;
       int url_len;
       php_stream_context *context;
       php_stream *stream;
       zval **prev_val, **hdr = NULL, **h;
       HashPosition pos;
       HashTable *hashT;
       long format = 0;
                
       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &url, &url_len, &format) == FAILURE) {
              return;
       }
       context = FG(default_context) ? FG(default_context) : (FG(default_context) = php_stream_context_alloc());

       if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) {
              RETURN_FALSE;
       }

       if (!stream->wrapperdata || Z_TYPE_P(stream->wrapperdata) != IS_ARRAY) {
              php_stream_close(stream);
              RETURN_FALSE;
       }

       array_init(return_value);

       /* check for curl-wrappers that provide headers via a special "headers" element */
       if (zend_hash_find(HASH_OF(stream->wrapperdata), "headers", sizeof("headers"), (void **)&h) != FAILURE && Z_TYPE_PP(h) == IS_ARRAY) {
              /* curl-wrappers don't load data until the 1st read */ 
              if (!Z_ARRVAL_PP(h)->nNumOfElements) {
                     php_stream_getc(stream);
              }
              zend_hash_find(HASH_OF(stream->wrapperdata), "headers", sizeof("headers"), (void **)&h);
              hashT = Z_ARRVAL_PP(h);     
       } else {
              hashT = HASH_OF(stream->wrapperdata);
       }

       zend_hash_internal_pointer_reset_ex(hashT, &pos);
       while (zend_hash_get_current_data_ex(hashT, (void**)&hdr, &pos) != FAILURE) {
              if (!hdr || Z_TYPE_PP(hdr) != IS_STRING) {
                     zend_hash_move_forward_ex(hashT, &pos);
                     continue;
              }
              if (!format) {
no_name_header:
                     add_next_index_stringl(return_value, Z_STRVAL_PP(hdr), Z_STRLEN_PP(hdr), 1);
              } else {
                     char c;
                     char *s, *p;

                     if ((p = strchr(Z_STRVAL_PP(hdr), ':'))) {
                            c = *p;
                            *p = '\0';
                            s = p + 1;
                            while (isspace((int)*(unsigned char *)s)) {
                                   s++;
                            }

                            if (zend_hash_find(HASH_OF(return_value), Z_STRVAL_PP(hdr), (p - Z_STRVAL_PP(hdr) + 1), (void **) &prev_val) == FAILURE) {
                                   add_assoc_stringl_ex(return_value, Z_STRVAL_PP(hdr), (p - Z_STRVAL_PP(hdr) + 1), s, (Z_STRLEN_PP(hdr) - (s - Z_STRVAL_PP(hdr))), 1);
                            } else { /* some headers may occur more then once, therefor we need to remake the string into an array */
                                   convert_to_array(*prev_val);
                                   add_next_index_stringl(*prev_val, s, (Z_STRLEN_PP(hdr) - (s - Z_STRVAL_PP(hdr))), 1);
                            }

                            *p = c;
                     } else {
                            goto no_name_header;
                     }
              }
              zend_hash_move_forward_ex(hashT, &pos);
       }

       php_stream_close(stream);
}

Here is the call graph for this function:

static int php_htoi ( char *  s) [static]

Definition at line 448 of file url.c.

{
       int value;
       int c;

       c = ((unsigned char *)s)[0];
       if (isupper(c))
              c = tolower(c);
       value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;

       c = ((unsigned char *)s)[1];
       if (isupper(c))
              c = tolower(c);
       value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;

       return (value);
}

Here is the caller graph for this function:

PHPAPI int php_raw_url_decode ( char *  str,
int  len 
)

Definition at line 667 of file url.c.

{
       char *dest = str;
       char *data = str;

       while (len--) {
              if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) 
                     && isxdigit((int) *(data + 2))) {
#ifndef CHARSET_EBCDIC
                     *dest = (char) php_htoi(data + 1);
#else
                     *dest = os_toebcdic[(char) php_htoi(data + 1)];
#endif
                     data += 2;
                     len -= 2;
              } else {
                     *dest = *data;
              }
              data++;
              dest++;
       }
       *dest = '\0';
       return dest - str;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHPAPI char* php_raw_url_encode ( char const *  s,
int  len,
int new_length 
)

Definition at line 597 of file url.c.

{
       register int x, y;
       unsigned char *str;

       str = (unsigned char *) safe_emalloc(3, len, 1);
       for (x = 0, y = 0; len--; x++, y++) {
              str[y] = (unsigned char) s[x];
#ifndef CHARSET_EBCDIC
              if ((str[y] < '0' && str[y] != '-' && str[y] != '.') ||
                     (str[y] < 'A' && str[y] > '9') ||
                     (str[y] > 'Z' && str[y] < 'a' && str[y] != '_') ||
                     (str[y] > 'z' && str[y] != '~')) {
                     str[y++] = '%';
                     str[y++] = hexchars[(unsigned char) s[x] >> 4];
                     str[y] = hexchars[(unsigned char) s[x] & 15];
#else /*CHARSET_EBCDIC*/
              if (!isalnum(str[y]) && strchr("_-.~", str[y]) != NULL) {
                     str[y++] = '%';
                     str[y++] = hexchars[os_toascii[(unsigned char) s[x]] >> 4];
                     str[y] = hexchars[os_toascii[(unsigned char) s[x]] & 15];
#endif /*CHARSET_EBCDIC*/
              }
       }
       str[y] = '\0';
       if (new_length) {
              *new_length = y;
       }
       return ((char *) str);
}

Here is the caller graph for this function:

PHPAPI char* php_replace_controlchars ( char *  str)

Definition at line 85 of file url.c.

{
       return php_replace_controlchars_ex(str, strlen(str));
} 

Here is the call graph for this function:

PHPAPI char* php_replace_controlchars_ex ( char *  str,
int  len 
)

Definition at line 64 of file url.c.

{
       unsigned char *s = (unsigned char *)str;
       unsigned char *e = (unsigned char *)str + len;
       
       if (!str) {
              return (NULL);
       }
       
       while (s < e) {
           
              if (iscntrl(*s)) {
                     *s='_';
              }      
              s++;
       }
       
       return (str);
} 

Here is the caller graph for this function:

PHPAPI int php_url_decode ( char *  str,
int  len 
)

Definition at line 566 of file url.c.

{
       char *dest = str;
       char *data = str;

       while (len--) {
              if (*data == '+') {
                     *dest = ' ';
              }
              else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) 
                             && isxdigit((int) *(data + 2))) {
#ifndef CHARSET_EBCDIC
                     *dest = (char) php_htoi(data + 1);
#else
                     *dest = os_toebcdic[(char) php_htoi(data + 1)];
#endif
                     data += 2;
                     len -= 2;
              } else {
                     *dest = *data;
              }
              data++;
              dest++;
       }
       *dest = '\0';
       return dest - str;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHPAPI char* php_url_encode ( char const *  s,
int  len,
int new_length 
)

Definition at line 484 of file url.c.

{
       register unsigned char c;
       unsigned char *to, *start;
       unsigned char const *from, *end;
       
       from = (unsigned char *)s;
       end = (unsigned char *)s + len;
       start = to = (unsigned char *) safe_emalloc(3, len, 1);

       while (from < end) {
              c = *from++;

              if (c == ' ') {
                     *to++ = '+';
#ifndef CHARSET_EBCDIC
              } else if ((c < '0' && c != '-' && c != '.') ||
                               (c < 'A' && c > '9') ||
                               (c > 'Z' && c < 'a' && c != '_') ||
                               (c > 'z')) {
                     to[0] = '%';
                     to[1] = hexchars[c >> 4];
                     to[2] = hexchars[c & 15];
                     to += 3;
#else /*CHARSET_EBCDIC*/
              } else if (!isalnum(c) && strchr("_-.", c) == NULL) {
                     /* Allow only alphanumeric chars and '_', '-', '.'; escape the rest */
                     to[0] = '%';
                     to[1] = hexchars[os_toascii[c] >> 4];
                     to[2] = hexchars[os_toascii[c] & 15];
                     to += 3;
#endif /*CHARSET_EBCDIC*/
              } else {
                     *to++ = c;
              }
       }
       *to = 0;
       if (new_length) {
              *new_length = to - start;
       }
       return (char *) start;
}

Here is the caller graph for this function:

PHPAPI void php_url_free ( php_url theurl)

Definition at line 42 of file url.c.

{
       if (theurl->scheme)
              efree(theurl->scheme);
       if (theurl->user)
              efree(theurl->user);
       if (theurl->pass)
              efree(theurl->pass);
       if (theurl->host)
              efree(theurl->host);
       if (theurl->path)
              efree(theurl->path);
       if (theurl->query)
              efree(theurl->query);
       if (theurl->fragment)
              efree(theurl->fragment);
       efree(theurl);
}

Here is the caller graph for this function:

PHPAPI php_url* php_url_parse ( char const *  str)

Definition at line 90 of file url.c.

{
       return php_url_parse_ex(str, strlen(str));
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHPAPI php_url* php_url_parse_ex ( char const *  str,
int  length 
)

Definition at line 97 of file url.c.

{
       char port_buf[6];
       php_url *ret = ecalloc(1, sizeof(php_url));
       char const *s, *e, *p, *pp, *ue;
              
       s = str;
       ue = s + length;

       /* parse scheme */
       if ((e = memchr(s, ':', length)) && (e - s)) {
              /* validate scheme */
              p = s;
              while (p < e) {
                     /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
                     if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
                            if (e + 1 < ue) {
                                   goto parse_port;
                            } else {
                                   goto just_path;
                            }
                     }
                     p++;
              }
       
              if (*(e + 1) == '\0') { /* only scheme is available */
                     ret->scheme = estrndup(s, (e - s));
                     php_replace_controlchars_ex(ret->scheme, (e - s));
                     goto end;
              }

              /* 
               * certain schemas like mailto: and zlib: may not have any / after them
               * this check ensures we support those.
               */
              if (*(e+1) != '/') {
                     /* check if the data we get is a port this allows us to 
                      * correctly parse things like a.com:80
                      */
                     p = e + 1;
                     while (isdigit(*p)) {
                            p++;
                     }
                     
                     if ((*p == '\0' || *p == '/') && (p - e) < 7) {
                            goto parse_port;
                     }
                     
                     ret->scheme = estrndup(s, (e-s));
                     php_replace_controlchars_ex(ret->scheme, (e - s));
                     
                     length -= ++e - s;
                     s = e;
                     goto just_path;
              } else {
                     ret->scheme = estrndup(s, (e-s));
                     php_replace_controlchars_ex(ret->scheme, (e - s));
              
                     if (*(e+2) == '/') {
                            s = e + 3;
                            if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
                                   if (*(e + 3) == '/') {
                                          /* support windows drive letters as in:
                                             file:///c:/somedir/file.txt
                                          */
                                          if (*(e + 5) == ':') {
                                                 s = e + 4;
                                          }
                                          goto nohost;
                                   }
                            }
                     } else {
                            if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
                                   s = e + 1;
                                   goto nohost;
                            } else {
                                   length -= ++e - s;
                                   s = e;
                                   goto just_path;
                            }      
                     }
              }      
       } else if (e) { /* no scheme; starts with colon: look for port */
              parse_port:
              p = e + 1;
              pp = p;

              while (pp-p < 6 && isdigit(*pp)) {
                     pp++;
              }

              if (pp - p > 0 && pp - p < 6 && (*pp == '/' || *pp == '\0')) {
                     long port;
                     memcpy(port_buf, p, (pp - p));
                     port_buf[pp - p] = '\0';
                     port = strtol(port_buf, NULL, 10);
                     if (port > 0 && port <= 65535) {
                            ret->port = (unsigned short) port;
                     } else {
                            STR_FREE(ret->scheme);
                            efree(ret);
                            return NULL;
                     }
              } else if (p == pp && *pp == '\0') {
                     STR_FREE(ret->scheme);
                     efree(ret);
                     return NULL;
              } else {
                     goto just_path;
              }
       } else {
              just_path:
              ue = s + length;
              goto nohost;
       }
       
       e = ue;
       
       if (!(p = memchr(s, '/', (ue - s)))) {
              char *query, *fragment;

              query = memchr(s, '?', (ue - s));
              fragment = memchr(s, '#', (ue - s));

              if (query && fragment) {
                     if (query > fragment) {
                            p = e = fragment;
                     } else {
                            p = e = query;
                     }
              } else if (query) {
                     p = e = query;
              } else if (fragment) {
                     p = e = fragment;
              }
       } else {
              e = p;
       }      
              
       /* check for login and password */
       if ((p = zend_memrchr(s, '@', (e-s)))) {
              if ((pp = memchr(s, ':', (p-s)))) {
                     if ((pp-s) > 0) {
                            ret->user = estrndup(s, (pp-s));
                            php_replace_controlchars_ex(ret->user, (pp - s));
                     }      
              
                     pp++;
                     if (p-pp > 0) {
                            ret->pass = estrndup(pp, (p-pp));
                            php_replace_controlchars_ex(ret->pass, (p-pp));
                     }      
              } else {
                     ret->user = estrndup(s, (p-s));
                     php_replace_controlchars_ex(ret->user, (p-s));
              }
              
              s = p + 1;
       }

       /* check for port */
       if (*s == '[' && *(e-1) == ']') {
              /* Short circuit portscan, 
                 we're dealing with an 
                 IPv6 embedded address */
              p = s;
       } else {
              /* memrchr is a GNU specific extension
                 Emulate for wide compatability */
              for(p = e; *p != ':' && p >= s; p--);
       }

       if (p >= s && *p == ':') {
              if (!ret->port) {
                     p++;
                     if (e-p > 5) { /* port cannot be longer then 5 characters */
                            STR_FREE(ret->scheme);
                            STR_FREE(ret->user);
                            STR_FREE(ret->pass);
                            efree(ret);
                            return NULL;
                     } else if (e - p > 0) {
                            long port;
                            memcpy(port_buf, p, (e - p));
                            port_buf[e - p] = '\0';
                            port = strtol(port_buf, NULL, 10);
                            if (port > 0 && port <= 65535) {
                                   ret->port = (unsigned short)port;
                            } else {
                                   STR_FREE(ret->scheme);
                                   STR_FREE(ret->user);
                                   STR_FREE(ret->pass);
                                   efree(ret);
                                   return NULL;
                            }
                     }
                     p--;
              }      
       } else {
              p = e;
       }
       
       /* check if we have a valid host, if we don't reject the string as url */
       if ((p-s) < 1) {
              STR_FREE(ret->scheme);
              STR_FREE(ret->user);
              STR_FREE(ret->pass);
              efree(ret);
              return NULL;
       }

       ret->host = estrndup(s, (p-s));
       php_replace_controlchars_ex(ret->host, (p - s));
       
       if (e == ue) {
              return ret;
       }
       
       s = e;
       
       nohost:
       
       if ((p = memchr(s, '?', (ue - s)))) {
              pp = strchr(s, '#');

              if (pp && pp < p) {
                     if (pp - s) {
                            ret->path = estrndup(s, (pp-s));
                            php_replace_controlchars_ex(ret->path, (pp - s));
                     }
                     p = pp;
                     goto label_parse;
              }
       
              if (p - s) {
                     ret->path = estrndup(s, (p-s));
                     php_replace_controlchars_ex(ret->path, (p - s));
              }      
       
              if (pp) {
                     if (pp - ++p) { 
                            ret->query = estrndup(p, (pp-p));
                            php_replace_controlchars_ex(ret->query, (pp - p));
                     }
                     p = pp;
                     goto label_parse;
              } else if (++p - ue) {
                     ret->query = estrndup(p, (ue-p));
                     php_replace_controlchars_ex(ret->query, (ue - p));
              }
       } else if ((p = memchr(s, '#', (ue - s)))) {
              if (p - s) {
                     ret->path = estrndup(s, (p-s));
                     php_replace_controlchars_ex(ret->path, (p - s));
              }      
              
              label_parse:
              p++;
              
              if (ue - p) {
                     ret->fragment = estrndup(p, (ue-p));
                     php_replace_controlchars_ex(ret->fragment, (ue - p));
              }      
       } else {
              ret->path = estrndup(s, (ue-s));
              php_replace_controlchars_ex(ret->path, (ue - s));
       }
end:
       return ret;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

unsigned char hexchars[] = "0123456789ABCDEF" [static]

Definition at line 480 of file url.c.