Back to index

php5  5.3.10
sanitizing_filters.c
Go to the documentation of this file.
00001 /*
00002   +----------------------------------------------------------------------+
00003   | PHP Version 5                                                        |
00004   +----------------------------------------------------------------------+
00005   | Copyright (c) 1997-2012 The PHP Group                                |
00006   +----------------------------------------------------------------------+
00007   | This source file is subject to version 3.01 of the PHP license,      |
00008   | that is bundled with this package in the file LICENSE, and is        |
00009   | available through the world-wide-web at the following url:           |
00010   | http://www.php.net/license/3_01.txt                                  |
00011   | If you did not receive a copy of the PHP license and are unable to   |
00012   | obtain it through the world-wide-web, please send a note to          |
00013   | license@php.net so we can mail you a copy immediately.               |
00014   +----------------------------------------------------------------------+
00015   | Authors: Derick Rethans <derick@php.net>                             |
00016   +----------------------------------------------------------------------+
00017 */
00018 
00019 /* $Id: sanitizing_filters.c 321634 2012-01-01 13:15:04Z felipe $ */
00020 
00021 #include "php_filter.h"
00022 #include "filter_private.h"
00023 #include "ext/standard/php_smart_str.h"
00024 
00025 /* {{{ STRUCTS */
00026 typedef unsigned long filter_map[256];
00027 /* }}} */
00028 
00029 /* {{{ HELPER FUNCTIONS */
00030 static void php_filter_encode_html(zval *value, const unsigned char *chars)
00031 {
00032        smart_str str = {0};
00033        int len = Z_STRLEN_P(value);
00034        unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
00035        unsigned char *e = s + len;
00036 
00037        if (Z_STRLEN_P(value) == 0) {
00038               return;
00039        }
00040 
00041        while (s < e) {
00042               if (chars[*s]) {
00043                      smart_str_appendl(&str, "&#", 2);
00044                      smart_str_append_unsigned(&str, (unsigned long)*s);
00045                      smart_str_appendc(&str, ';');
00046               } else {
00047                      /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
00048                      smart_str_appendc(&str, *s);
00049               }
00050               s++;
00051        }
00052 
00053        smart_str_0(&str);
00054        efree(Z_STRVAL_P(value));
00055        Z_STRVAL_P(value) = str.c;
00056        Z_STRLEN_P(value) = str.len;
00057 }
00058 
00059 static const unsigned char hexchars[] = "0123456789ABCDEF";
00060 
00061 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
00062 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
00063 #define DIGIT       "0123456789"
00064 
00065 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
00066 
00067 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
00068 {
00069        unsigned char *str, *p;
00070        unsigned char tmp[256];
00071        unsigned char *s = (unsigned char *)chars;
00072        unsigned char *e = s + char_len;
00073 
00074        memset(tmp, 1, sizeof(tmp)-1);
00075 
00076        while (s < e) {
00077               tmp[*s++] = 0;
00078        }
00079 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
00080        if (encode_nul) {
00081               tmp[0] = 1;
00082        }
00083        if (high) {
00084               memset(tmp + 127, 1, sizeof(tmp) - 127);
00085        }
00086        if (low) {
00087               memset(tmp, 1, 32);
00088        }
00089 */
00090        p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
00091        s = (unsigned char *)Z_STRVAL_P(value);
00092        e = s + Z_STRLEN_P(value);
00093 
00094        while (s < e) {
00095               if (tmp[*s]) {
00096                      *p++ = '%';
00097                      *p++ = hexchars[(unsigned char) *s >> 4];
00098                      *p++ = hexchars[(unsigned char) *s & 15];
00099               } else {
00100                      *p++ = *s;    
00101               }
00102               s++;   
00103        }
00104        *p = '\0';
00105        efree(Z_STRVAL_P(value));
00106        Z_STRVAL_P(value) = (char *)str;
00107        Z_STRLEN_P(value) = p - str;
00108 }
00109 
00110 static void php_filter_strip(zval *value, long flags)
00111 {
00112        unsigned char *buf, *str;
00113        int   i, c;
00114        
00115        /* Optimization for if no strip flags are set */
00116        if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
00117               return;
00118        }
00119 
00120        str = (unsigned char *)Z_STRVAL_P(value);
00121        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
00122        c = 0;
00123        for (i = 0; i < Z_STRLEN_P(value); i++) {
00124               if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
00125               } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
00126               } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
00127               } else {
00128                      buf[c] = str[i];
00129                      ++c;
00130               }
00131        }
00132        /* update zval string data */
00133        buf[c] = '\0';
00134        efree(Z_STRVAL_P(value));
00135        Z_STRVAL_P(value) = (char *)buf;
00136        Z_STRLEN_P(value) = c;
00137 }
00138 /* }}} */
00139 
00140 /* {{{ FILTER MAP HELPERS */
00141 static void filter_map_init(filter_map *map)
00142 {
00143        memset(map, 0, sizeof(filter_map));
00144 }
00145 
00146 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
00147 {
00148        int l, i;
00149 
00150        l = strlen((const char*)allowed_list);
00151        for (i = 0; i < l; ++i) {
00152               (*map)[allowed_list[i]] = flag;
00153        }
00154 }
00155 
00156 static void filter_map_apply(zval *value, filter_map *map)
00157 {
00158        unsigned char *buf, *str;
00159        int   i, c;
00160        
00161        str = (unsigned char *)Z_STRVAL_P(value);
00162        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
00163        c = 0;
00164        for (i = 0; i < Z_STRLEN_P(value); i++) {
00165               if ((*map)[str[i]]) {
00166                      buf[c] = str[i];
00167                      ++c;
00168               }
00169        }
00170        /* update zval string data */
00171        buf[c] = '\0';
00172        efree(Z_STRVAL_P(value));
00173        Z_STRVAL_P(value) = (char *)buf;
00174        Z_STRLEN_P(value) = c;
00175 }
00176 /* }}} */
00177 
00178 /* {{{ php_filter_string */
00179 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
00180 {
00181        size_t new_len;
00182        unsigned char enc[256] = {0};
00183 
00184        /* strip high/strip low ( see flags )*/
00185        php_filter_strip(value, flags);
00186 
00187        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
00188               enc['\''] = enc['"'] = 1;
00189        }
00190        if (flags & FILTER_FLAG_ENCODE_AMP) {
00191               enc['&'] = 1;
00192        }
00193        if (flags & FILTER_FLAG_ENCODE_LOW) {
00194               memset(enc, 1, 32);
00195        }
00196        if (flags & FILTER_FLAG_ENCODE_HIGH) {
00197               memset(enc + 127, 1, sizeof(enc) - 127);
00198        }
00199 
00200        php_filter_encode_html(value, enc);
00201 
00202        /* strip tags, implicitly also removes \0 chars */
00203        new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
00204        Z_STRLEN_P(value) = new_len;
00205 
00206        if (new_len == 0) {
00207               zval_dtor(value);
00208               if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
00209                      ZVAL_NULL(value);
00210               } else {
00211                      ZVAL_EMPTY_STRING(value);                 
00212               }
00213               return;
00214        }
00215 }
00216 /* }}} */
00217 
00218 /* {{{ php_filter_encoded */
00219 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
00220 {
00221        /* apply strip_high and strip_low filters */
00222        php_filter_strip(value, flags);
00223        /* urlencode */
00224        php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
00225 }
00226 /* }}} */
00227 
00228 /* {{{ php_filter_special_chars */
00229 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
00230 {
00231        unsigned char enc[256] = {0};
00232 
00233        php_filter_strip(value, flags);
00234 
00235        /* encodes ' " < > & \0 to numerical entities */
00236        enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
00237 
00238        /* if strip low is not set, then we encode them as &#xx; */
00239        memset(enc, 1, 32);
00240 
00241        if (flags & FILTER_FLAG_ENCODE_HIGH) {
00242               memset(enc + 127, 1, sizeof(enc) - 127);
00243        }
00244        
00245        php_filter_encode_html(value, enc);       
00246 }
00247 /* }}} */
00248 
00249 /* {{{ php_filter_full_special_chars */
00250 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
00251 {
00252        char *buf;
00253        int   len, quotes;
00254        
00255        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
00256               quotes = ENT_QUOTES;
00257        } else {
00258               quotes = ENT_NOQUOTES;
00259        }
00260        buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
00261        efree(Z_STRVAL_P(value));
00262        Z_STRVAL_P(value) = buf;
00263        Z_STRLEN_P(value) = len;
00264 }
00265 /* }}} */
00266 
00267 /* {{{ php_filter_unsafe_raw */
00268 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
00269 {
00270        /* Only if no flags are set (optimization) */
00271        if (flags != 0 && Z_STRLEN_P(value) > 0) {
00272               unsigned char enc[256] = {0};
00273 
00274               php_filter_strip(value, flags);
00275 
00276               if (flags & FILTER_FLAG_ENCODE_AMP) {
00277                      enc['&'] = 1;
00278               }
00279               if (flags & FILTER_FLAG_ENCODE_LOW) {
00280                      memset(enc, 1, 32);
00281               }
00282               if (flags & FILTER_FLAG_ENCODE_HIGH) {
00283                      memset(enc + 127, 1, sizeof(enc) - 127);
00284               }
00285 
00286               php_filter_encode_html(value, enc);       
00287        } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
00288               zval_dtor(value);
00289               ZVAL_NULL(value);
00290        }
00291 }
00292 /* }}} */
00293 
00294 
00295 
00296 /* {{{ php_filter_email */
00297 #define SAFE        "$-_.+"
00298 #define EXTRA       "!*'(),"
00299 #define NATIONAL    "{}|\\^~[]`"
00300 #define PUNCTUATION "<>#%\""
00301 #define RESERVED    ";/?:@&="
00302 
00303 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
00304 {
00305        /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
00306        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
00307        filter_map     map;
00308 
00309        filter_map_init(&map);
00310        filter_map_update(&map, 1, allowed_list);
00311        filter_map_apply(value, &map);
00312 }
00313 /* }}} */
00314 
00315 /* {{{ php_filter_url */
00316 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
00317 {
00318        /* Strip all chars not part of section 5 of
00319         * http://www.faqs.org/rfcs/rfc1738.html */
00320        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
00321        filter_map     map;
00322 
00323        filter_map_init(&map);
00324        filter_map_update(&map, 1, allowed_list);
00325        filter_map_apply(value, &map);
00326 }
00327 /* }}} */
00328 
00329 /* {{{ php_filter_number_int */
00330 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
00331 {
00332        /* strip everything [^0-9+-] */
00333        const unsigned char allowed_list[] = "+-" DIGIT;
00334        filter_map     map;
00335 
00336        filter_map_init(&map);
00337        filter_map_update(&map, 1, allowed_list);
00338        filter_map_apply(value, &map);
00339 }
00340 /* }}} */
00341 
00342 /* {{{ php_filter_number_float */
00343 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
00344 {
00345        /* strip everything [^0-9+-] */
00346        const unsigned char allowed_list[] = "+-" DIGIT;
00347        filter_map     map;
00348 
00349        filter_map_init(&map);
00350        filter_map_update(&map, 1, allowed_list);
00351 
00352        /* depending on flags, strip '.', 'e', ",", "'" */
00353        if (flags & FILTER_FLAG_ALLOW_FRACTION) {
00354               filter_map_update(&map, 2, (const unsigned char *) ".");
00355        }
00356        if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
00357               filter_map_update(&map, 3,  (const unsigned char *) ",");
00358        }
00359        if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
00360               filter_map_update(&map, 4,  (const unsigned char *) "eE");
00361        }
00362        filter_map_apply(value, &map);
00363 }
00364 /* }}} */
00365 
00366 /* {{{ php_filter_magic_quotes */
00367 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
00368 {
00369        char *buf;
00370        int   len;
00371        
00372        /* just call php_addslashes quotes */
00373        buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
00374 
00375        efree(Z_STRVAL_P(value));
00376        Z_STRVAL_P(value) = buf;
00377        Z_STRLEN_P(value) = len;
00378 }
00379 /* }}} */
00380 
00381 /*
00382  * Local variables:
00383  * tab-width: 4
00384  * c-basic-offset: 4
00385  * End:
00386  * vim600: noet sw=4 ts=4 fdm=marker
00387  * vim<600: noet sw=4 ts=4
00388  */