Back to index

php5  5.3.10
Defines | Functions
html.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Defines

#define ENT_HTML_QUOTE_NONE   0
#define ENT_HTML_QUOTE_SINGLE   1
#define ENT_HTML_QUOTE_DOUBLE   2
#define ENT_HTML_IGNORE_ERRORS   4
#define ENT_COMPAT   ENT_HTML_QUOTE_DOUBLE
#define ENT_QUOTES   (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE)
#define ENT_NOQUOTES   ENT_HTML_QUOTE_NONE
#define ENT_IGNORE   ENT_HTML_IGNORE_ERRORS

Functions

void register_html_constants (INIT_FUNC_ARGS)
 PHP_FUNCTION (htmlspecialchars)
 PHP_FUNCTION (htmlentities)
 PHP_FUNCTION (htmlspecialchars_decode)
 PHP_FUNCTION (html_entity_decode)
 PHP_FUNCTION (get_html_translation_table)
PHPAPI char * php_escape_html_entities (unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
PHPAPI char * php_escape_html_entities_ex (unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
PHPAPI char * php_unescape_html_entities (unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)

Define Documentation

Definition at line 29 of file html.h.

#define ENT_HTML_IGNORE_ERRORS   4

Definition at line 27 of file html.h.

#define ENT_HTML_QUOTE_DOUBLE   2

Definition at line 26 of file html.h.

#define ENT_HTML_QUOTE_NONE   0

Definition at line 24 of file html.h.

#define ENT_HTML_QUOTE_SINGLE   1

Definition at line 25 of file html.h.

Definition at line 32 of file html.h.

Definition at line 31 of file html.h.

Definition at line 30 of file html.h.


Function Documentation

PHPAPI char* php_escape_html_entities ( unsigned char *  old,
int  oldlen,
int newlen,
int  all,
int  quote_style,
char *hint_charset  TSRMLS_DC 
)

Definition at line 1112 of file html.c.

{
       return php_escape_html_entities_ex(old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PHPAPI char* php_escape_html_entities_ex ( unsigned char *  old,
int  oldlen,
int newlen,
int  all,
int  quote_style,
char *  hint_charset,
zend_bool double_encode  TSRMLS_DC 
)

Definition at line 1120 of file html.c.

{
       int i, j, maxlen, len;
       char *replaced;
       enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
       int matches_map;

       maxlen = 2 * oldlen;
       if (maxlen < 128)
              maxlen = 128;
       replaced = emalloc (maxlen);
       len = 0;
       i = 0;
       while (i < oldlen) {
              unsigned char mbsequence[16];      /* allow up to 15 characters in a multibyte sequence */
              int mbseqlen = sizeof(mbsequence);
              int status = SUCCESS;
              unsigned int this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);

              if(status == FAILURE) {
                     /* invalid MB sequence */
                     if (quote_style & ENT_HTML_IGNORE_ERRORS) {
                            continue;
                     }
                     efree(replaced);
                     if(!PG(display_errors)) {
                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
                     }
                     *newlen = 0;
                     return STR_EMPTY_ALLOC();
              }
              matches_map = 0;

              if (len + 16 > maxlen)
                     replaced = erealloc (replaced, maxlen += 128);

              if (all) {
                     /* look for a match in the maps for this charset */
                     unsigned char *rep = NULL;


                     for (j = 0; entity_map[j].charset != cs_terminator; j++) {
                            if (entity_map[j].charset == charset
                                          && this_char >= entity_map[j].basechar
                                          && this_char <= entity_map[j].endchar) {
                                   rep = (unsigned char*)entity_map[j].table[this_char - entity_map[j].basechar];
                                   if (rep == NULL) {
                                          /* there is no entity for this position; fall through and
                                           * just output the character itself */
                                          break;
                                   }

                                   matches_map = 1;
                                   break;
                            }
                     }

                     if (matches_map) {
                            int l = strlen(rep);
                            /* increase the buffer size */
                            if (len + 2 + l >= maxlen) {
                                   replaced = erealloc(replaced, maxlen += 128);
                            }

                            replaced[len++] = '&';
                            strlcpy(replaced + len, rep, maxlen);
                            len += l;
                            replaced[len++] = ';';
                     }
              }
              if (!matches_map) {  
                     int is_basic = 0;

                     if (this_char == '&') {
                            if (double_encode) {
encode_amp:
                                   memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
                                   len += sizeof("&amp;") - 1;
                            } else {
                                   char *e = memchr(old + i, ';', oldlen - i);
                                   char *s = old + i;

                                   if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
                                          goto encode_amp;
                                   } else {
                                          if (*s == '#') { /* numeric entities */
                                                 s++;
                                                 /* Hex (&#x5A;) */
                                                 if (*s == 'x' || *s == 'X') {
                                                        s++;
                                                        while (s < e) {
                                                               if (!isxdigit((int)*(unsigned char *)s++)) {
                                                                      goto encode_amp;
                                                               }
                                                        }
                                                 /* Dec (&#90;)*/
                                                 } else {
                                                        while (s < e) {
                                                               if (!isdigit((int)*(unsigned char *)s++)) {
                                                                      goto encode_amp;
                                                               }
                                                        }
                                                 }
                                          } else { /* text entities */
                                                 while (s < e) {
                                                        if (!isalnum((int)*(unsigned char *)s++)) {
                                                               goto encode_amp;
                                                        }
                                                 }
                                          }
                                          replaced[len++] = '&';
                                   }
                            }
                            is_basic = 1;
                     } else {
                            for (j = 0; basic_entities[j].charcode != 0; j++) {
                                   if ((basic_entities[j].charcode != this_char) ||
                                                 (basic_entities[j].flags &&
                                                 (quote_style & basic_entities[j].flags) == 0)) {
                                          continue;
                                   }

                                   memcpy(replaced + len, basic_entities[j].entity, basic_entities[j].entitylen);
                                   len += basic_entities[j].entitylen;
              
                                   is_basic = 1;
                                   break;
                            }
                     }

                     if (!is_basic) {
                            /* a wide char without a named entity; pass through the original sequence */
                            if (mbseqlen > 1) {
                                   memcpy(replaced + len, mbsequence, mbseqlen);
                                   len += mbseqlen;
                            } else {
                                   replaced[len++] = (unsigned char)this_char;
                            }
                     }
              }
       }
       replaced[len] = '\0';
       *newlen = len;

       return replaced;


}

Here is the call graph for this function:

Here is the caller graph for this function:

PHP_FUNCTION ( htmlspecialchars  )

Definition at line 1308 of file html.c.

Here is the call graph for this function:

PHP_FUNCTION ( htmlentities  )

Definition at line 1405 of file html.c.

Here is the call graph for this function:

PHP_FUNCTION ( htmlspecialchars_decode  )

Definition at line 1316 of file html.c.

{
       char *str, *new_str, *e, *p;
       int len, j, i, new_len;
       long quote_style = ENT_COMPAT;
       struct basic_entities_dec basic_entities_dec[8];

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &quote_style) == FAILURE) {
              return;
       }

       new_str = estrndup(str, len);
       new_len = len;
       e = new_str + new_len;

       if (!(p = memchr(new_str, '&', new_len))) {
              RETURN_STRINGL(new_str, new_len, 0);
       }

       for (j = 0, i = 0; basic_entities[i].charcode != 0; i++) {
              if (basic_entities[i].flags && !(quote_style & basic_entities[i].flags)) {
                     continue;
              }
              basic_entities_dec[j].charcode = basic_entities[i].charcode;
              memcpy(basic_entities_dec[j].entity, basic_entities[i].entity, basic_entities[i].entitylen + 1);
              basic_entities_dec[j].entitylen = basic_entities[i].entitylen;
              j++;
       }
       basic_entities_dec[j].charcode = '&';
       basic_entities_dec[j].entitylen = sizeof("&amp;") - 1;
       memcpy(basic_entities_dec[j].entity, "&amp;", sizeof("&amp;"));
       i = j + 1;
       
       do {
              int l = e - p;
       
              for (j = 0; j < i; j++) {
                     if (basic_entities_dec[j].entitylen > l) {
                            continue;
                     }
                     if (!memcmp(p, basic_entities_dec[j].entity, basic_entities_dec[j].entitylen)) {
                            int e_len = basic_entities_dec[j].entitylen - 1;
              
                            *p++ = basic_entities_dec[j].charcode;
                            memmove(p, p + e_len, (e - p - e_len));
                            e -= e_len;
                            goto done;
                     }
              }
              p++;

done:
              if (p >= e) {
                     break;
              }
       } while ((p = memchr(p, '&', (e - p))));

       new_len = e - new_str;

       new_str[new_len] = '\0';
       RETURN_STRINGL(new_str, new_len, 0);
}

Here is the call graph for this function:

PHP_FUNCTION ( html_entity_decode  )

Definition at line 1382 of file html.c.

{
       char *str, *hint_charset = NULL;
       int str_len, hint_charset_len = 0, len;
       long quote_style = ENT_COMPAT;
       char *replaced;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len,
                                                   &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
              return;
       }

       replaced = php_unescape_html_entities(str, str_len, &len, 1, quote_style, hint_charset TSRMLS_CC);
       if (replaced) {
              RETURN_STRINGL(replaced, len, 0);
       }
       RETURN_FALSE;
}

Here is the call graph for this function:

PHP_FUNCTION ( get_html_translation_table  )

Definition at line 1413 of file html.c.

{
       long which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT;
       unsigned int i;
       int j;
       unsigned char ind[5]; /* max # of 8-bit code units (4; for UTF-8) + 1 for \0 */
       void *dummy;
       char *charset_hint = NULL;
       int charset_hint_len;
       enum entity_charset charset;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|lls",
                     &which, &quote_style, &charset_hint, &charset_hint_len) == FAILURE) {
              return;
       }

       charset = determine_charset(charset_hint TSRMLS_CC);

       array_init(return_value);

       switch (which) {
       case HTML_ENTITIES:
              for (j = 0; entity_map[j].charset != cs_terminator; j++) {
                     if (entity_map[j].charset != charset)
                            continue;
                     for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) {
                            char buffer[16];
                            unsigned k;
                            size_t written;

                            if (entity_map[j].table[i] == NULL)
                                   continue;
                                   
                            k = i + entity_map[j].basechar;

                            switch (charset) {
                            case cs_utf_8:
                                   written = php_utf32_utf8(ind, k);
                                   ind[written] = '\0';
                                   break;
                            case cs_big5:
                            case cs_gb2312:
                            case cs_big5hkscs:
                            case cs_sjis:
                                   /* we have no mappings for these, but if we had... */
                                   /* break through */
                            default: /* one byte */
                                   written = 1;
                                   ind[0] = (unsigned char)k;
                                   ind[1] = '\0';
                                   break;
                            }

                            snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]);
                            if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, written+1, &dummy) == FAILURE) {
                                   /* in case of the single quote, which is repeated, the first one wins,
                                          * so don't replace the existint mapping */
                                   add_assoc_string(return_value, (const char*)ind, buffer, 1);
                            }
                     }
              }
              /* break thru */

       case HTML_SPECIALCHARS:
              add_assoc_stringl(return_value, "&", "&amp;", sizeof("&amp;") - 1, 1);
              for (j = 0; basic_entities[j].charcode != 0; j++) {
                     if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
                            continue;
                            
                     ind[0] = (unsigned char)basic_entities[j].charcode;
                     ind[1] = '\0';
                     if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, 2, &dummy) == FAILURE) {
                            add_assoc_stringl(return_value, ind, basic_entities[j].entity,
                                   basic_entities[j].entitylen, 1);
                     }
              }

              break;
       }
}

Here is the call graph for this function:

PHPAPI char* php_unescape_html_entities ( unsigned char *  old,
int  oldlen,
int newlen,
int  all,
int  quote_style,
char *hint_charset  TSRMLS_DC 
)

Definition at line 916 of file html.c.

{
       int retlen;
       int j, k;
       char *replaced, *ret, *p, *q, *lim, *next;
       enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
       unsigned char replacement[15];
       int replacement_len;

       ret = estrndup(old, oldlen);
       retlen = oldlen;
       if (!retlen) {
              goto empty_source;
       }
       
       if (all) {
              /* look for a match in the maps for this charset */
              for (j = 0; entity_map[j].charset != cs_terminator; j++) {
                     if (entity_map[j].charset != charset)
                            continue;

                     for (k = entity_map[j].basechar; k <= entity_map[j].endchar; k++) {
                            unsigned char entity[32];
                            int entity_length = 0;

                            if (entity_map[j].table[k - entity_map[j].basechar] == NULL)
                                   continue;

                            entity_length = slprintf(entity, sizeof(entity), "&%s;", entity_map[j].table[k - entity_map[j].basechar]);
                            if (entity_length >= sizeof(entity)) {
                                   continue;
                            }

                            /* When we have MBCS entities in the tables above, this will need to handle it */
                            replacement_len = 0;
                            switch (charset) {
                                   case cs_8859_1:
                                   case cs_cp1252:
                                   case cs_8859_15:
                                   case cs_cp1251:
                                   case cs_8859_5:
                                   case cs_cp866:
                                   case cs_koi8r:
                                          replacement[0] = k;
                                          replacement[1] = '\0';
                                          replacement_len = 1;
                                          break;

                                   case cs_big5:
                                   case cs_gb2312:
                                   case cs_big5hkscs:
                                   case cs_sjis:
                                   case cs_eucjp:
                                          /* we cannot properly handle those multibyte encodings
                                           * with php_str_to_str. skip it. */ 
                                          continue;

                                   case cs_utf_8:
                                          replacement_len = php_utf32_utf8(replacement, k);
                                          break;

                                   default:
                                          php_error_docref(NULL TSRMLS_CC, E_WARNING, "cannot yet handle MBCS!");
                                          efree(ret);
                                          return NULL;
                            }

                            if (php_memnstr(ret, entity, entity_length, ret+retlen)) {
                                   replaced = php_str_to_str(ret, retlen, entity, entity_length, replacement, replacement_len, &retlen);
                                   efree(ret);
                                   ret = replaced;
                            }
                     }
              }
       }

       for (j = 0; basic_entities[j].charcode != 0; j++) {

              if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
                     continue;
              
              replacement[0] = (unsigned char)basic_entities[j].charcode;
              replacement[1] = '\0';

              if (php_memnstr(ret, basic_entities[j].entity, basic_entities[j].entitylen, ret+retlen)) {        
                     replaced = php_str_to_str(ret, retlen, basic_entities[j].entity, basic_entities[j].entitylen, replacement, 1, &retlen);
                     efree(ret);
                     ret = replaced;
              }
       }

       /* replace numeric entities & "&amp;" */
       lim = ret + retlen;
       for (p = ret, q = ret; p < lim;) {
              int code;

              if (p[0] == '&') {
                     if (p + 2 < lim) {
                            if (p[1] == '#') {
                                   int invalid_code = 0;

                                   if (p[2] == 'x' || p[2] == 'X') {
                                          code = strtol(p + 3, &next, 16);
                                   } else {
                                          code = strtol(p + 2, &next, 10);
                                   }

                                   if ((code == '\'' && !(quote_style & ENT_HTML_QUOTE_SINGLE)) ||
                                          (code == '"' && !(quote_style & ENT_HTML_QUOTE_DOUBLE))) {
                                          invalid_code = 1;
                                   }

                                   if (next != NULL && *next == ';' && !invalid_code) {
                                          switch (charset) {
                                                 case cs_utf_8:
                                                        q += php_utf32_utf8(q, code);
                                                        break;

                                                 case cs_8859_1:
                                                 case cs_8859_5:
                                                 case cs_8859_15:
                                                        if ((code >= 0x80 && code < 0xa0) || code > 0xff) {
                                                               invalid_code = 1;
                                                        } else {
                                                               *(q++) = code;
                                                        }
                                                        break;

                                                 case cs_cp1252:
                                                        if (code > 0xff) {
                                                               invalid_code = 1;
                                                        } else {
                                                               *(q++) = code;
                                                        }
                                                        break;

                                                 case cs_cp1251:
                                                 case cs_cp866:
                                                 case cs_big5:
                                                 case cs_big5hkscs:
                                                 case cs_sjis:
                                                 case cs_eucjp:
                                                        if (code >= 0x80) {
                                                               invalid_code = 1;
                                                        } else {
                                                               *(q++) = code;
                                                        }
                                                        break;

                                                 case cs_gb2312:
                                                        if (code >= 0x81) {
                                                               invalid_code = 1;
                                                        } else {
                                                               *(q++) = code;
                                                        }
                                                        break;

                                                 default:
                                                        /* for backwards compatilibity */
                                                        invalid_code = 1;
                                                        break;
                                          }
                                          if (invalid_code) {
                                                 for (; p <= next; p++) {
                                                        *(q++) = *p;
                                                 }
                                          }
                                          p = next + 1;
                                   } else {
                                          *(q++) = *(p++);     
                                          *(q++) = *(p++);     
                                   }
                            } else if (p + 4 < lim &&
                                                 p[1] == 'a' && p[2] == 'm' &&p[3] == 'p' &&
                                                 p[4] == ';') {
                                   *(q++) = '&';
                                   p += 5;
                            } else {
                                   *(q++) = *(p++);
                                   *(q++) = *(p++);
                            }
                     } else {
                            *(q++) = *(p++);     
                     }
              } else {
                     *(q++) = *(p++);     
              }
       }
       *q = '\0';
       retlen = (size_t)(q - ret);
empty_source: 
       *newlen = retlen;
       return ret;
}

Here is the call graph for this function:

Here is the caller graph for this function: