Back to index

php5  5.3.10
tokenizer.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Andrei Zmievski <andrei@php.net>                             |
00016    +----------------------------------------------------------------------+
00017 */
00018 
00019 /* $Id: tokenizer.c 321634 2012-01-01 13:15:04Z felipe $ */
00020 
00021 #ifdef HAVE_CONFIG_H
00022 #include "config.h"
00023 #endif
00024 
00025 #include "php.h"
00026 #include "php_ini.h"
00027 #include "ext/standard/info.h"
00028 #include "php_tokenizer.h"
00029 
00030 #include "zend.h"
00031 #include "zend_language_scanner.h"
00032 #include "zend_language_scanner_defs.h"
00033 #include <zend_language_parser.h>
00034 
00035 #define zendtext LANG_SCNG(yy_text)
00036 #define zendleng LANG_SCNG(yy_leng)
00037 
00038 /* {{{ arginfo */
00039 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
00040        ZEND_ARG_INFO(0, source)
00041 ZEND_END_ARG_INFO()
00042 
00043 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1)
00044        ZEND_ARG_INFO(0, token)
00045 ZEND_END_ARG_INFO()
00046 /* }}} */
00047 
00048 /* {{{ tokenizer_functions[]
00049  *
00050  * Every user visible function must have an entry in tokenizer_functions[].
00051  */
00052 const zend_function_entry tokenizer_functions[] = {
00053        PHP_FE(token_get_all,       arginfo_token_get_all)
00054        PHP_FE(token_name,          arginfo_token_name)
00055        PHP_FE_END
00056 };
00057 /* }}} */
00058 
00059 /* {{{ tokenizer_module_entry
00060  */
00061 zend_module_entry tokenizer_module_entry = {
00062 #if ZEND_MODULE_API_NO >= 20010901
00063        STANDARD_MODULE_HEADER,
00064 #endif
00065        "tokenizer",
00066        tokenizer_functions,
00067        PHP_MINIT(tokenizer),
00068        NULL,
00069        NULL,
00070        NULL,
00071        PHP_MINFO(tokenizer),
00072 #if ZEND_MODULE_API_NO >= 20010901
00073        "0.1", /* Replace with version number for your extension */
00074 #endif
00075        STANDARD_MODULE_PROPERTIES
00076 };
00077 /* }}} */
00078 
00079 #ifdef COMPILE_DL_TOKENIZER
00080 ZEND_GET_MODULE(tokenizer)
00081 #endif
00082 
00083 /* {{{ PHP_MINIT_FUNCTION
00084  */
00085 PHP_MINIT_FUNCTION(tokenizer)
00086 {
00087        tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
00088        return SUCCESS;
00089 }
00090 /* }}} */
00091 
00092 /* {{{ PHP_MINFO_FUNCTION
00093  */
00094 PHP_MINFO_FUNCTION(tokenizer)
00095 {
00096        php_info_print_table_start();
00097        php_info_print_table_row(2, "Tokenizer Support", "enabled");
00098        php_info_print_table_end();
00099 }
00100 /* }}} */
00101 
00102 static void tokenize(zval *return_value TSRMLS_DC)
00103 {
00104        zval token;
00105        zval *keyword;
00106        int token_type;
00107        zend_bool destroy;
00108        int token_line = 1;
00109 
00110        array_init(return_value);
00111 
00112        ZVAL_NULL(&token);
00113        while ((token_type = lex_scan(&token TSRMLS_CC))) {
00114               destroy = 1;
00115               switch (token_type) {
00116                      case T_CLOSE_TAG:
00117                             if (zendtext[zendleng - 1] != '>') {
00118                                    CG(zend_lineno)++;
00119                             }
00120                      case T_OPEN_TAG:
00121                      case T_OPEN_TAG_WITH_ECHO:
00122                      case T_WHITESPACE:
00123                      case T_COMMENT:
00124                      case T_DOC_COMMENT:
00125                             destroy = 0;
00126                             break;
00127               }
00128 
00129               if (token_type >= 256) {
00130                      MAKE_STD_ZVAL(keyword);
00131                      array_init(keyword);
00132                      add_next_index_long(keyword, token_type);
00133                      if (token_type == T_END_HEREDOC) {
00134                             if (CG(increment_lineno)) {
00135                                    token_line = ++CG(zend_lineno);
00136                                    CG(increment_lineno) = 0;
00137                             }
00138                             add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
00139                             efree(Z_STRVAL(token));
00140                      } else {
00141                             add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
00142                      }
00143                      add_next_index_long(keyword, token_line);
00144                      add_next_index_zval(return_value, keyword);
00145               } else {
00146                      add_next_index_stringl(return_value, (char *)zendtext, zendleng, 1);
00147               }
00148               if (destroy && Z_TYPE(token) != IS_NULL) {
00149                      zval_dtor(&token);
00150               }
00151               ZVAL_NULL(&token);
00152 
00153               token_line = CG(zend_lineno);
00154 
00155               if (token_type == T_HALT_COMPILER) {
00156                      break;
00157               }
00158        }
00159 }
00160 
00161 /* {{{ proto array token_get_all(string source)
00162  */
00163 PHP_FUNCTION(token_get_all)
00164 {
00165        char *source = NULL;
00166        int argc = ZEND_NUM_ARGS();
00167        int source_len;
00168        zval source_z;
00169        zend_lex_state original_lex_state;
00170 
00171        if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE) 
00172               return;
00173 
00174        ZVAL_STRINGL(&source_z, source, source_len, 1);
00175        zend_save_lexical_state(&original_lex_state TSRMLS_CC);
00176 
00177        if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
00178               zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
00179               RETURN_EMPTY_STRING();
00180        }
00181 
00182        LANG_SCNG(yy_state) = yycINITIAL;
00183 
00184        tokenize(return_value TSRMLS_CC);
00185        
00186        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
00187        zval_dtor(&source_z);
00188 }
00189 /* }}} */
00190 
00191 /* {{{ proto string token_name(int type)
00192  */
00193 PHP_FUNCTION(token_name)
00194 {
00195        int argc = ZEND_NUM_ARGS();
00196        long type;
00197 
00198        if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
00199               return;
00200        }
00201        RETVAL_STRING(get_token_type_name(type), 1);
00202 }
00203 /* }}} */
00204 
00205 /*
00206  * Local variables:
00207  * tab-width: 4
00208  * c-basic-offset: 4
00209  * End:
00210  * vim600: noet sw=4 ts=4 fdm=marker
00211  * vim<600: noet sw=4 ts=4
00212  */