PHP scanner optimization

This commit is contained in:
Dmitry Stogov 2018-03-14 01:48:17 +03:00
parent d1585a9e3f
commit 8afb91cdad
6 changed files with 543 additions and 433 deletions

View File

@ -1675,44 +1675,16 @@ ZEND_API void zend_activate_auto_globals(void) /* {{{ */
}
/* }}} */
int zendlex(zend_parser_stack_elem *elem) /* {{{ */
int ZEND_FASTCALL zendlex(zend_parser_stack_elem *elem) /* {{{ */
{
zval zv;
int retval;
uint32_t start_lineno;
if (CG(increment_lineno)) {
CG(zend_lineno)++;
CG(increment_lineno) = 0;
}
again:
ZVAL_UNDEF(&zv);
start_lineno = CG(zend_lineno);
retval = lex_scan(&zv);
if (EG(exception)) {
return T_ERROR;
}
switch (retval) {
case T_COMMENT:
case T_DOC_COMMENT:
case T_OPEN_TAG:
case T_WHITESPACE:
goto again;
case T_CLOSE_TAG:
retval = ';'; /* implicit ; */
break;
case T_OPEN_TAG_WITH_ECHO:
retval = T_ECHO;
break;
}
if (Z_TYPE(zv) != IS_UNDEF) {
elem->ast = zend_ast_create_zval_with_lineno(&zv, 0, start_lineno);
}
return retval;
return lex_scan(&zv, elem);
}
/* }}} */

View File

@ -691,7 +691,7 @@ void zend_file_context_end(zend_file_context *prev_context);
extern ZEND_API zend_op_array *(*zend_compile_file)(zend_file_handle *file_handle, int type);
extern ZEND_API zend_op_array *(*zend_compile_string)(zval *source_string, char *filename);
ZEND_API int lex_scan(zval *zendlval);
ZEND_API int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem);
void startup_scanner(void);
void shutdown_scanner(void);
@ -800,7 +800,7 @@ ZEND_API zend_bool zend_is_auto_global_str(char *name, size_t len);
ZEND_API size_t zend_dirname(char *path, size_t len);
ZEND_API void zend_set_function_arg_flags(zend_function *func);
int zendlex(zend_parser_stack_elem *elem);
int ZEND_FASTCALL zendlex(zend_parser_stack_elem *elem);
int zend_add_literal(zend_op_array *op_array, zval *zv);

View File

@ -93,8 +93,7 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
zend_printf("<code>");
zend_printf("<span style=\"color: %s\">\n", last_color);
/* highlight stuff coming back from zendlex() */
ZVAL_UNDEF(&token);
while ((token_type=lex_scan(&token))) {
while ((token_type=lex_scan(&token, NULL))) {
switch (token_type) {
case T_INLINE_HTML:
next_color = syntax_highlighter_ini->highlight_html;
@ -180,8 +179,7 @@ ZEND_API void zend_strip(void)
int token_type;
int prev_space = 0;
ZVAL_UNDEF(&token);
while ((token_type=lex_scan(&token))) {
while ((token_type=lex_scan(&token, NULL))) {
switch (token_type) {
case T_WHITESPACE:
if (!prev_space) {
@ -197,7 +195,7 @@ ZEND_API void zend_strip(void)
case T_END_HEREDOC:
zend_write((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
/* read the following character, either newline or ; */
if (lex_scan(&token) != T_WHITESPACE) {
if (lex_scan(&token, NULL) != T_WHITESPACE) {
zend_write((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
}
zend_write("\n", sizeof("\n") - 1);

File diff suppressed because it is too large Load Diff

View File

@ -1094,22 +1094,25 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
return SUCCESS;
}
static zend_always_inline int emit_token(int token, int token_line)
#define PARSER_MODE() \
EXPECTED(elem != NULL)
#define RETURN_TOKEN(_token) do { \
token = _token; \
goto emit_token; \
} while (0)
#define SKIP_TOKEN(_token) do { \
token = _token; \
goto skip_token; \
} while (0)
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
{
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, token_line, SCNG(on_event_context));
}
return token;
}
#define RETURN_TOKEN(token) return emit_token(token, start_line);
int lex_scan(zval *zendlval)
{
int token;
int start_line = CG(zend_lineno);
ZVAL_UNDEF(zendlval);
restart:
SCNG(yy_text) = YYCURSOR;
@ -1299,6 +1302,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
HANDLE_NEWLINES(yytext, yyleng);
if (PARSER_MODE()) {
SKIP_TOKEN(T_WHITESPACE);
}
RETURN_TOKEN(T_WHITESPACE);
}
@ -1663,6 +1669,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN(T_LNUMBER);
}
} else {
@ -1680,7 +1689,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_DNUMBER);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
RETURN_TOKEN(T_DNUMBER);
}
@ -1688,6 +1699,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN(T_DNUMBER);
}
}
@ -1788,6 +1802,9 @@ string:
<INITIAL>"<?=" {
BEGIN(ST_IN_SCRIPTING);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ECHO);
}
RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
}
@ -1795,6 +1812,9 @@ string:
<INITIAL>"<?php"([ \t]|{NEWLINE}) {
HANDLE_NEWLINE(yytext[yyleng-1]);
BEGIN(ST_IN_SCRIPTING);
if (EXPECTED(elem != NULL)) {
SKIP_TOKEN(T_OPEN_TAG);
}
RETURN_TOKEN(T_OPEN_TAG);
}
@ -1802,6 +1822,9 @@ string:
<INITIAL>"<?" {
if (CG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
if (EXPECTED(elem != NULL)) {
SKIP_TOKEN(T_OPEN_TAG);
}
RETURN_TOKEN(T_OPEN_TAG);
} else {
goto inline_char_handler;
@ -1928,6 +1951,9 @@ inline_char_handler:
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(elem != NULL)) {
SKIP_TOKEN(T_COMMENT);
}
RETURN_TOKEN(T_COMMENT);
}
@ -1958,9 +1984,15 @@ inline_char_handler:
if (doc_com) {
CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
if (EXPECTED(elem != NULL)) {
SKIP_TOKEN(T_DOC_COMMENT);
}
RETURN_TOKEN(T_DOC_COMMENT);
}
if (EXPECTED(elem != NULL)) {
SKIP_TOKEN(T_COMMENT);
}
RETURN_TOKEN(T_COMMENT);
}
@ -1969,7 +2001,10 @@ inline_char_handler:
if (yytext[yyleng-1] != '>') {
CG(increment_lineno) = 1;
}
RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
if (PARSER_MODE()) {
RETURN_TOKEN(';'); /* implicit ';' at php-end tag */
}
RETURN_TOKEN(T_CLOSE_TAG);
}
@ -2049,8 +2084,12 @@ inline_char_handler:
switch (*YYCURSOR++) {
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
} else {
RETURN_TOKEN(T_ERROR);
}
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
@ -2216,8 +2255,12 @@ inline_char_handler:
double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng, '"');
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
}
@ -2258,8 +2301,12 @@ double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng, '`');
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
}
@ -2332,8 +2379,12 @@ double_quotes_scan_done:
heredoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
}
@ -2404,4 +2455,22 @@ nowdoc_scan_done:
}
*/
emit_token:
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
}
if (PARSER_MODE()) {
if (Z_TYPE_P(zendlval) != IS_UNDEF) {
elem->ast = zend_ast_create_zval_with_lineno(zendlval, 0, start_line);
}
}
return token;
skip_token:
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
}
start_line = CG(zend_lineno);
goto restart;
}

View File

@ -144,8 +144,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
LANG_SCNG(yy_state) = yycINITIAL;
array_init(return_value);
ZVAL_UNDEF(&token);
while ((token_type = lex_scan(&token))) {
while ((token_type = lex_scan(&token, NULL))) {
add_token(return_value, token_type, zendtext, zendleng, token_line);
if (Z_TYPE(token) != IS_UNDEF) {