Revert "Handle scanner error in first place (don't hide them from ext/tokenizer) and cheaper whitespace handlig."

This reverts commit 0d6da03f5c.
This commit is contained in:
Dmitry Stogov 2018-03-14 01:08:03 +03:00
parent 0d6da03f5c
commit d1585a9e3f
6 changed files with 233 additions and 252 deletions

View File

@ -1690,16 +1690,25 @@ again:
ZVAL_UNDEF(&zv);
start_lineno = CG(zend_lineno);
retval = lex_scan(&zv);
if (EG(exception)) {
return T_ERROR;
}
if (retval >= T_WHITESPACE) {
if (EXPECTED(retval < T_OPEN_TAG_WITH_ECHO)) {
switch (retval) {
case T_COMMENT:
case T_DOC_COMMENT:
case T_OPEN_TAG:
case T_WHITESPACE:
goto again;
} else if (retval == T_OPEN_TAG_WITH_ECHO) {
retval = T_ECHO;
} else if (retval == T_CLOSE_TAG) {
case T_CLOSE_TAG:
retval = ';'; /* implicit ; */
}
} else if (Z_TYPE(zv) != IS_UNDEF) {
break;
case T_OPEN_TAG_WITH_ECHO:
retval = T_ECHO;
break;
}
if (Z_TYPE(zv) != IS_UNDEF) {
elem->ast = zend_ast_create_zval_with_lineno(&zv, 0, start_lineno);
}

View File

@ -203,6 +203,12 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%token T_TRAIT_C "__TRAIT__ (T_TRAIT_C)"
%token T_METHOD_C "__METHOD__ (T_METHOD_C)"
%token T_FUNC_C "__FUNCTION__ (T_FUNC_C)"
%token T_COMMENT "comment (T_COMMENT)"
%token T_DOC_COMMENT "doc comment (T_DOC_COMMENT)"
%token T_OPEN_TAG "open tag (T_OPEN_TAG)"
%token T_OPEN_TAG_WITH_ECHO "open tag with echo (T_OPEN_TAG_WITH_ECHO)"
%token T_CLOSE_TAG "close tag (T_CLOSE_TAG)"
%token T_WHITESPACE "whitespace (T_WHITESPACE)"
%token T_START_HEREDOC "heredoc start (T_START_HEREDOC)"
%token T_END_HEREDOC "heredoc end (T_END_HEREDOC)"
%token T_DOLLAR_OPEN_CURLY_BRACES "${ (T_DOLLAR_OPEN_CURLY_BRACES)"
@ -216,14 +222,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%token T_POW "** (T_POW)"
%token T_POW_EQUAL "**= (T_POW_EQUAL)"
/* Special tokens (ignorred by PHP compiler, T_WHITESPACE must be first) */
%token T_WHITESPACE "whitespace (T_WHITESPACE)"
%token T_COMMENT "comment (T_COMMENT)"
%token T_DOC_COMMENT "doc comment (T_DOC_COMMENT)"
%token T_OPEN_TAG "open tag (T_OPEN_TAG)"
%token T_OPEN_TAG_WITH_ECHO "open tag with echo (T_OPEN_TAG_WITH_ECHO)"
%token T_CLOSE_TAG "close tag (T_CLOSE_TAG)"
/* Token used to force a parse error from the lexer */
%token T_ERROR

File diff suppressed because it is too large Load Diff

View File

@ -1663,7 +1663,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
RETURN_TOKEN(T_LNUMBER);
}
} else {
errno = 0;
@ -1680,7 +1680,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
RETURN_TOKEN(T_DNUMBER);
}
RETURN_TOKEN(T_DNUMBER);
}
@ -1688,7 +1688,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
RETURN_TOKEN(T_DNUMBER);
}
}
ZEND_ASSERT(!errno);
@ -2049,11 +2049,8 @@ inline_char_handler:
switch (*YYCURSOR++) {
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)) {
RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
} else {
RETURN_TOKEN(T_ERROR);
}
zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
@ -2219,11 +2216,8 @@ inline_char_handler:
double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
zend_scan_escape_string(zendlval, yytext, yyleng, '"');
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
@ -2264,11 +2258,8 @@ double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
zend_scan_escape_string(zendlval, yytext, yyleng, '`');
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
@ -2341,11 +2332,8 @@ double_quotes_scan_done:
heredoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == SUCCESS)) {
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}

View File

@ -33,7 +33,7 @@ Invalid numeric literal
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR (078)
T_LNUMBER (078)
)
;
@ -41,7 +41,7 @@ Invalid UTF-8 codepoint escape sequence
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR ("\u{xyz}")
T_CONSTANT_ENCAPSED_STRING ("\u{xyz}")
)
;
@ -49,7 +49,7 @@ Invalid UTF-8 codepoint escape sequence: Codepoint too large
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR ("\u{ffffff}")
T_CONSTANT_ENCAPSED_STRING ("\u{ffffff}")
)
;
@ -57,10 +57,10 @@ Invalid numeric literal
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR (078)
T_LNUMBER (078)
T_WHITESPACE ( )
+
T_WHITESPACE ( )
T_ERROR (078)
T_LNUMBER (078)
)
;

View File

@ -163,7 +163,6 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
REGISTER_LONG_CONSTANT("T_NS_SEPARATOR", T_NS_SEPARATOR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_ELLIPSIS", T_ELLIPSIS, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_ERROR", T_ERROR, CONST_CS | CONST_PERSISTENT);
}
char *get_token_type_name(int token_type)
@ -303,7 +302,6 @@ char *get_token_type_name(int token_type)
case T_NS_C: return "T_NS_C";
case T_NS_SEPARATOR: return "T_NS_SEPARATOR";
case T_ELLIPSIS: return "T_ELLIPSIS";
case T_ERROR: return "T_ERROR";
}
return "UNKNOWN";