php-src/Zend/zend_language_scanner.l
Niels Dossche ac9964502c
Fix GH-10634: Lexing memory corruption (#10866)
We're not relying on re2c's bounds checking mechanism because
re2c:yyfill:check = 0; is set. We just return 0 if we read over the end
of the input in YYFILL. Note that we used to use the "any character"
wildcard in the comment regexes.
But that means if we go over the end in the comment regexes,
we don't know that and it's just like the 0 bytes are part of the token.
Since a 0 byte already is considered as an end-of-file, we can just block
those in the regex.

For the regexes with newlines, I had to not only include \x00 in the
denylist, but also \n and \r because otherwise it would greedily match
those and let the single-line comment run over multiple lines.
2023-03-17 17:09:14 +01:00

3155 lines
75 KiB
Plaintext

/*
+----------------------------------------------------------------------+
| Zend Engine |
+----------------------------------------------------------------------+
| Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Marcus Boerger <helly@php.net> |
| Nuno Lopes <nlopess@php.net> |
| Scott MacVicar <scottmac@php.net> |
| Flex version authors: |
| Andi Gutmans <andi@php.net> |
| Zeev Suraski <zeev@php.net> |
+----------------------------------------------------------------------+
*/
#if 0
# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
#else
# define YYDEBUG(s, c)
#endif
#include "zend_language_scanner_defs.h"
#include <errno.h>
#include "zend.h"
#ifdef ZEND_WIN32
# include <Winuser.h>
#endif
#include "zend_alloc.h"
#include <zend_language_parser.h>
#include "zend_compile.h"
#include "zend_language_scanner.h"
#include "zend_highlight.h"
#include "zend_constants.h"
#include "zend_variables.h"
#include "zend_operators.h"
#include "zend_API.h"
#include "zend_strtod.h"
#include "zend_exceptions.h"
#include "zend_virtual_cwd.h"
#define YYCTYPE unsigned char
#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
#define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker)
#define YYGETCONDITION() SCNG(yy_state)
#define YYSETCONDITION(s) SCNG(yy_state) = s
#define STATE(name) yyc##name
/* emulate flex constructs */
#define BEGIN(state) YYSETCONDITION(STATE(state))
#define YYSTATE YYGETCONDITION()
#define yytext ((char*)SCNG(yy_text))
#define yyleng SCNG(yy_leng)
#define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
yyleng = (unsigned int)x; } while(0)
#define yymore() goto yymore_restart
/* perform sanity check. If this message is triggered you should
increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
/*!max:re2c */
#if ZEND_MMAP_AHEAD < YYMAXFILL
# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
#endif
#include <stdarg.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
/* Globals Macros */
#define SCNG LANG_SCNG
#ifdef ZTS
ZEND_API ts_rsrc_id language_scanner_globals_id;
ZEND_API size_t language_scanner_globals_offset;
#else
ZEND_API zend_php_scanner_globals language_scanner_globals;
#endif
#define HANDLE_NEWLINES(s, l) \
do { \
char *p = (s), *boundary = p+(l); \
\
while (p<boundary) { \
if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
CG(zend_lineno)++; \
} \
p++; \
} \
} while (0)
#define HANDLE_NEWLINE(c) \
{ \
if (c == '\n' || c == '\r') { \
CG(zend_lineno)++; \
} \
}
/* To save initial string length after scanning to first variable */
#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
#define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
#define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
#define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
static void strip_underscores(char *str, size_t *len)
{
char *src = str, *dest = str;
while (*src != '\0') {
if (*src != '_') {
*dest = *src;
dest++;
} else {
--(*len);
}
src++;
}
*dest = '\0';
}
static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
ZEND_ASSERT(internal_encoding);
return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
}
static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
}
static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
return zend_multibyte_encoding_converter(to, to_length, from, from_length,
LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
}
static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
ZEND_ASSERT(internal_encoding);
return zend_multibyte_encoding_converter(to, to_length, from, from_length,
internal_encoding, zend_multibyte_encoding_utf8);
}
static void _yy_push_state(int new_state)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
YYSETCONDITION(new_state);
}
#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
static void yy_pop_state(void)
{
int *stack_state = zend_stack_top(&SCNG(state_stack));
YYSETCONDITION(*stack_state);
zend_stack_del_top(&SCNG(state_stack));
}
static void yy_scan_buffer(char *str, size_t len)
{
YYCURSOR = (YYCTYPE*)str;
YYLIMIT = YYCURSOR + len;
if (!SCNG(yy_start)) {
SCNG(yy_start) = YYCURSOR;
}
}
void startup_scanner(void)
{
CG(parse_error) = 0;
CG(doc_comment) = NULL;
CG(extra_fn_flags) = 0;
zend_stack_init(&SCNG(state_stack), sizeof(int));
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
}
static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
efree(heredoc_label->label);
}
void shutdown_scanner(void)
{
CG(parse_error) = 0;
RESET_DOC_COMMENT();
zend_stack_destroy(&SCNG(state_stack));
zend_stack_destroy(&SCNG(nest_location_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
SCNG(on_event) = NULL;
}
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
{
lex_state->yy_leng = SCNG(yy_leng);
lex_state->yy_start = SCNG(yy_start);
lex_state->yy_text = SCNG(yy_text);
lex_state->yy_cursor = SCNG(yy_cursor);
lex_state->yy_marker = SCNG(yy_marker);
lex_state->yy_limit = SCNG(yy_limit);
lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack), sizeof(int));
lex_state->nest_location_stack = SCNG(nest_location_stack);
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
lex_state->in = SCNG(yy_in);
lex_state->yy_state = YYSTATE;
lex_state->filename = CG(compiled_filename);
lex_state->lineno = CG(zend_lineno);
CG(compiled_filename) = NULL;
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
lex_state->script_filtered_size = SCNG(script_filtered_size);
lex_state->input_filter = SCNG(input_filter);
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
lex_state->on_event = SCNG(on_event);
lex_state->on_event_context = SCNG(on_event_context);
lex_state->ast = CG(ast);
lex_state->ast_arena = CG(ast_arena);
}
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
{
SCNG(yy_leng) = lex_state->yy_leng;
SCNG(yy_start) = lex_state->yy_start;
SCNG(yy_text) = lex_state->yy_text;
SCNG(yy_cursor) = lex_state->yy_cursor;
SCNG(yy_marker) = lex_state->yy_marker;
SCNG(yy_limit) = lex_state->yy_limit;
zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack;
zend_stack_destroy(&SCNG(nest_location_stack));
SCNG(nest_location_stack) = lex_state->nest_location_stack;
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
SCNG(yy_in) = lex_state->in;
YYSETCONDITION(lex_state->yy_state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename);
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
SCNG(script_org) = lex_state->script_org;
SCNG(script_org_size) = lex_state->script_org_size;
SCNG(script_filtered) = lex_state->script_filtered;
SCNG(script_filtered_size) = lex_state->script_filtered_size;
SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
SCNG(on_event) = lex_state->on_event;
SCNG(on_event_context) = lex_state->on_event_context;
CG(ast) = lex_state->ast;
CG(ast_arena) = lex_state->ast_arena;
RESET_DOC_COMMENT();
}
ZEND_API zend_result zend_lex_tstring(zval *zv, unsigned char *ident)
{
unsigned char *end = ident;
while ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z') || *end == '_') {
end++;
}
size_t length = end - ident;
if (length == 0) {
ZEND_ASSERT(ident[0] == '<' && ident[1] == '?' && ident[2] == '=');
zend_throw_exception(zend_ce_parse_error, "Cannot use \"<?=\" as an identifier", 0);
return FAILURE;
}
if (SCNG(on_event)) {
SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, (char *) ident, length, SCNG(on_event_context));
}
ZVAL_STRINGL(zv, (char *) ident, length);
return SUCCESS;
}
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
#define BOM_UTF16_LE "\xff\xfe"
#define BOM_UTF8 "\xef\xbb\xbf"
static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
{
const unsigned char *p;
int wchar_size = 2;
int le = 0;
/* utf-16 or utf-32? */
p = script;
assert(p >= script);
while ((size_t)(p-script) < script_size) {
p = memchr(p, 0, script_size-(p-script)-2);
if (!p) {
break;
}
if (*(p+1) == '\0' && *(p+2) == '\0') {
wchar_size = 4;
break;
}
/* searching for UTF-32 specific byte orders, so this will do */
p += 4;
}
/* BE or LE? */
p = script;
assert(p >= script);
while ((size_t)(p-script) < script_size) {
if (*p == '\0' && *(p+wchar_size-1) != '\0') {
/* BE */
le = 0;
break;
} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
/* LE* */
le = 1;
break;
}
p += wchar_size;
}
if (wchar_size == 2) {
return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
} else {
return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
}
return NULL;
}
static const zend_encoding* zend_multibyte_detect_unicode(void)
{
const zend_encoding *script_encoding = NULL;
int bom_size;
unsigned char *pos1, *pos2;
if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
return NULL;
}
/* check out BOM */
if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
script_encoding = zend_multibyte_encoding_utf32be;
bom_size = sizeof(BOM_UTF32_BE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
script_encoding = zend_multibyte_encoding_utf32le;
bom_size = sizeof(BOM_UTF32_LE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
script_encoding = zend_multibyte_encoding_utf16be;
bom_size = sizeof(BOM_UTF16_BE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
script_encoding = zend_multibyte_encoding_utf16le;
bom_size = sizeof(BOM_UTF16_LE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
script_encoding = zend_multibyte_encoding_utf8;
bom_size = sizeof(BOM_UTF8)-1;
}
if (script_encoding) {
/* remove BOM */
LANG_SCNG(script_org) += bom_size;
LANG_SCNG(script_org_size) -= bom_size;
return script_encoding;
}
/* script contains NULL bytes -> auto-detection */
if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
/* check if the NULL byte is after the __HALT_COMPILER(); */
pos2 = LANG_SCNG(script_org);
while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
pos2 = memchr(pos2, '_', pos1 - pos2);
if (!pos2) break;
pos2++;
if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
pos2 += sizeof("_HALT_COMPILER")-1;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == '(') {
pos2++;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == ')') {
pos2++;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == ';') {
return NULL;
}
}
}
}
}
/* make best effort if BOM is missing */
return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
}
return NULL;
}
static const zend_encoding* zend_multibyte_find_script_encoding(void)
{
const zend_encoding *script_encoding;
if (CG(detect_unicode)) {
/* check out bom(byte order mark) and see if containing wchars */
script_encoding = zend_multibyte_detect_unicode();
if (script_encoding != NULL) {
/* bom or wchar detection is prior to 'script_encoding' option */
return script_encoding;
}
}
/* if no script_encoding specified, just leave alone */
if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
return NULL;
}
/* if multiple encodings specified, detect automagically */
if (CG(script_encoding_list_size) > 1) {
return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
}
return CG(script_encoding_list)[0];
}
ZEND_API zend_result zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
if (!script_encoding) {
return FAILURE;
}
/* judge input/output filter */
LANG_SCNG(script_encoding) = script_encoding;
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = NULL;
if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
} else {
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = NULL;
}
return SUCCESS;
}
if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
LANG_SCNG(output_filter) = NULL;
} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
} else {
/* both script and internal encodings are incompatible w/ flex */
LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
}
return SUCCESS;
}
ZEND_API zend_result open_file_for_scanning(zend_file_handle *file_handle)
{
char *buf;
size_t size;
zend_string *compiled_filename;
if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
/* Still add it to open_files to make destroy_file_handle work */
zend_llist_add_element(&CG(open_files), file_handle);
file_handle->in_list = 1;
return FAILURE;
}
ZEND_ASSERT(!EG(exception) && "stream_fixup() should have failed");
zend_llist_add_element(&CG(open_files), file_handle);
file_handle->in_list = 1;
/* Reset the scanner for scanning the new file */
SCNG(yy_in) = file_handle;
SCNG(yy_start) = NULL;
if (size != (size_t)-1) {
if (CG(multibyte)) {
SCNG(script_org) = (unsigned char*)buf;
SCNG(script_org_size) = size;
SCNG(script_filtered) = NULL;
zend_multibyte_set_filter(NULL);
if (SCNG(input_filter)) {
if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
buf = (char*)SCNG(script_filtered);
size = SCNG(script_filtered_size);
}
}
SCNG(yy_start) = (unsigned char *)buf;
yy_scan_buffer(buf, size);
} else {
zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
}
if (CG(skip_shebang)) {
BEGIN(SHEBANG);
} else {
BEGIN(INITIAL);
}
if (file_handle->opened_path) {
compiled_filename = zend_string_copy(file_handle->opened_path);
} else {
compiled_filename = zend_string_copy(file_handle->filename);
}
zend_set_compiled_filename(compiled_filename);
zend_string_release_ex(compiled_filename, 0);
RESET_DOC_COMMENT();
CG(zend_lineno) = 1;
CG(increment_lineno) = 0;
return SUCCESS;
}
static zend_op_array *zend_compile(int type)
{
zend_op_array *op_array = NULL;
bool original_in_compilation = CG(in_compilation);
CG(in_compilation) = 1;
CG(ast) = NULL;
CG(ast_arena) = zend_arena_create(1024 * 32);
if (!zendparse()) {
int last_lineno = CG(zend_lineno);
zend_file_context original_file_context;
zend_oparray_context original_oparray_context;
zend_op_array *original_active_op_array = CG(active_op_array);
op_array = emalloc(sizeof(zend_op_array));
init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
CG(active_op_array) = op_array;
/* Use heap to not waste arena memory */
op_array->fn_flags |= ZEND_ACC_HEAP_RT_CACHE;
if (zend_ast_process) {
zend_ast_process(CG(ast));
}
zend_file_context_begin(&original_file_context);
zend_oparray_context_begin(&original_oparray_context);
zend_compile_top_stmt(CG(ast));
CG(zend_lineno) = last_lineno;
zend_emit_final_return(type == ZEND_USER_FUNCTION);
op_array->line_start = 1;
op_array->line_end = last_lineno;
pass_two(op_array);
zend_oparray_context_end(&original_oparray_context);
zend_file_context_end(&original_file_context);
CG(active_op_array) = original_active_op_array;
}
zend_ast_destroy(CG(ast));
zend_arena_destroy(CG(ast_arena));
CG(in_compilation) = original_in_compilation;
return op_array;
}
ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
{
zend_lex_state original_lex_state;
zend_op_array *op_array = NULL;
zend_save_lexical_state(&original_lex_state);
if (open_file_for_scanning(file_handle)==FAILURE) {
if (!EG(exception)) {
if (type==ZEND_REQUIRE) {
zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, ZSTR_VAL(file_handle->filename));
} else {
zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, ZSTR_VAL(file_handle->filename));
}
}
} else {
op_array = zend_compile(ZEND_USER_FUNCTION);
}
zend_restore_lexical_state(&original_lex_state);
return op_array;
}
ZEND_API zend_ast *zend_compile_string_to_ast(
zend_string *code, zend_arena **ast_arena, zend_string *filename) {
zval code_zv;
bool original_in_compilation;
zend_lex_state original_lex_state;
zend_ast *ast;
ZVAL_STR_COPY(&code_zv, code);
original_in_compilation = CG(in_compilation);
CG(in_compilation) = 1;
zend_save_lexical_state(&original_lex_state);
zend_prepare_string_for_scanning(&code_zv, filename);
CG(ast) = NULL;
CG(ast_arena) = zend_arena_create(1024 * 32);
LANG_SCNG(yy_state) = yycINITIAL;
if (zendparse() != 0) {
zend_ast_destroy(CG(ast));
zend_arena_destroy(CG(ast_arena));
CG(ast) = NULL;
}
/* restore_lexical_state changes CG(ast) and CG(ast_arena) */
ast = CG(ast);
*ast_arena = CG(ast_arena);
zend_restore_lexical_state(&original_lex_state);
CG(in_compilation) = original_in_compilation;
zval_ptr_dtor_str(&code_zv);
return ast;
}
zend_op_array *compile_filename(int type, zend_string *filename)
{
zend_file_handle file_handle;
zend_op_array *retval;
zend_string *opened_path = NULL;
zend_stream_init_filename_ex(&file_handle, filename);
retval = zend_compile_file(&file_handle, type);
if (retval && file_handle.handle.stream.handle) {
if (!file_handle.opened_path) {
file_handle.opened_path = opened_path = zend_string_copy(filename);
}
zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
if (opened_path) {
zend_string_release_ex(opened_path, 0);
}
}
zend_destroy_file_handle(&file_handle);
return retval;
}
ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)
{
char *buf;
size_t size, old_len;
/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
old_len = Z_STRLEN_P(str);
Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
Z_TYPE_INFO_P(str) = IS_STRING_EX;
memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
SCNG(yy_in) = NULL;
SCNG(yy_start) = NULL;
buf = Z_STRVAL_P(str);
size = old_len;
if (CG(multibyte)) {
SCNG(script_org) = (unsigned char*)buf;
SCNG(script_org_size) = size;
SCNG(script_filtered) = NULL;
zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
if (SCNG(input_filter)) {
if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
buf = (char*)SCNG(script_filtered);
size = SCNG(script_filtered_size);
}
}
yy_scan_buffer(buf, size);
zend_set_compiled_filename(filename);
CG(zend_lineno) = 1;
CG(increment_lineno) = 0;
RESET_DOC_COMMENT();
}
ZEND_API size_t zend_get_scanned_file_offset(void)
{
size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
if (SCNG(input_filter)) {
size_t original_offset = offset, length = 0;
do {
unsigned char *p = NULL;
if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
return (size_t)-1;
}
efree(p);
if (length > original_offset) {
offset--;
} else if (length < original_offset) {
offset++;
}
} while (original_offset != length);
}
return offset;
}
zend_op_array *compile_string(zend_string *source_string, const char *filename, zend_compile_position position)
{
zend_lex_state original_lex_state;
zend_op_array *op_array = NULL;
zval tmp;
zend_string *filename_str;
if (ZSTR_LEN(source_string) == 0) {
return NULL;
}
ZVAL_STR_COPY(&tmp, source_string);
zend_save_lexical_state(&original_lex_state);
filename_str = zend_string_init(filename, strlen(filename), 0);
zend_prepare_string_for_scanning(&tmp, filename_str);
zend_string_release(filename_str);
switch (position) {
case ZEND_COMPILE_POSITION_AT_SHEBANG:
BEGIN(SHEBANG);
break;
case ZEND_COMPILE_POSITION_AT_OPEN_TAG:
BEGIN(INITIAL);
break;
case ZEND_COMPILE_POSITION_AFTER_OPEN_TAG:
BEGIN(ST_IN_SCRIPTING);
break;
}
op_array = zend_compile(ZEND_EVAL_CODE);
zend_restore_lexical_state(&original_lex_state);
zval_ptr_dtor(&tmp);
return op_array;
}
zend_result highlight_file(const char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
{
zend_lex_state original_lex_state;
zend_file_handle file_handle;
zend_stream_init_filename(&file_handle, filename);
zend_save_lexical_state(&original_lex_state);
if (open_file_for_scanning(&file_handle)==FAILURE) {
zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
zend_destroy_file_handle(&file_handle);
zend_restore_lexical_state(&original_lex_state);
return FAILURE;
}
zend_highlight(syntax_highlighter_ini);
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
zend_destroy_file_handle(&file_handle);
zend_restore_lexical_state(&original_lex_state);
return SUCCESS;
}
void highlight_string(zend_string *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, const char *filename)
{
zend_lex_state original_lex_state;
zval str_zv;
zend_string *filename_str = zend_string_init(filename, strlen(filename), 0);
ZVAL_STR_COPY(&str_zv, str);
zend_save_lexical_state(&original_lex_state);
zend_prepare_string_for_scanning(&str_zv, filename_str);
zend_string_release(filename_str);
BEGIN(INITIAL);
zend_highlight(syntax_highlighter_ini);
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
zend_restore_lexical_state(&original_lex_state);
zval_ptr_dtor(&str_zv);
}
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
{
size_t length;
unsigned char *new_yy_start;
/* convert and set */
if (!SCNG(input_filter)) {
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
SCNG(script_filtered_size) = 0;
length = SCNG(script_org_size);
new_yy_start = SCNG(script_org);
} else {
if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
}
SCNG(script_filtered) = new_yy_start;
SCNG(script_filtered_size) = length;
}
SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
SCNG(yy_limit) = new_yy_start + length;
SCNG(yy_start) = new_yy_start;
}
// TODO: avoid reallocation ???
# define zend_copy_value(zendlval, yytext, yyleng) \
if (SCNG(output_filter)) { \
size_t sz = 0; \
char *s = NULL; \
SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
ZVAL_STRINGL(zendlval, s, sz); \
efree(s); \
} else if (yyleng == 1) { \
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
} else { \
ZVAL_STRINGL(zendlval, yytext, yyleng); \
}
static zend_result zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
{
char *s, *t;
char *end;
if (len <= 1) {
if (len < 1) {
ZVAL_EMPTY_STRING(zendlval);
} else {
zend_uchar c = (zend_uchar)*str;
if (c == '\n' || c == '\r') {
CG(zend_lineno)++;
}
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
}
goto skip_escape_conversion;
}
ZVAL_STRINGL(zendlval, str, len);
/* convert escape sequences */
s = Z_STRVAL_P(zendlval);
end = s+Z_STRLEN_P(zendlval);
while (1) {
if (UNEXPECTED(*s=='\\')) {
break;
}
if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
CG(zend_lineno)++;
}
s++;
if (s == end) {
goto skip_escape_conversion;
}
}
t = s;
while (s<end) {
if (*s=='\\') {
s++;
if (s >= end) {
*t++ = '\\';
break;
}
switch(*s) {
case 'n':
*t++ = '\n';
break;
case 'r':
*t++ = '\r';
break;
case 't':
*t++ = '\t';
break;
case 'f':
*t++ = '\f';
break;
case 'v':
*t++ = '\v';
break;
case 'e':
#ifdef ZEND_WIN32
*t++ = VK_ESCAPE;
#else
*t++ = '\e';
#endif
break;
case '"':
case '`':
if (*s != quote_type) {
*t++ = '\\';
*t++ = *s;
break;
}
ZEND_FALLTHROUGH;
case '\\':
case '$':
*t++ = *s;
break;
case 'x':
case 'X':
if (ZEND_IS_HEX(*(s+1))) {
char hex_buf[3] = { 0, 0, 0 };
hex_buf[0] = *(++s);
if (ZEND_IS_HEX(*(s+1))) {
hex_buf[1] = *(++s);
}
*t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
} else {
*t++ = '\\';
*t++ = *s;
}
break;
/* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
case 'u':
{
/* cache where we started so we can parse after validating */
char *start = s + 1;
size_t len = 0;
bool valid = 1;
unsigned long codepoint;
if (*start != '{') {
/* we silently let this pass to avoid breaking code
* with JSON in string literals (e.g. "\"\u202e\""
*/
*t++ = '\\';
*t++ = 'u';
break;
} else {
/* on the other hand, invalid \u{blah} errors */
s++;
len++;
s++;
while (*s != '}') {
if (!ZEND_IS_HEX(*s)) {
valid = 0;
break;
} else {
len++;
}
s++;
}
if (*s == '}') {
valid = 1;
len++;
}
}
/* \u{} is invalid */
if (len <= 2) {
valid = 0;
}
if (!valid) {
zend_throw_exception(zend_ce_parse_error,
"Invalid UTF-8 codepoint escape sequence", 0);
zval_ptr_dtor(zendlval);
ZVAL_UNDEF(zendlval);
return FAILURE;
}
errno = 0;
codepoint = strtoul(start + 1, NULL, 16);
/* per RFC 3629, UTF-8 can only represent 21 bits */
if (codepoint > 0x10FFFF || errno) {
zend_throw_exception(zend_ce_parse_error,
"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
zval_ptr_dtor(zendlval);
ZVAL_UNDEF(zendlval);
return FAILURE;
}
/* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
if (codepoint < 0x80) {
*t++ = codepoint;
} else if (codepoint <= 0x7FF) {
*t++ = (codepoint >> 6) + 0xC0;
*t++ = (codepoint & 0x3F) + 0x80;
} else if (codepoint <= 0xFFFF) {
*t++ = (codepoint >> 12) + 0xE0;
*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
*t++ = (codepoint & 0x3F) + 0x80;
} else if (codepoint <= 0x10FFFF) {
*t++ = (codepoint >> 18) + 0xF0;
*t++ = ((codepoint >> 12) & 0x3F) + 0x80;
*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
*t++ = (codepoint & 0x3F) + 0x80;
}
}
break;
default:
/* check for an octal */
if (ZEND_IS_OCT(*s)) {
char octal_buf[4] = { 0, 0, 0, 0 };
octal_buf[0] = *s;
if (ZEND_IS_OCT(*(s+1))) {
octal_buf[1] = *(++s);
if (ZEND_IS_OCT(*(s+1))) {
octal_buf[2] = *(++s);
}
}
if (octal_buf[2] && (octal_buf[0] > '3') && !SCNG(heredoc_scan_ahead)) {
/* 3 octit values must not overflow 0xFF (\377) */
zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
}
*t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
} else {
*t++ = '\\';
*t++ = *s;
}
break;
}
} else {
*t++ = *s;
}
if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
CG(zend_lineno)++;
}
s++;
}
*t = 0;
Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
skip_escape_conversion:
if (SCNG(output_filter)) {
size_t sz = 0;
unsigned char *str;
// TODO: avoid realocation ???
s = Z_STRVAL_P(zendlval);
SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
zval_ptr_dtor(zendlval);
ZVAL_STRINGL(zendlval, (char *) str, sz);
efree(str);
}
return SUCCESS;
}
#define HEREDOC_USING_SPACES 1
#define HEREDOC_USING_TABS 2
static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
for (; str < end; str++) {
if (*str == '\r') {
*newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
return str;
} else if (*str == '\n') {
*newline_len = 1;
return str;
}
}
*newline_len = 0;
return NULL;
}
static bool strip_multiline_string_indentation(
zval *zendlval, int indentation, bool using_spaces,
bool newline_at_start, bool newline_at_end)
{
const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
char *copy = Z_STRVAL_P(zendlval);
int newline_count = 0;
size_t newline_len;
const char *nl;
if (!newline_at_start) {
nl = next_newline(str, end, &newline_len);
if (!nl) {
return 1;
}
str = nl + newline_len;
copy = (char *) nl + newline_len;
newline_count++;
} else {
nl = str;
}
/* <= intentional */
while (str <= end && nl) {
size_t skip;
nl = next_newline(str, end, &newline_len);
if (!nl && newline_at_end) {
nl = end;
}
/* Try to skip indentation */
for (skip = 0; skip < indentation; skip++, str++) {
if (str == nl) {
/* Don't require full indentation on whitespace-only lines */
break;
}
if (str == end || (*str != ' ' && *str != '\t')) {
CG(zend_lineno) += newline_count;
zend_throw_exception_ex(zend_ce_parse_error, 0,
"Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
goto error;
}
if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
CG(zend_lineno) += newline_count;
zend_throw_exception(zend_ce_parse_error,
"Invalid indentation - tabs and spaces cannot be mixed", 0);
goto error;
}
}
if (str == end) {
break;
}
size_t len = nl ? (nl - str + newline_len) : (end - str);
memmove(copy, str, len);
str += len;
copy += len;
newline_count++;
}
*copy = '\0';
Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
return 1;
error:
zval_ptr_dtor_str(zendlval);
ZVAL_UNDEF(zendlval);
return 0;
}
static void copy_heredoc_label_stack(void *void_heredoc_label)
{
zend_heredoc_label *heredoc_label = void_heredoc_label;
zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
*new_heredoc_label = *heredoc_label;
new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
}
/* Check that { }, [ ], ( ) are nested correctly */
static void report_bad_nesting(char opening, int opening_lineno, char closing)
{
char buf[256];
size_t used = 0;
used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
if (opening_lineno != CG(zend_lineno)) {
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
}
if (closing) { /* 'closing' will be 0 if at end of file */
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
}
zend_throw_exception(zend_ce_parse_error, buf, 0);
}
static void enter_nesting(char opening)
{
zend_nest_location nest_loc = {opening, CG(zend_lineno)};
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
}
static zend_result exit_nesting(char closing)
{
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
return FAILURE;
}
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
char opening = nest_loc->text;
if ((opening == '{' && closing != '}') ||
(opening == '[' && closing != ']') ||
(opening == '(' && closing != ')')) {
report_bad_nesting(opening, nest_loc->lineno, closing);
return FAILURE;
}
zend_stack_del_top(&SCNG(nest_location_stack));
return SUCCESS;
}
static zend_result check_nesting_at_end(void)
{
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
return FAILURE;
}
return SUCCESS;
}
#define PARSER_MODE() \
EXPECTED(elem != NULL)
#define RETURN_TOKEN(_token) do { \
token = _token; \
goto emit_token; \
} while (0)
#define RETURN_TOKEN_WITH_VAL(_token) do { \
token = _token; \
goto emit_token_with_val; \
} while (0)
#define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
token = _token; \
offset = _offset; \
goto emit_token_with_str; \
} while (0)
#define RETURN_TOKEN_WITH_IDENT(_token) do { \
token = _token; \
goto emit_token_with_ident; \
} while (0)
#define RETURN_OR_SKIP_TOKEN(_token) do { \
token = _token; \
if (PARSER_MODE()) { \
goto skip_token; \
} \
goto emit_token; \
} while (0)
#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
if (exit_nesting(_token) != SUCCESS && PARSER_MODE()) { \
RETURN_TOKEN(T_ERROR); \
} else { \
RETURN_TOKEN(_token); \
} \
} while(0)
#define RETURN_END_TOKEN do { \
if (check_nesting_at_end() != SUCCESS && PARSER_MODE()) { \
RETURN_TOKEN(T_ERROR); \
} else { \
RETURN_TOKEN(END); \
} \
} while (0)
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
{
int token;
int offset;
int start_line = CG(zend_lineno);
ZVAL_UNDEF(zendlval);
restart:
SCNG(yy_text) = YYCURSOR;
/*!re2c
re2c:yyfill:check = 0;
LNUM [0-9]+(_[0-9]+)*
DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
BNUM "0b"[01]+(_[01]+)*
ONUM "0o"[0-7]+(_[0-7]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
TOKENS [;:,.|^&+-/*=%!~$<>?@]
ANY_CHAR [^]
NEWLINE ("\r"|"\n"|"\r\n")
OPTIONAL_WHITESPACE [ \n\r\t]*
/* We don't use re2c with bounds checking, we just return 0 bytes if we read past the input.
* If we use wildcard matching for comments, we can read past the input, which crashes
* once we try to report a syntax error because the 0 bytes are not actually part of
* the token. We prevent this by not allowing 0 bytes, which already aren't valid anyway. */
MULTI_LINE_COMMENT "/*"([^*\x00]*"*"+)([^*/\x00][^*\x00]*"*"+)*"/"
SINGLE_LINE_COMMENT "//"[^\x00\n\r]*[\n\r]
HASH_COMMENT "#"(([^[\x00][^\x00\n\r]*[\n\r])|[\n\r])
WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_COMMENT}|{HASH_COMMENT})+
OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_COMMENT}|{HASH_COMMENT})*
/* compute yyleng before each rule */
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"fn" {
RETURN_TOKEN_WITH_IDENT(T_FN);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN_WITH_IDENT(T_FUNCTION);
}
<ST_IN_SCRIPTING>"const" {
RETURN_TOKEN_WITH_IDENT(T_CONST);
}
<ST_IN_SCRIPTING>"return" {
RETURN_TOKEN_WITH_IDENT(T_RETURN);
}
<ST_IN_SCRIPTING>"#[" {
enter_nesting('[');
RETURN_TOKEN(T_ATTRIBUTE);
}
<ST_IN_SCRIPTING>"yield"{WHITESPACE_OR_COMMENTS}"from"[^a-zA-Z0-9_\x80-\xff] {
yyless(yyleng - 1);
HANDLE_NEWLINES(yytext, yyleng);
RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM);
}
<ST_IN_SCRIPTING>"yield" {
RETURN_TOKEN_WITH_IDENT(T_YIELD);
}
<ST_IN_SCRIPTING>"try" {
RETURN_TOKEN_WITH_IDENT(T_TRY);
}
<ST_IN_SCRIPTING>"catch" {
RETURN_TOKEN_WITH_IDENT(T_CATCH);
}
<ST_IN_SCRIPTING>"finally" {
RETURN_TOKEN_WITH_IDENT(T_FINALLY);
}
<ST_IN_SCRIPTING>"throw" {
RETURN_TOKEN_WITH_IDENT(T_THROW);
}
<ST_IN_SCRIPTING>"if" {
RETURN_TOKEN_WITH_IDENT(T_IF);
}
<ST_IN_SCRIPTING>"elseif" {
RETURN_TOKEN_WITH_IDENT(T_ELSEIF);
}
<ST_IN_SCRIPTING>"endif" {
RETURN_TOKEN_WITH_IDENT(T_ENDIF);
}
<ST_IN_SCRIPTING>"else" {
RETURN_TOKEN_WITH_IDENT(T_ELSE);
}
<ST_IN_SCRIPTING>"while" {
RETURN_TOKEN_WITH_IDENT(T_WHILE);
}
<ST_IN_SCRIPTING>"endwhile" {
RETURN_TOKEN_WITH_IDENT(T_ENDWHILE);
}
<ST_IN_SCRIPTING>"do" {
RETURN_TOKEN_WITH_IDENT(T_DO);
}
<ST_IN_SCRIPTING>"for" {
RETURN_TOKEN_WITH_IDENT(T_FOR);
}
<ST_IN_SCRIPTING>"endfor" {
RETURN_TOKEN_WITH_IDENT(T_ENDFOR);
}
<ST_IN_SCRIPTING>"foreach" {
RETURN_TOKEN_WITH_IDENT(T_FOREACH);
}
<ST_IN_SCRIPTING>"endforeach" {
RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH);
}
<ST_IN_SCRIPTING>"declare" {
RETURN_TOKEN_WITH_IDENT(T_DECLARE);
}
<ST_IN_SCRIPTING>"enddeclare" {
RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE);
}
<ST_IN_SCRIPTING>"instanceof" {
RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF);
}
<ST_IN_SCRIPTING>"as" {
RETURN_TOKEN_WITH_IDENT(T_AS);
}
<ST_IN_SCRIPTING>"switch" {
RETURN_TOKEN_WITH_IDENT(T_SWITCH);
}
<ST_IN_SCRIPTING>"match" {
RETURN_TOKEN_WITH_IDENT(T_MATCH);
}
<ST_IN_SCRIPTING>"endswitch" {
RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH);
}
<ST_IN_SCRIPTING>"case" {
RETURN_TOKEN_WITH_IDENT(T_CASE);
}
<ST_IN_SCRIPTING>"default" {
RETURN_TOKEN_WITH_IDENT(T_DEFAULT);
}
<ST_IN_SCRIPTING>"break" {
RETURN_TOKEN_WITH_IDENT(T_BREAK);
}
<ST_IN_SCRIPTING>"continue" {
RETURN_TOKEN_WITH_IDENT(T_CONTINUE);
}
<ST_IN_SCRIPTING>"goto" {
RETURN_TOKEN_WITH_IDENT(T_GOTO);
}
<ST_IN_SCRIPTING>"echo" {
RETURN_TOKEN_WITH_IDENT(T_ECHO);
}
<ST_IN_SCRIPTING>"print" {
RETURN_TOKEN_WITH_IDENT(T_PRINT);
}
<ST_IN_SCRIPTING>"class" {
RETURN_TOKEN_WITH_IDENT(T_CLASS);
}
<ST_IN_SCRIPTING>"interface" {
RETURN_TOKEN_WITH_IDENT(T_INTERFACE);
}
<ST_IN_SCRIPTING>"trait" {
RETURN_TOKEN_WITH_IDENT(T_TRAIT);
}
/*
* The enum keyword must be followed by whitespace and another identifier.
* This avoids the BC break of using enum in classes, namespaces, functions and constants.
*/
<ST_IN_SCRIPTING>"enum"{WHITESPACE_OR_COMMENTS}("extends"|"implements") {
yyless(4);
RETURN_TOKEN_WITH_STR(T_STRING, 0);
}
<ST_IN_SCRIPTING>"enum"{WHITESPACE_OR_COMMENTS}[a-zA-Z_\x80-\xff] {
yyless(4);
RETURN_TOKEN_WITH_IDENT(T_ENUM);
}
<ST_IN_SCRIPTING>"extends" {
RETURN_TOKEN_WITH_IDENT(T_EXTENDS);
}
<ST_IN_SCRIPTING>"implements" {
RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS);
}
<ST_IN_SCRIPTING>"->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY);
RETURN_TOKEN(T_OBJECT_OPERATOR);
}
<ST_IN_SCRIPTING>"?->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY);
RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR);
}
<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
goto return_whitespace;
}
<ST_LOOKING_FOR_PROPERTY>"->" {
RETURN_TOKEN(T_OBJECT_OPERATOR);
}
<ST_LOOKING_FOR_PROPERTY>"?->" {
RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR);
}
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state();
RETURN_TOKEN_WITH_STR(T_STRING, 0);
}
<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
yyless(0);
yy_pop_state();
goto restart;
}
<ST_IN_SCRIPTING>"::" {
RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
}
<ST_IN_SCRIPTING>"..." {
RETURN_TOKEN(T_ELLIPSIS);
}
<ST_IN_SCRIPTING>"??" {
RETURN_TOKEN(T_COALESCE);
}
<ST_IN_SCRIPTING>"new" {
RETURN_TOKEN_WITH_IDENT(T_NEW);
}
<ST_IN_SCRIPTING>"clone" {
RETURN_TOKEN_WITH_IDENT(T_CLONE);
}
<ST_IN_SCRIPTING>"var" {
RETURN_TOKEN_WITH_IDENT(T_VAR);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
RETURN_TOKEN(T_INT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("double"|"float"){TABS_AND_SPACES}")" {
RETURN_TOKEN(T_DOUBLE_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" {
if (PARSER_MODE()) {
zend_throw_exception(zend_ce_parse_error, "The (real) cast has been removed, use (float) instead", 0);
RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN(T_DOUBLE_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
RETURN_TOKEN(T_STRING_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
RETURN_TOKEN(T_ARRAY_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
RETURN_TOKEN(T_OBJECT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
RETURN_TOKEN(T_BOOL_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
RETURN_TOKEN(T_UNSET_CAST);
}
<ST_IN_SCRIPTING>"eval" {
RETURN_TOKEN_WITH_IDENT(T_EVAL);
}
<ST_IN_SCRIPTING>"include" {
RETURN_TOKEN_WITH_IDENT(T_INCLUDE);
}
<ST_IN_SCRIPTING>"include_once" {
RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE);
}
<ST_IN_SCRIPTING>"require" {
RETURN_TOKEN_WITH_IDENT(T_REQUIRE);
}
<ST_IN_SCRIPTING>"require_once" {
RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE);
}
<ST_IN_SCRIPTING>"namespace" {
RETURN_TOKEN_WITH_IDENT(T_NAMESPACE);
}
<ST_IN_SCRIPTING>"use" {
RETURN_TOKEN_WITH_IDENT(T_USE);
}
<ST_IN_SCRIPTING>"insteadof" {
RETURN_TOKEN_WITH_IDENT(T_INSTEADOF);
}
<ST_IN_SCRIPTING>"global" {
RETURN_TOKEN_WITH_IDENT(T_GLOBAL);
}
<ST_IN_SCRIPTING>"isset" {
RETURN_TOKEN_WITH_IDENT(T_ISSET);
}
<ST_IN_SCRIPTING>"empty" {
RETURN_TOKEN_WITH_IDENT(T_EMPTY);
}
<ST_IN_SCRIPTING>"__halt_compiler" {
RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER);
}
<ST_IN_SCRIPTING>"static" {
RETURN_TOKEN_WITH_IDENT(T_STATIC);
}
<ST_IN_SCRIPTING>"abstract" {
RETURN_TOKEN_WITH_IDENT(T_ABSTRACT);
}
<ST_IN_SCRIPTING>"final" {
RETURN_TOKEN_WITH_IDENT(T_FINAL);
}
<ST_IN_SCRIPTING>"private" {
RETURN_TOKEN_WITH_IDENT(T_PRIVATE);
}
<ST_IN_SCRIPTING>"protected" {
RETURN_TOKEN_WITH_IDENT(T_PROTECTED);
}
<ST_IN_SCRIPTING>"public" {
RETURN_TOKEN_WITH_IDENT(T_PUBLIC);
}
<ST_IN_SCRIPTING>"readonly" {
RETURN_TOKEN_WITH_IDENT(T_READONLY);
}
<ST_IN_SCRIPTING>"unset" {
RETURN_TOKEN_WITH_IDENT(T_UNSET);
}
<ST_IN_SCRIPTING>"=>" {
RETURN_TOKEN(T_DOUBLE_ARROW);
}
<ST_IN_SCRIPTING>"list" {
RETURN_TOKEN_WITH_IDENT(T_LIST);
}
<ST_IN_SCRIPTING>"array" {
RETURN_TOKEN_WITH_IDENT(T_ARRAY);
}
<ST_IN_SCRIPTING>"callable" {
RETURN_TOKEN_WITH_IDENT(T_CALLABLE);
}
<ST_IN_SCRIPTING>"++" {
RETURN_TOKEN(T_INC);
}
<ST_IN_SCRIPTING>"--" {
RETURN_TOKEN(T_DEC);
}
<ST_IN_SCRIPTING>"===" {
RETURN_TOKEN(T_IS_IDENTICAL);
}
<ST_IN_SCRIPTING>"!==" {
RETURN_TOKEN(T_IS_NOT_IDENTICAL);
}
<ST_IN_SCRIPTING>"==" {
RETURN_TOKEN(T_IS_EQUAL);
}
<ST_IN_SCRIPTING>"!="|"<>" {
RETURN_TOKEN(T_IS_NOT_EQUAL);
}
<ST_IN_SCRIPTING>"<=>" {
RETURN_TOKEN(T_SPACESHIP);
}
<ST_IN_SCRIPTING>"<=" {
RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
}
<ST_IN_SCRIPTING>">=" {
RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
}
<ST_IN_SCRIPTING>"+=" {
RETURN_TOKEN(T_PLUS_EQUAL);
}
<ST_IN_SCRIPTING>"-=" {
RETURN_TOKEN(T_MINUS_EQUAL);
}
<ST_IN_SCRIPTING>"*=" {
RETURN_TOKEN(T_MUL_EQUAL);
}
<ST_IN_SCRIPTING>"*\*" {
RETURN_TOKEN(T_POW);
}
<ST_IN_SCRIPTING>"*\*=" {
RETURN_TOKEN(T_POW_EQUAL);
}
<ST_IN_SCRIPTING>"/=" {
RETURN_TOKEN(T_DIV_EQUAL);
}
<ST_IN_SCRIPTING>".=" {
RETURN_TOKEN(T_CONCAT_EQUAL);
}
<ST_IN_SCRIPTING>"%=" {
RETURN_TOKEN(T_MOD_EQUAL);
}
<ST_IN_SCRIPTING>"<<=" {
RETURN_TOKEN(T_SL_EQUAL);
}
<ST_IN_SCRIPTING>">>=" {
RETURN_TOKEN(T_SR_EQUAL);
}
<ST_IN_SCRIPTING>"&=" {
RETURN_TOKEN(T_AND_EQUAL);
}
<ST_IN_SCRIPTING>"|=" {
RETURN_TOKEN(T_OR_EQUAL);
}
<ST_IN_SCRIPTING>"^=" {
RETURN_TOKEN(T_XOR_EQUAL);
}
<ST_IN_SCRIPTING>"??=" {
RETURN_TOKEN(T_COALESCE_EQUAL);
}
<ST_IN_SCRIPTING>"||" {
RETURN_TOKEN(T_BOOLEAN_OR);
}
<ST_IN_SCRIPTING>"&&" {
RETURN_TOKEN(T_BOOLEAN_AND);
}
<ST_IN_SCRIPTING>"OR" {
RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR);
}
<ST_IN_SCRIPTING>"AND" {
RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND);
}
<ST_IN_SCRIPTING>"XOR" {
RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR);
}
<ST_IN_SCRIPTING>"<<" {
RETURN_TOKEN(T_SL);
}
<ST_IN_SCRIPTING>">>" {
RETURN_TOKEN(T_SR);
}
<ST_IN_SCRIPTING>"&"{OPTIONAL_WHITESPACE_OR_COMMENTS}("$"|"...") {
yyless(1);
RETURN_TOKEN(T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG);
}
<ST_IN_SCRIPTING>"&" {
RETURN_TOKEN(T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG);
}
<ST_IN_SCRIPTING>"]"|")" {
/* Check that ] and ) match up properly with a preceding [ or ( */
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
}
<ST_IN_SCRIPTING>"["|"(" {
enter_nesting(yytext[0]);
RETURN_TOKEN(yytext[0]);
}
<ST_IN_SCRIPTING>{TOKENS} {
RETURN_TOKEN(yytext[0]);
}
<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
enter_nesting('{');
RETURN_TOKEN('{');
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
enter_nesting('{');
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_IN_SCRIPTING>"}" {
RESET_DOC_COMMENT();
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
RETURN_EXIT_NESTING_TOKEN('}');
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
}
<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
yyless(0);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
goto restart;
}
<ST_IN_SCRIPTING>{BNUM} {
/* The +/- 2 skips "0b" */
size_t len = yyleng - 2;
char *end, *bin = yytext + 2;
bool contains_underscores;
/* Skip any leading 0s */
while (len > 0 && (*bin == '0' || *bin == '_')) {
++bin;
--len;
}
contains_underscores = (memchr(bin, '_', len) != NULL);
if (contains_underscores) {
bin = estrndup(bin, len);
strip_underscores(bin, &len);
}
if (len < SIZEOF_ZEND_LONG * 8) {
if (len == 0) {
ZVAL_LONG(zendlval, 0);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
ZEND_ASSERT(!errno && end == bin + len);
}
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == bin + len);
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
<ST_IN_SCRIPTING>{ONUM} {
/* The +/- 2 skips "0o" */
size_t len = yyleng - 2;
char *end, *octal = yytext + 2;
bool contains_underscores = (memchr(octal, '_', len) != NULL);
/* Skip any leading 0s */
while (len > 0 && (*octal == '0' || *octal == '_')) {
++octal;
--len;
}
if (len == 0) {
ZVAL_LONG(zendlval, 0);
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}
if (contains_underscores) {
octal = estrndup(octal, len);
strip_underscores(octal, &len);
}
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(octal, &end, 8));
ZEND_ASSERT(end == octal + len);
if (!errno) {
if (contains_underscores) {
efree(octal);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}
/* Overflow */
ZEND_ASSERT(errno == ERANGE);
/* Reset errno */
errno = 0;
/* zend_oct_strtod skips leading '0' */
ZVAL_DOUBLE(zendlval, zend_oct_strtod(octal, (const char **)&end));
ZEND_ASSERT(!errno);
ZEND_ASSERT(end == octal + len);
if (contains_underscores) {
efree(octal);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
<ST_IN_SCRIPTING>{LNUM} {
size_t len = yyleng;
char *end, *lnum = yytext;
bool is_octal = lnum[0] == '0';
bool contains_underscores = (memchr(lnum, '_', len) != NULL);
if (contains_underscores) {
lnum = estrndup(lnum, len);
strip_underscores(lnum, &len);
}
/* Digits 8 and 9 are illegal in octal literals. */
if (is_octal) {
size_t i;
for (i = 0; i < len; i++) {
if (lnum[i] == '8' || lnum[i] == '9') {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
if (PARSER_MODE()) {
if (contains_underscores) {
efree(lnum);
}
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}
/* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
len = i;
break;
}
}
}
if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
errno = 0;
/* base must be passed explicitly for correct parse error on Windows */
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
ZEND_ASSERT(end == lnum + len);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
if (errno == ERANGE) { /* Overflow */
errno = 0;
if (is_octal) { /* octal overflow */
ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
} else {
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
}
ZEND_ASSERT(end == lnum + len);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
ZEND_ASSERT(end == lnum + len);
}
ZEND_ASSERT(!errno);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}
<ST_IN_SCRIPTING>{HNUM} {
/* The +/- 2 skips "0x" */
size_t len = yyleng - 2;
char *end, *hex = yytext + 2;
bool contains_underscores;
/* Skip any leading 0s */
while (len > 0 && (*hex == '0' || *hex == '_')) {
++hex;
--len;
}
contains_underscores = (memchr(hex, '_', len) != NULL);
if (contains_underscores) {
hex = estrndup(hex, len);
strip_underscores(hex, &len);
}
if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
if (len == 0) {
ZVAL_LONG(zendlval, 0);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
char *end;
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
if (errno == ERANGE) {
goto string;
}
ZEND_ASSERT(end == yytext + yyleng);
} else {
string:
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
}
<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM}|{ONUM} { /* Offset must be treated as a string */
if (yyleng == 1) {
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
} else {
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
}
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
const char *end;
size_t len = yyleng;
char *dnum = yytext;
bool contains_underscores = (memchr(dnum, '_', len) != NULL);
if (contains_underscores) {
dnum = estrndup(dnum, len);
strip_underscores(dnum, &len);
}
ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == dnum + len);
if (contains_underscores) {
efree(dnum);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
<ST_IN_SCRIPTING>"__CLASS__" {
RETURN_TOKEN_WITH_IDENT(T_CLASS_C);
}
<ST_IN_SCRIPTING>"__TRAIT__" {
RETURN_TOKEN_WITH_IDENT(T_TRAIT_C);
}
<ST_IN_SCRIPTING>"__FUNCTION__" {
RETURN_TOKEN_WITH_IDENT(T_FUNC_C);
}
<ST_IN_SCRIPTING>"__METHOD__" {
RETURN_TOKEN_WITH_IDENT(T_METHOD_C);
}
<ST_IN_SCRIPTING>"__LINE__" {
RETURN_TOKEN_WITH_IDENT(T_LINE);
}
<ST_IN_SCRIPTING>"__FILE__" {
RETURN_TOKEN_WITH_IDENT(T_FILE);
}
<ST_IN_SCRIPTING>"__DIR__" {
RETURN_TOKEN_WITH_IDENT(T_DIR);
}
<ST_IN_SCRIPTING>"__NAMESPACE__" {
RETURN_TOKEN_WITH_IDENT(T_NS_C);
}
<SHEBANG>"#!" .* {NEWLINE} {
CG(zend_lineno)++;
BEGIN(INITIAL);
goto restart;
}
<SHEBANG>{ANY_CHAR} {
yyless(0);
BEGIN(INITIAL);
goto restart;
}
<INITIAL>"<?=" {
BEGIN(ST_IN_SCRIPTING);
if (PARSER_MODE()) {
/* We'll reject this as an identifier in zend_lex_tstring. */
RETURN_TOKEN_WITH_IDENT(T_ECHO);
}
RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
}
<INITIAL>"<?php"([ \t]|{NEWLINE}) {
HANDLE_NEWLINE(yytext[yyleng-1]);
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
}
<INITIAL>"<?php" {
/* Allow <?php followed by end of file. */
if (YYCURSOR == YYLIMIT) {
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
}
/* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
if (CG(short_tags)) {
yyless(2);
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
}
goto inline_char_handler;
}
<INITIAL>"<?" {
if (CG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
} else {
goto inline_char_handler;
}
}
<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
inline_char_handler:
while (1) {
YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
if (YYCURSOR >= YYLIMIT) {
break;
}
if (*YYCURSOR == '?') {
if (CG(short_tags) /* <? */
|| (*(YYCURSOR + 1) == '=') /* <?= */
|| (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
(YYCURSOR + 4 == YYLIMIT ||
YYCURSOR[4] == ' ' || YYCURSOR[4] == '\t' ||
YYCURSOR[4] == '\n' || YYCURSOR[4] == '\r'))
) {
YYCURSOR--;
break;
}
}
}
yyleng = YYCURSOR - SCNG(yy_text);
if (SCNG(output_filter)) {
size_t readsize;
char *s = NULL;
size_t sz = 0;
// TODO: avoid reallocation ???
readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
ZVAL_STRINGL(zendlval, s, sz);
efree(s);
if (readsize < yyleng) {
yyless(readsize);
}
} else if (yyleng == 1) {
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
} else {
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
HANDLE_NEWLINES(yytext, yyleng);
RETURN_TOKEN_WITH_VAL(T_INLINE_HTML);
}
/* Make sure a label character follows "->" or "?->", otherwise there is no property
* and "->"/"?->" will be taken literally
*/
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
yyless(yyleng - 3);
yy_push_state(ST_LOOKING_FOR_PROPERTY);
RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
}
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"?->"[a-zA-Z_\x80-\xff] {
yyless(yyleng - 4);
yy_push_state(ST_LOOKING_FOR_PROPERTY);
RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
}
/* A [ always designates a variable offset, regardless of what follows
*/
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
yyless(yyleng - 1);
yy_push_state(ST_VAR_OFFSET);
RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
}
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
}
<ST_VAR_OFFSET>"]" {
yy_pop_state();
RETURN_TOKEN(']');
}
<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
RETURN_TOKEN(yytext[0]);
}
<ST_VAR_OFFSET>[ \n\r\t\\'#] {
/* Invalid rule to return a more explicit parse error with proper line number */
yyless(0);
yy_pop_state();
ZVAL_NULL(zendlval);
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING>"namespace"("\\"{LABEL})+ {
RETURN_TOKEN_WITH_STR(T_NAME_RELATIVE, sizeof("namespace\\") - 1);
}
<ST_IN_SCRIPTING>{LABEL}("\\"{LABEL})+ {
RETURN_TOKEN_WITH_STR(T_NAME_QUALIFIED, 0);
}
<ST_IN_SCRIPTING>"\\"{LABEL}("\\"{LABEL})* {
RETURN_TOKEN_WITH_STR(T_NAME_FULLY_QUALIFIED, 1);
}
<ST_IN_SCRIPTING>"\\" {
RETURN_TOKEN(T_NS_SEPARATOR);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
RETURN_TOKEN_WITH_STR(T_STRING, 0);
}
<ST_IN_SCRIPTING>"#"|"//" {
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
case '\n':
YYCURSOR--;
break;
case '?':
if (*YYCURSOR == '>') {
YYCURSOR--;
break;
}
ZEND_FALLTHROUGH;
default:
continue;
}
break;
}
yyleng = YYCURSOR - SCNG(yy_text);
RETURN_OR_SKIP_TOKEN(T_COMMENT);
}
<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
int doc_com;
if (yyleng > 2) {
doc_com = 1;
RESET_DOC_COMMENT();
} else {
doc_com = 0;
}
while (YYCURSOR < YYLIMIT) {
if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
break;
}
}
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
} else {
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unterminated comment starting line %d", CG(zend_lineno));
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
yyleng = YYCURSOR - SCNG(yy_text);
HANDLE_NEWLINES(yytext, yyleng);
if (doc_com) {
CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
RETURN_OR_SKIP_TOKEN(T_DOC_COMMENT);
}
RETURN_OR_SKIP_TOKEN(T_COMMENT);
}
<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
BEGIN(INITIAL);
if (yytext[yyleng-1] != '>') {
CG(increment_lineno) = 1;
}
if (PARSER_MODE()) {
RETURN_TOKEN(';'); /* implicit ';' at php-end tag */
}
RETURN_TOKEN(T_CLOSE_TAG);
}
<ST_IN_SCRIPTING>b?['] {
char *s, *t;
char *end;
int bprefix = (yytext[0] != '\'') ? 1 : 0;
while (1) {
if (YYCURSOR < YYLIMIT) {
if (*YYCURSOR == '\'') {
YYCURSOR++;
yyleng = YYCURSOR - SCNG(yy_text);
break;
} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
} else {
yyleng = YYLIMIT - SCNG(yy_text);
/* Unclosed single quotes; treat similar to double quotes, but without a separate token
* for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
* rule, which continued in ST_IN_SCRIPTING state after the quote */
ZVAL_NULL(zendlval);
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
}
if (yyleng-bprefix-2 <= 1) {
if (yyleng-bprefix-2 < 1) {
ZVAL_EMPTY_STRING(zendlval);
} else {
zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
if (c == '\n' || c == '\r') {
CG(zend_lineno)++;
}
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
}
goto skip_escape_conversion;
}
ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
/* convert escape sequences */
s = Z_STRVAL_P(zendlval);
end = s+Z_STRLEN_P(zendlval);
while (1) {
if (UNEXPECTED(*s=='\\')) {
break;
}
if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
CG(zend_lineno)++;
}
s++;
if (s == end) {
goto skip_escape_conversion;
}
}
t = s;
while (s<end) {
if (*s=='\\') {
s++;
if (*s == '\\' || *s == '\'') {
*t++ = *s;
} else {
*t++ = '\\';
*t++ = *s;
}
} else {
*t++ = *s;
}
if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
CG(zend_lineno)++;
}
s++;
}
*t = 0;
Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
skip_escape_conversion:
if (SCNG(output_filter)) {
size_t sz = 0;
char *str = NULL;
zend_string *new_str;
s = Z_STRVAL_P(zendlval);
// TODO: avoid reallocation ???
SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
new_str = zend_string_init(str, sz, 0);
if (str != s) {
efree(str);
}
zend_string_release_ex(Z_STR_P(zendlval), 0);
ZVAL_STR(zendlval, new_str);
}
RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
}
<ST_IN_SCRIPTING>b?["] {
int bprefix = (yytext[0] != '"') ? 1 : 0;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
} else {
RETURN_TOKEN(T_ERROR);
}
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
default:
continue;
}
YYCURSOR--;
break;
}
/* Remember how much was scanned to save rescanning */
SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
YYCURSOR = SCNG(yy_text) + yyleng;
BEGIN(ST_DOUBLE_QUOTES);
RETURN_TOKEN('"');
}
<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
char *s;
unsigned char *saved_cursor;
int bprefix = (yytext[0] != '<') ? 1 : 0, spacing = 0, indentation = 0;
zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
bool is_heredoc = 1;
CG(zend_lineno)++;
heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
s = yytext+bprefix+3;
while ((*s == ' ') || (*s == '\t')) {
s++;
heredoc_label->length--;
}
if (*s == '\'') {
s++;
heredoc_label->length -= 2;
is_heredoc = 0;
BEGIN(ST_NOWDOC);
} else {
if (*s == '"') {
s++;
heredoc_label->length -= 2;
}
BEGIN(ST_HEREDOC);
}
heredoc_label->label = estrndup(s, heredoc_label->length);
heredoc_label->indentation_uses_spaces = 0;
heredoc_label->indentation = 0;
saved_cursor = YYCURSOR;
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
if (*YYCURSOR == '\t') {
spacing |= HEREDOC_USING_TABS;
} else {
spacing |= HEREDOC_USING_SPACES;
}
++YYCURSOR;
++indentation;
}
if (YYCURSOR == YYLIMIT) {
YYCURSOR = saved_cursor;
RETURN_TOKEN(T_START_HEREDOC);
}
/* Check for ending label on the next line */
if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
if (!IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
YYCURSOR = saved_cursor;
heredoc_label->indentation = indentation;
BEGIN(ST_END_HEREDOC);
RETURN_TOKEN(T_START_HEREDOC);
}
}
YYCURSOR = saved_cursor;
if (is_heredoc && !SCNG(heredoc_scan_ahead)) {
zend_lex_state current_state;
zend_string *saved_doc_comment = CG(doc_comment);
int heredoc_nesting_level = 1;
int first_token = 0;
int error = 0;
zend_save_lexical_state(&current_state);
SCNG(heredoc_scan_ahead) = 1;
SCNG(heredoc_indentation) = 0;
SCNG(heredoc_indentation_uses_spaces) = 0;
LANG_SCNG(on_event) = NULL;
CG(doc_comment) = NULL;
zend_ptr_stack_reverse_apply(&current_state.heredoc_label_stack, copy_heredoc_label_stack);
zend_exception_save();
while (heredoc_nesting_level) {
zval zv;
int retval;
ZVAL_UNDEF(&zv);
retval = lex_scan(&zv, NULL);
zval_ptr_dtor_nogc(&zv);
if (EG(exception)) {
zend_clear_exception();
break;
}
if (!first_token) {
first_token = retval;
}
switch (retval) {
case T_START_HEREDOC:
++heredoc_nesting_level;
break;
case T_END_HEREDOC:
--heredoc_nesting_level;
break;
case END:
heredoc_nesting_level = 0;
}
}
zend_exception_restore();
if (
(first_token == T_VARIABLE
|| first_token == T_DOLLAR_OPEN_CURLY_BRACES
|| first_token == T_CURLY_OPEN
) && SCNG(heredoc_indentation)) {
zend_throw_exception_ex(zend_ce_parse_error, 0, "Invalid body indentation level (expecting an indentation level of at least %d)", SCNG(heredoc_indentation));
error = 1;
}
heredoc_label->indentation = SCNG(heredoc_indentation);
heredoc_label->indentation_uses_spaces = SCNG(heredoc_indentation_uses_spaces);
zend_restore_lexical_state(&current_state);
SCNG(heredoc_scan_ahead) = 0;
CG(increment_lineno) = 0;
CG(doc_comment) = saved_doc_comment;
if (PARSER_MODE() && error) {
RETURN_TOKEN(T_ERROR);
}
}
RETURN_TOKEN(T_START_HEREDOC);
}
<ST_IN_SCRIPTING>[`] {
BEGIN(ST_BACKQUOTE);
RETURN_TOKEN('`');
}
<ST_END_HEREDOC>{ANY_CHAR} {
zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
yyleng = heredoc_label->indentation + heredoc_label->length;
YYCURSOR += yyleng - 1;
heredoc_label_dtor(heredoc_label);
efree(heredoc_label);
BEGIN(ST_IN_SCRIPTING);
RETURN_TOKEN(T_END_HEREDOC);
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
enter_nesting('{');
RETURN_TOKEN(T_CURLY_OPEN);
}
<ST_DOUBLE_QUOTES>["] {
BEGIN(ST_IN_SCRIPTING);
RETURN_TOKEN('"');
}
<ST_BACKQUOTE>[`] {
BEGIN(ST_IN_SCRIPTING);
RETURN_TOKEN('`');
}
<ST_DOUBLE_QUOTES>{ANY_CHAR} {
if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
goto double_quotes_scan_done;
}
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '"':
break;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
default:
continue;
}
YYCURSOR--;
break;
}
double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
}
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '`':
break;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
default:
continue;
}
YYCURSOR--;
break;
}
yyleng = YYCURSOR - SCNG(yy_text);
if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
|| !PARSER_MODE()) {
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
} else {
RETURN_TOKEN(T_ERROR);
}
}
<ST_HEREDOC>{ANY_CHAR} {
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
int newline = 0, indentation = 0, spacing = 0;
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
YYCURSOR--;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
case '\n':
indentation = spacing = 0;
while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
if (*YYCURSOR == '\t') {
spacing |= HEREDOC_USING_TABS;
} else {
spacing |= HEREDOC_USING_SPACES;
}
++YYCURSOR;
++indentation;
}
if (YYCURSOR == YYLIMIT) {
yyleng = YYCURSOR - SCNG(yy_text);
HANDLE_NEWLINES(yytext, yyleng);
ZVAL_NULL(zendlval);
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
/* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
continue;
}
if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
/* newline before label will be subtracted from returned text, but
* yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
newline = 2; /* Windows newline */
} else {
newline = 1;
}
CG(increment_lineno) = 1; /* For newline before label */
if (SCNG(heredoc_scan_ahead)) {
SCNG(heredoc_indentation) = indentation;
SCNG(heredoc_indentation_uses_spaces) = (spacing == HEREDOC_USING_SPACES);
} else {
YYCURSOR -= indentation;
}
BEGIN(ST_END_HEREDOC);
goto heredoc_scan_done;
}
continue;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
default:
continue;
}
YYCURSOR--;
break;
}
heredoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
if (!SCNG(heredoc_scan_ahead) && !EG(exception) && PARSER_MODE()) {
bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
zend_string *copy = Z_STR_P(zendlval);
if (!strip_multiline_string_indentation(
zendlval, heredoc_label->indentation, heredoc_label->indentation_uses_spaces,
newline_at_start, newline != 0)) {
RETURN_TOKEN(T_ERROR);
}
if (UNEXPECTED(zend_scan_escape_string(zendlval, ZSTR_VAL(copy), ZSTR_LEN(copy), 0) != SUCCESS)) {
zend_string_efree(copy);
RETURN_TOKEN(T_ERROR);
}
zend_string_efree(copy);
} else {
HANDLE_NEWLINES(yytext, yyleng - newline);
}
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
<ST_NOWDOC>{ANY_CHAR} {
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
int newline = 0, indentation = 0, spacing = -1;
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
YYCURSOR--;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
ZEND_FALLTHROUGH;
case '\n':
indentation = spacing = 0;
while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
if (*YYCURSOR == '\t') {
spacing |= HEREDOC_USING_TABS;
} else {
spacing |= HEREDOC_USING_SPACES;
}
++YYCURSOR;
++indentation;
}
if (YYCURSOR == YYLIMIT) {
yyleng = YYCURSOR - SCNG(yy_text);
HANDLE_NEWLINES(yytext, yyleng);
ZVAL_NULL(zendlval);
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
/* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
continue;
}
if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
/* newline before label will be subtracted from returned text, but
* yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
newline = 2; /* Windows newline */
} else {
newline = 1;
}
CG(increment_lineno) = 1; /* For newline before label */
YYCURSOR -= indentation;
heredoc_label->indentation = indentation;
BEGIN(ST_END_HEREDOC);
goto nowdoc_scan_done;
}
ZEND_FALLTHROUGH;
default:
continue;
}
}
nowdoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
if (!EG(exception) && spacing != -1 && PARSER_MODE()) {
bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
if (!strip_multiline_string_indentation(
zendlval, indentation, spacing == HEREDOC_USING_SPACES,
newline_at_start, newline != 0)) {
RETURN_TOKEN(T_ERROR);
}
}
HANDLE_NEWLINES(yytext, yyleng - newline);
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_END_TOKEN;
}
RETURN_TOKEN(T_BAD_CHARACTER);
}
*/
emit_token_with_str:
zend_copy_value(zendlval, (yytext + offset), (yyleng - offset));
emit_token_with_val:
if (PARSER_MODE()) {
ZEND_ASSERT(Z_TYPE_P(zendlval) != IS_UNDEF);
elem->ast = zend_ast_create_zval_with_lineno(zendlval, start_line);
}
emit_token:
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
return token;
emit_token_with_ident:
if (PARSER_MODE()) {
elem->ident = SCNG(yy_text);
}
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
return token;
return_whitespace:
HANDLE_NEWLINES(yytext, yyleng);
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context));
}
if (PARSER_MODE()) {
start_line = CG(zend_lineno);
goto restart;
} else {
return T_WHITESPACE;
}
skip_token:
if (SCNG(on_event)) {
SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
start_line = CG(zend_lineno);
goto restart;
}