php-src/Zend/zend_ini_scanner.l
Peter Kokot 8d3f8ca12a Remove unused Git attributes ident
The $Id$ keywords were used in Subversion where they can be substituted
with filename, last revision number change, last changed date, and last
user who changed it.

In Git this functionality is different and can be done with Git attribute
ident. These need to be defined manually for each file in the
.gitattributes file and are afterwards replaced with 40-character
hexadecimal blob object name which is based only on the particular file
contents.

This patch simplifies handling of $Id$ keywords by removing them since
they are not used anymore.
2018-07-25 00:53:25 +02:00

654 lines
17 KiB
Plaintext

/*
+----------------------------------------------------------------------+
| Zend Engine |
+----------------------------------------------------------------------+
| Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Zeev Suraski <zeev@zend.com> |
| Jani Taskinen <jani@php.net> |
| Marcus Boerger <helly@php.net> |
| Nuno Lopes <nlopess@php.net> |
| Scott MacVicar <scottmac@php.net> |
+----------------------------------------------------------------------+
*/
#include <errno.h>
#include "zend.h"
#include "zend_API.h"
#include "zend_globals.h"
#include <zend_ini_parser.h>
#include "zend_ini_scanner.h"
#ifdef YYDEBUG
#undef YYDEBUG
#endif
#if 0
# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
#else
# define YYDEBUG(s, c)
#endif
#include "zend_ini_scanner_defs.h"
#define YYCTYPE unsigned char
/* allow the scanner to read one null byte after the end of the string (from ZEND_MMAP_AHEAD)
* so that if will be able to terminate to match the current token (e.g. non-enclosed string) */
#define YYFILL(n) { if (YYCURSOR > YYLIMIT) return 0; }
#define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker)
#define YYGETCONDITION() SCNG(yy_state)
#define YYSETCONDITION(s) SCNG(yy_state) = s
#define STATE(name) yyc##name
/* emulate flex constructs */
#define BEGIN(state) YYSETCONDITION(STATE(state))
#define YYSTATE YYGETCONDITION()
#define yytext ((char*)SCNG(yy_text))
#define yyleng SCNG(yy_leng)
#define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
yyleng = (unsigned int)x; } while(0)
/* #define yymore() goto yymore_restart */
/* perform sanity check. If this message is triggered you should
increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
/*!max:re2c */
#if ZEND_MMAP_AHEAD < (YYMAXFILL + 1)
# error ZEND_MMAP_AHEAD should be greater than YYMAXFILL
#endif
/* How it works (for the core ini directives):
* ===========================================
*
* 1. Scanner scans file for tokens and passes them to parser.
* 2. Parser parses the tokens and passes the name/value pairs to the callback
* function which stores them in the configuration hash table.
* 3. Later REGISTER_INI_ENTRIES() is called which triggers the actual
* registering of ini entries and uses zend_get_configuration_directive()
* to fetch the previously stored name/value pair from configuration hash table
* and registers the static ini entries which match the name to the value
* into EG(ini_directives) hash table.
* 4. PATH section entries are used per-request from down to top, each overriding
* previous if one exists. zend_alter_ini_entry() is called for each entry.
* Settings in PATH section are ZEND_INI_SYSTEM accessible and thus mimics the
* php_admin_* directives used within Apache httpd.conf when PHP is compiled as
* module for Apache.
* 5. User defined ini files (like .htaccess for apache) are parsed for each request and
* stored in separate hash defined by SAPI.
*/
/* TODO: (ordered by importance :-)
* ===============================================================================
*
* - Separate constant lookup totally from plain strings (using CONSTANT pattern)
* - Add #if .. #else .. #endif and ==, !=, <, > , <=, >= operators
* - Add #include "some.ini"
* - Allow variables to refer to options also when using parse_ini_file()
*
*/
/* Globals Macros */
#define SCNG INI_SCNG
#ifdef ZTS
ZEND_API ts_rsrc_id ini_scanner_globals_id;
#else
ZEND_API zend_ini_scanner_globals ini_scanner_globals;
#endif
#define ZEND_SYSTEM_INI CG(ini_parser_unbuffered_errors)
/* Eat leading whitespace */
#define EAT_LEADING_WHITESPACE() \
while (yyleng) { \
if (yytext[0] == ' ' || yytext[0] == '\t') { \
SCNG(yy_text)++; \
yyleng--; \
} else { \
break; \
} \
}
/* Eat trailing whitespace + extra char */
#define EAT_TRAILING_WHITESPACE_EX(ch) \
while (yyleng && ( \
(ch != 'X' && yytext[yyleng - 1] == ch) || \
yytext[yyleng - 1] == '\n' || \
yytext[yyleng - 1] == '\r' || \
yytext[yyleng - 1] == '\t' || \
yytext[yyleng - 1] == ' ') \
) { \
yyleng--; \
}
/* Eat trailing whitespace */
#define EAT_TRAILING_WHITESPACE() EAT_TRAILING_WHITESPACE_EX('X')
#define zend_ini_copy_value(retval, str, len) \
ZVAL_NEW_STR(retval, zend_string_init(str, len, ZEND_SYSTEM_INI))
#define RETURN_TOKEN(type, str, len) { \
if (SCNG(scanner_mode) == ZEND_INI_SCANNER_TYPED) { \
zend_ini_copy_typed_value(ini_lval, type, str, len); \
} else { \
zend_ini_copy_value(ini_lval, str, len); \
} \
return type; \
}
static inline int convert_to_number(zval *retval, const char *str, const int str_len)
{
zend_uchar type;
int overflow;
zend_long lval;
double dval;
if ((type = is_numeric_string_ex(str, str_len, &lval, &dval, 0, &overflow)) != 0) {
if (type == IS_LONG) {
ZVAL_LONG(retval, lval);
return SUCCESS;
} else if (type == IS_DOUBLE && !overflow) {
ZVAL_DOUBLE(retval, dval);
return SUCCESS;
}
}
return FAILURE;
}
static void zend_ini_copy_typed_value(zval *retval, const int type, const char *str, int len)
{
switch (type) {
case BOOL_FALSE:
case BOOL_TRUE:
ZVAL_BOOL(retval, type == BOOL_TRUE);
break;
case NULL_NULL:
ZVAL_NULL(retval);
break;
case TC_NUMBER:
if (convert_to_number(retval, str, len) == SUCCESS) {
break;
}
/* intentional fall-through */
default:
zend_ini_copy_value(retval, str, len);
}
}
static void _yy_push_state(int new_state)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
YYSETCONDITION(new_state);
}
#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
static void yy_pop_state(void)
{
int *stack_state = zend_stack_top(&SCNG(state_stack));
YYSETCONDITION(*stack_state);
zend_stack_del_top(&SCNG(state_stack));
}
static void yy_scan_buffer(char *str, unsigned int len)
{
YYCURSOR = (YYCTYPE*)str;
SCNG(yy_start) = YYCURSOR;
YYLIMIT = YYCURSOR + len;
}
#define ini_filename SCNG(filename)
/* {{{ init_ini_scanner()
*/
static int init_ini_scanner(int scanner_mode, zend_file_handle *fh)
{
/* Sanity check */
if (scanner_mode != ZEND_INI_SCANNER_NORMAL && scanner_mode != ZEND_INI_SCANNER_RAW && scanner_mode != ZEND_INI_SCANNER_TYPED) {
zend_error(E_WARNING, "Invalid scanner mode");
return FAILURE;
}
SCNG(lineno) = 1;
SCNG(scanner_mode) = scanner_mode;
SCNG(yy_in) = fh;
if (fh != NULL) {
ini_filename = zend_strndup(fh->filename, strlen(fh->filename));
} else {
ini_filename = NULL;
}
zend_stack_init(&SCNG(state_stack), sizeof(int));
BEGIN(INITIAL);
return SUCCESS;
}
/* }}} */
/* {{{ shutdown_ini_scanner()
*/
void shutdown_ini_scanner(void)
{
zend_stack_destroy(&SCNG(state_stack));
if (ini_filename) {
free(ini_filename);
}
}
/* }}} */
/* {{{ zend_ini_scanner_get_lineno()
*/
ZEND_COLD int zend_ini_scanner_get_lineno(void)
{
return SCNG(lineno);
}
/* }}} */
/* {{{ zend_ini_scanner_get_filename()
*/
ZEND_COLD char *zend_ini_scanner_get_filename(void)
{
return ini_filename ? ini_filename : "Unknown";
}
/* }}} */
/* {{{ zend_ini_open_file_for_scanning()
*/
int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode)
{
char *buf;
size_t size;
if (zend_stream_fixup(fh, &buf, &size) == FAILURE) {
return FAILURE;
}
if (init_ini_scanner(scanner_mode, fh) == FAILURE) {
zend_file_handle_dtor(fh);
return FAILURE;
}
yy_scan_buffer(buf, (unsigned int)size);
return SUCCESS;
}
/* }}} */
/* {{{ zend_ini_prepare_string_for_scanning()
*/
int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode)
{
int len = (int)strlen(str);
if (init_ini_scanner(scanner_mode, NULL) == FAILURE) {
return FAILURE;
}
yy_scan_buffer(str, len);
return SUCCESS;
}
/* }}} */
/* {{{ zend_ini_escape_string()
*/
static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_type)
{
register char *s, *t;
char *end;
zend_ini_copy_value(lval, str, len);
/* convert escape sequences */
s = t = Z_STRVAL_P(lval);
end = s + Z_STRLEN_P(lval);
while (s < end) {
if (*s == '\\') {
s++;
if (s >= end) {
*t++ = '\\';
continue;
}
switch (*s) {
case '"':
if (*s != quote_type) {
*t++ = '\\';
*t++ = *s;
break;
}
case '\\':
case '$':
*t++ = *s;
Z_STRLEN_P(lval)--;
break;
default:
*t++ = '\\';
*t++ = *s;
break;
}
} else {
*t++ = *s;
}
if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
SCNG(lineno)++;
}
s++;
}
*t = 0;
}
/* }}} */
int ini_lex(zval *ini_lval)
{
restart:
SCNG(yy_text) = YYCURSOR;
/* yymore_restart: */
/* detect EOF */
if (YYCURSOR >= YYLIMIT) {
if (YYSTATE == STATE(ST_VALUE) || YYSTATE == STATE(ST_RAW)) {
BEGIN(INITIAL);
return END_OF_LINE;
}
return 0;
}
/* Eat any UTF-8 BOM we find in the first 3 bytes */
if (YYCURSOR == SCNG(yy_start) && YYCURSOR + 3 < YYLIMIT) {
if (memcmp(YYCURSOR, "\xef\xbb\xbf", 3) == 0) {
YYCURSOR += 3;
goto restart;
}
}
/*!re2c
re2c:yyfill:check = 0;
LNUM [0-9]+
DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
NUMBER [-]?{LNUM}|{DNUM}
ANY_CHAR (.|[\n\t])
NEWLINE ("\r"|"\n"|"\r\n")
TABS_AND_SPACES [ \t]
WHITESPACE [ \t]+
CONSTANT [a-zA-Z_][a-zA-Z0-9_]*
LABEL [^=\n\r\t;&|^$~(){}!"\[]+
TOKENS [:,.\[\]"'()&|^+-/*=%$!~<>?@{}]
OPERATORS [&|^~()!]
DOLLAR_CURLY "${"
SECTION_RAW_CHARS [^\]\n\r]
SINGLE_QUOTED_CHARS [^']
RAW_VALUE_CHARS [^\n\r;\000]
LITERAL_DOLLAR ("$"([^{\000]|("\\"{ANY_CHAR})))
VALUE_CHARS ([^$= \t\n\r;&|^~()!"'\000]|{LITERAL_DOLLAR})
SECTION_VALUE_CHARS ([^$\n\r;"'\]\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR})
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<INITIAL>"[" { /* Section start */
/* Enter section data lookup state */
if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
BEGIN(ST_SECTION_RAW);
} else {
BEGIN(ST_SECTION_VALUE);
}
return TC_SECTION;
}
<ST_VALUE,ST_SECTION_VALUE,ST_OFFSET>"'"{SINGLE_QUOTED_CHARS}+"'" { /* Raw string */
/* Eat leading and trailing single quotes */
if (yytext[0] == '\'' && yytext[yyleng - 1] == '\'') {
SCNG(yy_text)++;
yyleng = yyleng - 2;
}
RETURN_TOKEN(TC_RAW, yytext, yyleng);
}
<ST_SECTION_RAW,ST_SECTION_VALUE>"]"{TABS_AND_SPACES}*{NEWLINE}? { /* End of section */
BEGIN(INITIAL);
SCNG(lineno)++;
return ']';
}
<INITIAL>{LABEL}"["{TABS_AND_SPACES}* { /* Start of option with offset */
/* Eat leading whitespace */
EAT_LEADING_WHITESPACE();
/* Eat trailing whitespace and [ */
EAT_TRAILING_WHITESPACE_EX('[');
/* Enter offset lookup state */
BEGIN(ST_OFFSET);
RETURN_TOKEN(TC_OFFSET, yytext, yyleng);
}
<ST_OFFSET>{TABS_AND_SPACES}*"]" { /* End of section or an option offset */
BEGIN(INITIAL);
return ']';
}
<ST_DOUBLE_QUOTES,ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{DOLLAR_CURLY} { /* Variable start */
yy_push_state(ST_VARNAME);
return TC_DOLLAR_CURLY;
}
<ST_VARNAME>{LABEL} { /* Variable name */
/* Eat leading whitespace */
EAT_LEADING_WHITESPACE();
/* Eat trailing whitespace */
EAT_TRAILING_WHITESPACE();
RETURN_TOKEN(TC_VARNAME, yytext, yyleng);
}
<ST_VARNAME>"}" { /* Variable end */
yy_pop_state();
return '}';
}
<INITIAL,ST_VALUE>("true"|"on"|"yes"){TABS_AND_SPACES}* { /* TRUE value (when used outside option value/offset this causes parse error!) */
RETURN_TOKEN(BOOL_TRUE, "1", 1);
}
<INITIAL,ST_VALUE>("false"|"off"|"no"|"none"){TABS_AND_SPACES}* { /* FALSE value (when used outside option value/offset this causes parse error!)*/
RETURN_TOKEN(BOOL_FALSE, "", 0);
}
<INITIAL,ST_VALUE>("null"){TABS_AND_SPACES}* {
RETURN_TOKEN(NULL_NULL, "", 0);
}
<INITIAL>{LABEL} { /* Get option name */
/* Eat leading whitespace */
EAT_LEADING_WHITESPACE();
/* Eat trailing whitespace */
EAT_TRAILING_WHITESPACE();
RETURN_TOKEN(TC_LABEL, yytext, yyleng);
}
<INITIAL>{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */
if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
BEGIN(ST_RAW);
} else {
BEGIN(ST_VALUE);
}
return '=';
}
<ST_RAW>{RAW_VALUE_CHARS} { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
unsigned char *sc = NULL;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR) {
case '\n':
case '\r':
goto end_raw_value_chars;
break;
case ';':
if (sc == NULL) {
sc = YYCURSOR;
}
/* no break */
default:
YYCURSOR++;
break;
}
}
end_raw_value_chars:
yyleng = YYCURSOR - SCNG(yy_text);
/* Eat trailing semicolons */
while (yytext[yyleng - 1] == ';') {
yyleng--;
}
/* Eat leading and trailing double quotes */
if (yyleng > 1 && yytext[0] == '"' && yytext[yyleng - 1] == '"') {
SCNG(yy_text)++;
yyleng = yyleng - 2;
} else if (sc) {
YYCURSOR = sc;
yyleng = YYCURSOR - SCNG(yy_text);
}
RETURN_TOKEN(TC_RAW, yytext, yyleng);
}
<ST_SECTION_RAW>{SECTION_RAW_CHARS}+ { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
RETURN_TOKEN(TC_RAW, yytext, yyleng);
}
<ST_VALUE,ST_RAW>{TABS_AND_SPACES}*{NEWLINE} { /* End of option value */
BEGIN(INITIAL);
SCNG(lineno)++;
return END_OF_LINE;
}
<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{CONSTANT} { /* Get constant option value */
RETURN_TOKEN(TC_CONSTANT, yytext, yyleng);
}
<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{NUMBER} { /* Get number option value as string */
RETURN_TOKEN(TC_NUMBER, yytext, yyleng);
}
<INITIAL>{TOKENS} { /* Disallow these chars outside option values */
return yytext[0];
}
<ST_VALUE>{OPERATORS}{TABS_AND_SPACES}* { /* Boolean operators */
return yytext[0];
}
<ST_VALUE>[=] { /* Make = used in option value to trigger error */
yyless(0);
BEGIN(INITIAL);
return END_OF_LINE;
}
<ST_VALUE>{VALUE_CHARS}+ { /* Get everything else as option/offset value */
RETURN_TOKEN(TC_STRING, yytext, yyleng);
}
<ST_SECTION_VALUE,ST_OFFSET>{SECTION_VALUE_CHARS}+ { /* Get rest as section/offset value */
RETURN_TOKEN(TC_STRING, yytext, yyleng);
}
<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{TABS_AND_SPACES}*["] { /* Double quoted '"' string start */
yy_push_state(ST_DOUBLE_QUOTES);
return '"';
}
<ST_DOUBLE_QUOTES>["]{TABS_AND_SPACES}* { /* Double quoted '"' string ends */
yy_pop_state();
return '"';
}
<ST_DOUBLE_QUOTES>[^] { /* Escape double quoted string contents */
if (YYCURSOR > YYLIMIT) {
return 0;
}
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '"':
if (YYCURSOR < YYLIMIT && YYCURSOR[-2] == '\\' && *YYCURSOR != '\r' && *YYCURSOR != '\n') {
continue;
}
break;
case '$':
if (*YYCURSOR == '{') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT && *YYCURSOR != '"') {
YYCURSOR++;
}
/* fall through */
default:
continue;
}
YYCURSOR--;
break;
}
yyleng = YYCURSOR - SCNG(yy_text);
zend_ini_escape_string(ini_lval, yytext, yyleng, '"');
return TC_QUOTED_STRING;
}
<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{WHITESPACE} {
RETURN_TOKEN(TC_WHITESPACE, yytext, yyleng);
}
<INITIAL,ST_RAW>{TABS_AND_SPACES}+ {
/* eat whitespace */
goto restart;
}
<INITIAL>{TABS_AND_SPACES}*{NEWLINE} {
SCNG(lineno)++;
return END_OF_LINE;
}
<INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;][^\r\n]*{NEWLINE} { /* Comment */
BEGIN(INITIAL);
SCNG(lineno)++;
return END_OF_LINE;
}
<ST_VALUE,ST_RAW>[^] { /* End of option value (if EOF is reached before EOL */
BEGIN(INITIAL);
return END_OF_LINE;
}
<*>[^] {
return 0;
}
*/
}