Implement request #68325: parse huge option for xml_parser_create (#12256)

This commit is contained in:
Niels Dossche 2023-10-22 15:44:49 +01:00 committed by GitHub
parent 06ee697cff
commit 98b08c52db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 170 additions and 13 deletions

View File

@ -141,6 +141,11 @@ PHP 8.4 UPGRADE NOTES
. The IntlDateFormatter class exposes now the new PATTERN constant
reflecting udat api's UDAT_PATTERN.
- XML:
. Added XML_OPTION_PARSE_HUGE to allow large inputs in xml_parse and
xml_parse_into_struct.
RFC: https://wiki.php.net/rfc/xml_option_parse_huge.
========================================
11. Changes to INI File Handling
========================================

View File

@ -43,7 +43,8 @@ enum php_xml_option {
PHP_XML_OPTION_CASE_FOLDING = 1,
PHP_XML_OPTION_TARGET_ENCODING,
PHP_XML_OPTION_SKIP_TAGSTART,
PHP_XML_OPTION_SKIP_WHITE
PHP_XML_OPTION_SKIP_WHITE,
PHP_XML_OPTION_PARSE_HUGE,
};
#ifdef LIBXML_EXPAT_COMPAT

View File

@ -0,0 +1,91 @@
--TEST--
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create)
--EXTENSIONS--
xml
--SKIPIF--
<?php
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
?>
--FILE--
<?php
function logName(string $str) {
if (strlen($str) > 20) {
echo substr($str, 0, 20) . "...\n";
} else {
echo $str . "\n";
}
}
function createParser(bool $huge) {
$parser = xml_parser_create();
echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge);
echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
xml_set_element_handler($parser, function($parser, $data) {
echo "open: ";
logName($data);
}, function($parser, $data) {
});
return $parser;
}
// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE
$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MB */);
$long_xml_head = "<?xml version=\"1.0\"?><container><$long_text/><$long_text/><second>foo</second>";
$long_xml_tail = "</container>";
echo "--- Parse using xml_parse (failure) ---\n";
$parser = createParser(false);
$ret = xml_parse($parser, $long_xml_head, true);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
echo "--- Parse using xml_parse (success) ---\n";
$parser = createParser(true);
$ret = xml_parse($parser, $long_xml_head, false);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
$ret = xml_parse($parser, $long_xml_tail, true);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
echo "--- Parse using xml_parse_into_struct (failure) ---\n";
$parser = createParser(false);
$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
echo "--- Parse using xml_parse_into_struct (success) ---\n";
$parser = createParser(true);
$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index);
var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
?>
--EXPECT--
--- Parse using xml_parse (failure) ---
old option value: bool(false)
new option value: bool(false)
open: CONTAINER
ret = 0 (XML_ERR_NAME_REQUIRED)
--- Parse using xml_parse (success) ---
old option value: bool(false)
new option value: bool(true)
open: CONTAINER
open: AAAAAAAAAAAAAAAAAAAA...
open: AAAAAAAAAAAAAAAAAAAA...
open: SECOND
ret = 1 (No error)
ret = 1 (No error)
--- Parse using xml_parse_into_struct (failure) ---
old option value: bool(false)
new option value: bool(false)
open: CONTAINER
ret = 0 (XML_ERR_NAME_REQUIRED)
--- Parse using xml_parse_into_struct (success) ---
old option value: bool(false)
new option value: bool(true)
open: CONTAINER
open: AAAAAAAAAAAAAAAAAAAA...
open: AAAAAAAAAAAAAAAAAAAA...
open: SECOND
int(5)
int(3)
ret = 1 (No error)

View File

@ -0,0 +1,27 @@
--TEST--
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing)
--EXTENSIONS--
xml
--SKIPIF--
<?php
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
?>
--FILE--
<?php
$parser = xml_parser_create();
xml_set_element_handler($parser, function($parser, $data) {
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, true);
}, function($parser, $data) {
});
xml_parse($parser, "<foo/>", true);
?>
--EXPECTF--
Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d
Stack trace:
#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true)
#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array)
#2 %s(%d): xml_parse(Object(XMLParser), '<foo/>', true)
#3 {main}
thrown in %s on line %d

View File

@ -91,6 +91,7 @@ typedef struct {
int lastwasopen;
int skipwhite;
int isparsing;
bool parsehuge;
XML_Char *baseURI;
@ -264,6 +265,28 @@ PHP_MINFO_FUNCTION(xml)
/* {{{ extension-internal functions */
static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
{
ZEND_ASSERT(!parser->isparsing);
/* libxml2 specific options */
#if LIBXML_EXPAT_COMPAT
/* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
if (parser->parsehuge) {
parser->parser->parser->options |= XML_PARSE_HUGE;
xmlDictSetLimit(parser->parser->parser->dict, 0);
} else {
parser->parser->parser->options &= ~XML_PARSE_HUGE;
xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
}
#endif
parser->isparsing = 1;
int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
parser->isparsing = 0;
return ret;
}
static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
{
if (s == NULL) {
@ -1024,6 +1047,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp
parser->target_encoding = encoding;
parser->case_folding = 1;
parser->isparsing = 0;
parser->parsehuge = false; /* It's the default for BC & DoS protection */
XML_SetUserData(parser->parser, parser);
ZVAL_COPY_VALUE(&parser->index, return_value);
@ -1283,7 +1307,6 @@ PHP_FUNCTION(xml_parse)
zval *pind;
char *data;
size_t data_len;
int ret;
bool isFinal = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
@ -1295,10 +1318,7 @@ PHP_FUNCTION(xml_parse)
zend_throw_error(NULL, "Parser must not be called recursively");
RETURN_THROWS();
}
parser->isparsing = 1;
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
parser->isparsing = 0;
RETVAL_LONG(ret);
RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
}
/* }}} */
@ -1310,7 +1330,6 @@ PHP_FUNCTION(xml_parse_into_struct)
zval *pind, *xdata, *info = NULL;
char *data;
size_t data_len;
int ret;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
RETURN_THROWS();
@ -1348,11 +1367,7 @@ PHP_FUNCTION(xml_parse_into_struct)
XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
parser->isparsing = 1;
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
parser->isparsing = 0;
RETVAL_LONG(ret);
RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
}
/* }}} */
@ -1481,6 +1496,15 @@ PHP_FUNCTION(xml_parser_set_option)
case PHP_XML_OPTION_SKIP_WHITE:
parser->skipwhite = zend_is_true(value);
break;
/* Boolean option */
case PHP_XML_OPTION_PARSE_HUGE:
/* Prevent wreaking havock to the parser internals during parsing */
if (UNEXPECTED(parser->isparsing)) {
zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
RETURN_THROWS();
}
parser->parsehuge = zend_is_true(value);
break;
/* Integer option */
case PHP_XML_OPTION_SKIP_TAGSTART:
/* The tag start offset is stored in an int */
@ -1542,6 +1566,9 @@ PHP_FUNCTION(xml_parser_get_option)
case PHP_XML_OPTION_SKIP_WHITE:
RETURN_BOOL(parser->skipwhite);
break;
case PHP_XML_OPTION_PARSE_HUGE:
RETURN_BOOL(parser->parsehuge);
break;
case PHP_XML_OPTION_TARGET_ENCODING:
RETURN_STRING((char *)parser->target_encoding);
break;

View File

@ -133,6 +133,11 @@ const XML_OPTION_SKIP_TAGSTART = UNKNOWN;
* @cvalue PHP_XML_OPTION_SKIP_WHITE
*/
const XML_OPTION_SKIP_WHITE = UNKNOWN;
/**
* @var int
* @cvalue PHP_XML_OPTION_PARSE_HUGE
*/
const XML_OPTION_PARSE_HUGE = UNKNOWN;
/**
* @var string

3
ext/xml/xml_arginfo.h generated
View File

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: eb168a134e8acf6f19f0cc2c9ddeae95da61045d */
* Stub hash: 69734dd8094fd69c878383d488900886d1162998 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_xml_parser_create, 0, 0, XMLParser, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
@ -166,6 +166,7 @@ static void register_xml_symbols(int module_number)
REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_PARSE_HUGE", PHP_XML_OPTION_PARSE_HUGE, CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("XML_SAX_IMPL", PHP_XML_SAX_IMPL, CONST_PERSISTENT);
}