Fix #51903: simplexml_load_file() doesn't use HTTP headers

The `encoding` attribute of the XML declaration is optional; it is good
practice to use external encoding information where available if it is
missing.  Thus, we check for `charset` info of `Content-Type` headers,
and see whether the encoding is supported.

We cater to trailing parameters and quoted-strings, but not to escaped
backslashes and quotes in quoted-strings, since no known character
encoding contains these anyway.

Co-authored-by: Michael Wallner <mike@php.net>

Closes GH-6747.
This commit is contained in:
Christoph M. Becker 2021-03-03 19:23:39 +01:00
parent 5787f91c55
commit f901bec494
3 changed files with 89 additions and 0 deletions

3
NEWS
View File

@ -11,6 +11,9 @@ PHP NEWS
. Fixed bug #80763 (msgfmt_format() does not accept DateTime references).
(cmb)
- Libxml:
. Fixed bug #51903 (simplexml_load_file() doesn't use HTTP headers). (cmb)
- MySQLnd:
. Fixed bug #80713 (SegFault when disabling ATTR_EMULATE_PREPARES and
MySQL 8.0). (Nikita)

View File

@ -409,6 +409,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
return(NULL);
}
/* Check if there's been an external transport protocol with an encoding information */
if (enc == XML_CHAR_ENCODING_NONE) {
php_stream *s = (php_stream *) context;
if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
zval *header;
ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
const char buf[] = "Content-Type:";
if (Z_TYPE_P(header) == IS_STRING &&
!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
char *needle = estrdup("charset=");
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
if (encoding) {
char *end;
encoding += sizeof("charset=")-1;
if (*encoding == '"') {
encoding++;
}
end = strchr(encoding, ';');
if (end == NULL) {
end = encoding + strlen(encoding);
}
end--; /* end == encoding-1 isn't a buffer underrun */
while (*end == ' ' || *end == '\t') {
end--;
}
if (*end == '"') {
end--;
}
if (encoding >= end) continue;
*(end+1) = '\0';
enc = xmlParseCharEncoding(encoding);
if (enc <= XML_CHAR_ENCODING_NONE) {
enc = XML_CHAR_ENCODING_NONE;
}
}
efree(haystack);
efree(needle);
break; /* found content-type */
}
} ZEND_HASH_FOREACH_END();
}
}
/* Allocate the Input buffer front-end. */
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {

View File

@ -0,0 +1,38 @@
--TEST--
Bug #51903 (simplexml_load_file() doesn't use HTTP headers)
--SKIPIF--
<?php
if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
http_server_skipif('tcp://127.0.0.1:12342');
?>
--FILE--
<?php
require "./ext/standard/tests/http/server.inc";
$responses = [
"data://text/plain,HTTP/1.1 200 OK\r\n"
. "Content-Type: text/xml; charset=ISO-8859-1\r\n\r\n"
. "<?xml version=\"1.0\"?>\n"
. "<root>\xE4\xF6\xFC</root>\n",
"data://text/plain,HTTP/1.1 200 OK\r\n"
. "Content-Type: text/xml; charset=ISO-8859-1; foo=bar\r\n\r\n"
. "<?xml version=\"1.0\"?>\n"
. "<root>\xE4\xF6\xFC</root>\n",
"data://text/plain,HTTP/1.1 200 OK\r\n"
. "Content-Type: text/xml; charset=\"ISO-8859-1\" ; foo=bar\r\n\r\n"
. "<?xml version=\"1.0\"?>\n"
. "<root>\xE4\xF6\xFC</root>\n",
];
$pid = http_server('tcp://127.0.0.1:12342', $responses);
for ($i = 0; $i < count($responses); $i++) {
$sxe = simplexml_load_file('http://127.0.0.1:12342/');
echo "$sxe\n";
}
http_server_kill($pid);
?>
--EXPECT--
äöü
äöü
äöü