Optimize and reduce memory usage of XML serialization (#14204)

The serialization process uses the system allocator and requires a copy
to request allocated memory once finished. This patch improves this by
using smart_str to build the resulting string, reducing the number of
copies and reducing total peak memory usage.
This commit is contained in:
Niels Dossche 2024-05-12 01:57:29 +02:00 committed by GitHub
parent 8776561581
commit aa3e6eec50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 42 deletions

View File

@ -693,6 +693,7 @@ PHP 8.4 UPGRADE NOTES
. The performance of DOMNode::C14N() is greatly improved for the case without
an xpath query. This can give a time improvement of easily two order of
magnitude for documents with tens of thousands of nodes.
. Improved performance and reduce memory consumption of XML serialization.
- FTP:
. Improved the performance of FTP uploads up to a factor of 10x for large
@ -708,6 +709,9 @@ PHP 8.4 UPGRADE NOTES
- MySQLnd:
. Improved the performance of MySQLnd quoting.
- SimpleXML:
. Improved performance and reduce memory consumption of XML serialization.
- Standard:
. Improved the performance of strpbrk().
. get_browser() is much faster now, up to 1.5x - 2.5x for some test cases.

View File

@ -250,42 +250,38 @@ PHP_METHOD(Dom_XMLDocument, createFromFile)
load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
static int php_new_dom_write_smart_str(void *context, const char *buffer, int len)
{
smart_str *str = context;
smart_str_appendl(str, buffer, len);
return len;
}
static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
{
xmlBufferPtr buf = xmlBufferCreate();
if (!buf) {
return NULL;
}
smart_str str = {0};
int status = -1;
xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, XML_SAVE_AS_XML);
xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_new_dom_write_smart_str, NULL, &str, encoding, XML_SAVE_AS_XML);
if (EXPECTED(ctxt != NULL)) {
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
xmlOutputBufferPtr out = xmlOutputBufferCreateBuffer(buf, handler);
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format);
status |= xmlOutputBufferFlush(out);
status |= xmlOutputBufferClose(out);
} else {
xmlCharEncCloseFunc(handler);
}
(void) xmlSaveClose(ctxt);
xmlCharEncCloseFunc(handler);
}
if (UNEXPECTED(status < 0)) {
xmlBufferFree(buf);
smart_str_free_ex(&str, false);
return NULL;
}
const xmlChar *content = xmlBufferContent(buf);
if (!content) {
xmlBufferFree(buf);
return NULL;
}
int size = xmlBufferLength(buf);
zend_string *res = zend_string_init((const char *) content, size, false);
xmlBufferFree(buf);
return res;
return smart_str_extract(&str);
}
static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)

View File

@ -1481,43 +1481,38 @@ PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free)
return value;
}
static int php_libxml_write_smart_str(void *context, const char *buffer, int len)
{
smart_str *str = context;
smart_str_appendl(str, buffer, len);
return len;
}
static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)
{
xmlBufferPtr buf = xmlBufferCreate();
if (!buf) {
return NULL;
}
smart_str str = {0};
/* Encoding is handled from the encoding property set on the document */
xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, options);
xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_libxml_write_smart_str, NULL, &str, encoding, options);
if (!ctxt) {
xmlBufferFree(buf);
return NULL;
}
long status = xmlSaveDoc(ctxt, doc);
(void) xmlSaveClose(ctxt);
if (status < 0) {
xmlBufferFree(buf);
smart_str_free_ex(&str, false);
return NULL;
}
const xmlChar *content = xmlBufferContent(buf);
if (!content) {
xmlBufferFree(buf);
return NULL;
}
int size = xmlBufferLength(buf);
zend_string *str = zend_string_init((const char *) content, size, false);
xmlBufferFree(buf);
return str;
return smart_str_extract(&str);
}
static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
{
// TODO: should this alloc take an encoding? For now keep it NULL for BC.
xmlOutputBufferPtr buf = xmlAllocOutputBuffer(NULL);
smart_str str = {0};
// TODO: should this buffer take an encoding? For now keep it NULL for BC.
xmlOutputBufferPtr buf = xmlOutputBufferCreateIO(php_libxml_write_smart_str, NULL, &str, NULL);
if (!buf) {
return NULL;
}
@ -1525,16 +1520,14 @@ static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePt
xmlNodeDumpOutput(buf, doc, node, 0, format, encoding);
if (xmlOutputBufferFlush(buf) < 0) {
smart_str_free_ex(&str, false);
xmlOutputBufferClose(buf);
return NULL;
}
const xmlChar *content = xmlOutputBufferGetContent(buf);
size_t size = xmlOutputBufferGetSize(buf);
zend_string *str = zend_string_init((const char *) content, size, false);
xmlOutputBufferClose(buf);
return str;
return smart_str_extract(&str);
}
static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)