Implement Dom $innerHTML property

This commit is contained in:
Niels Dossche 2024-04-12 13:57:02 +02:00
parent 162e71e165
commit 768900b180
26 changed files with 1025 additions and 81 deletions

View File

@ -32,7 +32,7 @@ if test "$PHP_DOM" != "no"; then
parentnode/tree.c parentnode/css_selectors.c \
processinginstruction.c cdatasection.c \
documentfragment.c domimplementation.c \
element.c node.c characterdata.c \
element.c node.c characterdata.c inner_html_mixin.c \
documenttype.c entity.c \
nodelist.c html_collection.c text.c comment.c \
entityreference.c \

View File

@ -10,7 +10,7 @@ if (PHP_DOM == "yes") {
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
node.c characterdata.c documenttype.c \
entity.c nodelist.c html_collection.c text.c comment.c \
entityreference.c \

View File

@ -83,6 +83,8 @@ zend_result dom_element_class_name_write(dom_object *obj, zval *newval);
zend_result dom_element_id_read(dom_object *obj, zval *retval);
zend_result dom_element_id_write(dom_object *obj, zval *newval);
zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval);
/* entity properties */
zend_result dom_entity_public_id_read(dom_object *obj, zval *retval);

View File

@ -99,6 +99,7 @@ static zend_always_inline xmlNodePtr lexbor_libxml2_bridge_new_text_node_fast(xm
static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
lxb_dom_node_t *start_node,
xmlDocPtr lxml_doc,
xmlNodePtr root,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
@ -114,7 +115,7 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
lexbor_array_obj_init(&work_list, WORK_LIST_INIT_SIZE, sizeof(work_list_item));
for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) {
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL);
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, root, NULL);
}
work_list_item *current_stack_item;
@ -316,6 +317,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
lxb_dom_interface_node(document)->last_child,
lxml_doc,
(xmlNodePtr) lxml_doc,
compact_text_nodes,
create_default_ns,
ns_mapper
@ -328,6 +330,35 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
}
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
lxb_dom_node_t *start_node,
xmlDocPtr lxml_doc,
xmlNodePtr *fragment_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
)
{
xmlNodePtr fragment = xmlNewDocFragment(lxml_doc);
if (UNEXPECTED(fragment == NULL)) {
return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
}
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
start_node,
lxml_doc,
fragment,
compact_text_nodes,
create_default_ns,
ns_mapper
);
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
xmlFreeNode(fragment);
return status;
}
*fragment_out = fragment;
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
}
void lexbor_libxml2_bridge_report_errors(
const lexbor_libxml2_bridge_parse_context *ctx,
lxb_html_parser_t *parser,
@ -376,12 +407,22 @@ void lexbor_libxml2_bridge_report_errors(
*error_index_offset_tree = index;
}
static php_libxml_quirks_mode dom_translate_quirks_mode(lxb_dom_document_cmode_t quirks_mode)
{
switch (quirks_mode) {
case LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS: return PHP_LIBXML_NO_QUIRKS;
case LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS: return PHP_LIBXML_LIMITED_QUIRKS;
case LXB_DOM_DOCUMENT_CMODE_QUIRKS: return PHP_LIBXML_QUIRKS;
EMPTY_SWITCH_DEFAULT_CASE();
}
}
void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations)
{
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS;
observations->quirks_mode = dom_translate_quirks_mode(lxb_dom_interface_document(tree->document)->compat_mode);
}
#endif /* HAVE_LIBXML && HAVE_DOM */

View File

@ -47,7 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations {
bool has_explicit_html_tag;
bool has_explicit_head_tag;
bool has_explicit_body_tag;
bool quirks_mode;
php_libxml_quirks_mode quirks_mode;
} lexbor_libxml2_bridge_extracted_observations;
typedef struct _lexbor_libxml2_bridge_parse_context {
@ -73,6 +73,14 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
);
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
lxb_dom_node_t *start_node,
xmlDocPtr lxml_doc,
xmlNodePtr *fragment_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
);
void lexbor_libxml2_bridge_report_errors(
const lexbor_libxml2_bridge_parse_context *ctx,
lxb_html_parser_t *parser,

357
ext/dom/inner_html_mixin.c Normal file
View File

@ -0,0 +1,357 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "dom_properties.h"
#include "html5_parser.h"
#include "html5_serializer.h"
#include "xml_serializer.h"
#include "domexception.h"
#include <libxml/xmlsave.h>
#include <lexbor/dom/interfaces/element.h>
#include <lexbor/html/interfaces/document.h>
#include <lexbor/tag/tag.h>
#include <lexbor/encoding/encoding.h>
/* Spec date: 2024-04-14 */
static zend_result dom_inner_html_write_string(void *application_data, const char *buf)
{
smart_str *output = application_data;
smart_str_appends(output, buf);
return SUCCESS;
}
static zend_result dom_inner_html_write_string_len(void *application_data, const char *buf, size_t len)
{
smart_str *output = application_data;
smart_str_appendl(output, buf, len);
return SUCCESS;
}
static int dom_write_smart_str(void *context, const char *buffer, int len)
{
smart_str *str = context;
smart_str_appendl(str, buffer, len);
return len;
}
/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm */
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(xmlNodePtr, node, obj);
/* 1. Let context document be the value of node's node document. */
const xmlDoc *context_document = node->doc;
/* 2. If context document is an HTML document, return an HTML serialization of node. */
if (context_document->type == XML_HTML_DOCUMENT_NODE) {
smart_str output = {0};
dom_html5_serialize_context ctx;
ctx.application_data = &output;
ctx.write_string = dom_inner_html_write_string;
ctx.write_string_len = dom_inner_html_write_string_len;
dom_html5_serialize(&ctx, node);
ZVAL_STR(retval, smart_str_extract(&output));
}
/* 3. Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. */
else {
ZEND_ASSERT(context_document->type == XML_DOCUMENT_NODE);
int status = -1;
smart_str str = {0};
/* No need to check buf's return value, as xmlSaveToBuffer() will fail instead. */
xmlSaveCtxtPtr ctxt = xmlSaveToIO(dom_write_smart_str, NULL, &str, "UTF-8", XML_SAVE_AS_XML);
if (EXPECTED(ctxt != NULL)) {
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8");
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) {
/* Note: the innerHTML mixin sets the well-formed flag to true. */
xmlNodePtr child = node->children;
status = 0;
while (child != NULL && status == 0) {
status = dom_xml_serialize(ctxt, out, child, false, true);
child = child->next;
}
status |= xmlOutputBufferFlush(out);
status |= xmlOutputBufferClose(out);
}
(void) xmlSaveClose(ctxt);
xmlCharEncCloseFunc(handler);
}
if (UNEXPECTED(status < 0)) {
smart_str_free_ex(&str, false);
php_dom_throw_error_with_message(SYNTAX_ERR, "The resulting XML serialization is not well-formed", true);
return FAILURE;
}
ZVAL_STR(retval, smart_str_extract(&str));
}
return SUCCESS;
}
static lxb_dom_node_t *dom_html_fragment_lexbor_parse(lxb_html_document_t *document, lxb_dom_element_t *element, const zend_string *input)
{
lxb_status_t status = lxb_html_document_parse_fragment_chunk_begin(document, element);
if (status != LXB_STATUS_OK) {
return NULL;
}
const lxb_encoding_data_t *encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
lxb_encoding_decode_t decode;
lxb_encoding_decode_init_single(&decode, encoding_data);
const lxb_char_t *buf_ref = (const lxb_char_t *) ZSTR_VAL(input);
if (ZSTR_IS_VALID_UTF8(input)) {
/* If we know the input is valid UTF-8, we don't have to perform checks and replace invalid sequences. */
status = lxb_html_document_parse_fragment_chunk(document, buf_ref, ZSTR_LEN(input));
if (UNEXPECTED(status != LXB_STATUS_OK)) {
return NULL;
}
} else {
/* See dom_decode_encode_fast_path(), simplified version for in-memory use-case. */
const lxb_char_t *buf_end = buf_ref + ZSTR_LEN(input);
const lxb_char_t *last_output = buf_ref;
while (buf_ref < buf_end) {
if (decode.u.utf_8.need == 0 && *buf_ref < 0x80) {
buf_ref++;
continue;
}
const lxb_char_t *buf_ref_backup = buf_ref;
lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single(&decode, &buf_ref, buf_end);
if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) {
status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref_backup - last_output);
if (UNEXPECTED(status != LXB_STATUS_OK)) {
return NULL;
}
status = lxb_html_document_parse_fragment_chunk(document, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
if (UNEXPECTED(status != LXB_STATUS_OK)) {
return NULL;
}
last_output = buf_ref;
}
}
if (buf_ref != last_output) {
status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref - last_output);
if (UNEXPECTED(status != LXB_STATUS_OK)) {
return NULL;
}
}
}
return lxb_html_document_parse_fragment_chunk_end(document);
}
static lxb_dom_document_cmode_t dom_translate_quirks_mode(php_libxml_quirks_mode quirks_mode)
{
switch (quirks_mode) {
case PHP_LIBXML_NO_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS;
case PHP_LIBXML_LIMITED_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS;
case PHP_LIBXML_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_QUIRKS;
EMPTY_SWITCH_DEFAULT_CASE();
}
}
/* https://html.spec.whatwg.org/#html-fragment-parsing-algorithm */
static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePtr context_node, const zend_string *input, php_libxml_quirks_mode quirks_mode)
{
/* The whole algorithm is implemented in Lexbor, we just have to be the adapter between the
* data structures used in PHP and what Lexbor expects. */
lxb_html_document_t *document = lxb_html_document_create();
document->dom_document.compat_mode = dom_translate_quirks_mode(quirks_mode);
lxb_dom_element_t *element = lxb_dom_element_interface_create(&document->dom_document);
const lxb_tag_data_t *tag_data = lxb_tag_data_by_name(document->dom_document.tags, (lxb_char_t *) context_node->name, xmlStrlen(context_node->name));
element->node.local_name = tag_data == NULL ? LXB_TAG__UNDEF : tag_data->tag_id;
const lxb_char_t *ns_uri;
size_t ns_uri_len;
if (context_node->ns == NULL || context_node->ns->href == NULL) {
ns_uri = (lxb_char_t *) "";
ns_uri_len = 0;
} else {
ns_uri = context_node->ns->href;
ns_uri_len = xmlStrlen(ns_uri);
}
const lxb_ns_data_t *ns_data = lxb_ns_data_by_link(document->dom_document.ns, ns_uri, ns_uri_len);
element->node.ns = ns_data == NULL ? LXB_NS__UNDEF : ns_data->ns_id;
lxb_dom_node_t *node = dom_html_fragment_lexbor_parse(document, element, input);
xmlNodePtr fragment = NULL;
if (node != NULL) {
/* node->last_child could be NULL, but that is allowed. */
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_ns_mapper(obj));
if (UNEXPECTED(status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
}
} else {
php_dom_throw_error(INVALID_STATE_ERR, true);
}
lxb_html_document_destroy(document);
return fragment;
}
static void dom_xml_parser_tag_name(const xmlNode *context_node, xmlParserCtxtPtr parser)
{
if (context_node->ns != NULL && context_node->ns->prefix != NULL) {
xmlParseChunk(parser, (const char *) context_node->ns->prefix, xmlStrlen(context_node->ns->prefix), 0);
xmlParseChunk(parser, ":", 1, 0);
}
xmlParseChunk(parser, (const char *) context_node->name, xmlStrlen(context_node->name), 0);
}
static void dom_xml_fragment_parsing_algorithm_parse(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *context_node, const zend_string *input, xmlParserCtxtPtr parser)
{
xmlParseChunk(parser, "<", 1, 0);
dom_xml_parser_tag_name(context_node, parser);
/* Namespaces: we have to declare all in-scope namespaces including the default namespace */
/* xmlns attributes */
php_dom_in_scope_ns in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, context_node, true);
for (size_t i = 0; i < in_scope_ns.count; i++) {
const xmlNs *ns = in_scope_ns.list[i];
xmlParseChunk(parser, " xmlns:", 7, 0);
ZEND_ASSERT(ns->prefix != NULL);
xmlParseChunk(parser, (const char *) ns->prefix, xmlStrlen(ns->prefix), 0);
xmlParseChunk(parser, "=\"", 2, 0);
xmlParseChunk(parser, (const char *) ns->href, xmlStrlen(ns->href), 0);
xmlParseChunk(parser, "\"", 1, 0);
}
php_dom_in_scope_ns_destroy(&in_scope_ns);
/* default namespace */
const char *default_ns = dom_locate_a_namespace(context_node, NULL);
if (default_ns != NULL) {
xmlParseChunk(parser, " xmlns=\"", 8, 0);
xmlParseChunk(parser, default_ns, strlen(default_ns), 0);
xmlParseChunk(parser, "\"", 1, 0);
}
xmlParseChunk(parser, ">", 1, 0);
xmlParseChunk(parser, (const char *) ZSTR_VAL(input), ZSTR_LEN(input), 0);
xmlParseChunk(parser, "</", 2, 0);
dom_xml_parser_tag_name(context_node, parser);
xmlParseChunk(parser, ">", 1, 1);
}
/* https://html.spec.whatwg.org/#xml-fragment-parsing-algorithm */
static xmlNodePtr dom_xml_fragment_parsing_algorithm(dom_object *obj, const xmlNode *context_node, const zend_string *input)
{
/* Steps 1-4 below */
xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
if (UNEXPECTED(parser == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
return NULL;
}
/* This is not only good to avoid a performance cost of changing the tree, but also to work around an old bug
* in xmlSetTreeDoc(). */
xmlDictFree(parser->dict);
if (context_node->doc->dict == NULL) {
context_node->doc->dict = xmlDictCreate();
xmlDictSetLimit(context_node->doc->dict, XML_MAX_DICTIONARY_LIMIT);
}
parser->dict = context_node->doc->dict;
php_libxml_sanitize_parse_ctxt_options(parser);
xmlCtxtUseOptions(parser, XML_PARSE_IGNORE_ENC | XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("UTF-8");
(void) xmlSwitchToEncoding(parser, encoding);
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(obj);
dom_xml_fragment_parsing_algorithm_parse(ns_mapper, context_node, input, parser);
/* 5. If there is an XML well-formedness or XML namespace well-formedness error, then throw a "SyntaxError" DOMException. */
if (!parser->wellFormed || !parser->nsWellFormed) {
parser->dict = NULL;
xmlFreeDoc(parser->myDoc);
xmlFreeParserCtxt(parser);
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
return NULL;
}
xmlDocPtr doc = parser->myDoc;
xmlFreeParserCtxt(parser);
if (EXPECTED(doc != NULL)) {
doc->dict = NULL;
/* 6. If the document element of the resulting Document has any sibling nodes, then throw a "SyntaxError" DOMException. */
xmlNodePtr document_element = doc->children;
if (document_element == NULL || document_element->next != NULL) {
xmlFreeDoc(doc);
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
return NULL;
}
/* 7. Return the child nodes of the document element of the resulting Document, in tree order. */
xmlNodePtr fragment = xmlNewDocFragment(context_node->doc);
if (EXPECTED(fragment != NULL)) {
xmlNodePtr child = document_element->children;
/* Yes, we have to call both xmlSetTreeDoc() prior to xmlAddChildList()
* because xmlAddChildList() _only_ sets the tree for the topmost elements in the subtree! */
xmlSetTreeDoc(document_element, context_node->doc);
xmlAddChildList(fragment, child);
dom_mark_namespaces_as_attributes_too(ns_mapper, doc);
document_element->children = NULL;
document_element->last = NULL;
}
xmlFreeDoc(doc);
return fragment;
}
return NULL;
}
/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
{
DOM_PROP_NODE(xmlNodePtr, context_node, obj);
xmlNodePtr fragment;
if (context_node->doc->type == XML_DOCUMENT_NODE) {
fragment = dom_xml_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval));
} else {
fragment = dom_html_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval), obj->document->quirks_mode);
}
if (fragment == NULL) {
return FAILURE;
}
/* We skip the steps involving the template element as context node since we don't do special handling for that. */
dom_remove_all_children(context_node);
return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;
}
#endif

View File

@ -447,7 +447,7 @@ PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns
zend_hash_destroy(&ctx.old_ns_to_new_ns_ptr);
}
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node)
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements)
{
ZEND_ASSERT(node != NULL);
@ -464,7 +464,7 @@ PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_map
for (const xmlNode *cur = node; cur != NULL; cur = cur->parent) {
if (cur->type == XML_ELEMENT_NODE) {
/* Register namespace of element */
if (cur->ns != NULL && cur->ns->prefix != NULL) {
if (!ignore_elements && cur->ns != NULL && cur->ns->prefix != NULL) {
const char *prefix = (const char *) cur->ns->prefix;
zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), cur->ns);
}

View File

@ -70,7 +70,7 @@ typedef struct _php_dom_in_scope_ns {
bool origin_is_ns_compat;
} php_dom_in_scope_ns;
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node);
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements);
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node);
PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns);

View File

@ -1881,7 +1881,7 @@ PHP_METHOD(Dom_Node, lookupPrefix)
/* }}} end dom_node_lookup_prefix */
/* https://dom.spec.whatwg.org/#locate-a-namespace */
static const char *dom_locate_a_namespace(xmlNodePtr node, const zend_string *prefix)
const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix)
{
/* switch on the interface node implements: */
if (node->type == XML_ELEMENT_NODE) {

View File

@ -1039,6 +1039,7 @@ PHP_MINIT_FUNCTION(dom)
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "childElementCount", dom_parent_node_child_element_count, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write);
zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write);
zend_hash_add_new_ptr(&classes, dom_modern_element_class_entry->name, &dom_modern_element_prop_handlers);

View File

@ -171,6 +171,8 @@ dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_ent
xmlDocPtr php_dom_create_html_doc(void);
xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference);
void dom_set_xml_class(php_libxml_ref_obj *document);
const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix);
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc);
bool dom_compare_value(const xmlAttr *attr, const xmlChar *value);
void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp);

View File

@ -1380,6 +1380,8 @@ namespace Dom
public function querySelectorAll(string $selectors): NodeList {}
public function closest(string $selectors): ?Element {}
public function matches(string $selectors): bool {}
public string $innerHTML;
}
class HTMLElement extends Element

View File

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 28365949d78a2d0254cfdb0da6549e282d2eb436 */
* Stub hash: 9065d5c713a6fb879f8116821eaabc3a01a4db20 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@ -3121,6 +3121,12 @@ static zend_class_entry *register_class_Dom_Element(zend_class_entry *class_entr
zend_declare_typed_property(class_entry, property_nextElementSibling_name, &property_nextElementSibling_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_nextElementSibling_class_Dom_Element, 0, MAY_BE_NULL));
zend_string_release(property_nextElementSibling_name);
zval property_innerHTML_default_value;
ZVAL_UNDEF(&property_innerHTML_default_value);
zend_string *property_innerHTML_name = zend_string_init("innerHTML", sizeof("innerHTML") - 1, 1);
zend_declare_typed_property(class_entry, property_innerHTML_name, &property_innerHTML_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING));
zend_string_release(property_innerHTML_name);
return class_entry;
}

View File

@ -0,0 +1,65 @@
--TEST--
Test writing Element::$innerHTML on HTML documents
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createEmpty();
$el = $dom->createElement('div');
$dom->appendChild($el);
$el->innerHTML = '<p>foo</p>';
echo $dom->saveXML(), "\n";
$el->innerHTML = '</div>';
echo $dom->saveXML(), "\n";
$el->innerHTML = '';
echo $dom->saveXML(), "\n";
$el->innerHTML = '<div></div>&nbsp;<p></p>';
echo $dom->saveXML(), "\n";
$el->innerHTML = "invalid\xffutf-8𐍈𐍈𐍈";
echo $dom->saveXML(), "\n";
// Create a non-interned string that gets the UTF-8 validity flag added
$str = str_repeat("my valid string", random_int(1, 1));
preg_match('/^.*$/u', $str);
$el->innerHTML = $str;
echo $dom->saveXML(), "\n";
$dom = DOM\HTMLDocument::createEmpty();
$el = $dom->createElement('style');
$dom->appendChild($el);
$el->innerHTML = '<p>foo</p>';
echo $dom->saveXML(), "\n";
$dom = DOM\HTMLDocument::createEmpty();
$el = $dom->createElementNS('urn:a', 'style');
$dom->appendChild($el);
$el->innerHTML = '<p>foo</p>';
echo $dom->saveXML(), "\n";
$dom = DOM\HTMLDocument::createEmpty();
$el = $dom->createElement('textarea');
$dom->appendChild($el);
$el->innerHTML = "</textarea>\0-->";
echo $dom->saveXML(), "\n";
?>
--EXPECT--
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml"><p>foo</p></div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml"></div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml"></div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml"><div></div> <p></p></div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml">invalid<69>utf-8𐍈𐍈𐍈</div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<div xmlns="http://www.w3.org/1999/xhtml">my valid string</div>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<style xmlns="http://www.w3.org/1999/xhtml">&lt;p&gt;foo&lt;/p&gt;</style>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<style xmlns="urn:a"><p xmlns="">foo</p></style>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<textarea xmlns="http://www.w3.org/1999/xhtml">&lt;/textarea&gt;<3B>--&gt;</textarea>

View File

@ -0,0 +1,21 @@
--TEST--
Test reading Element::$innerHTML on HTML documents
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString('<!DOCTYPE html><html><head><title>Test</title></head><body><div></div><p>Hello, World!</p></body></html>');
var_dump($dom->getElementsByTagName('body')[0]->innerHTML);
var_dump($dom->getElementsByTagName('head')[0]->innerHTML);
var_dump($dom->getElementsByTagName('html')[0]->innerHTML);
var_dump($dom->getElementsByTagName('div')[0]->innerHTML);
var_dump($dom->getElementsByTagName('p')[0]->innerHTML);
?>
--EXPECT--
string(31) "<div></div><p>Hello, World!</p>"
string(19) "<title>Test</title>"
string(76) "<head><title>Test</title></head><body><div></div><p>Hello, World!</p></body>"
string(0) ""
string(13) "Hello, World!"

View File

@ -0,0 +1,65 @@
--TEST--
Test reading Element::$innerHTML on XML documents
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\XMLDocument::createEmpty();
function createContainer() {
global $dom;
$element = $dom->createElement("container");
return $element;
}
$container = createContainer();
$container->append("Hello, world!");
var_dump($container->innerHTML);
$container = createContainer();
$container->append($dom->createComment("This is -a- comment"));
var_dump($container->innerHTML);
$container = createContainer();
// Note: intentionally typo'd to check whether the string matching against "xml" happens correctly
// i.e. no bugs with prefix-matching only.
$container->append($dom->createProcessingInstruction("xmll", ""));
var_dump($container->innerHTML);
$container = createContainer();
$container->append($dom->createProcessingInstruction("almostmalformed", ">?"));
var_dump($container->innerHTML);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'http://example.com/');
var_dump($container->innerHTML);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttributeNS('urn:a', 'name', '');
$element->setAttributeNS('urn:b', 'name', '');
var_dump($container->innerHTML);
$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/../../book.xml');
var_dump($dom->documentElement->innerHTML);
?>
--EXPECT--
string(13) "Hello, world!"
string(26) "<!--This is -a- comment-->"
string(9) "<?xmll ?>"
string(22) "<?almostmalformed >??>"
string(12) "<container/>"
string(72) "<container xmlns:ns1="urn:a" ns1:name="" xmlns:ns2="urn:b" ns2:name=""/>"
string(167) "
<book>
<title>The Grapes of Wrath</title>
<author>John Steinbeck</author>
</book>
<book>
<title>The Pearl</title>
<author>John Steinbeck</author>
</book>
"

View File

@ -0,0 +1,108 @@
--TEST--
Test reading Element::$innerHTML on XML documents - error cases
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\XMLDocument::createEmpty();
function createContainer() {
global $dom;
$element = $dom->createElement("container");
return $element;
}
function test($container) {
try {
var_dump($container->innerHTML);
} catch (DOMException $e) {
echo $e->getMessage(), "\n";
}
}
$container = createContainer();
$container->append("Hello, \x01 world!");
test($container);
$container = createContainer();
$container->append($dom->createComment('Hello -- world'));
test($container);
$container = createContainer();
$container->append($dom->createComment('Hello world-'));
test($container);
$container = createContainer();
$container->append($dom->createComment('Hello world-'));
test($container);
$container = createContainer();
$container->append($dom->createComment("\x01"));
test($container);
$container = createContainer();
$legacy = new DOMDocument;
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo:bar', '?>')));
test($container);
$container = createContainer();
$legacy = new DOMDocument;
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', '?>')));
test($container);
$container = createContainer();
$legacy = new DOMDocument;
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('xml', '')));
test($container);
$container = createContainer();
$legacy = new DOMDocument;
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', "\x01")));
test($container);
$container = createContainer();
$container->append($dom->createElement("with:colon"));
test($container);
$container = createContainer();
$container->append($dom->createElementNS("http://www.w3.org/2000/xmlns/", "xmlns:colon"));
test($container);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", "http://www.w3.org/2000/xmlns/");
test($container);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", "");
test($container);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttribute("with:colon", "value");
test($container);
$container = createContainer();
$element = $container->appendChild(createContainer());
$element->setAttribute("xmlns", "value");
test($container);
?>
--EXPECT--
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed
The resulting XML serialization is not well-formed

View File

@ -0,0 +1,86 @@
--TEST--
Test writing Element::$innerHTML on XML documents
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\XMLDocument::createEmpty();
$el = $dom->createElementNS('urn:a', 'root');
$dom->appendChild($el);
$el->innerHTML = '<p>foo</p><p xmlns="">bar</p>';
echo $dom->saveXML(), "\n";
$el->innerHTML = '';
echo $dom->saveXML(), "\n";
$el->innerHTML = '&amp;';
echo $dom->saveXML(), "\n";
$el->innerHTML = '&lt;foo&gt;';
echo $dom->saveXML(), "\n";
echo "----------------\n";
$dom = DOM\XMLDocument::createFromString('<root/>');
$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child'));
$child->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'urn:b');
$child->innerHTML = '<default/>';
echo $dom->saveXML(), "\n";
var_dump($child->namespaceURI);
var_dump($child->firstChild->namespaceURI);
echo "----------------\n";
$dom = DOM\XMLDocument::createFromString(<<<XML
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c">
<b:child a="none" b:b="b" c:c="c"/>
<c:child a="none" b:b="b" c:c="c"/>
<?pi ?>
<!-- comment -->
<child a="none" b:b="b" c:c="c">
<![CDATA[ cdata ]]>
</child>
</root>
XML);
$dom->documentElement->innerHTML = $dom->documentElement->innerHTML;
echo $dom->saveXML(), "\n";
echo "----------------\n";
$dom->documentElement->innerHTML = <<<XML
<child b:b="b" c:c="c">
<b:child/>
<c:child/>
</child>
XML;
echo $dom->saveXML(), "\n";
?>
--EXPECT--
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a"><p>foo</p><p xmlns="">bar</p></root>
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a"/>
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a">&amp;</root>
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a">&lt;foo&gt;</root>
----------------
<?xml version="1.0" encoding="UTF-8"?>
<root><child xmlns="urn:a"><default/></child></root>
string(5) "urn:a"
string(5) "urn:a"
----------------
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c">
<b:child a="none" b:b="b" c:c="c"/>
<c:child a="none" b:b="b" c:c="c"/>
<?pi ?>
<!-- comment -->
<child xmlns="urn:a" a="none" b:b="b" c:c="c">
<![CDATA[ cdata ]]>
</child>
</root>
----------------
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c"><child b:b="b" c:c="c">
<b:child/>
<c:child/>
</child></root>

View File

@ -0,0 +1,47 @@
--TEST--
Test writing Element::$innerHTML on XML documents - error cases
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\XMLDocument::createFromString(<<<XML
<!DOCTYPE root [
<!ENTITY foo "content">
]>
<root/>
XML);
$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child'));
$original = $dom->saveXML();
function test($child, $html) {
global $dom, $original;
try {
$child->innerHTML = $html;
} catch (DOMException $e) {
echo $e->getMessage(), "\n";
}
var_dump($dom->saveXML() === $original);
}
test($child, '&foo;');
test($child, '</root>');
test($child, '</root><foo/><!--');
test($child, '--></root><!--');
test($child, '<');
test($child, '<!ENTITY foo "content">');
?>
--EXPECT--
XML fragment is not well-formed
bool(true)
XML fragment is not well-formed
bool(true)
XML fragment is not well-formed
bool(true)
XML fragment is not well-formed
bool(true)
XML fragment is not well-formed
bool(true)
XML fragment is not well-formed
bool(true)

View File

@ -11,7 +11,7 @@ fclose($memory);
--EXPECTF--
Warning: Dom\XMLDocument::createFromFile(): Document is empty in php://memory, line: 1 in %s on line %d
Fatal error: Uncaught Exception: XML document is malformed in %s:%d
Fatal error: Uncaught DOMException: XML fragment is not well-formed in %s:%d
Stack trace:
#0 %s(%d): Dom\XMLDocument::createFromFile('php://memory')
#1 {main}

View File

@ -71,7 +71,7 @@ static bool check_options_validity(uint32_t arg_num, zend_long options)
* So in principle we could just ignore them outright.
* However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token (Date 2023-12-15)
* requires us to have the declaration as an attribute available */
static void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
{
xmlNodePtr node = doc->children;
while (node != NULL) {
@ -175,7 +175,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) {
if (!EG(exception)) {
if (lxml_doc == DOM_DOCUMENT_MALFORMED) {
zend_throw_exception_ex(NULL, 0, "XML document is malformed");
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
} else {
if (mode == DOM_LOAD_FILE) {
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
@ -262,7 +262,7 @@ static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format);
status = dom_xml_serialize(ctxt, out, node, format, false);
status |= xmlOutputBufferFlush(out);
status |= xmlOutputBufferClose(out);
} else {
@ -303,7 +303,7 @@ zend_long php_new_dom_dump_node_to_file(const char *filename, xmlDocPtr doc, xml
int status = -1;
xmlSaveCtxtPtr ctxt = xmlSaveToIO(out->writecallback, NULL, stream, encoding, XML_SAVE_AS_XML);
if (EXPECTED(ctxt != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format);
status = dom_xml_serialize(ctxt, out, node, format, false);
status |= xmlOutputBufferFlush(out);
(void) xmlSaveClose(ctxt);
}

View File

@ -24,6 +24,7 @@
#include "namespace_compat.h"
#include "serialize_common.h"
#include "internal_helpers.h"
#include <libxml/chvalid.h>
// TODO: implement iterative approach instead of recursive?
@ -72,7 +73,8 @@ static int dom_xml_serialization_algorithm(
xmlNodePtr node,
const xmlChar *namespace,
unsigned int *prefix_index,
int indent
int indent,
bool require_well_formed
);
static bool dom_xml_str_equals_treat_nulls_as_empty(const xmlChar *s1, const xmlChar *s2)
@ -345,12 +347,10 @@ static const xmlChar *dom_recording_the_namespace_information(
}
/* 2.3.2.4. If namespace definition is the empty string (the declarative form of having no namespace),
* then let namespace definition be null instead. */
if (*namespace_definition == '\0') {
namespace_definition = NULL;
}
* then let namespace definition be null instead.
* => This gets delayed until later down. */
size_t namespace_definition_length = namespace_definition == NULL ? 0 : strlen((const char *) namespace_definition);
size_t namespace_definition_length = strlen((const char *) namespace_definition);
/* 2.3.2.5. If prefix definition is found in map given the namespace namespace definition,
* then stop running these steps, and return to Main to visit the next attribute. */
@ -358,6 +358,11 @@ static const xmlChar *dom_recording_the_namespace_information(
continue;
}
/* Delayed step 2.3.2.4 */
if (*namespace_definition == '\0') {
namespace_definition = NULL;
}
/* 2.3.2.6. Add the prefix prefix definition to map given namespace namespace definition. */
dom_xml_ns_prefix_map_add(namespace_prefix_map, prefix_definition, false, namespace_definition, namespace_definition_length);
@ -534,15 +539,40 @@ static int dom_xml_common_text_serialization(xmlOutputBufferPtr out, const char
return xmlOutputBufferWrite(out, content - last_output, last_output);
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-an-element-node */
static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out, xmlNodePtr text)
static int dom_xml_check_char_production(const xmlChar *content)
{
/* 1. If the require well-formed flag is set ...
* => N/A */
// TODO: optimization idea: fast-pass for ASCII-only data
const xmlChar *ptr = content;
while (*ptr != '\0') {
int len = 4;
int c = xmlGetUTF8Char(ptr, &len);
if (c < 0 || !xmlIsCharQ(c)) {
return -1;
}
ptr += len;
}
return 0;
}
/* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-text-node */
static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out, xmlNodePtr text, bool require_well_formed)
{
/* 1. If the require well-formed flag is set and node's data contains characters that are not matched by the XML Char production,
* then throw an exception. */
if (require_well_formed && text->content != NULL) {
TRY(dom_xml_check_char_production(text->content));
}
return dom_xml_common_text_serialization(out, (const char *) text->content, false);
}
static zend_always_inline const xmlChar *dom_xml_attribute_namespace(const xmlAttr *attr)
{
return attr->ns == NULL ? NULL : attr->ns->href;
}
static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAttrPtr attr)
{
TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
@ -561,7 +591,28 @@ static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAtt
return xmlOutputBufferWriteLit(out, "\"");
}
/* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28 */
/* These steps are from the attribute serialization algorithm's well-formed checks.
* Note that this does not return a boolean but an int to be compatible with the TRY/TRY_CLEANUP interface
* that we do for compatibility with libxml's interfaces. */
static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const xmlAttr *attr)
{
const xmlChar *attr_value = dom_get_attribute_value(attr);
/* 3.5.2.2. If the require well-formed flag is set and the value of attr's value attribute matches the XMLNS namespace, then throw an exception */
if (strcmp((const char *) attr_value, DOM_XMLNS_NS_URI) == 0) {
return -1;
}
/* 3.5.2.3. If the require well-formed flag is set and the value of attr's value attribute is the empty string */
if (*attr_value == '\0') {
return -1;
}
return 0;
}
/* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28
* This does not have a require_well_formed argument because the only way to get here is via saveXML(), which has it off. */
static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr attr)
{
if (attr->ns != NULL && attr->ns->prefix != NULL) {
@ -572,10 +623,21 @@ static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr a
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-comment-node */
static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr comment)
static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr comment, bool require_well_formed)
{
/* 1. If the require well-formed flag is set ...
* => N/A */
/* Step 1 deals with well-formed flag */
if (require_well_formed) {
/* node's data contains characters that are not matched by the XML Char production or contains "--"
* (two adjacent U+002D HYPHEN-MINUS characters) or that ends with a "-" (U+002D HYPHEN-MINUS) character,
* then throw an exception */
const xmlChar *ptr = comment->content;
if (ptr != NULL) {
TRY(dom_xml_check_char_production(ptr));
if (strstr((const char *) ptr, "--") != NULL || ptr[strlen((const char *) ptr) - 1] == '-') {
return -1;
}
}
}
TRY(xmlOutputBufferWriteLit(out, "<!--"));
if (EXPECTED(comment->content != NULL)) {
@ -585,10 +647,24 @@ static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr com
}
/* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-processinginstruction-node */
static int dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out, xmlNodePtr pi)
static int dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out, xmlNodePtr pi, bool require_well_formed)
{
/* Steps 1-2 deal with well-formed flag
* => N/A */
/* Steps 1-2 deal with well-formed flag */
if (require_well_formed) {
/* target contains a ":" (U+003A COLON) character or is an ASCII case-insensitive match for the string "xml", then throw an exception */
if (strchr((const char *) pi->name, ':') != NULL || strcasecmp((const char *) pi->name, "xml") == 0) {
return -1;
}
/* node's data contains characters that are not matched by the XML Char production or contains the string "?>"
* (U+003F QUESTION MARK, U+003E GREATER-THAN SIGN), then throw an exception */
if (pi->content != NULL) {
TRY(dom_xml_check_char_production(pi->content));
if (strstr((const char *) pi->content, "?>") != NULL) {
return -1;
}
}
}
TRY(xmlOutputBufferWriteLit(out, "<?"));
TRY(xmlOutputBufferWriteString(out, (const char *) pi->name));
@ -610,6 +686,23 @@ static int dom_xml_serialize_cdata_section_node(xmlOutputBufferPtr out, xmlNodeP
return xmlOutputBufferWriteLit(out, "]]>");
}
static zend_string *dom_xml_create_localname_set_key(const xmlAttr *attr)
{
if (attr->ns == NULL || attr->ns->href == NULL) {
return zend_string_init((const char *) attr->name, strlen((const char *) attr->name), false);
}
/* Spec requires us to create a tuple as a key, however HashTable doesn't support that natively.
* Fortunately, href and name cannot have embedded NUL bytes in them, so we can create a
* "tuple" by concatenating them against each other, separated by a \0 byte.
*/
return zend_string_concat3(
(const char *) attr->ns->href, strlen((const char *) attr->ns->href),
"", 1, /* include the \0 */
(const char *) attr->name, strlen((const char *) attr->name)
);
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization-of-the-attributes */
static int dom_xml_serialize_attributes(
xmlOutputBufferPtr out,
@ -617,25 +710,34 @@ static int dom_xml_serialize_attributes(
dom_xml_ns_prefix_map *map,
dom_xml_local_prefix_map *local_prefixes_map,
unsigned int *prefix_index,
bool ignore_namespace_definition_attribute
bool ignore_namespace_definition_attribute,
bool require_well_formed
)
{
/* 1. Let result be the empty string.
* => We're going to write directly to the output buffer. */
/* 2. Let localname set be a new empty namespace localname set.
* => N/A this is only required for well-formedness */
* We can do this unconditionally even if we don't use it, because this doesn't allocate memory anyway. */
HashTable localname_set;
zend_hash_init(&localname_set, 8, NULL, NULL, false);
/* 3. [LOOP] For each attribute attr in element's attributes, in the order they are specified in the element's attribute list: */
for (xmlAttrPtr attr = element->properties; attr != NULL; attr = attr->next) {
/* 3.1. If the require well-formed flag is set ...
* => N/A */
/* 3.2. Create a new tuple consisting of attr's namespaceURI attribute and localName attribute, and add it to the localname set.
* => N/A this is only required for well-formedness */
if (require_well_formed) {
zend_string *key = dom_xml_create_localname_set_key(attr);
/* 3.1. If the require well-formed flag is set and the localname set contains a tuple whose values match those of a
* new tuple consisting of attr's namespaceURI attribute and localName attribute, then throw an exception
* 3.2. Create a new tuple consisting of attr's namespaceURI attribute and localName attribute, and add it to the localname set. */
bool duplicate = zend_hash_add_empty_element(&localname_set, key) == NULL;
zend_string_release_ex(key, false);
if (duplicate) {
goto cleanup;
}
}
/* 3.3. Let attribute namespace be the value of attr's namespaceURI value. */
const xmlChar *attribute_namespace = attr->ns == NULL ? NULL : attr->ns->href;
const xmlChar *attribute_namespace = dom_xml_attribute_namespace(attr);
/* 3.4. Let candidate prefix be null. */
const xmlChar *candidate_prefix = NULL;
@ -682,10 +784,10 @@ static int dom_xml_serialize_attributes(
}
}
/* 3.5.2.2. If the require well-formed flag is set ...
* => N/A */
/* 3.5.2.3. If the require well-formed flag is set ...
* => N/A */
if (require_well_formed) {
/* 3.5.2.2 and 3.5.2.3 are done by this call. */
TRY_OR_CLEANUP(dom_xml_check_xmlns_attribute_requirements(attr));
}
/* 3.5.2.4. the attr's prefix matches the string "xmlns", then let candidate prefix be the string "xmlns". */
if (attr->ns->prefix != NULL && strcmp((const char *) attr->ns->prefix, "xmlns") == 0) {
@ -725,33 +827,47 @@ static int dom_xml_serialize_attributes(
}
/* 3.5.3.2. Append the following to result, in the order listed: */
TRY(xmlOutputBufferWriteLit(out, " xmlns:"));
TRY(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
TRY(xmlOutputBufferWriteLit(out, "=\""));
TRY(dom_xml_common_text_serialization(out, (const char *) attribute_namespace, true));
TRY(xmlOutputBufferWriteLit(out, "\""));
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " xmlns:"));
TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "=\""));
TRY_OR_CLEANUP(dom_xml_common_text_serialization(out, (const char *) attribute_namespace, true));
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "\""));
}
}
/* 3.6. Append a " " (U+0020 SPACE) to result. */
TRY(xmlOutputBufferWriteLit(out, " "));
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " "));
/* 3.7. If candidate prefix is not null, then append to result the concatenation of candidate prefix with ":" (U+003A COLON). */
if (candidate_prefix != NULL) {
TRY(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
TRY(xmlOutputBufferWriteLit(out, ":"));
TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, ":"));
}
/* 3.8. If the require well-formed flag is set ...
* => N/A */
if (require_well_formed) {
/* 3.8. If the require well-formed flag is set and
* this attr's localName attribute contains the character ":" (U+003A COLON)
* or does not match the XML Name production
* or equals "xmlns" and attribute namespace is null */
if (xmlValidateNCName(attr->name, /* space */ 0) != 0
|| (strcmp((const char *) attr->name, "xmlns") == 0 && dom_xml_attribute_namespace(attr) == NULL)) {
goto cleanup;
}
}
/* 3.9. Append the following strings to result, in the order listed: */
dom_xml_serialize_attribute_node_value(out, attr);
TRY_OR_CLEANUP(dom_xml_serialize_attribute_node_value(out, attr));
}
/* 4. Return the value of result.
* => We're writing directly to the output buffer. */
zend_hash_destroy(&localname_set);
return 0;
cleanup:
zend_hash_destroy(&localname_set);
return -1;
}
/* Only format output if there are no text/entityrefs/cdata nodes as children. */
@ -785,13 +901,19 @@ static int dom_xml_serialize_element_node(
dom_xml_ns_prefix_map *namespace_prefix_map,
xmlNodePtr element,
unsigned int *prefix_index,
int indent
int indent,
bool require_well_formed
)
{
bool should_format = indent >= 0 && element->children != NULL && dom_xml_should_format_element(element);
/* 1. If the require well-formed flag is set and this node's localName attribute contains
* the character ":" (U+003A COLON) or does not match the XML Name production, then throw an exception. */
if (require_well_formed) {
if (xmlValidateNCName(element->name, /* space */ 0) != 0) {
return -1;
}
}
/* 1. If the require well-formed flag is set ...
* => N/A */
bool should_format = indent >= 0 && element->children != NULL && dom_xml_should_format_element(element);
/* 2. Let markup be the string "<" (U+003C LESS-THAN SIGN). */
TRY(xmlOutputBufferWriteLit(out, "<"));
@ -863,7 +985,10 @@ static int dom_xml_serialize_element_node(
/* 12.3. If the value of prefix matches "xmlns", then run the following steps: */
if (prefix != NULL && strcmp((const char *) prefix, "xmlns") == 0) {
/* Step 1 deals with well-formedness, which we don't implement here. */
/* 12.3.1. If the require well-formed flag is set, then throw an error. */
if (require_well_formed) {
goto cleanup;
}
/* 12.3.2. Let candidate prefix be the value of prefix. */
candidate_prefix = prefix;
@ -956,7 +1081,7 @@ static int dom_xml_serialize_element_node(
/* 13. Append to markup the result of the XML serialization of node's attributes given map, prefix index,
* local prefixes map, ignore namespace definition attribute flag, and require well-formed flag. */
TRY_OR_CLEANUP(dom_xml_serialize_attributes(out, element, &map, &local_prefixes_map, prefix_index, ignore_namespace_definition_attribute));
TRY_OR_CLEANUP(dom_xml_serialize_attributes(out, element, &map, &local_prefixes_map, prefix_index, ignore_namespace_definition_attribute, require_well_formed));
/* 14. If ns is the HTML namespace, and the node's list of children is empty, and the node's localName matches
* any one of the following void elements: ... */
@ -1013,7 +1138,7 @@ static int dom_xml_serialize_element_node(
if (should_format) {
TRY_OR_CLEANUP(dom_xml_output_indents(out, indent));
}
TRY_OR_CLEANUP(dom_xml_serialization_algorithm(ctxt, out, &map, child, inherited_ns, prefix_index, indent));
TRY_OR_CLEANUP(dom_xml_serialization_algorithm(ctxt, out, &map, child, inherited_ns, prefix_index, indent, require_well_formed));
}
if (should_format) {
@ -1047,7 +1172,8 @@ static int dom_xml_serializing_a_document_fragment_node(
xmlNodePtr node,
const xmlChar *namespace,
unsigned int *prefix_index,
int indent
int indent,
bool require_well_formed
)
{
/* 1. Let markup the empty string.
@ -1056,7 +1182,7 @@ static int dom_xml_serializing_a_document_fragment_node(
/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child ... */
xmlNodePtr child = node->children;
while (child != NULL) {
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent));
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
child = child->next;
}
@ -1073,7 +1199,8 @@ static int dom_xml_serializing_a_document_node(
xmlNodePtr node,
const xmlChar *namespace,
unsigned int *prefix_index,
int indent
int indent,
bool require_well_formed
)
{
/* 1. Let serialized document be an empty string.
@ -1092,7 +1219,7 @@ static int dom_xml_serializing_a_document_node(
/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child passing along the provided arguments,
* and append the result to serialized document. */
while (child != NULL) {
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent));
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
child = child->next;
}
@ -1109,29 +1236,30 @@ static int dom_xml_serialization_algorithm(
xmlNodePtr node,
const xmlChar *namespace,
unsigned int *prefix_index,
int indent
int indent,
bool require_well_formed
)
{
/* If node's interface is: */
switch (node->type) {
case XML_ELEMENT_NODE:
return dom_xml_serialize_element_node(ctxt, out, namespace, namespace_prefix_map, node, prefix_index, indent);
return dom_xml_serialize_element_node(ctxt, out, namespace, namespace_prefix_map, node, prefix_index, indent, require_well_formed);
case XML_DOCUMENT_FRAG_NODE:
return dom_xml_serializing_a_document_fragment_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent);
return dom_xml_serializing_a_document_fragment_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_NODE:
return dom_xml_serializing_a_document_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent);
return dom_xml_serializing_a_document_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
case XML_TEXT_NODE:
return dom_xml_serialize_text_node(out, node);
return dom_xml_serialize_text_node(out, node, require_well_formed);
case XML_COMMENT_NODE:
return dom_xml_serialize_comment_node(out, node);
return dom_xml_serialize_comment_node(out, node, require_well_formed);
case XML_PI_NODE:
return dom_xml_serialize_processing_instruction(out, node);
return dom_xml_serialize_processing_instruction(out, node, require_well_formed);
case XML_CDATA_SECTION_NODE:
return dom_xml_serialize_cdata_section_node(out, node);
@ -1152,9 +1280,8 @@ static int dom_xml_serialization_algorithm(
ZEND_UNREACHABLE();
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization
* Assumes well-formed == false. */
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format)
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization */
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed)
{
/* 1. Let namespace be a context namespace with value null. */
const xmlChar *namespace = NULL;
@ -1171,7 +1298,7 @@ int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr no
/* 5. Return the result of running the XML serialization algorithm ... */
int indent = format ? 0 : -1;
int result = dom_xml_serialization_algorithm(ctxt, out, &namespace_prefix_map, node, namespace, &prefix_index, indent);
int result = dom_xml_serialization_algorithm(ctxt, out, &namespace_prefix_map, node, namespace, &prefix_index, indent, require_well_formed);
dom_xml_ns_prefix_map_dtor(&namespace_prefix_map);

View File

@ -22,6 +22,6 @@
#include <libxml/xmlsave.h>
#include <libxml/xmlIO.h>
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format);
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed);
#endif

View File

@ -287,7 +287,7 @@ static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type, bool modern)
if (register_node_ns && nodep != NULL) {
if (modern) {
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(&intern->dom);
in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, nodep);
in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, nodep, false);
} else {
in_scope_ns = php_dom_get_in_scope_ns_legacy(nodep);
}

View File

@ -1350,7 +1350,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object,
object->document->private_data = NULL;
object->document->class_type = PHP_LIBXML_CLASS_UNSET;
object->document->handlers = &php_libxml_default_document_handlers;
object->document->quirks_mode = false;
object->document->quirks_mode = PHP_LIBXML_NO_QUIRKS;
}
return ret_refcount;

View File

@ -95,6 +95,12 @@ typedef enum _php_libxml_class_type {
PHP_LIBXML_CLASS_MODERN = 2,
} php_libxml_class_type;
typedef enum php_libxml_quirks_mode {
PHP_LIBXML_NO_QUIRKS = 0,
PHP_LIBXML_QUIRKS,
PHP_LIBXML_LIMITED_QUIRKS,
} php_libxml_quirks_mode;
typedef struct _php_libxml_ref_obj {
void *ptr;
libxml_doc_props *doc_props;
@ -103,7 +109,7 @@ typedef struct _php_libxml_ref_obj {
const php_libxml_document_handlers *handlers;
int refcount;
php_libxml_class_type class_type : 8;
bool quirks_mode;
php_libxml_quirks_mode quirks_mode : 8;
} php_libxml_ref_obj;
typedef struct _php_libxml_node_ptr {