mirror of
https://github.com/php/php-src.git
synced 2024-09-21 09:57:23 +00:00
Implement Dom $innerHTML property
This commit is contained in:
parent
162e71e165
commit
768900b180
@ -32,7 +32,7 @@ if test "$PHP_DOM" != "no"; then
|
||||
parentnode/tree.c parentnode/css_selectors.c \
|
||||
processinginstruction.c cdatasection.c \
|
||||
documentfragment.c domimplementation.c \
|
||||
element.c node.c characterdata.c \
|
||||
element.c node.c characterdata.c inner_html_mixin.c \
|
||||
documenttype.c entity.c \
|
||||
nodelist.c html_collection.c text.c comment.c \
|
||||
entityreference.c \
|
||||
|
@ -10,7 +10,7 @@ if (PHP_DOM == "yes") {
|
||||
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
|
||||
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
|
||||
domexception.c processinginstruction.c \
|
||||
cdatasection.c documentfragment.c domimplementation.c element.c \
|
||||
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
|
||||
node.c characterdata.c documenttype.c \
|
||||
entity.c nodelist.c html_collection.c text.c comment.c \
|
||||
entityreference.c \
|
||||
|
@ -83,6 +83,8 @@ zend_result dom_element_class_name_write(dom_object *obj, zval *newval);
|
||||
zend_result dom_element_id_read(dom_object *obj, zval *retval);
|
||||
zend_result dom_element_id_write(dom_object *obj, zval *newval);
|
||||
zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval);
|
||||
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval);
|
||||
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval);
|
||||
|
||||
/* entity properties */
|
||||
zend_result dom_entity_public_id_read(dom_object *obj, zval *retval);
|
||||
|
@ -99,6 +99,7 @@ static zend_always_inline xmlNodePtr lexbor_libxml2_bridge_new_text_node_fast(xm
|
||||
static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
|
||||
lxb_dom_node_t *start_node,
|
||||
xmlDocPtr lxml_doc,
|
||||
xmlNodePtr root,
|
||||
bool compact_text_nodes,
|
||||
bool create_default_ns,
|
||||
php_dom_libxml_ns_mapper *ns_mapper
|
||||
@ -114,7 +115,7 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
|
||||
lexbor_array_obj_init(&work_list, WORK_LIST_INIT_SIZE, sizeof(work_list_item));
|
||||
|
||||
for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) {
|
||||
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL);
|
||||
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, root, NULL);
|
||||
}
|
||||
|
||||
work_list_item *current_stack_item;
|
||||
@ -316,6 +317,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
|
||||
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
|
||||
lxb_dom_interface_node(document)->last_child,
|
||||
lxml_doc,
|
||||
(xmlNodePtr) lxml_doc,
|
||||
compact_text_nodes,
|
||||
create_default_ns,
|
||||
ns_mapper
|
||||
@ -328,6 +330,35 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
|
||||
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
|
||||
}
|
||||
|
||||
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
|
||||
lxb_dom_node_t *start_node,
|
||||
xmlDocPtr lxml_doc,
|
||||
xmlNodePtr *fragment_out,
|
||||
bool compact_text_nodes,
|
||||
bool create_default_ns,
|
||||
php_dom_libxml_ns_mapper *ns_mapper
|
||||
)
|
||||
{
|
||||
xmlNodePtr fragment = xmlNewDocFragment(lxml_doc);
|
||||
if (UNEXPECTED(fragment == NULL)) {
|
||||
return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
|
||||
}
|
||||
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
|
||||
start_node,
|
||||
lxml_doc,
|
||||
fragment,
|
||||
compact_text_nodes,
|
||||
create_default_ns,
|
||||
ns_mapper
|
||||
);
|
||||
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
|
||||
xmlFreeNode(fragment);
|
||||
return status;
|
||||
}
|
||||
*fragment_out = fragment;
|
||||
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
|
||||
}
|
||||
|
||||
void lexbor_libxml2_bridge_report_errors(
|
||||
const lexbor_libxml2_bridge_parse_context *ctx,
|
||||
lxb_html_parser_t *parser,
|
||||
@ -376,12 +407,22 @@ void lexbor_libxml2_bridge_report_errors(
|
||||
*error_index_offset_tree = index;
|
||||
}
|
||||
|
||||
static php_libxml_quirks_mode dom_translate_quirks_mode(lxb_dom_document_cmode_t quirks_mode)
|
||||
{
|
||||
switch (quirks_mode) {
|
||||
case LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS: return PHP_LIBXML_NO_QUIRKS;
|
||||
case LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS: return PHP_LIBXML_LIMITED_QUIRKS;
|
||||
case LXB_DOM_DOCUMENT_CMODE_QUIRKS: return PHP_LIBXML_QUIRKS;
|
||||
EMPTY_SWITCH_DEFAULT_CASE();
|
||||
}
|
||||
}
|
||||
|
||||
void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations)
|
||||
{
|
||||
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
|
||||
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
|
||||
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
|
||||
observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS;
|
||||
observations->quirks_mode = dom_translate_quirks_mode(lxb_dom_interface_document(tree->document)->compat_mode);
|
||||
}
|
||||
|
||||
#endif /* HAVE_LIBXML && HAVE_DOM */
|
||||
|
@ -47,7 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations {
|
||||
bool has_explicit_html_tag;
|
||||
bool has_explicit_head_tag;
|
||||
bool has_explicit_body_tag;
|
||||
bool quirks_mode;
|
||||
php_libxml_quirks_mode quirks_mode;
|
||||
} lexbor_libxml2_bridge_extracted_observations;
|
||||
|
||||
typedef struct _lexbor_libxml2_bridge_parse_context {
|
||||
@ -73,6 +73,14 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
|
||||
bool create_default_ns,
|
||||
php_dom_libxml_ns_mapper *ns_mapper
|
||||
);
|
||||
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
|
||||
lxb_dom_node_t *start_node,
|
||||
xmlDocPtr lxml_doc,
|
||||
xmlNodePtr *fragment_out,
|
||||
bool compact_text_nodes,
|
||||
bool create_default_ns,
|
||||
php_dom_libxml_ns_mapper *ns_mapper
|
||||
);
|
||||
void lexbor_libxml2_bridge_report_errors(
|
||||
const lexbor_libxml2_bridge_parse_context *ctx,
|
||||
lxb_html_parser_t *parser,
|
||||
|
357
ext/dom/inner_html_mixin.c
Normal file
357
ext/dom/inner_html_mixin.c
Normal file
@ -0,0 +1,357 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| https://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Authors: Niels Dossche <nielsdos@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
|
||||
#include "php_dom.h"
|
||||
#include "dom_properties.h"
|
||||
#include "html5_parser.h"
|
||||
#include "html5_serializer.h"
|
||||
#include "xml_serializer.h"
|
||||
#include "domexception.h"
|
||||
#include <libxml/xmlsave.h>
|
||||
#include <lexbor/dom/interfaces/element.h>
|
||||
#include <lexbor/html/interfaces/document.h>
|
||||
#include <lexbor/tag/tag.h>
|
||||
#include <lexbor/encoding/encoding.h>
|
||||
|
||||
/* Spec date: 2024-04-14 */
|
||||
|
||||
static zend_result dom_inner_html_write_string(void *application_data, const char *buf)
|
||||
{
|
||||
smart_str *output = application_data;
|
||||
smart_str_appends(output, buf);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static zend_result dom_inner_html_write_string_len(void *application_data, const char *buf, size_t len)
|
||||
{
|
||||
smart_str *output = application_data;
|
||||
smart_str_appendl(output, buf, len);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static int dom_write_smart_str(void *context, const char *buffer, int len)
|
||||
{
|
||||
smart_str *str = context;
|
||||
smart_str_appendl(str, buffer, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
|
||||
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm */
|
||||
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
|
||||
{
|
||||
DOM_PROP_NODE(xmlNodePtr, node, obj);
|
||||
|
||||
/* 1. Let context document be the value of node's node document. */
|
||||
const xmlDoc *context_document = node->doc;
|
||||
|
||||
/* 2. If context document is an HTML document, return an HTML serialization of node. */
|
||||
if (context_document->type == XML_HTML_DOCUMENT_NODE) {
|
||||
smart_str output = {0};
|
||||
dom_html5_serialize_context ctx;
|
||||
ctx.application_data = &output;
|
||||
ctx.write_string = dom_inner_html_write_string;
|
||||
ctx.write_string_len = dom_inner_html_write_string_len;
|
||||
dom_html5_serialize(&ctx, node);
|
||||
ZVAL_STR(retval, smart_str_extract(&output));
|
||||
}
|
||||
/* 3. Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. */
|
||||
else {
|
||||
ZEND_ASSERT(context_document->type == XML_DOCUMENT_NODE);
|
||||
|
||||
int status = -1;
|
||||
smart_str str = {0};
|
||||
/* No need to check buf's return value, as xmlSaveToBuffer() will fail instead. */
|
||||
xmlSaveCtxtPtr ctxt = xmlSaveToIO(dom_write_smart_str, NULL, &str, "UTF-8", XML_SAVE_AS_XML);
|
||||
if (EXPECTED(ctxt != NULL)) {
|
||||
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8");
|
||||
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler);
|
||||
if (EXPECTED(out != NULL)) {
|
||||
/* Note: the innerHTML mixin sets the well-formed flag to true. */
|
||||
xmlNodePtr child = node->children;
|
||||
status = 0;
|
||||
while (child != NULL && status == 0) {
|
||||
status = dom_xml_serialize(ctxt, out, child, false, true);
|
||||
child = child->next;
|
||||
}
|
||||
status |= xmlOutputBufferFlush(out);
|
||||
status |= xmlOutputBufferClose(out);
|
||||
}
|
||||
(void) xmlSaveClose(ctxt);
|
||||
xmlCharEncCloseFunc(handler);
|
||||
}
|
||||
if (UNEXPECTED(status < 0)) {
|
||||
smart_str_free_ex(&str, false);
|
||||
php_dom_throw_error_with_message(SYNTAX_ERR, "The resulting XML serialization is not well-formed", true);
|
||||
return FAILURE;
|
||||
}
|
||||
ZVAL_STR(retval, smart_str_extract(&str));
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static lxb_dom_node_t *dom_html_fragment_lexbor_parse(lxb_html_document_t *document, lxb_dom_element_t *element, const zend_string *input)
|
||||
{
|
||||
lxb_status_t status = lxb_html_document_parse_fragment_chunk_begin(document, element);
|
||||
if (status != LXB_STATUS_OK) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const lxb_encoding_data_t *encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
|
||||
lxb_encoding_decode_t decode;
|
||||
lxb_encoding_decode_init_single(&decode, encoding_data);
|
||||
|
||||
const lxb_char_t *buf_ref = (const lxb_char_t *) ZSTR_VAL(input);
|
||||
if (ZSTR_IS_VALID_UTF8(input)) {
|
||||
/* If we know the input is valid UTF-8, we don't have to perform checks and replace invalid sequences. */
|
||||
status = lxb_html_document_parse_fragment_chunk(document, buf_ref, ZSTR_LEN(input));
|
||||
if (UNEXPECTED(status != LXB_STATUS_OK)) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* See dom_decode_encode_fast_path(), simplified version for in-memory use-case. */
|
||||
const lxb_char_t *buf_end = buf_ref + ZSTR_LEN(input);
|
||||
const lxb_char_t *last_output = buf_ref;
|
||||
while (buf_ref < buf_end) {
|
||||
if (decode.u.utf_8.need == 0 && *buf_ref < 0x80) {
|
||||
buf_ref++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const lxb_char_t *buf_ref_backup = buf_ref;
|
||||
lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single(&decode, &buf_ref, buf_end);
|
||||
if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) {
|
||||
status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref_backup - last_output);
|
||||
if (UNEXPECTED(status != LXB_STATUS_OK)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = lxb_html_document_parse_fragment_chunk(document, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
|
||||
if (UNEXPECTED(status != LXB_STATUS_OK)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
last_output = buf_ref;
|
||||
}
|
||||
}
|
||||
|
||||
if (buf_ref != last_output) {
|
||||
status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref - last_output);
|
||||
if (UNEXPECTED(status != LXB_STATUS_OK)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lxb_html_document_parse_fragment_chunk_end(document);
|
||||
}
|
||||
|
||||
static lxb_dom_document_cmode_t dom_translate_quirks_mode(php_libxml_quirks_mode quirks_mode)
|
||||
{
|
||||
switch (quirks_mode) {
|
||||
case PHP_LIBXML_NO_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS;
|
||||
case PHP_LIBXML_LIMITED_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS;
|
||||
case PHP_LIBXML_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_QUIRKS;
|
||||
EMPTY_SWITCH_DEFAULT_CASE();
|
||||
}
|
||||
}
|
||||
|
||||
/* https://html.spec.whatwg.org/#html-fragment-parsing-algorithm */
|
||||
static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePtr context_node, const zend_string *input, php_libxml_quirks_mode quirks_mode)
|
||||
{
|
||||
/* The whole algorithm is implemented in Lexbor, we just have to be the adapter between the
|
||||
* data structures used in PHP and what Lexbor expects. */
|
||||
|
||||
lxb_html_document_t *document = lxb_html_document_create();
|
||||
document->dom_document.compat_mode = dom_translate_quirks_mode(quirks_mode);
|
||||
lxb_dom_element_t *element = lxb_dom_element_interface_create(&document->dom_document);
|
||||
|
||||
const lxb_tag_data_t *tag_data = lxb_tag_data_by_name(document->dom_document.tags, (lxb_char_t *) context_node->name, xmlStrlen(context_node->name));
|
||||
element->node.local_name = tag_data == NULL ? LXB_TAG__UNDEF : tag_data->tag_id;
|
||||
|
||||
const lxb_char_t *ns_uri;
|
||||
size_t ns_uri_len;
|
||||
if (context_node->ns == NULL || context_node->ns->href == NULL) {
|
||||
ns_uri = (lxb_char_t *) "";
|
||||
ns_uri_len = 0;
|
||||
} else {
|
||||
ns_uri = context_node->ns->href;
|
||||
ns_uri_len = xmlStrlen(ns_uri);
|
||||
}
|
||||
const lxb_ns_data_t *ns_data = lxb_ns_data_by_link(document->dom_document.ns, ns_uri, ns_uri_len);
|
||||
element->node.ns = ns_data == NULL ? LXB_NS__UNDEF : ns_data->ns_id;
|
||||
|
||||
lxb_dom_node_t *node = dom_html_fragment_lexbor_parse(document, element, input);
|
||||
xmlNodePtr fragment = NULL;
|
||||
if (node != NULL) {
|
||||
/* node->last_child could be NULL, but that is allowed. */
|
||||
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_ns_mapper(obj));
|
||||
if (UNEXPECTED(status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
|
||||
php_dom_throw_error(INVALID_STATE_ERR, true);
|
||||
}
|
||||
} else {
|
||||
php_dom_throw_error(INVALID_STATE_ERR, true);
|
||||
}
|
||||
|
||||
lxb_html_document_destroy(document);
|
||||
|
||||
return fragment;
|
||||
}
|
||||
|
||||
static void dom_xml_parser_tag_name(const xmlNode *context_node, xmlParserCtxtPtr parser)
|
||||
{
|
||||
if (context_node->ns != NULL && context_node->ns->prefix != NULL) {
|
||||
xmlParseChunk(parser, (const char *) context_node->ns->prefix, xmlStrlen(context_node->ns->prefix), 0);
|
||||
xmlParseChunk(parser, ":", 1, 0);
|
||||
}
|
||||
|
||||
xmlParseChunk(parser, (const char *) context_node->name, xmlStrlen(context_node->name), 0);
|
||||
}
|
||||
|
||||
static void dom_xml_fragment_parsing_algorithm_parse(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *context_node, const zend_string *input, xmlParserCtxtPtr parser)
|
||||
{
|
||||
xmlParseChunk(parser, "<", 1, 0);
|
||||
dom_xml_parser_tag_name(context_node, parser);
|
||||
|
||||
/* Namespaces: we have to declare all in-scope namespaces including the default namespace */
|
||||
/* xmlns attributes */
|
||||
php_dom_in_scope_ns in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, context_node, true);
|
||||
for (size_t i = 0; i < in_scope_ns.count; i++) {
|
||||
const xmlNs *ns = in_scope_ns.list[i];
|
||||
xmlParseChunk(parser, " xmlns:", 7, 0);
|
||||
ZEND_ASSERT(ns->prefix != NULL);
|
||||
xmlParseChunk(parser, (const char *) ns->prefix, xmlStrlen(ns->prefix), 0);
|
||||
xmlParseChunk(parser, "=\"", 2, 0);
|
||||
xmlParseChunk(parser, (const char *) ns->href, xmlStrlen(ns->href), 0);
|
||||
xmlParseChunk(parser, "\"", 1, 0);
|
||||
}
|
||||
php_dom_in_scope_ns_destroy(&in_scope_ns);
|
||||
/* default namespace */
|
||||
const char *default_ns = dom_locate_a_namespace(context_node, NULL);
|
||||
if (default_ns != NULL) {
|
||||
xmlParseChunk(parser, " xmlns=\"", 8, 0);
|
||||
xmlParseChunk(parser, default_ns, strlen(default_ns), 0);
|
||||
xmlParseChunk(parser, "\"", 1, 0);
|
||||
}
|
||||
|
||||
xmlParseChunk(parser, ">", 1, 0);
|
||||
|
||||
xmlParseChunk(parser, (const char *) ZSTR_VAL(input), ZSTR_LEN(input), 0);
|
||||
|
||||
xmlParseChunk(parser, "</", 2, 0);
|
||||
dom_xml_parser_tag_name(context_node, parser);
|
||||
xmlParseChunk(parser, ">", 1, 1);
|
||||
}
|
||||
|
||||
/* https://html.spec.whatwg.org/#xml-fragment-parsing-algorithm */
|
||||
static xmlNodePtr dom_xml_fragment_parsing_algorithm(dom_object *obj, const xmlNode *context_node, const zend_string *input)
|
||||
{
|
||||
/* Steps 1-4 below */
|
||||
xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
|
||||
if (UNEXPECTED(parser == NULL)) {
|
||||
php_dom_throw_error(INVALID_STATE_ERR, true);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* This is not only good to avoid a performance cost of changing the tree, but also to work around an old bug
|
||||
* in xmlSetTreeDoc(). */
|
||||
xmlDictFree(parser->dict);
|
||||
if (context_node->doc->dict == NULL) {
|
||||
context_node->doc->dict = xmlDictCreate();
|
||||
xmlDictSetLimit(context_node->doc->dict, XML_MAX_DICTIONARY_LIMIT);
|
||||
}
|
||||
parser->dict = context_node->doc->dict;
|
||||
|
||||
php_libxml_sanitize_parse_ctxt_options(parser);
|
||||
xmlCtxtUseOptions(parser, XML_PARSE_IGNORE_ENC | XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
|
||||
|
||||
xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("UTF-8");
|
||||
(void) xmlSwitchToEncoding(parser, encoding);
|
||||
|
||||
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(obj);
|
||||
dom_xml_fragment_parsing_algorithm_parse(ns_mapper, context_node, input, parser);
|
||||
|
||||
/* 5. If there is an XML well-formedness or XML namespace well-formedness error, then throw a "SyntaxError" DOMException. */
|
||||
if (!parser->wellFormed || !parser->nsWellFormed) {
|
||||
parser->dict = NULL;
|
||||
xmlFreeDoc(parser->myDoc);
|
||||
xmlFreeParserCtxt(parser);
|
||||
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xmlDocPtr doc = parser->myDoc;
|
||||
xmlFreeParserCtxt(parser);
|
||||
|
||||
if (EXPECTED(doc != NULL)) {
|
||||
doc->dict = NULL;
|
||||
|
||||
/* 6. If the document element of the resulting Document has any sibling nodes, then throw a "SyntaxError" DOMException. */
|
||||
xmlNodePtr document_element = doc->children;
|
||||
if (document_element == NULL || document_element->next != NULL) {
|
||||
xmlFreeDoc(doc);
|
||||
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* 7. Return the child nodes of the document element of the resulting Document, in tree order. */
|
||||
xmlNodePtr fragment = xmlNewDocFragment(context_node->doc);
|
||||
if (EXPECTED(fragment != NULL)) {
|
||||
xmlNodePtr child = document_element->children;
|
||||
/* Yes, we have to call both xmlSetTreeDoc() prior to xmlAddChildList()
|
||||
* because xmlAddChildList() _only_ sets the tree for the topmost elements in the subtree! */
|
||||
xmlSetTreeDoc(document_element, context_node->doc);
|
||||
xmlAddChildList(fragment, child);
|
||||
dom_mark_namespaces_as_attributes_too(ns_mapper, doc);
|
||||
document_element->children = NULL;
|
||||
document_element->last = NULL;
|
||||
}
|
||||
xmlFreeDoc(doc);
|
||||
return fragment;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
|
||||
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */
|
||||
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
|
||||
{
|
||||
DOM_PROP_NODE(xmlNodePtr, context_node, obj);
|
||||
|
||||
xmlNodePtr fragment;
|
||||
if (context_node->doc->type == XML_DOCUMENT_NODE) {
|
||||
fragment = dom_xml_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval));
|
||||
} else {
|
||||
fragment = dom_html_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval), obj->document->quirks_mode);
|
||||
}
|
||||
|
||||
if (fragment == NULL) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
/* We skip the steps involving the template element as context node since we don't do special handling for that. */
|
||||
dom_remove_all_children(context_node);
|
||||
return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;
|
||||
}
|
||||
|
||||
#endif
|
@ -447,7 +447,7 @@ PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns
|
||||
zend_hash_destroy(&ctx.old_ns_to_new_ns_ptr);
|
||||
}
|
||||
|
||||
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node)
|
||||
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements)
|
||||
{
|
||||
ZEND_ASSERT(node != NULL);
|
||||
|
||||
@ -464,7 +464,7 @@ PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_map
|
||||
for (const xmlNode *cur = node; cur != NULL; cur = cur->parent) {
|
||||
if (cur->type == XML_ELEMENT_NODE) {
|
||||
/* Register namespace of element */
|
||||
if (cur->ns != NULL && cur->ns->prefix != NULL) {
|
||||
if (!ignore_elements && cur->ns != NULL && cur->ns->prefix != NULL) {
|
||||
const char *prefix = (const char *) cur->ns->prefix;
|
||||
zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), cur->ns);
|
||||
}
|
||||
|
@ -70,7 +70,7 @@ typedef struct _php_dom_in_scope_ns {
|
||||
bool origin_is_ns_compat;
|
||||
} php_dom_in_scope_ns;
|
||||
|
||||
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node);
|
||||
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements);
|
||||
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node);
|
||||
PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns);
|
||||
|
||||
|
@ -1881,7 +1881,7 @@ PHP_METHOD(Dom_Node, lookupPrefix)
|
||||
/* }}} end dom_node_lookup_prefix */
|
||||
|
||||
/* https://dom.spec.whatwg.org/#locate-a-namespace */
|
||||
static const char *dom_locate_a_namespace(xmlNodePtr node, const zend_string *prefix)
|
||||
const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix)
|
||||
{
|
||||
/* switch on the interface node implements: */
|
||||
if (node->type == XML_ELEMENT_NODE) {
|
||||
|
@ -1039,6 +1039,7 @@ PHP_MINIT_FUNCTION(dom)
|
||||
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "childElementCount", dom_parent_node_child_element_count, NULL);
|
||||
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL);
|
||||
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL);
|
||||
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write);
|
||||
zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
|
||||
DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write);
|
||||
zend_hash_add_new_ptr(&classes, dom_modern_element_class_entry->name, &dom_modern_element_prop_handlers);
|
||||
|
@ -171,6 +171,8 @@ dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_ent
|
||||
xmlDocPtr php_dom_create_html_doc(void);
|
||||
xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference);
|
||||
void dom_set_xml_class(php_libxml_ref_obj *document);
|
||||
const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix);
|
||||
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc);
|
||||
bool dom_compare_value(const xmlAttr *attr, const xmlChar *value);
|
||||
void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp);
|
||||
|
||||
|
@ -1380,6 +1380,8 @@ namespace Dom
|
||||
public function querySelectorAll(string $selectors): NodeList {}
|
||||
public function closest(string $selectors): ?Element {}
|
||||
public function matches(string $selectors): bool {}
|
||||
|
||||
public string $innerHTML;
|
||||
}
|
||||
|
||||
class HTMLElement extends Element
|
||||
|
8
ext/dom/php_dom_arginfo.h
generated
8
ext/dom/php_dom_arginfo.h
generated
@ -1,5 +1,5 @@
|
||||
/* This is a generated file, edit the .stub.php file instead.
|
||||
* Stub hash: 28365949d78a2d0254cfdb0da6549e282d2eb436 */
|
||||
* Stub hash: 9065d5c713a6fb879f8116821eaabc3a01a4db20 */
|
||||
|
||||
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
|
||||
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
|
||||
@ -3121,6 +3121,12 @@ static zend_class_entry *register_class_Dom_Element(zend_class_entry *class_entr
|
||||
zend_declare_typed_property(class_entry, property_nextElementSibling_name, &property_nextElementSibling_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_nextElementSibling_class_Dom_Element, 0, MAY_BE_NULL));
|
||||
zend_string_release(property_nextElementSibling_name);
|
||||
|
||||
zval property_innerHTML_default_value;
|
||||
ZVAL_UNDEF(&property_innerHTML_default_value);
|
||||
zend_string *property_innerHTML_name = zend_string_init("innerHTML", sizeof("innerHTML") - 1, 1);
|
||||
zend_declare_typed_property(class_entry, property_innerHTML_name, &property_innerHTML_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING));
|
||||
zend_string_release(property_innerHTML_name);
|
||||
|
||||
return class_entry;
|
||||
}
|
||||
|
||||
|
65
ext/dom/tests/modern/html/parser/Element_innerHTML.phpt
Normal file
65
ext/dom/tests/modern/html/parser/Element_innerHTML.phpt
Normal file
@ -0,0 +1,65 @@
|
||||
--TEST--
|
||||
Test writing Element::$innerHTML on HTML documents
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\HTMLDocument::createEmpty();
|
||||
$el = $dom->createElement('div');
|
||||
$dom->appendChild($el);
|
||||
$el->innerHTML = '<p>foo</p>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '</div>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '<div></div> <p></p>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = "invalid\xffutf-8𐍈𐍈𐍈";
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
// Create a non-interned string that gets the UTF-8 validity flag added
|
||||
$str = str_repeat("my valid string", random_int(1, 1));
|
||||
preg_match('/^.*$/u', $str);
|
||||
$el->innerHTML = $str;
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
$dom = DOM\HTMLDocument::createEmpty();
|
||||
$el = $dom->createElement('style');
|
||||
$dom->appendChild($el);
|
||||
$el->innerHTML = '<p>foo</p>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
$dom = DOM\HTMLDocument::createEmpty();
|
||||
$el = $dom->createElementNS('urn:a', 'style');
|
||||
$dom->appendChild($el);
|
||||
$el->innerHTML = '<p>foo</p>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
$dom = DOM\HTMLDocument::createEmpty();
|
||||
$el = $dom->createElement('textarea');
|
||||
$dom->appendChild($el);
|
||||
$el->innerHTML = "</textarea>\0-->";
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"><p>foo</p></div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"></div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"></div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"><div></div> <p></p></div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">invalid<69>utf-8𐍈𐍈𐍈</div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">my valid string</div>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<style xmlns="http://www.w3.org/1999/xhtml"><p>foo</p></style>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<style xmlns="urn:a"><p xmlns="">foo</p></style>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<textarea xmlns="http://www.w3.org/1999/xhtml"></textarea><3B>--></textarea>
|
21
ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt
Normal file
21
ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt
Normal file
@ -0,0 +1,21 @@
|
||||
--TEST--
|
||||
Test reading Element::$innerHTML on HTML documents
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\HTMLDocument::createFromString('<!DOCTYPE html><html><head><title>Test</title></head><body><div></div><p>Hello, World!</p></body></html>');
|
||||
var_dump($dom->getElementsByTagName('body')[0]->innerHTML);
|
||||
var_dump($dom->getElementsByTagName('head')[0]->innerHTML);
|
||||
var_dump($dom->getElementsByTagName('html')[0]->innerHTML);
|
||||
var_dump($dom->getElementsByTagName('div')[0]->innerHTML);
|
||||
var_dump($dom->getElementsByTagName('p')[0]->innerHTML);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
string(31) "<div></div><p>Hello, World!</p>"
|
||||
string(19) "<title>Test</title>"
|
||||
string(76) "<head><title>Test</title></head><body><div></div><p>Hello, World!</p></body>"
|
||||
string(0) ""
|
||||
string(13) "Hello, World!"
|
65
ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt
Normal file
65
ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt
Normal file
@ -0,0 +1,65 @@
|
||||
--TEST--
|
||||
Test reading Element::$innerHTML on XML documents
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\XMLDocument::createEmpty();
|
||||
|
||||
function createContainer() {
|
||||
global $dom;
|
||||
$element = $dom->createElement("container");
|
||||
return $element;
|
||||
}
|
||||
|
||||
$container = createContainer();
|
||||
$container->append("Hello, world!");
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createComment("This is -a- comment"));
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$container = createContainer();
|
||||
// Note: intentionally typo'd to check whether the string matching against "xml" happens correctly
|
||||
// i.e. no bugs with prefix-matching only.
|
||||
$container->append($dom->createProcessingInstruction("xmll", ""));
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createProcessingInstruction("almostmalformed", ">?"));
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'http://example.com/');
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttributeNS('urn:a', 'name', '');
|
||||
$element->setAttributeNS('urn:b', 'name', '');
|
||||
var_dump($container->innerHTML);
|
||||
|
||||
$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/../../book.xml');
|
||||
var_dump($dom->documentElement->innerHTML);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
string(13) "Hello, world!"
|
||||
string(26) "<!--This is -a- comment-->"
|
||||
string(9) "<?xmll ?>"
|
||||
string(22) "<?almostmalformed >??>"
|
||||
string(12) "<container/>"
|
||||
string(72) "<container xmlns:ns1="urn:a" ns1:name="" xmlns:ns2="urn:b" ns2:name=""/>"
|
||||
string(167) "
|
||||
<book>
|
||||
<title>The Grapes of Wrath</title>
|
||||
<author>John Steinbeck</author>
|
||||
</book>
|
||||
<book>
|
||||
<title>The Pearl</title>
|
||||
<author>John Steinbeck</author>
|
||||
</book>
|
||||
"
|
108
ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt
Normal file
108
ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt
Normal file
@ -0,0 +1,108 @@
|
||||
--TEST--
|
||||
Test reading Element::$innerHTML on XML documents - error cases
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\XMLDocument::createEmpty();
|
||||
|
||||
function createContainer() {
|
||||
global $dom;
|
||||
$element = $dom->createElement("container");
|
||||
return $element;
|
||||
}
|
||||
|
||||
function test($container) {
|
||||
try {
|
||||
var_dump($container->innerHTML);
|
||||
} catch (DOMException $e) {
|
||||
echo $e->getMessage(), "\n";
|
||||
}
|
||||
}
|
||||
|
||||
$container = createContainer();
|
||||
$container->append("Hello, \x01 world!");
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createComment('Hello -- world'));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createComment('Hello world-'));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createComment('Hello world-'));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createComment("\x01"));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$legacy = new DOMDocument;
|
||||
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo:bar', '?>')));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$legacy = new DOMDocument;
|
||||
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', '?>')));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$legacy = new DOMDocument;
|
||||
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('xml', '')));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$legacy = new DOMDocument;
|
||||
$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', "\x01")));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createElement("with:colon"));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$container->append($dom->createElementNS("http://www.w3.org/2000/xmlns/", "xmlns:colon"));
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", "http://www.w3.org/2000/xmlns/");
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", "");
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttribute("with:colon", "value");
|
||||
test($container);
|
||||
|
||||
$container = createContainer();
|
||||
$element = $container->appendChild(createContainer());
|
||||
$element->setAttribute("xmlns", "value");
|
||||
test($container);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
||||
The resulting XML serialization is not well-formed
|
86
ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt
Normal file
86
ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt
Normal file
@ -0,0 +1,86 @@
|
||||
--TEST--
|
||||
Test writing Element::$innerHTML on XML documents
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\XMLDocument::createEmpty();
|
||||
$el = $dom->createElementNS('urn:a', 'root');
|
||||
$dom->appendChild($el);
|
||||
$el->innerHTML = '<p>foo</p><p xmlns="">bar</p>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '&';
|
||||
echo $dom->saveXML(), "\n";
|
||||
$el->innerHTML = '<foo>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
echo "----------------\n";
|
||||
|
||||
$dom = DOM\XMLDocument::createFromString('<root/>');
|
||||
$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child'));
|
||||
$child->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'urn:b');
|
||||
$child->innerHTML = '<default/>';
|
||||
echo $dom->saveXML(), "\n";
|
||||
var_dump($child->namespaceURI);
|
||||
var_dump($child->firstChild->namespaceURI);
|
||||
|
||||
echo "----------------\n";
|
||||
|
||||
$dom = DOM\XMLDocument::createFromString(<<<XML
|
||||
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c">
|
||||
<b:child a="none" b:b="b" c:c="c"/>
|
||||
<c:child a="none" b:b="b" c:c="c"/>
|
||||
<?pi ?>
|
||||
<!-- comment -->
|
||||
<child a="none" b:b="b" c:c="c">
|
||||
<![CDATA[ cdata ]]>
|
||||
</child>
|
||||
</root>
|
||||
XML);
|
||||
$dom->documentElement->innerHTML = $dom->documentElement->innerHTML;
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
echo "----------------\n";
|
||||
$dom->documentElement->innerHTML = <<<XML
|
||||
<child b:b="b" c:c="c">
|
||||
<b:child/>
|
||||
<c:child/>
|
||||
</child>
|
||||
XML;
|
||||
echo $dom->saveXML(), "\n";
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a"><p>foo</p><p xmlns="">bar</p></root>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a"/>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a">&</root>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a"><foo></root>
|
||||
----------------
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root><child xmlns="urn:a"><default/></child></root>
|
||||
string(5) "urn:a"
|
||||
string(5) "urn:a"
|
||||
----------------
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c">
|
||||
<b:child a="none" b:b="b" c:c="c"/>
|
||||
<c:child a="none" b:b="b" c:c="c"/>
|
||||
<?pi ?>
|
||||
<!-- comment -->
|
||||
<child xmlns="urn:a" a="none" b:b="b" c:c="c">
|
||||
<![CDATA[ cdata ]]>
|
||||
</child>
|
||||
</root>
|
||||
----------------
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root xmlns="urn:a" xmlns:b="urn:b" xmlns:c="urn:c"><child b:b="b" c:c="c">
|
||||
<b:child/>
|
||||
<c:child/>
|
||||
</child></root>
|
@ -0,0 +1,47 @@
|
||||
--TEST--
|
||||
Test writing Element::$innerHTML on XML documents - error cases
|
||||
--EXTENSIONS--
|
||||
dom
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$dom = DOM\XMLDocument::createFromString(<<<XML
|
||||
<!DOCTYPE root [
|
||||
<!ENTITY foo "content">
|
||||
]>
|
||||
<root/>
|
||||
XML);
|
||||
$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child'));
|
||||
$original = $dom->saveXML();
|
||||
|
||||
function test($child, $html) {
|
||||
global $dom, $original;
|
||||
try {
|
||||
$child->innerHTML = $html;
|
||||
} catch (DOMException $e) {
|
||||
echo $e->getMessage(), "\n";
|
||||
}
|
||||
var_dump($dom->saveXML() === $original);
|
||||
}
|
||||
|
||||
test($child, '&foo;');
|
||||
test($child, '</root>');
|
||||
test($child, '</root><foo/><!--');
|
||||
test($child, '--></root><!--');
|
||||
test($child, '<');
|
||||
test($child, '<!ENTITY foo "content">');
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
||||
XML fragment is not well-formed
|
||||
bool(true)
|
@ -11,7 +11,7 @@ fclose($memory);
|
||||
--EXPECTF--
|
||||
Warning: Dom\XMLDocument::createFromFile(): Document is empty in php://memory, line: 1 in %s on line %d
|
||||
|
||||
Fatal error: Uncaught Exception: XML document is malformed in %s:%d
|
||||
Fatal error: Uncaught DOMException: XML fragment is not well-formed in %s:%d
|
||||
Stack trace:
|
||||
#0 %s(%d): Dom\XMLDocument::createFromFile('php://memory')
|
||||
#1 {main}
|
||||
|
@ -71,7 +71,7 @@ static bool check_options_validity(uint32_t arg_num, zend_long options)
|
||||
* So in principle we could just ignore them outright.
|
||||
* However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token (Date 2023-12-15)
|
||||
* requires us to have the declaration as an attribute available */
|
||||
static void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
|
||||
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
|
||||
{
|
||||
xmlNodePtr node = doc->children;
|
||||
while (node != NULL) {
|
||||
@ -175,7 +175,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
|
||||
if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) {
|
||||
if (!EG(exception)) {
|
||||
if (lxml_doc == DOM_DOCUMENT_MALFORMED) {
|
||||
zend_throw_exception_ex(NULL, 0, "XML document is malformed");
|
||||
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
|
||||
} else {
|
||||
if (mode == DOM_LOAD_FILE) {
|
||||
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
|
||||
@ -262,7 +262,7 @@ static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options
|
||||
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
|
||||
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
|
||||
if (EXPECTED(out != NULL)) {
|
||||
status = dom_xml_serialize(ctxt, out, node, format);
|
||||
status = dom_xml_serialize(ctxt, out, node, format, false);
|
||||
status |= xmlOutputBufferFlush(out);
|
||||
status |= xmlOutputBufferClose(out);
|
||||
} else {
|
||||
@ -303,7 +303,7 @@ zend_long php_new_dom_dump_node_to_file(const char *filename, xmlDocPtr doc, xml
|
||||
int status = -1;
|
||||
xmlSaveCtxtPtr ctxt = xmlSaveToIO(out->writecallback, NULL, stream, encoding, XML_SAVE_AS_XML);
|
||||
if (EXPECTED(ctxt != NULL)) {
|
||||
status = dom_xml_serialize(ctxt, out, node, format);
|
||||
status = dom_xml_serialize(ctxt, out, node, format, false);
|
||||
status |= xmlOutputBufferFlush(out);
|
||||
(void) xmlSaveClose(ctxt);
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "namespace_compat.h"
|
||||
#include "serialize_common.h"
|
||||
#include "internal_helpers.h"
|
||||
#include <libxml/chvalid.h>
|
||||
|
||||
// TODO: implement iterative approach instead of recursive?
|
||||
|
||||
@ -72,7 +73,8 @@ static int dom_xml_serialization_algorithm(
|
||||
xmlNodePtr node,
|
||||
const xmlChar *namespace,
|
||||
unsigned int *prefix_index,
|
||||
int indent
|
||||
int indent,
|
||||
bool require_well_formed
|
||||
);
|
||||
|
||||
static bool dom_xml_str_equals_treat_nulls_as_empty(const xmlChar *s1, const xmlChar *s2)
|
||||
@ -345,12 +347,10 @@ static const xmlChar *dom_recording_the_namespace_information(
|
||||
}
|
||||
|
||||
/* 2.3.2.4. If namespace definition is the empty string (the declarative form of having no namespace),
|
||||
* then let namespace definition be null instead. */
|
||||
if (*namespace_definition == '\0') {
|
||||
namespace_definition = NULL;
|
||||
}
|
||||
* then let namespace definition be null instead.
|
||||
* => This gets delayed until later down. */
|
||||
|
||||
size_t namespace_definition_length = namespace_definition == NULL ? 0 : strlen((const char *) namespace_definition);
|
||||
size_t namespace_definition_length = strlen((const char *) namespace_definition);
|
||||
|
||||
/* 2.3.2.5. If prefix definition is found in map given the namespace namespace definition,
|
||||
* then stop running these steps, and return to Main to visit the next attribute. */
|
||||
@ -358,6 +358,11 @@ static const xmlChar *dom_recording_the_namespace_information(
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Delayed step 2.3.2.4 */
|
||||
if (*namespace_definition == '\0') {
|
||||
namespace_definition = NULL;
|
||||
}
|
||||
|
||||
/* 2.3.2.6. Add the prefix prefix definition to map given namespace namespace definition. */
|
||||
dom_xml_ns_prefix_map_add(namespace_prefix_map, prefix_definition, false, namespace_definition, namespace_definition_length);
|
||||
|
||||
@ -534,15 +539,40 @@ static int dom_xml_common_text_serialization(xmlOutputBufferPtr out, const char
|
||||
return xmlOutputBufferWrite(out, content - last_output, last_output);
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-an-element-node */
|
||||
static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out, xmlNodePtr text)
|
||||
static int dom_xml_check_char_production(const xmlChar *content)
|
||||
{
|
||||
/* 1. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
// TODO: optimization idea: fast-pass for ASCII-only data
|
||||
|
||||
const xmlChar *ptr = content;
|
||||
while (*ptr != '\0') {
|
||||
int len = 4;
|
||||
int c = xmlGetUTF8Char(ptr, &len);
|
||||
if (c < 0 || !xmlIsCharQ(c)) {
|
||||
return -1;
|
||||
}
|
||||
ptr += len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-text-node */
|
||||
static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out, xmlNodePtr text, bool require_well_formed)
|
||||
{
|
||||
/* 1. If the require well-formed flag is set and node's data contains characters that are not matched by the XML Char production,
|
||||
* then throw an exception. */
|
||||
if (require_well_formed && text->content != NULL) {
|
||||
TRY(dom_xml_check_char_production(text->content));
|
||||
}
|
||||
|
||||
return dom_xml_common_text_serialization(out, (const char *) text->content, false);
|
||||
}
|
||||
|
||||
static zend_always_inline const xmlChar *dom_xml_attribute_namespace(const xmlAttr *attr)
|
||||
{
|
||||
return attr->ns == NULL ? NULL : attr->ns->href;
|
||||
}
|
||||
|
||||
static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAttrPtr attr)
|
||||
{
|
||||
TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
|
||||
@ -561,7 +591,28 @@ static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAtt
|
||||
return xmlOutputBufferWriteLit(out, "\"");
|
||||
}
|
||||
|
||||
/* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28 */
|
||||
/* These steps are from the attribute serialization algorithm's well-formed checks.
|
||||
* Note that this does not return a boolean but an int to be compatible with the TRY/TRY_CLEANUP interface
|
||||
* that we do for compatibility with libxml's interfaces. */
|
||||
static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const xmlAttr *attr)
|
||||
{
|
||||
const xmlChar *attr_value = dom_get_attribute_value(attr);
|
||||
|
||||
/* 3.5.2.2. If the require well-formed flag is set and the value of attr's value attribute matches the XMLNS namespace, then throw an exception */
|
||||
if (strcmp((const char *) attr_value, DOM_XMLNS_NS_URI) == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* 3.5.2.3. If the require well-formed flag is set and the value of attr's value attribute is the empty string */
|
||||
if (*attr_value == '\0') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28
|
||||
* This does not have a require_well_formed argument because the only way to get here is via saveXML(), which has it off. */
|
||||
static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr attr)
|
||||
{
|
||||
if (attr->ns != NULL && attr->ns->prefix != NULL) {
|
||||
@ -572,10 +623,21 @@ static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr a
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-comment-node */
|
||||
static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr comment)
|
||||
static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr comment, bool require_well_formed)
|
||||
{
|
||||
/* 1. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
/* Step 1 deals with well-formed flag */
|
||||
if (require_well_formed) {
|
||||
/* node's data contains characters that are not matched by the XML Char production or contains "--"
|
||||
* (two adjacent U+002D HYPHEN-MINUS characters) or that ends with a "-" (U+002D HYPHEN-MINUS) character,
|
||||
* then throw an exception */
|
||||
const xmlChar *ptr = comment->content;
|
||||
if (ptr != NULL) {
|
||||
TRY(dom_xml_check_char_production(ptr));
|
||||
if (strstr((const char *) ptr, "--") != NULL || ptr[strlen((const char *) ptr) - 1] == '-') {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TRY(xmlOutputBufferWriteLit(out, "<!--"));
|
||||
if (EXPECTED(comment->content != NULL)) {
|
||||
@ -585,10 +647,24 @@ static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr com
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-processinginstruction-node */
|
||||
static int dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out, xmlNodePtr pi)
|
||||
static int dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out, xmlNodePtr pi, bool require_well_formed)
|
||||
{
|
||||
/* Steps 1-2 deal with well-formed flag
|
||||
* => N/A */
|
||||
/* Steps 1-2 deal with well-formed flag */
|
||||
if (require_well_formed) {
|
||||
/* target contains a ":" (U+003A COLON) character or is an ASCII case-insensitive match for the string "xml", then throw an exception */
|
||||
if (strchr((const char *) pi->name, ':') != NULL || strcasecmp((const char *) pi->name, "xml") == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* node's data contains characters that are not matched by the XML Char production or contains the string "?>"
|
||||
* (U+003F QUESTION MARK, U+003E GREATER-THAN SIGN), then throw an exception */
|
||||
if (pi->content != NULL) {
|
||||
TRY(dom_xml_check_char_production(pi->content));
|
||||
if (strstr((const char *) pi->content, "?>") != NULL) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TRY(xmlOutputBufferWriteLit(out, "<?"));
|
||||
TRY(xmlOutputBufferWriteString(out, (const char *) pi->name));
|
||||
@ -610,6 +686,23 @@ static int dom_xml_serialize_cdata_section_node(xmlOutputBufferPtr out, xmlNodeP
|
||||
return xmlOutputBufferWriteLit(out, "]]>");
|
||||
}
|
||||
|
||||
static zend_string *dom_xml_create_localname_set_key(const xmlAttr *attr)
|
||||
{
|
||||
if (attr->ns == NULL || attr->ns->href == NULL) {
|
||||
return zend_string_init((const char *) attr->name, strlen((const char *) attr->name), false);
|
||||
}
|
||||
|
||||
/* Spec requires us to create a tuple as a key, however HashTable doesn't support that natively.
|
||||
* Fortunately, href and name cannot have embedded NUL bytes in them, so we can create a
|
||||
* "tuple" by concatenating them against each other, separated by a \0 byte.
|
||||
*/
|
||||
return zend_string_concat3(
|
||||
(const char *) attr->ns->href, strlen((const char *) attr->ns->href),
|
||||
"", 1, /* include the \0 */
|
||||
(const char *) attr->name, strlen((const char *) attr->name)
|
||||
);
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization-of-the-attributes */
|
||||
static int dom_xml_serialize_attributes(
|
||||
xmlOutputBufferPtr out,
|
||||
@ -617,25 +710,34 @@ static int dom_xml_serialize_attributes(
|
||||
dom_xml_ns_prefix_map *map,
|
||||
dom_xml_local_prefix_map *local_prefixes_map,
|
||||
unsigned int *prefix_index,
|
||||
bool ignore_namespace_definition_attribute
|
||||
bool ignore_namespace_definition_attribute,
|
||||
bool require_well_formed
|
||||
)
|
||||
{
|
||||
/* 1. Let result be the empty string.
|
||||
* => We're going to write directly to the output buffer. */
|
||||
|
||||
/* 2. Let localname set be a new empty namespace localname set.
|
||||
* => N/A this is only required for well-formedness */
|
||||
* We can do this unconditionally even if we don't use it, because this doesn't allocate memory anyway. */
|
||||
HashTable localname_set;
|
||||
zend_hash_init(&localname_set, 8, NULL, NULL, false);
|
||||
|
||||
/* 3. [LOOP] For each attribute attr in element's attributes, in the order they are specified in the element's attribute list: */
|
||||
for (xmlAttrPtr attr = element->properties; attr != NULL; attr = attr->next) {
|
||||
/* 3.1. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
|
||||
/* 3.2. Create a new tuple consisting of attr's namespaceURI attribute and localName attribute, and add it to the localname set.
|
||||
* => N/A this is only required for well-formedness */
|
||||
if (require_well_formed) {
|
||||
zend_string *key = dom_xml_create_localname_set_key(attr);
|
||||
/* 3.1. If the require well-formed flag is set and the localname set contains a tuple whose values match those of a
|
||||
* new tuple consisting of attr's namespaceURI attribute and localName attribute, then throw an exception
|
||||
* 3.2. Create a new tuple consisting of attr's namespaceURI attribute and localName attribute, and add it to the localname set. */
|
||||
bool duplicate = zend_hash_add_empty_element(&localname_set, key) == NULL;
|
||||
zend_string_release_ex(key, false);
|
||||
if (duplicate) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* 3.3. Let attribute namespace be the value of attr's namespaceURI value. */
|
||||
const xmlChar *attribute_namespace = attr->ns == NULL ? NULL : attr->ns->href;
|
||||
const xmlChar *attribute_namespace = dom_xml_attribute_namespace(attr);
|
||||
|
||||
/* 3.4. Let candidate prefix be null. */
|
||||
const xmlChar *candidate_prefix = NULL;
|
||||
@ -682,10 +784,10 @@ static int dom_xml_serialize_attributes(
|
||||
}
|
||||
}
|
||||
|
||||
/* 3.5.2.2. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
/* 3.5.2.3. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
if (require_well_formed) {
|
||||
/* 3.5.2.2 and 3.5.2.3 are done by this call. */
|
||||
TRY_OR_CLEANUP(dom_xml_check_xmlns_attribute_requirements(attr));
|
||||
}
|
||||
|
||||
/* 3.5.2.4. the attr's prefix matches the string "xmlns", then let candidate prefix be the string "xmlns". */
|
||||
if (attr->ns->prefix != NULL && strcmp((const char *) attr->ns->prefix, "xmlns") == 0) {
|
||||
@ -725,33 +827,47 @@ static int dom_xml_serialize_attributes(
|
||||
}
|
||||
|
||||
/* 3.5.3.2. Append the following to result, in the order listed: */
|
||||
TRY(xmlOutputBufferWriteLit(out, " xmlns:"));
|
||||
TRY(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
|
||||
TRY(xmlOutputBufferWriteLit(out, "=\""));
|
||||
TRY(dom_xml_common_text_serialization(out, (const char *) attribute_namespace, true));
|
||||
TRY(xmlOutputBufferWriteLit(out, "\""));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " xmlns:"));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "=\""));
|
||||
TRY_OR_CLEANUP(dom_xml_common_text_serialization(out, (const char *) attribute_namespace, true));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "\""));
|
||||
}
|
||||
}
|
||||
|
||||
/* 3.6. Append a " " (U+0020 SPACE) to result. */
|
||||
TRY(xmlOutputBufferWriteLit(out, " "));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " "));
|
||||
|
||||
/* 3.7. If candidate prefix is not null, then append to result the concatenation of candidate prefix with ":" (U+003A COLON). */
|
||||
if (candidate_prefix != NULL) {
|
||||
TRY(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
|
||||
TRY(xmlOutputBufferWriteLit(out, ":"));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
|
||||
TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, ":"));
|
||||
}
|
||||
|
||||
/* 3.8. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
if (require_well_formed) {
|
||||
/* 3.8. If the require well-formed flag is set and
|
||||
* this attr's localName attribute contains the character ":" (U+003A COLON)
|
||||
* or does not match the XML Name production
|
||||
* or equals "xmlns" and attribute namespace is null */
|
||||
if (xmlValidateNCName(attr->name, /* space */ 0) != 0
|
||||
|| (strcmp((const char *) attr->name, "xmlns") == 0 && dom_xml_attribute_namespace(attr) == NULL)) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* 3.9. Append the following strings to result, in the order listed: */
|
||||
dom_xml_serialize_attribute_node_value(out, attr);
|
||||
TRY_OR_CLEANUP(dom_xml_serialize_attribute_node_value(out, attr));
|
||||
}
|
||||
|
||||
/* 4. Return the value of result.
|
||||
* => We're writing directly to the output buffer. */
|
||||
|
||||
zend_hash_destroy(&localname_set);
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
zend_hash_destroy(&localname_set);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Only format output if there are no text/entityrefs/cdata nodes as children. */
|
||||
@ -785,13 +901,19 @@ static int dom_xml_serialize_element_node(
|
||||
dom_xml_ns_prefix_map *namespace_prefix_map,
|
||||
xmlNodePtr element,
|
||||
unsigned int *prefix_index,
|
||||
int indent
|
||||
int indent,
|
||||
bool require_well_formed
|
||||
)
|
||||
{
|
||||
bool should_format = indent >= 0 && element->children != NULL && dom_xml_should_format_element(element);
|
||||
/* 1. If the require well-formed flag is set and this node's localName attribute contains
|
||||
* the character ":" (U+003A COLON) or does not match the XML Name production, then throw an exception. */
|
||||
if (require_well_formed) {
|
||||
if (xmlValidateNCName(element->name, /* space */ 0) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* 1. If the require well-formed flag is set ...
|
||||
* => N/A */
|
||||
bool should_format = indent >= 0 && element->children != NULL && dom_xml_should_format_element(element);
|
||||
|
||||
/* 2. Let markup be the string "<" (U+003C LESS-THAN SIGN). */
|
||||
TRY(xmlOutputBufferWriteLit(out, "<"));
|
||||
@ -863,7 +985,10 @@ static int dom_xml_serialize_element_node(
|
||||
|
||||
/* 12.3. If the value of prefix matches "xmlns", then run the following steps: */
|
||||
if (prefix != NULL && strcmp((const char *) prefix, "xmlns") == 0) {
|
||||
/* Step 1 deals with well-formedness, which we don't implement here. */
|
||||
/* 12.3.1. If the require well-formed flag is set, then throw an error. */
|
||||
if (require_well_formed) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* 12.3.2. Let candidate prefix be the value of prefix. */
|
||||
candidate_prefix = prefix;
|
||||
@ -956,7 +1081,7 @@ static int dom_xml_serialize_element_node(
|
||||
|
||||
/* 13. Append to markup the result of the XML serialization of node's attributes given map, prefix index,
|
||||
* local prefixes map, ignore namespace definition attribute flag, and require well-formed flag. */
|
||||
TRY_OR_CLEANUP(dom_xml_serialize_attributes(out, element, &map, &local_prefixes_map, prefix_index, ignore_namespace_definition_attribute));
|
||||
TRY_OR_CLEANUP(dom_xml_serialize_attributes(out, element, &map, &local_prefixes_map, prefix_index, ignore_namespace_definition_attribute, require_well_formed));
|
||||
|
||||
/* 14. If ns is the HTML namespace, and the node's list of children is empty, and the node's localName matches
|
||||
* any one of the following void elements: ... */
|
||||
@ -1013,7 +1138,7 @@ static int dom_xml_serialize_element_node(
|
||||
if (should_format) {
|
||||
TRY_OR_CLEANUP(dom_xml_output_indents(out, indent));
|
||||
}
|
||||
TRY_OR_CLEANUP(dom_xml_serialization_algorithm(ctxt, out, &map, child, inherited_ns, prefix_index, indent));
|
||||
TRY_OR_CLEANUP(dom_xml_serialization_algorithm(ctxt, out, &map, child, inherited_ns, prefix_index, indent, require_well_formed));
|
||||
}
|
||||
|
||||
if (should_format) {
|
||||
@ -1047,7 +1172,8 @@ static int dom_xml_serializing_a_document_fragment_node(
|
||||
xmlNodePtr node,
|
||||
const xmlChar *namespace,
|
||||
unsigned int *prefix_index,
|
||||
int indent
|
||||
int indent,
|
||||
bool require_well_formed
|
||||
)
|
||||
{
|
||||
/* 1. Let markup the empty string.
|
||||
@ -1056,7 +1182,7 @@ static int dom_xml_serializing_a_document_fragment_node(
|
||||
/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child ... */
|
||||
xmlNodePtr child = node->children;
|
||||
while (child != NULL) {
|
||||
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent));
|
||||
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
|
||||
child = child->next;
|
||||
}
|
||||
|
||||
@ -1073,7 +1199,8 @@ static int dom_xml_serializing_a_document_node(
|
||||
xmlNodePtr node,
|
||||
const xmlChar *namespace,
|
||||
unsigned int *prefix_index,
|
||||
int indent
|
||||
int indent,
|
||||
bool require_well_formed
|
||||
)
|
||||
{
|
||||
/* 1. Let serialized document be an empty string.
|
||||
@ -1092,7 +1219,7 @@ static int dom_xml_serializing_a_document_node(
|
||||
/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child passing along the provided arguments,
|
||||
* and append the result to serialized document. */
|
||||
while (child != NULL) {
|
||||
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent));
|
||||
TRY(dom_xml_serialization_algorithm(ctxt, out, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
|
||||
child = child->next;
|
||||
}
|
||||
|
||||
@ -1109,29 +1236,30 @@ static int dom_xml_serialization_algorithm(
|
||||
xmlNodePtr node,
|
||||
const xmlChar *namespace,
|
||||
unsigned int *prefix_index,
|
||||
int indent
|
||||
int indent,
|
||||
bool require_well_formed
|
||||
)
|
||||
{
|
||||
/* If node's interface is: */
|
||||
switch (node->type) {
|
||||
case XML_ELEMENT_NODE:
|
||||
return dom_xml_serialize_element_node(ctxt, out, namespace, namespace_prefix_map, node, prefix_index, indent);
|
||||
return dom_xml_serialize_element_node(ctxt, out, namespace, namespace_prefix_map, node, prefix_index, indent, require_well_formed);
|
||||
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
return dom_xml_serializing_a_document_fragment_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent);
|
||||
return dom_xml_serializing_a_document_fragment_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
|
||||
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_NODE:
|
||||
return dom_xml_serializing_a_document_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent);
|
||||
return dom_xml_serializing_a_document_node(ctxt, out, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
|
||||
|
||||
case XML_TEXT_NODE:
|
||||
return dom_xml_serialize_text_node(out, node);
|
||||
return dom_xml_serialize_text_node(out, node, require_well_formed);
|
||||
|
||||
case XML_COMMENT_NODE:
|
||||
return dom_xml_serialize_comment_node(out, node);
|
||||
return dom_xml_serialize_comment_node(out, node, require_well_formed);
|
||||
|
||||
case XML_PI_NODE:
|
||||
return dom_xml_serialize_processing_instruction(out, node);
|
||||
return dom_xml_serialize_processing_instruction(out, node, require_well_formed);
|
||||
|
||||
case XML_CDATA_SECTION_NODE:
|
||||
return dom_xml_serialize_cdata_section_node(out, node);
|
||||
@ -1152,9 +1280,8 @@ static int dom_xml_serialization_algorithm(
|
||||
ZEND_UNREACHABLE();
|
||||
}
|
||||
|
||||
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization
|
||||
* Assumes well-formed == false. */
|
||||
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format)
|
||||
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization */
|
||||
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed)
|
||||
{
|
||||
/* 1. Let namespace be a context namespace with value null. */
|
||||
const xmlChar *namespace = NULL;
|
||||
@ -1171,7 +1298,7 @@ int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr no
|
||||
|
||||
/* 5. Return the result of running the XML serialization algorithm ... */
|
||||
int indent = format ? 0 : -1;
|
||||
int result = dom_xml_serialization_algorithm(ctxt, out, &namespace_prefix_map, node, namespace, &prefix_index, indent);
|
||||
int result = dom_xml_serialization_algorithm(ctxt, out, &namespace_prefix_map, node, namespace, &prefix_index, indent, require_well_formed);
|
||||
|
||||
dom_xml_ns_prefix_map_dtor(&namespace_prefix_map);
|
||||
|
||||
|
@ -22,6 +22,6 @@
|
||||
#include <libxml/xmlsave.h>
|
||||
#include <libxml/xmlIO.h>
|
||||
|
||||
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format);
|
||||
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed);
|
||||
|
||||
#endif
|
||||
|
@ -287,7 +287,7 @@ static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type, bool modern)
|
||||
if (register_node_ns && nodep != NULL) {
|
||||
if (modern) {
|
||||
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(&intern->dom);
|
||||
in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, nodep);
|
||||
in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, nodep, false);
|
||||
} else {
|
||||
in_scope_ns = php_dom_get_in_scope_ns_legacy(nodep);
|
||||
}
|
||||
|
@ -1350,7 +1350,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object,
|
||||
object->document->private_data = NULL;
|
||||
object->document->class_type = PHP_LIBXML_CLASS_UNSET;
|
||||
object->document->handlers = &php_libxml_default_document_handlers;
|
||||
object->document->quirks_mode = false;
|
||||
object->document->quirks_mode = PHP_LIBXML_NO_QUIRKS;
|
||||
}
|
||||
|
||||
return ret_refcount;
|
||||
|
@ -95,6 +95,12 @@ typedef enum _php_libxml_class_type {
|
||||
PHP_LIBXML_CLASS_MODERN = 2,
|
||||
} php_libxml_class_type;
|
||||
|
||||
typedef enum php_libxml_quirks_mode {
|
||||
PHP_LIBXML_NO_QUIRKS = 0,
|
||||
PHP_LIBXML_QUIRKS,
|
||||
PHP_LIBXML_LIMITED_QUIRKS,
|
||||
} php_libxml_quirks_mode;
|
||||
|
||||
typedef struct _php_libxml_ref_obj {
|
||||
void *ptr;
|
||||
libxml_doc_props *doc_props;
|
||||
@ -103,7 +109,7 @@ typedef struct _php_libxml_ref_obj {
|
||||
const php_libxml_document_handlers *handlers;
|
||||
int refcount;
|
||||
php_libxml_class_type class_type : 8;
|
||||
bool quirks_mode;
|
||||
php_libxml_quirks_mode quirks_mode : 8;
|
||||
} php_libxml_ref_obj;
|
||||
|
||||
typedef struct _php_libxml_node_ptr {
|
||||
|
Loading…
Reference in New Issue
Block a user