php-src/ext/dom/namespace_compat.c
Niels Dossche 6980eba863
Support templated content
The template element in HTML 5 is special in the sense that it does not
add its contents into the DOM tree, but instead keeps them in a separate
shadow DOM document fragment. Interacting with the DOM tree cannot touch
the elements in the document fragment.

Closes GH-14906.
2024-07-15 11:10:51 +02:00

505 lines
17 KiB
C

/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"
/* The actual value of these doesn't matter as long as they serve as a unique ID.
* They need to be pointers because the `_private` field is a pointer, however we can choose the contents ourselves.
* We need keep these at least 4-byte aligned because the pointer may be tagged (although for now 2 byte alignment works too).
* We use a trick: we declare a struct with a double member to force the alignment. */
#define DECLARE_NS_TOKEN(name, uri) \
static const struct { \
char val[sizeof(uri)]; \
double align; \
} decl_##name = { uri, 0.0 }; \
PHP_DOM_EXPORT const php_dom_ns_magic_token *(name) = (const php_dom_ns_magic_token *) &decl_##name;
DECLARE_NS_TOKEN(php_dom_ns_is_html_magic_token, DOM_XHTML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_mathml_magic_token, DOM_MATHML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_svg_magic_token, DOM_SVG_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xlink_magic_token, DOM_XLINK_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xml_magic_token, DOM_XML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xmlns_magic_token, DOM_XMLNS_NS_URI);
static void php_dom_libxml_ns_mapper_prefix_map_element_dtor(zval *zv)
{
if (DOM_Z_IS_OWNED(zv)) {
efree(Z_PTR_P(zv));
}
}
static HashTable *php_dom_libxml_ns_mapper_ensure_prefix_map(php_dom_libxml_ns_mapper *mapper, zend_string **uri)
{
zval *zv = zend_hash_find(&mapper->uri_to_prefix_map, *uri);
HashTable *prefix_map;
if (zv == NULL) {
prefix_map = emalloc(sizeof(HashTable));
zend_hash_init(prefix_map, 0, NULL, php_dom_libxml_ns_mapper_prefix_map_element_dtor, false);
zval zv_prefix_map;
ZVAL_ARR(&zv_prefix_map, prefix_map);
zend_hash_add_new(&mapper->uri_to_prefix_map, *uri, &zv_prefix_map);
} else {
/* cast to Bucket* only works if this holds, I would prefer a static assert but we're stuck at C99. */
ZEND_ASSERT(XtOffsetOf(Bucket, val) == 0);
ZEND_ASSERT(Z_TYPE_P(zv) == IS_ARRAY);
Bucket *bucket = (Bucket *) zv;
/* Make sure we take the value from the key string that lives long enough. */
*uri = bucket->key;
prefix_map = Z_ARRVAL_P(zv);
}
return prefix_map;
}
static xmlNsPtr php_dom_libxml_ns_mapper_ensure_cached_ns(php_dom_libxml_ns_mapper *mapper, xmlNsPtr *ptr, const char *uri, size_t length, const php_dom_ns_magic_token *token)
{
if (EXPECTED(*ptr != NULL)) {
return *ptr;
}
zend_string *uri_str = zend_string_init(uri, length, false);
*ptr = php_dom_libxml_ns_mapper_get_ns(mapper, NULL, uri_str);
(*ptr)->_private = (void *) token;
zend_string_release_ex(uri_str, false);
return *ptr;
}
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_html_ns(php_dom_libxml_ns_mapper *mapper)
{
return php_dom_libxml_ns_mapper_ensure_cached_ns(mapper, &mapper->html_ns, DOM_XHTML_NS_URI, sizeof(DOM_XHTML_NS_URI) - 1, php_dom_ns_is_html_magic_token);
}
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_prefixless_xmlns_ns(php_dom_libxml_ns_mapper *mapper)
{
return php_dom_libxml_ns_mapper_ensure_cached_ns(mapper, &mapper->prefixless_xmlns_ns, DOM_XMLNS_NS_URI, sizeof(DOM_XMLNS_NS_URI) - 1, php_dom_ns_is_xmlns_magic_token);
}
static xmlNsPtr dom_create_owned_ns(zend_string *prefix, zend_string *uri)
{
ZEND_ASSERT(prefix != NULL);
ZEND_ASSERT(uri != NULL);
xmlNsPtr ns = emalloc(sizeof(*ns));
memset(ns, 0, sizeof(*ns));
ns->type = XML_LOCAL_NAMESPACE;
/* These two strings are kept alive because they're the hash table keys that lead to this entry. */
ns->prefix = ZSTR_LEN(prefix) == 0 ? NULL : BAD_CAST ZSTR_VAL(prefix);
ns->href = BAD_CAST ZSTR_VAL(uri);
/* Note ns->context is unused in libxml2 at the moment, and if it were used it would be for
* LIBXML_NAMESPACE_DICT which is opt-in anyway. */
return ns;
}
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns(php_dom_libxml_ns_mapper *mapper, zend_string *prefix, zend_string *uri)
{
if (uri == NULL) {
uri = zend_empty_string;
}
if (prefix == NULL) {
prefix = zend_empty_string;
}
if (ZSTR_LEN(prefix) == 0 && ZSTR_LEN(uri) == 0) {
return NULL;
}
HashTable *prefix_map = php_dom_libxml_ns_mapper_ensure_prefix_map(mapper, &uri);
xmlNsPtr found = zend_hash_find_ptr(prefix_map, prefix);
if (found != NULL) {
return found;
}
xmlNsPtr ns = dom_create_owned_ns(prefix, uri);
zval new_zv;
DOM_Z_OWNED(&new_zv, ns);
zend_hash_add_new(prefix_map, prefix, &new_zv);
return ns;
}
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(php_dom_libxml_ns_mapper *mapper, const xmlChar *prefix, size_t prefix_len, zend_string *uri)
{
xmlNsPtr ns;
if (prefix_len == 0) {
/* Fast path */
ns = php_dom_libxml_ns_mapper_get_ns(mapper, zend_empty_string, uri);
} else {
zend_string *prefix_str = zend_string_init((const char *) prefix, prefix_len, false);
ns = php_dom_libxml_ns_mapper_get_ns(mapper, prefix_str, uri);
zend_string_release_ex(prefix_str, false);
}
return ns;
}
static xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings_ex(php_dom_libxml_ns_mapper *mapper, const char *prefix, size_t prefix_len, const char *uri, size_t uri_len)
{
zend_string *prefix_str = zend_string_init(prefix, prefix_len, false);
zend_string *uri_str = zend_string_init(uri, uri_len, false);
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns(mapper, prefix_str, uri_str);
zend_string_release_ex(prefix_str, false);
zend_string_release_ex(uri_str, false);
return ns;
}
static zend_always_inline xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings(php_dom_libxml_ns_mapper *mapper, const char *prefix, const char *uri)
{
return php_dom_libxml_ns_mapper_get_ns_raw_strings_ex(mapper, prefix, strlen(prefix), uri, strlen(uri));
}
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(php_dom_libxml_ns_mapper *mapper, const char *prefix, const char *uri)
{
if (prefix == NULL) {
prefix = "";
}
if (uri == NULL) {
uri = "";
}
return php_dom_libxml_ns_mapper_get_ns_raw_strings(mapper, prefix, uri);
}
static xmlNsPtr php_dom_libxml_ns_mapper_store_and_normalize_parsed_ns(php_dom_libxml_ns_mapper *mapper, xmlNsPtr ns)
{
ZEND_ASSERT(ns != NULL);
zend_string *href_str = zend_string_init((const char *) ns->href, xmlStrlen(ns->href), false);
zend_string *href_str_orig = href_str;
HashTable *prefix_map = php_dom_libxml_ns_mapper_ensure_prefix_map(mapper, &href_str);
zend_string_release_ex(href_str_orig, false);
const char *prefix = (const char *) ns->prefix;
size_t prefix_len;
if (prefix == NULL) {
prefix = "";
prefix_len = 0;
} else {
prefix_len = xmlStrlen(ns->prefix);
}
zval *zv = zend_hash_str_find_ptr(prefix_map, prefix, prefix_len);
if (zv != NULL) {
return Z_PTR_P(zv);
}
zval new_zv;
DOM_Z_UNOWNED(&new_zv, ns);
zend_hash_str_add_new(prefix_map, prefix, prefix_len, &new_zv);
return ns;
}
typedef struct {
/* Fast lookup for created mappings. */
HashTable old_ns_to_new_ns_ptr;
/* It is common that the last created mapping will be used for a while,
* cache it too to bypass the hash table. */
xmlNsPtr last_mapped_src, last_mapped_dst;
php_dom_libxml_ns_mapper *ns_mapper;
} dom_libxml_reconcile_ctx;
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_get_ns_mapper(dom_object *object)
{
return &php_dom_get_private_data(object)->ns_mapper;
}
PHP_DOM_EXPORT xmlAttrPtr php_dom_ns_compat_mark_attribute(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node, xmlNsPtr ns)
{
xmlNsPtr xmlns_ns;
const xmlChar *name;
if (ns->prefix != NULL) {
xmlns_ns = php_dom_libxml_ns_mapper_get_ns_raw_strings(mapper, "xmlns", DOM_XMLNS_NS_URI);
name = ns->prefix;
} else {
xmlns_ns = php_dom_libxml_ns_mapper_ensure_prefixless_xmlns_ns(mapper);
name = BAD_CAST "xmlns";
}
ZEND_ASSERT(xmlns_ns != NULL);
return xmlSetNsProp(node, xmlns_ns, name, ns->href);
}
PHP_DOM_EXPORT void php_dom_ns_compat_mark_attribute_list(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node)
{
if (node->nsDef == NULL) {
return;
}
/* We want to prepend at the front, but in order of the namespace definitions.
* So temporarily unlink the existing properties and add them again at the end. */
xmlAttrPtr attr = node->properties;
node->properties = NULL;
xmlNsPtr ns = node->nsDef;
xmlAttrPtr last_added = NULL;
do {
last_added = php_dom_ns_compat_mark_attribute(mapper, node, ns);
php_dom_libxml_ns_mapper_store_and_normalize_parsed_ns(mapper, ns);
xmlNsPtr next = ns->next;
ns->next = NULL;
php_libxml_set_old_ns(node->doc, ns);
ns = next;
} while (ns != NULL);
if (last_added != NULL) {
/* node->properties now points to the first namespace declaration attribute. */
if (attr != NULL) {
last_added->next = attr;
attr->prev = last_added;
}
} else {
/* Nothing added, so nothing changed. Only really possible on OOM. */
node->properties = attr;
}
node->nsDef = NULL;
}
PHP_DOM_EXPORT bool php_dom_ns_is_fast_ex(xmlNsPtr ns, const php_dom_ns_magic_token *magic_token)
{
ZEND_ASSERT(ns != NULL);
/* cached for fast checking */
if (ns->_private == magic_token) {
return true;
} else if (ns->_private != NULL && ((uintptr_t) ns->_private & 1) == 0) {
/* Other token stored */
return false;
}
/* Slow path */
if (xmlStrEqual(ns->href, BAD_CAST magic_token)) {
if (ns->_private == NULL) {
/* Only overwrite the private data if there is no other token stored. */
ns->_private = (void *) magic_token;
}
return true;
}
return false;
}
PHP_DOM_EXPORT bool php_dom_ns_is_fast(const xmlNode *nodep, const php_dom_ns_magic_token *magic_token)
{
ZEND_ASSERT(nodep != NULL);
xmlNsPtr ns = nodep->ns;
if (ns != NULL) {
return php_dom_ns_is_fast_ex(ns, magic_token);
}
return false;
}
PHP_DOM_EXPORT bool php_dom_ns_is_html_and_document_is_html(const xmlNode *nodep)
{
ZEND_ASSERT(nodep != NULL);
return nodep->doc && nodep->doc->type == XML_HTML_DOCUMENT_NODE && php_dom_ns_is_fast(nodep, php_dom_ns_is_html_magic_token);
}
/* will rename prefixes if there is a declaration with the same prefix but different uri. */
PHP_DOM_EXPORT void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp)
{
ZEND_ASSERT(attrp != NULL);
if (attrp->ns != NULL) {
/* Try to link to an existing namespace. If that won't work, reconcile. */
xmlNodePtr nodep = attrp->parent;
xmlNsPtr matching_ns = xmlSearchNs(nodep->doc, nodep, attrp->ns->prefix);
if (matching_ns && xmlStrEqual(matching_ns->href, attrp->ns->href)) {
/* Doesn't leak because this doesn't define the declaration. */
attrp->ns = matching_ns;
} else {
if (attrp->ns->prefix != NULL) {
/* Note: explicitly use the legacy reconciliation as it mostly (i.e. as good as it gets for legacy DOM)
* does the right thing for attributes. */
xmlReconciliateNs(nodep->doc, nodep);
}
}
}
}
static zend_always_inline void php_dom_libxml_reconcile_modern_single_node(dom_libxml_reconcile_ctx *ctx, xmlNodePtr node)
{
ZEND_ASSERT(node->ns != NULL);
if (node->ns == ctx->last_mapped_src) {
node->ns = ctx->last_mapped_dst;
return;
}
/* If the namespace is the same as in the map, we're good. */
xmlNsPtr new_ns = zend_hash_index_find_ptr(&ctx->old_ns_to_new_ns_ptr, dom_mangle_pointer_for_key(node->ns));
if (new_ns == NULL) {
/* We have to create an alternative declaration, and we'll add it to the map. */
const char *prefix = (const char *) node->ns->prefix;
const char *href = (const char *) node->ns->href;
new_ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ctx->ns_mapper, prefix, href);
zend_hash_index_add_new_ptr(&ctx->old_ns_to_new_ns_ptr, dom_mangle_pointer_for_key(node->ns), new_ns);
ctx->last_mapped_src = node->ns;
ctx->last_mapped_dst = new_ns;
node->ns = new_ns;
} else if (node->ns != new_ns) {
/* The namespace is different, so we have to replace it. */
node->ns = new_ns;
}
}
static zend_always_inline bool dom_libxml_reconcile_fast_element_skip(xmlNodePtr node)
{
/* Fast path: this is a lone element and the namespace is defined by the node (or the namespace is NULL). */
ZEND_ASSERT(node->type == XML_ELEMENT_NODE);
return node->children == NULL && node->properties == NULL && node->ns == node->nsDef;
}
static zend_always_inline void php_dom_libxml_reconcile_modern_single_element_node(dom_libxml_reconcile_ctx *ctx, xmlNodePtr node)
{
ZEND_ASSERT(node->type == XML_ELEMENT_NODE);
/* Since this is modern DOM, the declarations are not on the node and thus there's nothing to add from nsDef. */
ZEND_ASSERT(node->nsDef == NULL);
if (node->ns != NULL) {
php_dom_libxml_reconcile_modern_single_node(ctx, node);
}
for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) {
if (attr->ns != NULL) {
php_dom_libxml_reconcile_modern_single_node(ctx, (xmlNodePtr) attr);
}
}
}
PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node)
{
if (node->type == XML_ATTRIBUTE_NODE) {
if (node->ns != NULL) {
node->ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ns_mapper, (const char *) node->ns->prefix, (const char *) node->ns->href);
}
return;
}
if (node->type != XML_ELEMENT_NODE || dom_libxml_reconcile_fast_element_skip(node)) {
return;
}
dom_libxml_reconcile_ctx ctx;
zend_hash_init(&ctx.old_ns_to_new_ns_ptr, 0, NULL, NULL, 0);
ctx.last_mapped_src = NULL;
ctx.last_mapped_dst = NULL;
ctx.ns_mapper = ns_mapper;
php_dom_libxml_reconcile_modern_single_element_node(&ctx, node);
xmlNodePtr base = node;
node = node->children;
while (node != NULL) {
ZEND_ASSERT(node != base);
if (node->type == XML_ELEMENT_NODE) {
php_dom_libxml_reconcile_modern_single_element_node(&ctx, node);
}
node = php_dom_next_in_tree_order(node, base);
}
zend_hash_destroy(&ctx.old_ns_to_new_ns_ptr);
}
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements)
{
ZEND_ASSERT(node != NULL);
php_dom_in_scope_ns in_scope_ns;
in_scope_ns.origin_is_ns_compat = true;
/* libxml fetches all nsDef items from bottom to top - left to right, ignoring prefixes already in the list.
* We don't have nsDef, but we can use the ns pointer (as that is necessarily in scope),
* and check the xmlns attributes. */
HashTable tmp_prefix_to_ns_table;
zend_hash_init(&tmp_prefix_to_ns_table, 0, NULL, NULL, false);
zend_hash_real_init_mixed(&tmp_prefix_to_ns_table);
for (const xmlNode *cur = node; cur != NULL; cur = cur->parent) {
if (cur->type == XML_ELEMENT_NODE) {
/* Register namespace of element */
if (!ignore_elements && cur->ns != NULL && cur->ns->prefix != NULL) {
const char *prefix = (const char *) cur->ns->prefix;
zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), cur->ns);
}
/* Register xmlns attributes */
for (const xmlAttr *attr = cur->properties; attr != NULL; attr = attr->next) {
if (attr->ns != NULL && attr->ns->prefix != NULL && php_dom_ns_is_fast_ex(attr->ns, php_dom_ns_is_xmlns_magic_token)
&& attr->children != NULL && attr->children->content != NULL) {
/* This attribute declares a namespace, get the relevant instance.
* The declared namespace is not the same as the namespace of this attribute (which is xmlns). */
const char *prefix = (const char *) attr->name;
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_strings(ns_mapper, prefix, (const char *) attr->children->content);
zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), ns);
}
}
}
}
in_scope_ns.count = zend_hash_num_elements(&tmp_prefix_to_ns_table);
in_scope_ns.list = safe_emalloc(in_scope_ns.count, sizeof(xmlNsPtr), 0);
size_t index = 0;
xmlNsPtr ns;
ZEND_HASH_MAP_FOREACH_PTR(&tmp_prefix_to_ns_table, ns) {
in_scope_ns.list[index++] = ns;
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(&tmp_prefix_to_ns_table);
return in_scope_ns;
}
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node)
{
ZEND_ASSERT(node != NULL);
php_dom_in_scope_ns in_scope_ns;
in_scope_ns.origin_is_ns_compat = false;
in_scope_ns.list = xmlGetNsList(node->doc, node);
in_scope_ns.count = 0;
if (in_scope_ns.list != NULL) {
while (in_scope_ns.list[in_scope_ns.count] != NULL) {
in_scope_ns.count++;
}
}
return in_scope_ns;
}
PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns)
{
ZEND_ASSERT(in_scope_ns != NULL);
if (in_scope_ns->origin_is_ns_compat) {
efree(in_scope_ns->list);
} else {
xmlFree(in_scope_ns->list);
}
}
#endif /* HAVE_LIBXML && HAVE_DOM */