diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 13e07c86661..c629b2dff9f 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -247,6 +247,7 @@ PHP 8.4 INTERNALS UPGRADE NOTES - Removed the "properties" HashTable field from php_libxml_node_object. - Added a way to attached private data to a php_libxml_ref_obj. - Added a way to fix a class type onto php_libxml_ref_obj. + - Added a way to record quirks mode in php_libxml_ref_obj. - Added php_libxml_uses_internal_errors(). - Added a way to override document handlers (e.g. serialization) with php_libxml_document_handlers. diff --git a/codecov.yml b/codecov.yml index e64d19fc4e1..a36f7f8f4e0 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,3 +1,11 @@ ignore: - - "ext/dom/lexbor/lexbor" # bundled library - - "ext/pcre/pcre2lib" # bundled library + # bundled libraries + - "ext/dom/lexbor/lexbor/core" + - "ext/dom/lexbor/lexbor/css" + - "ext/dom/lexbor/lexbor/dom" + - "ext/dom/lexbor/lexbor/encoding" + - "ext/dom/lexbor/lexbor/html" + - "ext/dom/lexbor/lexbor/ns" + - "ext/dom/lexbor/lexbor/ports" + - "ext/dom/lexbor/lexbor/tag" + - "ext/pcre/pcre2lib" diff --git a/ext/dom/config.m4 b/ext/dom/config.m4 index 0db013b8689..12b6bdba745 100644 --- a/ext/dom/config.m4 +++ b/ext/dom/config.m4 @@ -22,12 +22,14 @@ if test "$PHP_DOM" != "no"; then $LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \ $LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \ $LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \ - $LEXBOR_DIR/selectors/selectors.c \ + $LEXBOR_DIR/css/state.c $LEXBOR_DIR/css/log.c $LEXBOR_DIR/css/parser.c $LEXBOR_DIR/css/selectors/state.c $LEXBOR_DIR/css/selectors/selectors.c $LEXBOR_DIR/css/selectors/selector.c $LEXBOR_DIR/css/selectors/pseudo_state.c $LEXBOR_DIR/css/selectors/pseudo.c $LEXBOR_DIR/css/syntax/tokenizer/error.c $LEXBOR_DIR/css/syntax/state.c $LEXBOR_DIR/css/syntax/parser.c $LEXBOR_DIR/css/syntax/syntax.c $LEXBOR_DIR/css/syntax/anb.c $LEXBOR_DIR/css/syntax/tokenizer.c $LEXBOR_DIR/css/syntax/token.c $LEXBOR_DIR/css/css.c \ + $LEXBOR_DIR/selectors-adapted/selectors.c \ $LEXBOR_DIR/ns/ns.c \ $LEXBOR_DIR/tag/tag.c" PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \ xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \ - domexception.c parentnode.c \ + domexception.c \ + parentnode/tree.c parentnode/css_selectors.c \ processinginstruction.c cdatasection.c \ documentfragment.c domimplementation.c \ element.c node.c characterdata.c \ @@ -38,6 +40,7 @@ if test "$PHP_DOM" != "no"; then namednodemap.c xpath_callbacks.c \ $LEXBOR_SOURCES], $ext_shared,,$PHP_LEXBOR_CFLAGS) + PHP_ADD_BUILD_DIR($ext_builddir/parentnode) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ports/posix/lexbor/core) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/core) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/dom/interfaces) @@ -45,7 +48,10 @@ if test "$PHP_DOM" != "no"; then PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tokenizer) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/interfaces) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/encoding) - PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors) + PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/selectors) + PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/tokenizer) + PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/syntax/tokenizer) + PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors-adapted) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ns) PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/tag) PHP_SUBST(DOM_SHARED_LIBADD) diff --git a/ext/dom/config.w32 b/ext/dom/config.w32 index 02a7e0a9409..87096903daa 100644 --- a/ext/dom/config.w32 +++ b/ext/dom/config.w32 @@ -9,7 +9,7 @@ if (PHP_DOM == "yes") { ) { EXTENSION("dom", "php_dom.c attr.c document.c infra.c \ xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \ - domexception.c parentnode.c processinginstruction.c \ + domexception.c processinginstruction.c \ cdatasection.c documentfragment.c domimplementation.c element.c \ node.c characterdata.c documenttype.c \ entity.c nodelist.c html_collection.c text.c comment.c \ @@ -17,6 +17,7 @@ if (PHP_DOM == "yes") { notation.c xpath.c dom_iterators.c \ namednodemap.c xpath_callbacks.c", null, "-Iext/dom/lexbor"); + ADD_SOURCES("ext/dom/parentnode", "tree.c css_selectors.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom"); @@ -27,7 +28,11 @@ if (PHP_DOM == "yes") { ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom"); - ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom"); + ADD_SOURCES("ext/dom/lexbor/lexbor/selectors-adapted", "selectors.c", "dom"); + ADD_SOURCES("ext/dom/lexbor/lexbor/css", "state.c log.c parser.c css.c", "dom"); + ADD_SOURCES("ext/dom/lexbor/lexbor/css/selectors", "state.c selectors.c selector.c pseudo_state.c pseudo.c", "dom"); + ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax", "state.c parser.c syntax.c anb.c tokenizer.c token.c", "dom"); + ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax/tokenizer", "error.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom"); ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom"); ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC "); diff --git a/ext/dom/domexception.h b/ext/dom/domexception.h index 799b4fd189a..574e164424d 100644 --- a/ext/dom/domexception.h +++ b/ext/dom/domexception.h @@ -46,4 +46,7 @@ typedef enum { VALIDATION_ERR = 16 } dom_exception_code; +void php_dom_throw_error(dom_exception_code error_code, bool strict_error); +void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error); + #endif /* DOM_EXCEPTION_H */ diff --git a/ext/dom/element.c b/ext/dom/element.c index 6bf77dcafdd..99043b7f382 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -1752,4 +1752,66 @@ out: } /* }}} end DOMElement::prepend */ +static void php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAMETERS, bool all) +{ + zend_string *selectors_str; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(selectors_str) + ZEND_PARSE_PARAMETERS_END(); + + xmlNodePtr thisp; + dom_object *intern; + zval *id; + DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern); + + if (all) { + dom_parent_node_query_selector_all(thisp, intern, return_value, selectors_str); + } else { + dom_parent_node_query_selector(thisp, intern, return_value, selectors_str); + } +} + +PHP_METHOD(Dom_Element, querySelector) +{ + php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, false); +} + +PHP_METHOD(Dom_Element, querySelectorAll) +{ + php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, true); +} + +PHP_METHOD(Dom_Element, matches) +{ + zend_string *selectors_str; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(selectors_str) + ZEND_PARSE_PARAMETERS_END(); + + xmlNodePtr thisp; + dom_object *intern; + zval *id; + DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern); + + dom_element_matches(thisp, intern, return_value, selectors_str); +} + +PHP_METHOD(Dom_Element, closest) +{ + zend_string *selectors_str; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(selectors_str) + ZEND_PARSE_PARAMETERS_END(); + + xmlNodePtr thisp; + dom_object *intern; + zval *id; + DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern); + + dom_element_closest(thisp, intern, return_value, selectors_str); +} + #endif diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c index 718a05a5db1..a8d75d56860 100644 --- a/ext/dom/html5_parser.c +++ b/ext/dom/html5_parser.c @@ -24,6 +24,7 @@ #include "html5_parser.h" #include #include +#include #include #include #include @@ -380,6 +381,7 @@ void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxm observations->has_explicit_html_tag = tree->has_explicit_html_tag; observations->has_explicit_head_tag = tree->has_explicit_head_tag; observations->has_explicit_body_tag = tree->has_explicit_body_tag; + observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS; } #endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/html5_parser.h b/ext/dom/html5_parser.h index bffa6b0c8aa..6b01a187605 100644 --- a/ext/dom/html5_parser.h +++ b/ext/dom/html5_parser.h @@ -47,6 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations { bool has_explicit_html_tag; bool has_explicit_head_tag; bool has_explicit_body_tag; + bool quirks_mode; } lexbor_libxml2_bridge_extracted_observations; typedef struct _lexbor_libxml2_bridge_parse_context { diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index 7d367020888..c8a805318c3 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -917,6 +917,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromString) NULL ); dom_set_xml_class(intern->document); + intern->document->quirks_mode = ctx.observations.quirks_mode; intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper); return; @@ -1137,6 +1138,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile) NULL ); dom_set_xml_class(intern->document); + intern->document->quirks_mode = ctx.observations.quirks_mode; intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper); return; diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c new file mode 100644 index 00000000000..aa11a348484 --- /dev/null +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c @@ -0,0 +1,1934 @@ +/* + * Copyright (C) 2021-2024 Alexander Borisov + * + * Author: Alexander Borisov + * Adapted for PHP + libxml2 by: Niels Dossche + * Based on Lexbor 2.4.0 (upstream commit e9d35f6384de7bd8c1b79e7111bc3a44f8822967) + */ + +#include +#include +#include +#include +#include +#include + +#include "lexbor/selectors-adapted/selectors.h" +#include "ext/dom/namespace_compat.h" +#include "ext/dom/domexception.h" +#include "ext/dom/php_dom.h" + +#include + +/* Note: casting and then comparing is a bit faster on my i7-4790 */ +#define CMP_NODE_TYPE(node, ty) ((unsigned char) (node)->type == ty) + +typedef struct dom_lxb_str_wrapper { + lexbor_str_t str; + bool should_free; +} dom_lxb_str_wrapper; + +static void dom_lxb_str_wrapper_release(dom_lxb_str_wrapper *wrapper) +{ + if (wrapper->should_free) { + xmlFree(wrapper->str.data); + } +} + +static zend_always_inline bool lxb_selectors_adapted_is_matchable_child(const xmlNode *node) +{ + return CMP_NODE_TYPE(node, XML_ELEMENT_NODE); +} + +static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_literal(const xmlNode *node, const char *name) +{ + return strcmp((const char *) node->name, name) == 0; +} + +static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, const xmlNode *b) +{ + /* Namespace URIs are not interned, hence a->href != b->href. */ + return a->ns == b->ns || (a->ns != NULL && b->ns != NULL && xmlStrEqual(a->ns->href, b->ns->href)); +} + +static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xmlNode *node, const lxb_selectors_adapted_id *id) +{ + uintptr_t ptr = (uintptr_t) node->name; + if (id->interned && (ptr & (ZEND_MM_ALIGNMENT - 1)) != 0) { + /* It cannot be a heap-allocated string because the pointer is not properly aligned for a heap allocation. + * Therefore, it must be interned into the dictionary pool. */ + return node->name == id->name; + } + + return strcmp((const char *) node->name, (const char *) id->name) == 0; +} + +static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name) +{ + const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name); + if (attr != NULL && attr->ns != NULL) { + return NULL; + } + return attr; +} + +static zend_always_inline bool lxb_selectors_adapted_has_attr(const xmlNode *node, const char *name) +{ + return lxb_selectors_adapted_attr(node, (const lxb_char_t *) name) != NULL; +} + +static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(const xmlAttr *attr) +{ + dom_lxb_str_wrapper ret; + ret.str.data = (lxb_char_t *) php_libxml_attr_value(attr, &ret.should_free); + ret.str.length = strlen((const char *) ret.str.data); + return ret; +} + +static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) +{ + if (node->doc != NULL && node->doc->dict != NULL) { + const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length); + if (interned != NULL) { + entry->id.name = interned; + entry->id.interned = true; + return; + } + } + + entry->id.name = selector->name.data; + entry->id.interned = false; +} + +static zend_always_inline void lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) +{ + if (entry->id.name == NULL) { + lxb_selectors_adapted_set_entry_id_ex(entry, selector, node); + } +} + +static lxb_status_t +lxb_selectors_state_tree(lxb_selectors_t *selectors, const xmlNode *root, + const lxb_css_selector_list_t *list); + +static lxb_status_t +lxb_selectors_state_run(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_list_t *list); + +static lxb_selectors_entry_t * +lxb_selectors_state_find(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +static lxb_selectors_entry_t * +lxb_selectors_state_find_check(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_t *selector, + lxb_selectors_entry_t *entry); + +static lxb_selectors_entry_t * +lxb_selectors_state_pseudo_class_function(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +static const xmlNode * +lxb_selectors_next_node(lxb_selectors_nested_t *main); + +static const xmlNode * +lxb_selectors_state_has_relative(const xmlNode *node, + const lxb_css_selector_t *selector); + +static lxb_selectors_entry_t * +lxb_selectors_state_after_find_has(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +static lxb_selectors_entry_t * +lxb_selectors_state_after_find(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +static lxb_selectors_entry_t * +lxb_selectors_state_after_nth_child(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +static bool +lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *node); + +static bool +lxb_selectors_match_element(const lxb_css_selector_t *selector, + const xmlNode *node, lxb_selectors_entry_t *entry); + +static bool +lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks); + +static bool +lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src, + bool quirks); + +static bool +lxb_selectors_match_attribute(const lxb_css_selector_t *selector, + const xmlNode *node, lxb_selectors_entry_t *entry); + +static bool +lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, + const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_function(const lxb_css_selector_t *selector, + const xmlNode *node); + +static bool +lxb_selectors_pseudo_element(const lxb_css_selector_t *selector, + const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_disabled(const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_first_child(const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_first_of_type(const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_last_child(const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_last_of_type(const xmlNode *node); + +static bool +lxb_selectors_pseudo_class_read_write(const xmlNode *node); + +static bool +lxb_selectors_anb_calc(const lxb_css_selector_anb_of_t *anb, size_t index); + +static lxb_status_t +lxb_selectors_cb_ok(const xmlNode *node, + lxb_css_selector_specificity_t spec, void *ctx); + +static lxb_status_t +lxb_selectors_cb_not(const xmlNode *node, + lxb_css_selector_specificity_t spec, void *ctx); + + +lxb_status_t +lxb_selectors_init(lxb_selectors_t *selectors) +{ + lxb_status_t status; + + selectors->objs = lexbor_dobject_create(); + status = lexbor_dobject_init(selectors->objs, + 128, sizeof(lxb_selectors_entry_t)); + if (status != LXB_STATUS_OK) { + return status; + } + + selectors->nested = lexbor_dobject_create(); + status = lexbor_dobject_init(selectors->nested, + 64, sizeof(lxb_selectors_nested_t)); + if (status != LXB_STATUS_OK) { + return status; + } + + selectors->options = LXB_SELECTORS_OPT_DEFAULT; + + return LXB_STATUS_OK; +} + +void +lxb_selectors_clean(lxb_selectors_t *selectors) +{ + lexbor_dobject_clean(selectors->objs); + lexbor_dobject_clean(selectors->nested); +} + +void +lxb_selectors_destroy(lxb_selectors_t *selectors) +{ + selectors->objs = lexbor_dobject_destroy(selectors->objs, true); + selectors->nested = lexbor_dobject_destroy(selectors->nested, true); +} + +lxb_inline const xmlNode * +lxb_selectors_descendant(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, + const xmlNode *node) +{ + node = node->parent; + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE) + && lxb_selectors_match(selectors, entry, selector, node)) + { + return node; + } + + node = node->parent; + } + + return NULL; +} + +lxb_inline const xmlNode * +lxb_selectors_close(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *node) +{ + if (lxb_selectors_match(selectors, entry, selector, node)) { + return node; + } + + return NULL; +} + +lxb_inline const xmlNode * +lxb_selectors_child(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *root) +{ + root = root->parent; + + if (root != NULL && CMP_NODE_TYPE(root, XML_ELEMENT_NODE) + && lxb_selectors_match(selectors, entry, selector, root)) + { + return root; + } + + return NULL; +} + +lxb_inline const xmlNode * +lxb_selectors_sibling(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *node) +{ + node = node->prev; + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + if (lxb_selectors_match(selectors, entry, selector, node)) { + return node; + } + + return NULL; + } + + node = node->prev; + } + + return NULL; +} + +lxb_inline const xmlNode * +lxb_selectors_following(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *node) +{ + node = node->prev; + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE) && + lxb_selectors_match(selectors, entry, selector, node)) + { + return node; + } + + node = node->prev; + } + + return NULL; +} + +lxb_status_t +lxb_selectors_find(lxb_selectors_t *selectors, const xmlNode *root, + const lxb_css_selector_list_t *list, + lxb_selectors_cb_f cb, void *ctx) +{ + lxb_selectors_entry_t *entry; + lxb_selectors_nested_t nested; + + entry = lexbor_dobject_calloc(selectors->objs); + + entry->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; + entry->selector = list->last; + + nested.parent = NULL; + nested.entry = entry; + nested.cb = cb; + nested.ctx = ctx; + + selectors->current = &nested; + selectors->status = LXB_STATUS_OK; + + return lxb_selectors_state_tree(selectors, root, list); +} + +lxb_status_t +lxb_selectors_match_node(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_list_t *list, + lxb_selectors_cb_f cb, void *ctx) +{ + lxb_status_t status; + lxb_selectors_entry_t *entry; + lxb_selectors_nested_t nested; + + if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + return LXB_STATUS_OK; + } + + entry = lexbor_dobject_calloc(selectors->objs); + + entry->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; + entry->selector = list->last; + + nested.parent = NULL; + nested.entry = entry; + nested.cb = cb; + nested.ctx = ctx; + + selectors->current = &nested; + selectors->status = LXB_STATUS_OK; + + status = lxb_selectors_state_run(selectors, node, list); + + lxb_selectors_clean(selectors); + + return status; +} + +static lxb_status_t +lxb_selectors_state_tree(lxb_selectors_t *selectors, const xmlNode *root, + const lxb_css_selector_list_t *list) +{ + lxb_status_t status; + const xmlNode *node; + +#if 0 + if (selectors->options & LXB_SELECTORS_OPT_MATCH_ROOT) { + node = root; + + if (CMP_NODE_TYPE(node, XML_DOCUMENT_NODE) || CMP_NODE_TYPE(node, XML_HTML_DOCUMENT_NODE) + || CMP_NODE_TYPE(node, XML_DOCUMENT_FRAG_NODE)) { + node = root->children; + } + } + else +#endif + { + node = root->children; + } + + if (node == NULL) { + goto out; + } + + do { + if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + goto next; + } + + status = lxb_selectors_state_run(selectors, node, list); + if (status != LXB_STATUS_OK) { + if (status == LXB_STATUS_STOP) { + break; + } + + lxb_selectors_clean(selectors); + + return status; + } + + if (node->children != NULL) { + node = node->children; + } + else { + + next: + + while (node != root && node->next == NULL) { + node = node->parent; + } + + if (node == root) { + break; + } + + node = node->next; + } + } + while (true); + +out: + lxb_selectors_clean(selectors); + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_selectors_state_run(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_list_t *list) +{ + lxb_selectors_entry_t *entry; + + entry = selectors->current->entry; + + entry->node = node; + selectors->state = lxb_selectors_state_find; + selectors->first = entry; + +again: + + do { + entry = selectors->state(selectors, entry); + } + while (entry != NULL); + + if (selectors->current->parent != NULL + && selectors->status == LXB_STATUS_OK) + { + entry = selectors->current->entry; + selectors->state = selectors->current->return_state; + + goto again; + } + + return selectors->status; +} + +static lxb_selectors_entry_t * +lxb_selectors_state_find(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry) +{ + const xmlNode *node; + lxb_selectors_entry_t *next; + const lxb_css_selector_t *selector; + const lxb_css_selector_anb_of_t *anb; + const lxb_css_selector_pseudo_t *pseudo; + + selector = entry->selector; + + if (selector->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION) { + pseudo = &selector->u.pseudo; + + /* Optimizing. */ + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + anb = pseudo->data; + + if (anb->of != NULL) { + break; + } + + goto godoit; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: + goto godoit; + + default: + break; + } + + if (entry->nested == NULL) { + next = lexbor_dobject_calloc(selectors->objs); + + next->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; + + entry->nested = lexbor_dobject_calloc(selectors->nested); + + entry->nested->entry = next; + entry->nested->parent = selectors->current; + } + + selectors->state = lxb_selectors_state_pseudo_class_function; + selectors->current->last = entry; + selectors->current = entry->nested; + + next = entry->nested->entry; + next->node = entry->node; + + return next; + } + +godoit: + + switch (entry->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + node = lxb_selectors_descendant(selectors, entry, + selector, entry->node); + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + node = lxb_selectors_close(selectors, entry, + selector, entry->node); + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + node = lxb_selectors_child(selectors, entry, + selector, entry->node); + break; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + node = lxb_selectors_sibling(selectors, entry, + selector, entry->node); + break; + + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = lxb_selectors_following(selectors, entry, + selector, entry->node); + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CELL: + default: + selectors->status = LXB_STATUS_ERROR; + return NULL; + } + + return lxb_selectors_state_find_check(selectors, node, selector, entry); +} + +static lxb_selectors_entry_t * +lxb_selectors_state_find_check(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_t *selector, + lxb_selectors_entry_t *entry) +{ + lxb_selectors_entry_t *next; + lxb_selectors_nested_t *current; + + if (node == NULL) { + + try_next: + + if (entry->next == NULL) { + + try_next_list: + + if (selector->list->next == NULL) { + return NULL; + } + + /* + * Try the following selectors from the selector list. + */ + + if (entry->following != NULL) { + entry->following->node = entry->node; + + if (selectors->current->parent == NULL) { + selectors->first = entry->following; + } + + return entry->following; + } + + next = lexbor_dobject_calloc(selectors->objs); + + next->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; + next->selector = selector->list->next->last; + next->node = entry->node; + + entry->following = next; + + if (selectors->current->parent == NULL) { + selectors->first = next; + } + + return next; + } + + do { + entry = entry->next; + + while (entry->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + if (entry->next == NULL) { + selector = entry->selector; + goto try_next; + } + + entry = entry->next; + } + + switch (entry->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + node = entry->node->parent; + + if (node == NULL + || !CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) + { + node = NULL; + } + + break; + + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = entry->node->prev; + break; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + node = NULL; + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CELL: + default: + selectors->status = LXB_STATUS_ERROR; + return NULL; + } + } + while (node == NULL); + + entry->node = node; + + return entry; + } + + if (selector->prev == NULL) { + current = selectors->current; + + selectors->status = current->cb(current->entry->node, + selector->list->specificity, + current->ctx); + + if ((selectors->options & LXB_SELECTORS_OPT_MATCH_FIRST) == 0 + && current->parent == NULL) + { + if (selectors->status == LXB_STATUS_OK) { + entry = selectors->first; + goto try_next_list; + } + } + + return NULL; + } + + if (entry->prev == NULL) { + next = lexbor_dobject_calloc(selectors->objs); + + next->combinator = selector->combinator; + next->selector = selector->prev; + next->node = node; + + next->next = entry; + entry->prev = next; + + return next; + } + + entry->prev->node = node; + + return entry->prev; +} + +static lxb_selectors_entry_t * +lxb_selectors_state_pseudo_class_function(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry) +{ + const xmlNode *node, *base; + lxb_selectors_nested_t *current; + const lxb_css_selector_list_t *list; + lxb_css_selector_anb_of_t *anb; + const lxb_css_selector_pseudo_t *pseudo; + + current = selectors->current; + + base = lxb_selectors_next_node(current); + if (base == NULL) { + goto not_found; + } + + pseudo = ¤t->parent->last->selector->u.pseudo; + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_HAS: + list = (lxb_css_selector_list_t *) pseudo->data; + node = lxb_selectors_state_has_relative(base, list->first); + + if (node == NULL) { + selectors->current = selectors->current->parent; + entry = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, NULL, + entry->selector, entry); + } + + current->root = base; + + current->entry->selector = list->last; + current->entry->node = node; + current->return_state = lxb_selectors_state_after_find_has; + current->cb = lxb_selectors_cb_ok; + current->ctx = ¤t->found; + current->found = false; + + selectors->state = lxb_selectors_state_find; + + return entry; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_CURRENT: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_IS: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE: + current->entry->selector = ((lxb_css_selector_list_t *) pseudo->data)->last; + current->entry->node = base; + current->return_state = lxb_selectors_state_after_find; + current->cb = lxb_selectors_cb_ok; + current->ctx = ¤t->found; + current->found = false; + + selectors->state = lxb_selectors_state_find; + + return entry; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NOT: + current->entry->selector = ((lxb_css_selector_list_t *) pseudo->data)->last; + current->entry->node = base; + current->return_state = lxb_selectors_state_after_find; + current->cb = lxb_selectors_cb_not; + current->ctx = ¤t->found; + current->found = true; + + selectors->state = lxb_selectors_state_find; + + return entry; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + anb = pseudo->data; + + current->entry->selector = anb->of->last; + current->entry->node = base; + current->return_state = lxb_selectors_state_after_nth_child; + current->cb = lxb_selectors_cb_ok; + current->ctx = ¤t->found; + current->root = base; + current->index = 0; + current->found = false; + + selectors->state = lxb_selectors_state_find; + + return entry; + + /* + * This one can only happen if the user has somehow messed up the + * selector. + */ + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL: + default: + break; + } + +not_found: + + selectors->current = selectors->current->parent; + entry = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, NULL, + entry->selector, entry); +} + +static const xmlNode * +lxb_selectors_next_node(lxb_selectors_nested_t *main) +{ + const xmlNode *node = main->entry->node; + + switch (main->parent->last->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + if (node->parent == NULL + || !CMP_NODE_TYPE(node->parent, XML_ELEMENT_NODE)) + { + return NULL; + } + + return node->parent; + + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + return node; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = node->prev; + break; + + default: + return NULL; + } + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + break; + } + + node = node->prev; + } + + return node; +} + +static const xmlNode * +lxb_selectors_state_has_relative(const xmlNode *node, + const lxb_css_selector_t *selector) +{ + const xmlNode *root = node; + + switch (selector->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + node = node->children; + break; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = node->next; + break; + + default: + return NULL; + } + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + break; + } + + while (node !=root && node->next == NULL) { + node = node->parent; + } + + if (node == root) { + return NULL; + } + + node = node->next; + } + + return node; +} + +static lxb_selectors_entry_t * +lxb_selectors_state_after_find_has(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry) +{ + const xmlNode *node; + lxb_selectors_entry_t *parent; + lxb_selectors_nested_t *current; + + if (selectors->current->found) { + node = selectors->current->root; + + selectors->current = selectors->current->parent; + parent = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, node, + parent->selector, parent); + } + + current = selectors->current; + node = entry->node; + + switch (entry->selector->list->first->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + if (node->children != NULL) { + node = node->children; + } + else { + + next: + + while (node != current->root && node->next == NULL) { + node = node->parent; + } + + if (node == current->root) { + goto failed; + } + + node = node->next; + } + + if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + goto next; + } + + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = node->next; + + while (node != NULL && !CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + node = node->next; + } + + if (node == NULL) { + goto failed; + } + + break; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + goto failed; + + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + case LXB_CSS_SELECTOR_COMBINATOR_CELL: + default: + selectors->status = LXB_STATUS_ERROR; + return NULL; + } + + entry->node = node; + selectors->state = lxb_selectors_state_find; + + return entry; + +failed: + + selectors->current = selectors->current->parent; + parent = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, NULL, + parent->selector, parent); +} + + +static lxb_selectors_entry_t * +lxb_selectors_state_after_find(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry) +{ + const xmlNode *node; + lxb_selectors_entry_t *parent; + lxb_selectors_nested_t *current; + + current = selectors->current; + + if (current->found) { + node = entry->node; + + selectors->current = current->parent; + parent = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, node, + parent->selector, parent); + } + + node = entry->node; + + switch (current->parent->last->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + if (node->parent != NULL + && CMP_NODE_TYPE(node->parent, XML_ELEMENT_NODE)) + { + node = node->parent; + } + else { + node = NULL; + } + + break; + + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + node = node->prev; + + while (node != NULL && !CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + node = node->prev; + } + + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + node = NULL; + break; + + case LXB_CSS_SELECTOR_COMBINATOR_CELL: + default: + selectors->status = LXB_STATUS_ERROR; + return NULL; + } + + if (node == NULL) { + selectors->current = current->parent; + parent = selectors->current->last; + + selectors->state = lxb_selectors_state_find; + + return lxb_selectors_state_find_check(selectors, node, + parent->selector, parent); + } + + entry->node = node; + selectors->state = lxb_selectors_state_find; + + return entry; +} + +static lxb_selectors_entry_t * +lxb_selectors_state_after_nth_child(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry) +{ + bool found; + const xmlNode *node; + lxb_selectors_entry_t *parent; + lxb_selectors_nested_t *current; + const lxb_css_selector_t *selector; + const lxb_css_selector_pseudo_t *pseudo; + + current = selectors->current; + selector = current->parent->last->selector; + pseudo = &selector->u.pseudo; + + node = entry->node; + + if (current->found) { + current->index += 1; + } + else if (current->root == node) { + node = NULL; + goto done; + } + + if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD) { + node = node->prev; + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + break; + } + + node = node->prev; + } + } + else { + node = node->next; + + while (node != NULL) { + if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { + break; + } + + node = node->next; + } + } + + if (node == NULL) { + goto done; + } + + entry->node = node; + current->found = false; + selectors->state = lxb_selectors_state_find; + + return entry; + +done: + + if (current->index > 0) { + found = lxb_selectors_anb_calc(pseudo->data, current->index); + + node = (found) ? current->root : NULL; + } + + selectors->state = lxb_selectors_state_find; + selectors->current = selectors->current->parent; + + parent = selectors->current->last; + + return lxb_selectors_state_find_check(selectors, node, + parent->selector, parent); +} + +static bool +lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry, + const lxb_css_selector_t *selector, const xmlNode *node) +{ + switch (selector->type) { + case LXB_CSS_SELECTOR_TYPE_ANY: + return true; + + case LXB_CSS_SELECTOR_TYPE_ELEMENT: + return lxb_selectors_match_element(selector, node, entry); + + case LXB_CSS_SELECTOR_TYPE_ID: + return lxb_selectors_match_id(selector, node, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE); + + case LXB_CSS_SELECTOR_TYPE_CLASS: { + const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "class"); + if (dom_attr == NULL) { + return false; + } + + dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr); + + if (trg.str.length == 0) { + dom_lxb_str_wrapper_release(&trg); + return false; + } + + bool ret = lxb_selectors_match_class(&trg.str, + &selector->name, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE); + dom_lxb_str_wrapper_release(&trg); + return ret; + } + + case LXB_CSS_SELECTOR_TYPE_ATTRIBUTE: + return lxb_selectors_match_attribute(selector, node, entry); + + case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS: + return lxb_selectors_pseudo_class(selector, node); + + case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION: + return lxb_selectors_pseudo_class_function(selector, node); + + case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT: + return lxb_selectors_pseudo_element(selector, node); + + case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT_FUNCTION: + return false; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return false; +} + +static bool +lxb_selectors_match_element(const lxb_css_selector_t *selector, + const xmlNode *node, lxb_selectors_entry_t *entry) +{ + lxb_selectors_adapted_set_entry_id(entry, selector, node); + return lxb_selectors_adapted_cmp_local_name_id(node, &entry->id); +} + +static bool +lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks) +{ + const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "id"); + if (dom_attr == NULL) { + return false; + } + + const lexbor_str_t *src = &selector->name; + dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr); + bool ret = false; + if (trg.str.length == src->length) { + if (quirks) { + ret = lexbor_str_data_ncasecmp(trg.str.data, src->data, src->length); + } else { + ret = lexbor_str_data_ncmp(trg.str.data, src->data, src->length); + } + } + dom_lxb_str_wrapper_release(&trg); + + return ret; +} + +static bool +lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src, + bool quirks) +{ + lxb_char_t chr; + + if (target->length < src->length) { + return false; + } + + bool is_it = false; + + const lxb_char_t *data = target->data; + const lxb_char_t *pos = data; + const lxb_char_t *end = data + target->length; + + for (; data < end; data++) { + chr = *data; + + if (lexbor_utils_whitespace(chr, ==, ||)) { + + if ((size_t) (data - pos) == src->length) { + if (quirks) { + is_it = lexbor_str_data_ncasecmp(pos, src->data, src->length); + } + else { + is_it = lexbor_str_data_ncmp(pos, src->data, src->length); + } + + if (is_it) { + return true; + } + } + + if ((size_t) (end - data) < src->length) { + return false; + } + + pos = data + 1; + } + } + + if ((size_t) (end - pos) == src->length && src->length != 0) { + if (quirks) { + is_it = lexbor_str_data_ncasecmp(pos, src->data, src->length); + } + else { + is_it = lexbor_str_data_ncmp(pos, src->data, src->length); + } + } + + return is_it; +} + +static bool +lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src) +{ + bool res; + bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I; + + switch (attr->match) { + case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */ + if (trg->length == src->length) { + if (ins) { + return lexbor_str_data_ncasecmp(trg->data, src->data, + src->length); + } + + return lexbor_str_data_ncmp(trg->data, src->data, + src->length); + } + + return false; + + case LXB_CSS_SELECTOR_MATCH_INCLUDE: /* ~= */ + return lxb_selectors_match_class(trg, src, ins); + + case LXB_CSS_SELECTOR_MATCH_DASH: /* |= */ + if (trg->length == src->length) { + if (ins) { + return lexbor_str_data_ncasecmp(trg->data, src->data, + src->length); + } + + return lexbor_str_data_ncmp(trg->data, src->data, + src->length); + } + + if (trg->length > src->length) { + if (ins) { + res = lexbor_str_data_ncasecmp(trg->data, + src->data, src->length); + } + else { + res = lexbor_str_data_ncmp(trg->data, + src->data, src->length); + } + + if (res && trg->data[src->length] == '-') { + return true; + } + } + + return false; + + case LXB_CSS_SELECTOR_MATCH_PREFIX: /* ^= */ + if (src->length != 0 && trg->length >= src->length) { + if (ins) { + return lexbor_str_data_ncasecmp(trg->data, src->data, + src->length); + } + + return lexbor_str_data_ncmp(trg->data, src->data, + src->length); + } + + return false; + + case LXB_CSS_SELECTOR_MATCH_SUFFIX: /* $= */ + if (src->length != 0 && trg->length >= src->length) { + size_t dif = trg->length - src->length; + + if (ins) { + return lexbor_str_data_ncasecmp(trg->data + dif, + src->data, src->length); + } + + return lexbor_str_data_ncmp(trg->data + dif, src->data, + src->length); + } + + return false; + + case LXB_CSS_SELECTOR_MATCH_SUBSTRING: /* *= */ + if (src->length == 0) { + return false; + } + + if (ins) { + return lexbor_str_data_ncasecmp_contain(trg->data, trg->length, + src->data, src->length); + } + + return lexbor_str_data_ncmp_contain(trg->data, trg->length, + src->data, src->length); + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return false; +} + +static bool +lxb_selectors_match_attribute(const lxb_css_selector_t *selector, + const xmlNode *node, lxb_selectors_entry_t *entry) +{ + const lxb_css_selector_attribute_t *attr = &selector->u.attribute; + + lxb_selectors_adapted_set_entry_id(entry, selector, node); + + const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, entry->id.name); + if (dom_attr == NULL) { + return false; + } + + const lexbor_str_t *src = &attr->value; + if (src->data == NULL) { + return true; + } + + dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr); + bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src); + dom_lxb_str_wrapper_release(&trg); + return res; +} + +static bool +lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, + const xmlNode *node) +{ + const lxb_css_selector_pseudo_t *pseudo = &selector->u.pseudo; + + static const lxb_char_t checkbox[] = "checkbox"; + static const size_t checkbox_length = sizeof(checkbox) / sizeof(lxb_char_t) - 1; + + static const lxb_char_t radio[] = "radio"; + static const size_t radio_length = sizeof(radio) / sizeof(lxb_char_t) - 1; + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ACTIVE: + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ANY_LINK: + /* https://drafts.csswg.org/selectors/#the-any-link-pseudo */ + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && (lxb_selectors_adapted_cmp_local_name_literal(node, "a") + || lxb_selectors_adapted_cmp_local_name_literal(node, "area"))) + { + return lxb_selectors_adapted_has_attr(node, "href"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_BLANK: + if (!EG(exception)) { + php_dom_throw_error_with_message(NOT_SUPPORTED_ERR, ":blank selector is not implemented because CSSWG has not yet decided its semantics (https://github.com/w3c/csswg-drafts/issues/1967)", true); + } + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_CHECKED: + /* https://drafts.csswg.org/selectors/#checked */ + if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) { + return false; + } + if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")) { + const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "type"); + if (dom_attr == NULL) { + return false; + } + + dom_lxb_str_wrapper str = lxb_selectors_adapted_attr_value(dom_attr); + bool res = false; + + if (str.str.length == 8) { + if (lexbor_str_data_ncasecmp(checkbox, str.str.data, checkbox_length)) { + res = lxb_selectors_adapted_has_attr(node, "checked"); + } + } + else if (str.str.length == 5) { + if (lexbor_str_data_ncasecmp(radio, str.str.data, radio_length)) { + res = lxb_selectors_adapted_has_attr(node, "checked"); + } + } + + dom_lxb_str_wrapper_release(&str); + + return res; + } + else if(lxb_selectors_adapted_cmp_local_name_literal(node, "option")) { + return lxb_selectors_adapted_has_attr(node, "selected"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_CURRENT: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_DEFAULT: + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_DISABLED: + return lxb_selectors_pseudo_class_disabled(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_EMPTY: + node = node->children; + + while (node != NULL) { + /* Following https://developer.mozilla.org/en-US/docs/Web/CSS/:empty, i.e. what currently happens in browsers, + * not the CSS Selectors Level 4 Draft that no one implements yet. */ + if (!CMP_NODE_TYPE(node, XML_COMMENT_NODE) && !CMP_NODE_TYPE(node, XML_PI_NODE)) { + return false; + } + + node = node->next; + } + + return true; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ENABLED: + return !lxb_selectors_pseudo_class_disabled(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_CHILD: + return lxb_selectors_pseudo_class_first_child(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_OF_TYPE: + return lxb_selectors_pseudo_class_first_of_type(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_VISIBLE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_WITHIN: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FULLSCREEN: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUTURE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_HOVER: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_IN_RANGE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_INDETERMINATE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_INVALID: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_CHILD: + return lxb_selectors_pseudo_class_last_child(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_OF_TYPE: + return lxb_selectors_pseudo_class_last_of_type(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_LINK: + /* https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link */ + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && (lxb_selectors_adapted_cmp_local_name_literal(node, "a") + || lxb_selectors_adapted_cmp_local_name_literal(node, "area"))) + { + return lxb_selectors_adapted_has_attr(node, "href"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_LOCAL_LINK: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_CHILD: + return lxb_selectors_pseudo_class_first_child(node) + && lxb_selectors_pseudo_class_last_child(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_OF_TYPE: + return lxb_selectors_pseudo_class_first_of_type(node) + && lxb_selectors_pseudo_class_last_of_type(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_OPTIONAL: + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") + || lxb_selectors_adapted_cmp_local_name_literal(node, "select") + || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + { + return !lxb_selectors_adapted_has_attr(node, "required"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_OUT_OF_RANGE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_PAST: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_PLACEHOLDER_SHOWN: + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") + || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + { + return lxb_selectors_adapted_has_attr(node, "placeholder"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_READ_ONLY: + return !lxb_selectors_pseudo_class_read_write(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_READ_WRITE: + return lxb_selectors_pseudo_class_read_write(node); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_REQUIRED: + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") + || lxb_selectors_adapted_cmp_local_name_literal(node, "select") + || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + { + return lxb_selectors_adapted_has_attr(node, "required"); + } + + return false; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_ROOT: + return node->parent != NULL + && (node->parent->type == XML_DOCUMENT_FRAG_NODE || node->parent->type == XML_DOCUMENT_NODE + || node->parent->type == XML_HTML_DOCUMENT_NODE); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_SCOPE: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET_WITHIN: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_USER_INVALID: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_VALID: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_VISITED: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_WARNING: + break; + } + + return false; +} + +static bool +lxb_selectors_pseudo_class_function(const lxb_css_selector_t *selector, + const xmlNode *node) +{ + size_t index; + const xmlNode *base; + const lxb_css_selector_pseudo_t *pseudo; + + pseudo = &selector->u.pseudo; + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + index = 0; + + if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD) { + while (node != NULL) { + if (lxb_selectors_adapted_is_matchable_child(node)) + { + index++; + } + + node = node->prev; + } + } + else { + while (node != NULL) { + if (lxb_selectors_adapted_is_matchable_child(node)) + { + index++; + } + + node = node->next; + } + } + + return lxb_selectors_anb_calc(pseudo->data, index); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: + index = 0; + base = node; + + if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE) { + while (node != NULL) { + if(lxb_selectors_adapted_is_matchable_child(node) + && xmlStrEqual(node->name, base->name) + && lxb_selectors_adapted_cmp_ns(node, base)) + { + index++; + } + + node = node->prev; + } + } + else { + while (node != NULL) { + if(lxb_selectors_adapted_is_matchable_child(node) + && xmlStrEqual(node->name, base->name) + && lxb_selectors_adapted_cmp_ns(node, base)) + { + index++; + } + + node = node->next; + } + } + + return lxb_selectors_anb_calc(pseudo->data, index); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL: + default: + break; + } + + return false; +} + +static bool +lxb_selectors_pseudo_element(const lxb_css_selector_t *selector, + const xmlNode *node) +{ + const lxb_css_selector_pseudo_t *pseudo = &selector->u.pseudo; + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BACKDROP: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LINE: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_INACTIVE_SELECTION: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_MARKER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_PLACEHOLDER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SELECTION: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SPELLING_ERROR: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TARGET_TEXT: + break; + } + + return false; +} + +/* https://html.spec.whatwg.org/multipage/semantics-other.html#concept-element-disabled */ +static bool +lxb_selectors_pseudo_class_disabled(const xmlNode *node) +{ + if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) { + return false; + } + + if (lxb_selectors_adapted_has_attr(node, "disabled") + && (lxb_selectors_adapted_cmp_local_name_literal(node, "button") + || lxb_selectors_adapted_cmp_local_name_literal(node, "input") + || lxb_selectors_adapted_cmp_local_name_literal(node, "select") + || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea") + || lxb_selectors_adapted_cmp_local_name_literal(node, "optgroup") + || lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset"))) + { + return true; + } + + if (lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")) { + const xmlNode *fieldset = node; + node = node->parent; + + while (node != NULL && lxb_selectors_adapted_is_matchable_child(node)) { + /* node is a disabled fieldset that is an ancestor of fieldset */ + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) + && lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset") + && lxb_selectors_adapted_has_attr(node, "disabled")) + { + /* Search first legend child and figure out if fieldset is a descendent from that. */ + const xmlNode *search_current = node->children; + do { + if (search_current->type == XML_ELEMENT_NODE + && php_dom_ns_is_fast(search_current, php_dom_ns_is_html_magic_token) + && lxb_selectors_adapted_cmp_local_name_literal(search_current, "legend")) { + /* search_current is a legend element. */ + const xmlNode *inner_search_current = fieldset; + + /* Disabled does not apply if fieldset is a descendant from search_current */ + do { + if (inner_search_current == search_current) { + return false; + } + + inner_search_current = inner_search_current->parent; + } while (inner_search_current != NULL); + + return true; + } + + search_current = search_current->next; + } while (search_current != NULL); + } + + node = node->parent; + } + } + + return false; +} + +static bool +lxb_selectors_pseudo_class_first_child(const xmlNode *node) +{ + node = node->prev; + + while (node != NULL) { + if (lxb_selectors_adapted_is_matchable_child(node)) + { + return false; + } + + node = node->prev; + } + + return true; +} + +static bool +lxb_selectors_pseudo_class_first_of_type(const xmlNode *node) +{ + const xmlNode *root = node; + node = node->prev; + + while (node) { + if (lxb_selectors_adapted_is_matchable_child(node) + && xmlStrEqual(node->name, root->name) + && lxb_selectors_adapted_cmp_ns(node, root)) + { + return false; + } + + node = node->prev; + } + + return true; +} + +static bool +lxb_selectors_pseudo_class_last_child(const xmlNode *node) +{ + node = node->next; + + while (node != NULL) { + if (lxb_selectors_adapted_is_matchable_child(node)) + { + return false; + } + + node = node->next; + } + + return true; +} + +static bool +lxb_selectors_pseudo_class_last_of_type(const xmlNode *node) +{ + const xmlNode *root = node; + node = node->next; + + while (node) { + if (lxb_selectors_adapted_is_matchable_child(node) + && xmlStrEqual(node->name, root->name) + && lxb_selectors_adapted_cmp_ns(node, root)) + { + return false; + } + + node = node->next; + } + + return true; +} + +/* https://drafts.csswg.org/selectors/#rw-pseudos */ +static bool +lxb_selectors_pseudo_class_read_write(const xmlNode *node) +{ + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) { + if (lxb_selectors_adapted_cmp_local_name_literal(node, "input") + || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")) { + return !lxb_selectors_adapted_has_attr(node, "readonly") && !lxb_selectors_adapted_has_attr(node, "disabled"); + } else { + const xmlAttr *attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "contenteditable"); + return attr && !dom_compare_value(attr, BAD_CAST "false"); + } + } + + return false; +} + +static bool +lxb_selectors_anb_calc(const lxb_css_selector_anb_of_t *anb, size_t index) +{ + double num; + + if (anb->anb.a == 0) { + if (anb->anb.b >= 0 && (size_t) anb->anb.b == index) { + return true; + } + } + else { + num = ((double) index - (double) anb->anb.b) / (double) anb->anb.a; + + if (num >= 0.0f && (num - trunc(num)) == 0.0f) { + return true; + } + } + + return false; +} + +static lxb_status_t +lxb_selectors_cb_ok(const xmlNode *node, + lxb_css_selector_specificity_t spec, void *ctx) +{ + *((bool *) ctx) = true; + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_selectors_cb_not(const xmlNode *node, + lxb_css_selector_specificity_t spec, void *ctx) +{ + *((bool *) ctx) = false; + return LXB_STATUS_OK; +} diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h new file mode 100644 index 00000000000..441976b1e3d --- /dev/null +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h @@ -0,0 +1,254 @@ +/* + * Copyright (C) 2021-2024 Alexander Borisov + * + * Author: Alexander Borisov + * Adapted for PHP libxml2 by: Niels Dossche + */ + + +#ifndef LEXBOR_SELECTORS_H +#define LEXBOR_SELECTORS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "lexbor/selectors/base.h" +#include "lexbor/dom/dom.h" +#include "lexbor/css/selectors/selectors.h" +#include "lexbor/core/array_obj.h" +#include + + +typedef enum { + LXB_SELECTORS_OPT_DEFAULT = 0x00, + + /* + * Includes the passed (root) node in the search. + * + * By default, the root node does not participate in selector searches, + * only its children. + * + * This behavior is logical, if you have found a node and then you want to + * search for other nodes in it, you don't need to check it again. + * + * But there are cases when it is necessary for root node to participate + * in the search. That's what this option is for. + */ + LXB_SELECTORS_OPT_MATCH_ROOT = 1 << 1, + + /* + * Stop searching after the first match with any of the selectors + * in the list. + * + * By default, the callback will be triggered for each selector list. + * That is, if your node matches different selector lists, it will be + * returned multiple times in the callback. + * + * For example: + * HTML:
test
+ * Selectors: div, div[id="ok"], div:has(:not(a)) + * + * The default behavior will cause three callbacks with the same node (div). + * Because it will be found by every selector in the list. + * + * This option allows you to end the element check after the first match on + * any of the selectors. That is, the callback will be called only once + * for example above. This way we get rid of duplicates in the search. + */ + LXB_SELECTORS_OPT_MATCH_FIRST = 1 << 2, + + /* Quirks mode (sigh) */ + LXB_SELECTORS_OPT_QUIRKS_MODE = 1 << 3, +} +lxb_selectors_opt_t; + +typedef struct lxb_selectors lxb_selectors_t; +typedef struct lxb_selectors_entry lxb_selectors_entry_t; +typedef struct lxb_selectors_nested lxb_selectors_nested_t; + +typedef lxb_status_t +(*lxb_selectors_cb_f)(const xmlNode *node, + lxb_css_selector_specificity_t spec, void *ctx); + +typedef lxb_selectors_entry_t * +(*lxb_selectors_state_cb_f)(lxb_selectors_t *selectors, + lxb_selectors_entry_t *entry); + +typedef struct { + const xmlChar *name; + bool interned; +} lxb_selectors_adapted_id; + +struct lxb_selectors_entry { + lxb_selectors_adapted_id id; + lxb_css_selector_combinator_t combinator; + const lxb_css_selector_t *selector; + const xmlNode *node; + lxb_selectors_entry_t *next; + lxb_selectors_entry_t *prev; + lxb_selectors_entry_t *following; + lxb_selectors_nested_t *nested; +}; + +struct lxb_selectors_nested { + lxb_selectors_entry_t *entry; + lxb_selectors_state_cb_f return_state; + + lxb_selectors_cb_f cb; + void *ctx; + + const xmlNode *root; + lxb_selectors_entry_t *last; + lxb_selectors_nested_t *parent; + + size_t index; + bool found; +}; + +struct lxb_selectors { + lxb_selectors_state_cb_f state; + lexbor_dobject_t *objs; + lexbor_dobject_t *nested; + + lxb_selectors_nested_t *current; + lxb_selectors_entry_t *first; + + lxb_selectors_opt_t options; + lxb_status_t status; +}; + + +/* + * Initialization of lxb_selectors_t object. + * + * Caches are initialized in this function. + * + * @param[in] lxb_selectors_t * + * + * @return LXB_STATUS_OK if successful, otherwise an error status value. + */ +LXB_API lxb_status_t +lxb_selectors_init(lxb_selectors_t *selectors); + +/* + * Clears the object. Returns object to states as after initialization. + * + * After each call to lxb_selectors_find() and lxb_selectors_find_for_node(), + * the lxb_selectors_t object is cleared. That is, you don't need to call this + * function every time after searching by a selector. + * + * @param[in] lxb_url_parser_t * + */ +LXB_API void +lxb_selectors_clean(lxb_selectors_t *selectors); + +/* + * Destroy lxb_selectors_t object. + * + * Destroying all caches. + * + * @param[in] lxb_selectors_t *. Can be NULL. + * if true: destroys the lxb_selectors_t object and all internal caches. + */ +LXB_API void +lxb_selectors_destroy(lxb_selectors_t *selectors); + +/* + * Search for nodes by selector list. + * + * Default Behavior: + * 1. The root node does not participate in the search, only its child nodes. + * 2. If a node matches multiple selector lists, a callback with that node + * will be called on each list. + * For example: + * HTML:
+ * Selectors: div, div[id="ok"], div:has(:not(a)) + * For each selector list, a callback with a "div" node will be called. + * + * To change the search behavior, see lxb_selectors_opt_set(). + * + * @param[in] lxb_selectors_t *. + * @param[in] const xmlNode *. The node from which the search will begin. + * @param[in] const lxb_css_selector_list_t *. Selectors List. + * @param[in] lxb_selectors_cb_f. Callback for a found node. + * @param[in] void *. Context for the callback. + * if true: destroys the lxb_selectors_t object and all internal caches. + * + * @return LXB_STATUS_OK if successful, otherwise an error status value. + */ +LXB_API lxb_status_t +lxb_selectors_find(lxb_selectors_t *selectors, const xmlNode *root, + const lxb_css_selector_list_t *list, + lxb_selectors_cb_f cb, void *ctx); + +/* + * Match a node to a Selectors List. + * + * In other words, the function checks which selector lists will find the + * specified node. + * + * Default Behavior: + * 1. If a node matches multiple selector lists, a callback with that node + * will be called on each list. + * For example: + * HTML:
+ * Node: div + * Selectors: div, div[id="ok"], div:has(:not(a)) + * For each selector list, a callback with a "div" node will be called. + * + * To change the search behavior, see lxb_selectors_opt_set(). + * + * @param[in] lxb_selectors_t *. + * @param[in] const xmlNode *. The node from which the search will begin. + * @param[in] const lxb_css_selector_list_t *. Selectors List. + * @param[in] lxb_selectors_cb_f. Callback for a found node. + * @param[in] void *. Context for the callback. + * if true: destroys the lxb_selectors_t object and all internal caches. + * + * @return LXB_STATUS_OK if successful, otherwise an error status value. + */ +LXB_API lxb_status_t +lxb_selectors_match_node(lxb_selectors_t *selectors, const xmlNode *node, + const lxb_css_selector_list_t *list, + lxb_selectors_cb_f cb, void *ctx); + +/* + * Inline functions. + */ + +/* + * The function sets the node search options. + * + * For more information, see lxb_selectors_opt_t. + * + * @param[in] lxb_selectors_t *. + * @param[in] lxb_selectors_opt_t. + */ +lxb_inline void +lxb_selectors_opt_set(lxb_selectors_t *selectors, lxb_selectors_opt_t opt) +{ + selectors->options = opt; +} + +/* + * Get the current selector. + * + * Function to get the selector by which the node was found. + * Use context (void *ctx) to pass the lxb_selectors_t object to the callback. + * + * @param[in] const lxb_selectors_t *. + * + * @return const lxb_css_selector_list_t *. + */ +lxb_inline const lxb_css_selector_list_t * +lxb_selectors_selector(const lxb_selectors_t *selectors) +{ + return selectors->current->entry->selector->list; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* LEXBOR_SELECTORS_H */ diff --git a/ext/dom/parentnode/css_selectors.c b/ext/dom/parentnode/css_selectors.c new file mode 100644 index 00000000000..ff1d07abc53 --- /dev/null +++ b/ext/dom/parentnode/css_selectors.c @@ -0,0 +1,282 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#if defined(HAVE_LIBXML) && defined(HAVE_DOM) +#include "../php_dom.h" + +#include "lexbor/css/parser.h" +#include "lexbor/selectors-adapted/selectors.h" + +// TODO: optimization idea: cache the parsed selectors in an LRU fashion? + +typedef struct { + HashTable *list; + dom_object *intern; +} dom_query_selector_all_ctx; + +typedef struct { + const xmlNode *reference; + bool result; +} dom_query_selector_matches_ctx; + +static lxb_selectors_opt_t dom_quirks_opt(lxb_selectors_opt_t options, const dom_object *intern) +{ + if (intern->document != NULL && intern->document->quirks_mode) { + options |= LXB_SELECTORS_OPT_QUIRKS_MODE; + } + return options; +} + +lxb_status_t dom_query_selector_find_single_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx) +{ + xmlNodePtr *result = (xmlNodePtr *) ctx; + *result = (xmlNodePtr) node; + return LXB_STATUS_STOP; +} + +lxb_status_t dom_query_selector_find_array_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx) +{ + dom_query_selector_all_ctx *qsa_ctx = (dom_query_selector_all_ctx *) ctx; + zval object; + php_dom_create_object((xmlNodePtr) node, &object, qsa_ctx->intern); + zend_hash_next_index_insert_new(qsa_ctx->list, &object); + return LXB_STATUS_OK; +} + +lxb_status_t dom_query_selector_find_matches_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx) +{ + dom_query_selector_matches_ctx *matches_ctx = (dom_query_selector_matches_ctx *) ctx; + if (node == matches_ctx->reference) { + matches_ctx->result = true; + return LXB_STATUS_STOP; + } + return LXB_STATUS_OK; +} + +static lxb_css_selector_list_t *dom_parse_selector( + lxb_css_parser_t *parser, + lxb_selectors_t *selectors, + const zend_string *selectors_str, + lxb_selectors_opt_t options, + const dom_object *intern +) +{ + lxb_status_t status; + + memset(parser, 0, sizeof(lxb_css_parser_t)); + status = lxb_css_parser_init(parser, NULL); + ZEND_ASSERT(status == LXB_STATUS_OK); + + memset(selectors, 0, sizeof(lxb_selectors_t)); + status = lxb_selectors_init(selectors); + ZEND_ASSERT(status == LXB_STATUS_OK); + lxb_selectors_opt_set(selectors, dom_quirks_opt(options, intern)); + + lxb_css_selector_list_t *list = lxb_css_selectors_parse(parser, (const lxb_char_t *) ZSTR_VAL(selectors_str), ZSTR_LEN(selectors_str)); + if (UNEXPECTED(list == NULL)) { + size_t nr_of_messages = lexbor_array_obj_length(&parser->log->messages); + if (nr_of_messages > 0) { + lxb_css_log_message_t *msg = lexbor_array_obj_get(&parser->log->messages, 0); + char *error; + zend_spprintf(&error, 0, "Invalid selector (%.*s)", (int) msg->text.length, msg->text.data); + php_dom_throw_error_with_message(SYNTAX_ERR, error, true); + efree(error); + } else { + php_dom_throw_error_with_message(SYNTAX_ERR, "Invalid selector", true); + } + } + + return list; +} + +static lxb_status_t dom_check_css_execution_status(lxb_status_t status) +{ + if (UNEXPECTED(status != LXB_STATUS_OK && status != LXB_STATUS_STOP)) { + zend_argument_value_error(1, "contains an unsupported selector"); + return status; + } + return LXB_STATUS_OK; +} + +static void dom_selector_cleanup(lxb_css_parser_t *parser, lxb_selectors_t *selectors, lxb_css_selector_list_t *list) +{ + lxb_css_selector_list_destroy_memory(list); + lxb_selectors_destroy(selectors); + (void) lxb_css_parser_destroy(parser, false); +} + +static lxb_status_t dom_query_selector_common( + const xmlNode *root, + const dom_object *intern, + const zend_string *selectors_str, + lxb_selectors_cb_f cb, + void *ctx, + lxb_selectors_opt_t options +) +{ + lxb_status_t status; + + lxb_css_parser_t parser; + lxb_selectors_t selectors; + + lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, options, intern); + if (UNEXPECTED(list == NULL)) { + status = LXB_STATUS_ERROR; + } else { + status = lxb_selectors_find(&selectors, root, list, cb, ctx); + status = dom_check_css_execution_status(status); + } + + dom_selector_cleanup(&parser, &selectors, list); + + return status; +} + +static lxb_status_t dom_query_matches( + const xmlNode *root, + const dom_object *intern, + const zend_string *selectors_str, + void *ctx +) +{ + lxb_status_t status; + + lxb_css_parser_t parser; + lxb_selectors_t selectors; + + lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST, intern); + if (UNEXPECTED(list == NULL)) { + status = LXB_STATUS_ERROR; + } else { + status = lxb_selectors_match_node(&selectors, root, list, dom_query_selector_find_matches_callback, ctx); + status = dom_check_css_execution_status(status); + } + + dom_selector_cleanup(&parser, &selectors, list); + + return status; +} + +static const xmlNode *dom_query_closest( + const xmlNode *root, + const dom_object *intern, + const zend_string *selectors_str +) +{ + const xmlNode *ret = NULL; + + lxb_css_parser_t parser; + lxb_selectors_t selectors; + + lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST, intern); + if (EXPECTED(list != NULL)) { + const xmlNode *current = root; + while (current != NULL) { + dom_query_selector_matches_ctx ctx = { current, false }; + lxb_status_t status = lxb_selectors_match_node(&selectors, current, list, dom_query_selector_find_matches_callback, &ctx); + status = dom_check_css_execution_status(status); + if (UNEXPECTED(status != LXB_STATUS_OK)) { + break; + } + if (ctx.result) { + ret = current; + break; + } + current = current->parent; + } + } + + dom_selector_cleanup(&parser, &selectors, list); + + return ret; +} + +/* https://dom.spec.whatwg.org/#dom-parentnode-queryselector */ +void dom_parent_node_query_selector(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str) +{ + xmlNodePtr result = NULL; + + if (dom_query_selector_common( + thisp, + intern, + selectors_str, + dom_query_selector_find_single_callback, + &result, + LXB_SELECTORS_OPT_MATCH_FIRST + ) != LXB_STATUS_OK || result == NULL) { + RETURN_NULL(); + } else { + DOM_RET_OBJ(result, intern); + } +} + +/* https://dom.spec.whatwg.org/#dom-parentnode-queryselectorall */ +void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str) +{ + HashTable *list = zend_new_array(0); + dom_query_selector_all_ctx ctx = { list, intern }; + + if (dom_query_selector_common( + thisp, + intern, + selectors_str, + dom_query_selector_find_array_callback, + &ctx, + LXB_SELECTORS_OPT_DEFAULT + ) != LXB_STATUS_OK) { + zend_array_destroy(list); + RETURN_THROWS(); + } else { + php_dom_create_iterator(return_value, DOM_NODELIST, true); + dom_object *ret_obj = Z_DOMOBJ_P(return_value); + dom_nnodemap_object *mapptr = (dom_nnodemap_object *) ret_obj->ptr; + ZVAL_ARR(&mapptr->baseobj_zv, list); + mapptr->nodetype = DOM_NODESET; + } +} + +/* https://dom.spec.whatwg.org/#dom-element-matches */ +void dom_element_matches(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str) +{ + dom_query_selector_matches_ctx ctx = { thisp, false }; + + if (dom_query_matches( + thisp, + intern, + selectors_str, + &ctx + ) != LXB_STATUS_OK) { + RETURN_THROWS(); + } else { + RETURN_BOOL(ctx.result); + } +} + +/* https://dom.spec.whatwg.org/#dom-element-closest */ +void dom_element_closest(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str) +{ + const xmlNode *result = dom_query_closest(thisp, intern, selectors_str); + if (EXPECTED(result != NULL)) { + DOM_RET_OBJ((xmlNodePtr) result, intern); + } +} + +#endif diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode/tree.c similarity index 99% rename from ext/dom/parentnode.c rename to ext/dom/parentnode/tree.c index 2af9d9dbf8a..f1cd4fd742e 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode/tree.c @@ -1,6 +1,4 @@ /* - +----------------------------------------------------------------------+ - | PHP Version 7 | +----------------------------------------------------------------------+ | Copyright (c) The PHP Group | +----------------------------------------------------------------------+ @@ -23,9 +21,9 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) -#include "php_dom.h" -#include "internal_helpers.h" -#include "dom_properties.h" +#include "../php_dom.h" +#include "../internal_helpers.h" +#include "../dom_properties.h" /* {{{ firstElementChild DomParentNode readonly=yes diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 1978e94d1b3..ec4d3d92d5c 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -130,8 +130,6 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type); zend_object *dom_xpath_objects_new(zend_class_entry *class_type); #endif bool dom_get_strict_error(php_libxml_ref_obj *document); -void php_dom_throw_error(dom_exception_code error_code, bool strict_error); -void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error); void node_list_unlink(xmlNodePtr node); int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, int name_len); xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix); @@ -199,6 +197,10 @@ bool php_dom_fragment_insertion_hierarchy_check_replace(xmlNodePtr parent, xmlNo void php_dom_node_append(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePtr parent); bool php_dom_pre_insert(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePtr parent, xmlNodePtr insertion_point); bool php_dom_pre_insert_is_parent_invalid(xmlNodePtr parent); +void dom_parent_node_query_selector(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str); +void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str); +void dom_element_matches(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str); +void dom_element_closest(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str); /* nodemap and nodelist APIs */ xmlNodePtr php_dom_named_node_map_get_named_item(dom_nnodemap_object *objmap, const zend_string *named, bool may_transform); diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 76be5f62171..08313bc7b8a 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -1106,6 +1106,9 @@ namespace Dom public function append(Node|string ...$nodes): void; public function prepend(Node|string ...$nodes): void; public function replaceChildren(Node|string ...$nodes): void; + + public function querySelector(string $selectors): ?Element; + public function querySelectorAll(string $selectors): NodeList; } interface ChildNode @@ -1372,6 +1375,11 @@ namespace Dom public function prepend(Node|string ...$nodes): void {} /** @implementation-alias DOMElement::replaceChildren */ public function replaceChildren(Node|string ...$nodes): void {} + + public function querySelector(string $selectors): ?Element {} + public function querySelectorAll(string $selectors): NodeList {} + public function closest(string $selectors): ?Element {} + public function matches(string $selectors): bool {} } class HTMLElement extends Element @@ -1492,6 +1500,11 @@ namespace Dom public function prepend(Node|string ...$nodes): void {} /** @implementation-alias DOMElement::replaceChildren */ public function replaceChildren(Node|string ...$nodes): void {} + + /** @implementation-alias Dom\Element::querySelector */ + public function querySelector(string $selectors): ?Element {} + /** @implementation-alias Dom\Element::querySelectorAll */ + public function querySelectorAll(string $selectors): NodeList {} } class Entity extends Node @@ -1585,6 +1598,11 @@ namespace Dom public function importLegacyNode(\DOMNode $node, bool $deep = false): Node {} + /** @implementation-alias Dom\Element::querySelector */ + public function querySelector(string $selectors): ?Element {} + /** @implementation-alias Dom\Element::querySelectorAll */ + public function querySelectorAll(string $selectors): NodeList {} + public ?HTMLElement $body; /** @readonly */ public ?HTMLElement $head; diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 4d80d238a26..0284b73d959 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: fd650b1e64c4ed4ce66b0dad9c681a51cb8ff1ae */ + * Stub hash: 28365949d78a2d0254cfdb0da6549e282d2eb436 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -567,6 +567,14 @@ ZEND_END_ARG_INFO() #define arginfo_class_Dom_ParentNode_replaceChildren arginfo_class_Dom_ParentNode_append +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_ParentNode_querySelector, 0, 1, Dom\\Element, 1) + ZEND_ARG_TYPE_INFO(0, selectors, IS_STRING, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_ParentNode_querySelectorAll, 0, 1, Dom\\\116odeList, 0) + ZEND_ARG_TYPE_INFO(0, selectors, IS_STRING, 0) +ZEND_END_ARG_INFO() + #define arginfo_class_Dom_ChildNode_remove arginfo_class_DOMChildNode_remove #define arginfo_class_Dom_ChildNode_before arginfo_class_Dom_ParentNode_append @@ -836,6 +844,16 @@ ZEND_END_ARG_INFO() #define arginfo_class_Dom_Element_replaceChildren arginfo_class_Dom_ParentNode_append +#define arginfo_class_Dom_Element_querySelector arginfo_class_Dom_ParentNode_querySelector + +#define arginfo_class_Dom_Element_querySelectorAll arginfo_class_Dom_ParentNode_querySelectorAll + +#define arginfo_class_Dom_Element_closest arginfo_class_Dom_ParentNode_querySelector + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_Dom_Element_matches, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, selectors, IS_STRING, 0) +ZEND_END_ARG_INFO() + #define arginfo_class_Dom_Attr_isId arginfo_class_Dom_Node_hasChildNodes ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_Dom_CharacterData_substringData, 0, 2, IS_STRING, 0) @@ -893,6 +911,10 @@ ZEND_END_ARG_INFO() #define arginfo_class_Dom_DocumentFragment_replaceChildren arginfo_class_Dom_ParentNode_append +#define arginfo_class_Dom_DocumentFragment_querySelector arginfo_class_Dom_ParentNode_querySelector + +#define arginfo_class_Dom_DocumentFragment_querySelectorAll arginfo_class_Dom_ParentNode_querySelectorAll + #define arginfo_class_Dom_Document_getElementsByTagName arginfo_class_Dom_Element_getElementsByTagName #define arginfo_class_Dom_Document_getElementsByTagNameNS arginfo_class_Dom_Element_getElementsByTagNameNS @@ -988,6 +1010,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_Document_importLegacyNo ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false") ZEND_END_ARG_INFO() +#define arginfo_class_Dom_Document_querySelector arginfo_class_Dom_ParentNode_querySelector + +#define arginfo_class_Dom_Document_querySelectorAll arginfo_class_Dom_ParentNode_querySelectorAll + ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createEmpty, 0, 0, Dom\\HTMLDocument, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"") ZEND_END_ARG_INFO() @@ -1272,6 +1298,10 @@ ZEND_METHOD(Dom_Element, getElementsByTagNameNS); ZEND_METHOD(Dom_Element, insertAdjacentElement); ZEND_METHOD(Dom_Element, insertAdjacentText); ZEND_METHOD(Dom_Element, setIdAttributeNode); +ZEND_METHOD(Dom_Element, querySelector); +ZEND_METHOD(Dom_Element, querySelectorAll); +ZEND_METHOD(Dom_Element, closest); +ZEND_METHOD(Dom_Element, matches); ZEND_METHOD(Dom_CharacterData, appendData); ZEND_METHOD(Dom_CharacterData, insertData); ZEND_METHOD(Dom_CharacterData, deleteData); @@ -1559,6 +1589,8 @@ static const zend_function_entry class_Dom_ParentNode_methods[] = { ZEND_RAW_FENTRY("append", NULL, arginfo_class_Dom_ParentNode_append, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT, NULL, NULL) ZEND_RAW_FENTRY("prepend", NULL, arginfo_class_Dom_ParentNode_prepend, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT, NULL, NULL) ZEND_RAW_FENTRY("replaceChildren", NULL, arginfo_class_Dom_ParentNode_replaceChildren, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT, NULL, NULL) + ZEND_RAW_FENTRY("querySelector", NULL, arginfo_class_Dom_ParentNode_querySelector, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT, NULL, NULL) + ZEND_RAW_FENTRY("querySelectorAll", NULL, arginfo_class_Dom_ParentNode_querySelectorAll, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT, NULL, NULL) ZEND_FE_END }; @@ -1671,6 +1703,10 @@ static const zend_function_entry class_Dom_Element_methods[] = { ZEND_RAW_FENTRY("append", zim_DOMElement_append, arginfo_class_Dom_Element_append, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("prepend", zim_DOMElement_prepend, arginfo_class_Dom_Element_prepend, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("replaceChildren", zim_DOMElement_replaceChildren, arginfo_class_Dom_Element_replaceChildren, ZEND_ACC_PUBLIC, NULL, NULL) + ZEND_ME(Dom_Element, querySelector, arginfo_class_Dom_Element_querySelector, ZEND_ACC_PUBLIC) + ZEND_ME(Dom_Element, querySelectorAll, arginfo_class_Dom_Element_querySelectorAll, ZEND_ACC_PUBLIC) + ZEND_ME(Dom_Element, closest, arginfo_class_Dom_Element_closest, ZEND_ACC_PUBLIC) + ZEND_ME(Dom_Element, matches, arginfo_class_Dom_Element_matches, ZEND_ACC_PUBLIC) ZEND_FE_END }; @@ -1726,6 +1762,8 @@ static const zend_function_entry class_Dom_DocumentFragment_methods[] = { ZEND_RAW_FENTRY("append", zim_DOMElement_append, arginfo_class_Dom_DocumentFragment_append, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("prepend", zim_DOMElement_prepend, arginfo_class_Dom_DocumentFragment_prepend, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("replaceChildren", zim_DOMElement_replaceChildren, arginfo_class_Dom_DocumentFragment_replaceChildren, ZEND_ACC_PUBLIC, NULL, NULL) + ZEND_RAW_FENTRY("querySelector", zim_Dom_Element_querySelector, arginfo_class_Dom_DocumentFragment_querySelector, ZEND_ACC_PUBLIC, NULL, NULL) + ZEND_RAW_FENTRY("querySelectorAll", zim_Dom_Element_querySelectorAll, arginfo_class_Dom_DocumentFragment_querySelectorAll, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_FE_END }; @@ -1773,6 +1811,8 @@ static const zend_function_entry class_Dom_Document_methods[] = { ZEND_RAW_FENTRY("prepend", zim_DOMElement_prepend, arginfo_class_Dom_Document_prepend, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("replaceChildren", zim_DOMDocument_replaceChildren, arginfo_class_Dom_Document_replaceChildren, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_ME(Dom_Document, importLegacyNode, arginfo_class_Dom_Document_importLegacyNode, ZEND_ACC_PUBLIC) + ZEND_RAW_FENTRY("querySelector", zim_Dom_Element_querySelector, arginfo_class_Dom_Document_querySelector, ZEND_ACC_PUBLIC, NULL, NULL) + ZEND_RAW_FENTRY("querySelectorAll", zim_Dom_Element_querySelectorAll, arginfo_class_Dom_Document_querySelectorAll, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_FE_END }; diff --git a/ext/dom/tests/modern/css_selectors/attribute.phpt b/ext/dom/tests/modern/css_selectors/attribute.phpt new file mode 100644 index 00000000000..05ec94ee02b --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/attribute.phpt @@ -0,0 +1,118 @@ +--TEST-- +CSS Selectors - Attribute +--EXTENSIONS-- +dom +--FILE-- + + + + + + +XML); + +echo "=== Case sensitive ===\n"; + +test_helper($dom, 'a[title]'); +test_helper($dom, 'a[title="http://example.com"]'); +test_helper($dom, 'a[title="http://example."]'); +test_helper($dom, 'a[title*="example"]'); +test_helper($dom, 'a[title*=""]'); +test_helper($dom, 'a[title^="HTTP"]'); +test_helper($dom, 'a[title^="http"]'); +test_helper($dom, 'a[title^="http"][title$=".be"]'); +test_helper($dom, 'a[title$=".com"]'); +test_helper($dom, 'a[title$=".foo"]'); +test_helper($dom, 'a[lang|="nl"]'); +test_helper($dom, 'a[lang|="nl-be"]'); +test_helper($dom, 'a[tokens~="def"]'); +test_helper($dom, 'a[tokens~="de"]'); +test_helper($dom, 'a[tokens~="def ghi"]'); + +echo "=== Case insensitive ===\n"; + +test_helper($dom, 'a[title]'); +test_helper($dom, 'a[title="http://example.COM" i]'); +test_helper($dom, 'a[title="http://EXAMPLE." i]'); +test_helper($dom, 'a[title*="ExAmPlE" i]'); +test_helper($dom, 'a[title^="HTTP" i]'); +test_helper($dom, 'a[title^="HTTP" i][title$=".be"]'); +test_helper($dom, 'a[title$=".COM" i]'); +test_helper($dom, 'a[lang|="NL" i]'); +test_helper($dom, 'a[lang|="NL-BE" i]'); +test_helper($dom, 'a[tokens~="DE" i]'); +test_helper($dom, 'a[tokens~="DEF" i]'); +test_helper($dom, 'a[tokens~="DEF GHI" i]'); + +?> +--EXPECT-- +=== Case sensitive === +--- Selector: a[title] --- + + + + +--- Selector: a[title="http://example.com"] --- + +--- Selector: a[title="http://example."] --- +--- Selector: a[title*="example"] --- + + + + +--- Selector: a[title*=""] --- +--- Selector: a[title^="HTTP"] --- +--- Selector: a[title^="http"] --- + + +--- Selector: a[title^="http"][title$=".be"] --- + +--- Selector: a[title$=".com"] --- + +--- Selector: a[title$=".foo"] --- +--- Selector: a[lang|="nl"] --- + + +--- Selector: a[lang|="nl-be"] --- + + +--- Selector: a[tokens~="def"] --- + +--- Selector: a[tokens~="de"] --- +--- Selector: a[tokens~="def ghi"] --- +=== Case insensitive === +--- Selector: a[title] --- + + + + +--- Selector: a[title="http://example.COM" i] --- + +--- Selector: a[title="http://EXAMPLE." i] --- +--- Selector: a[title*="ExAmPlE" i] --- + + + + +--- Selector: a[title^="HTTP" i] --- + + +--- Selector: a[title^="HTTP" i][title$=".be"] --- + +--- Selector: a[title$=".COM" i] --- + +--- Selector: a[lang|="NL" i] --- + + +--- Selector: a[lang|="NL-BE" i] --- + + +--- Selector: a[tokens~="DE" i] --- +--- Selector: a[tokens~="DEF" i] --- + +--- Selector: a[tokens~="DEF GHI" i] --- diff --git a/ext/dom/tests/modern/css_selectors/closest.phpt b/ext/dom/tests/modern/css_selectors/closest.phpt new file mode 100644 index 00000000000..363ebe9f2fc --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/closest.phpt @@ -0,0 +1,49 @@ +--TEST-- +Test DOM\Element::closest() method: legit cases +--EXTENSIONS-- +dom +--FILE-- + + +
+
+
+
+
+ +XML; + +$dom = DOM\XMLDocument::createFromString($xml); + +function test($el, $selector) { + echo "--- Selector: $selector ---\n"; + var_dump($el->closest($selector)?->getAttribute('xml:id')); +} + +test($dom->getElementById('div3'), 'div'); +test($dom->getElementById('div3'), '[class="foo"]'); +test($dom->getElementById('div3'), ':not(root)'); +test($dom->getElementById('div3'), ':not(div)'); +test($dom->getElementById('div3'), 'a'); +test($dom->getElementById('div3'), 'root :not(div[class])'); +test($dom->getElementById('div3'), 'root > :not(div[class])'); + +?> +--EXPECT-- +--- Selector: div --- +string(4) "div3" +--- Selector: [class="foo"] --- +string(4) "div1" +--- Selector: :not(root) --- +string(4) "div3" +--- Selector: :not(div) --- +NULL +--- Selector: a --- +NULL +--- Selector: root :not(div[class]) --- +string(4) "div2" +--- Selector: root > :not(div[class]) --- +NULL diff --git a/ext/dom/tests/modern/css_selectors/closest_invalid_selector.phpt b/ext/dom/tests/modern/css_selectors/closest_invalid_selector.phpt new file mode 100644 index 00000000000..81db6a72b68 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/closest_invalid_selector.phpt @@ -0,0 +1,18 @@ +--TEST-- +Test DOM\Element::closest() method: invalid selector +--EXTENSIONS-- +dom +--FILE-- +"); + +try { + var_dump($dom->documentElement->closest('@invalid')); +} catch (DOMException $e) { + echo $e->getMessage(); +} + +?> +--EXPECT-- +Invalid selector (Selectors. Unexpected token: @invalid) diff --git a/ext/dom/tests/modern/css_selectors/combinators.phpt b/ext/dom/tests/modern/css_selectors/combinators.phpt new file mode 100644 index 00000000000..b43b07f59e8 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/combinators.phpt @@ -0,0 +1,113 @@ +--TEST-- +CSS Selectors - Combinators +--EXTENSIONS-- +dom +--FILE-- + + + +

First p

+

Second p

+ +
+

Third p

+ + +
+

Fourth p

+
+
+
+

Fifth p

+
+ + + + + + + + + + + +
ABC
+ + +HTML); + +test_helper($dom, 'nonsense'); +test_helper($dom, 'p'); +test_helper($dom, 'p, img'); +test_helper($dom, 'body p'); +test_helper($dom, 'body div p'); +test_helper($dom, 'div > *'); +test_helper($dom, 'div > p'); +test_helper($dom, 'div > p + img'); +test_helper($dom, 'div > p ~ img'); +test_helper($dom, 'body > img'); +test_helper($dom, 'div.bar.baz > p'); +test_helper($dom, 'article[title].bar p'); + +try { + test_helper($dom, 'col.selected||td'); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +--- Selector: nonsense --- +--- Selector: p --- +

First p

+

Second p

+

Third p

+

Fourth p

+

Fifth p

+--- Selector: p, img --- +

First p

+

Second p

+ +

Third p

+ + +

Fourth p

+

Fifth p

+--- Selector: body p --- +

First p

+

Second p

+

Third p

+

Fourth p

+

Fifth p

+--- Selector: body div p --- +

Third p

+

Fourth p

+--- Selector: div > * --- +

Third p

+ + +
+

Fourth p

+
+

Fourth p

+--- Selector: div > p --- +

Third p

+

Fourth p

+--- Selector: div > p + img --- + +--- Selector: div > p ~ img --- + + +--- Selector: body > img --- + +--- Selector: div.bar.baz > p --- +

Fourth p

+--- Selector: article[title].bar p --- +

Fifth p

+--- Selector: col.selected||td --- +Dom\Document::querySelectorAll(): Argument #1 ($selectors) contains an unsupported selector diff --git a/ext/dom/tests/modern/css_selectors/entities.phpt b/ext/dom/tests/modern/css_selectors/entities.phpt new file mode 100644 index 00000000000..ff25122c95d --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/entities.phpt @@ -0,0 +1,26 @@ +--TEST-- +CSS Selectors - Handling entities +--EXTENSIONS-- +dom +--FILE-- + +]> + + + +XML); +var_dump($dom->querySelector('input:checked')->nodeName); +var_dump($dom->querySelector('input[type$="ox"]')->nodeName); +var_dump($dom->querySelector('input.aboxa')->nodeName); +var_dump($dom->querySelector('input#aboxa')->nodeName); + +?> +--EXPECT-- +string(5) "input" +string(5) "input" +string(5) "input" +string(5) "input" diff --git a/ext/dom/tests/modern/css_selectors/id.phpt b/ext/dom/tests/modern/css_selectors/id.phpt new file mode 100644 index 00000000000..21af9b1f1a6 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/id.phpt @@ -0,0 +1,35 @@ +--TEST-- +CSS Selectors - ID +--EXTENSIONS-- +dom +--FILE-- + + + + + + + +XML); + +test_helper($dom, '#test'); +test_helper($dom, '#test1'); +test_helper($dom, '#test2'); +test_helper($dom, '#test3'); +test_helper($dom, '#test4'); + +?> +--EXPECT-- +--- Selector: #test --- +--- Selector: #test1 --- + +--- Selector: #test2 --- + +--- Selector: #test3 --- + +--- Selector: #test4 --- diff --git a/ext/dom/tests/modern/css_selectors/matches_invalid_selector.phpt b/ext/dom/tests/modern/css_selectors/matches_invalid_selector.phpt new file mode 100644 index 00000000000..11cc4a942d2 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/matches_invalid_selector.phpt @@ -0,0 +1,18 @@ +--TEST-- +Test DOM\Element::matches() method: invalid selector +--EXTENSIONS-- +dom +--FILE-- +"); + +try { + var_dump($dom->documentElement->matches('@invalid')); +} catch (DOMException $e) { + echo $e->getMessage(); +} + +?> +--EXPECT-- +Invalid selector (Selectors. Unexpected token: @invalid) diff --git a/ext/dom/tests/modern/css_selectors/namespaces.phpt b/ext/dom/tests/modern/css_selectors/namespaces.phpt new file mode 100644 index 00000000000..df4782d1191 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/namespaces.phpt @@ -0,0 +1,54 @@ +--TEST-- +CSS Selectors - Namespaces +--EXTENSIONS-- +dom +--FILE-- + + + +
+ + + + + + +XML); + +$container = $dom->documentElement->firstElementChild; +$container->setAttribute("foo:bar", "baz"); +$container->setAttributeNS("urn:a", "a:bar", "baz"); + +test_helper($dom, 'container[align]'); +test_helper($dom, 'container[foo\\:bar]'); +test_helper($dom, 'container[a\\:bar]'); +test_helper($dom, 'container[bar]'); + +test_helper($dom, 'a:first-of-type'); +test_helper($dom, 'a:last-of-type'); + +test_failure($dom, 'container[* | bar]'); + +?> +--EXPECT-- +--- Selector: container[align] --- + +--- Selector: container[foo\:bar] --- + +--- Selector: container[a\:bar] --- +--- Selector: container[bar] --- +--- Selector: a:first-of-type --- + + + +--- Selector: a:last-of-type --- + + + +--- Selector: container[* | bar] --- +Code 12 Invalid selector (Selectors. Unexpected token: *) diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_blank.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_blank.phpt new file mode 100644 index 00000000000..b1be7db685d --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_blank.phpt @@ -0,0 +1,23 @@ +--TEST-- +CSS Selectors - Pseudo classes: blank +--EXTENSIONS-- +dom +--FILE-- + +XML); + +try { + test_helper($dom, ':blank'); +} catch (DOMException $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +--- Selector: :blank --- +:blank selector is not implemented because CSSWG has not yet decided its semantics (https://github.com/w3c/csswg-drafts/issues/1967) diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_checked.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_checked.phpt new file mode 100644 index 00000000000..6341386c49c --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_checked.phpt @@ -0,0 +1,27 @@ +--TEST-- +CSS Selectors - Pseudo classes: checked +--EXTENSIONS-- +dom +--FILE-- + + + + diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_current.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_current.phpt new file mode 100644 index 00000000000..101a3d397ed --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_current.phpt @@ -0,0 +1,21 @@ +--TEST-- +CSS Selectors - Pseudo classes: current +--EXTENSIONS-- +dom +--FILE-- + +
+ +XML); + +test_helper($dom, ':current(div)'); + +?> +--EXPECT-- +--- Selector: :current(div) --- +
diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_dir.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_dir.phpt new file mode 100644 index 00000000000..1f82f504697 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_dir.phpt @@ -0,0 +1,27 @@ +--TEST-- +CSS Selectors - Pseudo classes: dir +--EXTENSIONS-- +dom +--FILE-- + +

1

+ +

2

+ + +XML); + +test_failure($dom, ':dir(rtl)', true); +test_failure($dom, ':dir(ltr)', true); + +?> +--EXPECT-- +--- Selector: :dir(rtl) --- +Code 12 Invalid selector (Selectors. Not supported: dir) +--- Selector: :dir(ltr) --- +Code 12 Invalid selector (Selectors. Not supported: dir) diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_disabled_enabled.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_disabled_enabled.phpt new file mode 100644 index 00000000000..03b88456d12 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_disabled_enabled.phpt @@ -0,0 +1,107 @@ +--TEST-- +CSS Selectors - Pseudo classes: disabled/enabled +--EXTENSIONS-- +dom +--FILE-- + + + + + + + + + +
+
+
+
+
+ + +
+
+
+
+
+
+
+
+ + +