From 7defc23532a31e2d5c8dc3723b1d196dfcd85cef Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 27 Apr 2024 23:25:01 +0200 Subject: [PATCH] Import Lexbor CSS component and update the patches for it --- ext/dom/lexbor/lexbor/css/css.c | 279 ++ ext/dom/lexbor/lexbor/css/log.c | 336 ++ ext/dom/lexbor/lexbor/css/parser.c | 339 ++ ext/dom/lexbor/lexbor/css/rule.h | 4 +- ext/dom/lexbor/lexbor/css/selectors/pseudo.c | 102 + .../lexbor/css/selectors/pseudo_state.c | 313 ++ .../lexbor/lexbor/css/selectors/selector.c | 942 ++++++ .../lexbor/lexbor/css/selectors/selectors.c | 342 ++ ext/dom/lexbor/lexbor/css/selectors/state.c | 2053 ++++++++++++ ext/dom/lexbor/lexbor/css/state.c | 51 + ext/dom/lexbor/lexbor/css/syntax/anb.c | 487 +++ ext/dom/lexbor/lexbor/css/syntax/parser.c | 1795 +++++++++++ ext/dom/lexbor/lexbor/css/syntax/state.c | 2744 +++++++++++++++++ ext/dom/lexbor/lexbor/css/syntax/syntax.c | 279 ++ ext/dom/lexbor/lexbor/css/syntax/token.c | 648 ++++ ext/dom/lexbor/lexbor/css/syntax/tokenizer.c | 709 +++++ .../lexbor/css/syntax/tokenizer/error.c | 30 + .../lexbor/lexbor/html/interfaces/document.c | 15 +- .../lexbor/lexbor/html/interfaces/document.h | 2 - ext/dom/lexbor/lexbor/html/tokenizer/res.h | 0 ...nd-column-information-for-use-in-PHP.patch | 22 +- ...-added-nodes-for-options-use-in-PHP.patch} | 14 +- ...nd-data-structure-to-be-able-to-gen.patch} | 8 +- ...e-unused-upper-case-tag-static-data.patch} | 8 +- ...k-size-of-static-binary-search-tree.patch} | 14 +- ...006-Patch-out-unused-CSS-style-code.patch} | 144 +- ext/dom/lexbor/patches/README.md | 22 +- 27 files changed, 11629 insertions(+), 73 deletions(-) create mode 100644 ext/dom/lexbor/lexbor/css/css.c create mode 100644 ext/dom/lexbor/lexbor/css/log.c create mode 100644 ext/dom/lexbor/lexbor/css/parser.c create mode 100644 ext/dom/lexbor/lexbor/css/selectors/pseudo.c create mode 100644 ext/dom/lexbor/lexbor/css/selectors/pseudo_state.c create mode 100644 ext/dom/lexbor/lexbor/css/selectors/selector.c create mode 100644 ext/dom/lexbor/lexbor/css/selectors/selectors.c create mode 100644 ext/dom/lexbor/lexbor/css/selectors/state.c create mode 100644 ext/dom/lexbor/lexbor/css/state.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/anb.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/parser.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/state.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/syntax.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/token.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/tokenizer.c create mode 100644 ext/dom/lexbor/lexbor/css/syntax/tokenizer/error.c mode change 100755 => 100644 ext/dom/lexbor/lexbor/html/tokenizer/res.h rename ext/dom/lexbor/patches/{0001-Track-implied-added-nodes-for-options-use-in-PHP.patch => 0002-Track-implied-added-nodes-for-options-use-in-PHP.patch} (90%) rename ext/dom/lexbor/patches/{0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch => 0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch} (95%) rename ext/dom/lexbor/patches/{0001-Remove-unused-upper-case-tag-static-data.patch => 0004-Remove-unused-upper-case-tag-static-data.patch} (91%) rename ext/dom/lexbor/patches/{0001-Shrink-size-of-static-binary-search-tree.patch => 0005-Shrink-size-of-static-binary-search-tree.patch} (93%) rename ext/dom/lexbor/patches/{0001-Patch-out-CSS-parser.patch => 0006-Patch-out-unused-CSS-style-code.patch} (70%) diff --git a/ext/dom/lexbor/lexbor/css/css.c b/ext/dom/lexbor/lexbor/css/css.c new file mode 100644 index 00000000000..3b6f0cb0f2f --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/css.c @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2021-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/css.h" + + +typedef struct { + lexbor_str_t *str; + lexbor_mraw_t *mraw; +} +lxb_css_str_ctx_t; + + +static lxb_status_t +lxb_css_str_cb(const lxb_char_t *data, size_t len, void *cb_ctx); + + +lxb_css_memory_t * +lxb_css_memory_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_memory_t)); +} + +lxb_status_t +lxb_css_memory_init(lxb_css_memory_t *memory, size_t prepare_count) +{ + lxb_status_t status; + + static const size_t size_mem = lexbor_max(sizeof(lxb_css_selector_t), + sizeof(lxb_css_selector_list_t)); + + if (memory == NULL) { + return LXB_STATUS_ERROR_INCOMPLETE_OBJECT; + } + + if (prepare_count < 64) { + prepare_count = 64; + } + + if (memory->objs == NULL) { + memory->objs = lexbor_dobject_create(); + status = lexbor_dobject_init(memory->objs, prepare_count, size_mem); + if (status != LXB_STATUS_OK) { + goto failed; + } + } + + if (memory->tree == NULL) { + prepare_count = prepare_count * 96; + + memory->tree = lexbor_mraw_create(); + status = lexbor_mraw_init(memory->tree, prepare_count); + if (status != LXB_STATUS_OK) { + goto failed; + } + } + + if (memory->mraw == NULL) { + memory->mraw = lexbor_mraw_create(); + status = lexbor_mraw_init(memory->mraw, 4096); + if (status != LXB_STATUS_OK) { + goto failed; + } + } + + memory->ref_count = 1; + + return LXB_STATUS_OK; + +failed: + + (void) lxb_css_memory_destroy(memory, false); + + return status; +} + +void +lxb_css_memory_clean(lxb_css_memory_t *memory) +{ + if (memory->objs != NULL) { + lexbor_dobject_clean(memory->objs); + } + + if (memory->mraw != NULL) { + lexbor_mraw_clean(memory->mraw); + } + + if (memory->tree != NULL) { + lexbor_mraw_clean(memory->tree); + } +} + +lxb_css_memory_t * +lxb_css_memory_destroy(lxb_css_memory_t *memory, bool self_destroy) +{ + if (memory == NULL) { + return NULL; + } + + if (memory->objs != NULL) { + memory->objs = lexbor_dobject_destroy(memory->objs, true); + } + + if (memory->mraw != NULL) { + memory->mraw = lexbor_mraw_destroy(memory->mraw, true); + } + + if (memory->tree != NULL) { + memory->tree = lexbor_mraw_destroy(memory->tree, true); + } + + if (self_destroy) { + return lexbor_free(memory); + } + + return memory; +} + +lxb_css_memory_t * +lxb_css_memory_ref_inc(lxb_css_memory_t *memory) +{ + if (SIZE_MAX - memory->ref_count == 0) { + return NULL; + } + + memory->ref_count++; + + return memory; +} + +void +lxb_css_memory_ref_dec(lxb_css_memory_t *memory) +{ + if (memory->ref_count > 0) { + memory->ref_count--; + } +} + +lxb_css_memory_t * +lxb_css_memory_ref_dec_destroy(lxb_css_memory_t *memory) +{ + if (memory->ref_count > 0) { + memory->ref_count--; + } + + if (memory->ref_count == 0) { + return lxb_css_memory_destroy(memory, true); + } + + return memory; +} + +lxb_status_t +lxb_css_make_data(lxb_css_parser_t *parser, lexbor_str_t *str, + uintptr_t begin, uintptr_t end) +{ + size_t length, offlen, len; + const lxb_char_t *pos; + const lexbor_str_t *tmp; + + tmp = &parser->str; + + offlen = begin - parser->offset; + length = end - begin; + + if (str->data == NULL) { + (void) lexbor_str_init(str, parser->memory->mraw, length); + if (str->data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + } + + if (tmp->length > offlen) { + len = tmp->length - offlen; + + if (len >= length) { + memcpy(str->data + str->length, tmp->data + offlen, length); + goto done; + } + else { + memcpy(str->data + str->length, tmp->data + offlen, len); + } + + str->length += len; + + pos = parser->pos; + length -= len; + } + else { + pos = parser->pos + (offlen - tmp->length); + } + + memcpy(str->data + str->length, pos, length); + +done: + + str->length += length; + str->data[str->length] = '\0'; + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_serialize_char_handler(const void *style, lxb_css_style_serialize_f cb, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = cb(style, lexbor_serialize_length_cb, &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = cb(style, lexbor_serialize_copy_cb, &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + +lxb_status_t +lxb_css_serialize_str_handler(const void *style, lexbor_str_t *str, + lexbor_mraw_t *mraw, + lxb_css_style_serialize_f cb) +{ + lxb_css_str_ctx_t ctx; + + ctx.str = str; + ctx.mraw = mraw; + + if (str->data == NULL) { + lexbor_str_init(str, mraw, 1); + if (str->data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + } + + return cb(style, lxb_css_str_cb, &ctx); +} + +static lxb_status_t +lxb_css_str_cb(const lxb_char_t *data, size_t len, void *cb_ctx) +{ + lxb_char_t *ptr; + lxb_css_str_ctx_t *ctx = (lxb_css_str_ctx_t *) cb_ctx; + + ptr = lexbor_str_append(ctx->str, ctx->mraw, data, len); + + return (ptr != NULL) ? LXB_STATUS_OK : LXB_STATUS_ERROR_MEMORY_ALLOCATION; +} diff --git a/ext/dom/lexbor/lexbor/css/log.c b/ext/dom/lexbor/lexbor/css/log.c new file mode 100644 index 00000000000..e9b244a450d --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/log.c @@ -0,0 +1,336 @@ +/* + * Copyright (C) 2021 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/core/print.h" +#include "lexbor/core/serialize.h" +#include "lexbor/css/log.h" + + +typedef struct { + const char *msg; + size_t length; +} +lxb_css_log_type_str_t; + + +static const lxb_css_log_type_str_t lxb_css_log_types_map[] = { + {"Info", 4}, + {"Warning", 7}, + {"Error", 5}, + {"Syntax error", 12} +}; + + +lxb_css_log_t * +lxb_css_log_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_log_t)); +} + +lxb_status_t +lxb_css_log_init(lxb_css_log_t *log, lexbor_mraw_t *mraw) +{ + lxb_status_t status; + + if (log == NULL) { + return LXB_STATUS_ERROR_OBJECT_IS_NULL; + } + + status = lexbor_array_obj_init(&log->messages, 64, + sizeof(lxb_css_log_message_t)); + if (status != LXB_STATUS_OK) { + memset(&log->mraw, 0, sizeof(lexbor_mraw_t)); + return status; + } + + if (mraw != NULL) { + log->mraw = mraw; + log->self_mraw = false; + return LXB_STATUS_OK; + } + + log->self_mraw = true; + + log->mraw = lexbor_mraw_create(); + + return lexbor_mraw_init(log->mraw, 4096); +} + +void +lxb_css_log_clean(lxb_css_log_t *log) +{ + if (log != NULL) { + lexbor_array_obj_clean(&log->messages); + + if (log->self_mraw) { + lexbor_mraw_clean(log->mraw); + } + } +} + +lxb_css_log_t * +lxb_css_log_destroy(lxb_css_log_t *log, bool self_destroy) +{ + if (log == NULL) { + return NULL; + } + + (void) lexbor_array_obj_destroy(&log->messages, false); + + if (log->self_mraw) { + (void) lexbor_mraw_destroy(log->mraw, true); + } + + if (self_destroy) { + log = lexbor_free(log); + } + + return log; +} + +lxb_css_log_message_t * +lxb_css_log_append(lxb_css_log_t *log, lxb_css_log_type_t type, + const lxb_char_t *str, size_t length) +{ + lxb_css_log_message_t *msg; + + msg = lexbor_array_obj_push(&log->messages); + if (msg == NULL) { + return NULL; + } + + if (lexbor_str_init(&msg->text, log->mraw, length) == NULL) { + lexbor_array_obj_pop(&log->messages); + return NULL; + } + + memcpy(msg->text.data, str, length); + msg->text.length = length; + + msg->text.data[length] = '\0'; + + msg->type = type; + + return msg; +} + +lxb_css_log_message_t * +lxb_css_log_push(lxb_css_log_t *log, lxb_css_log_type_t type, size_t length) +{ + lxb_css_log_message_t *msg; + + msg = lexbor_array_obj_push(&log->messages); + if (msg == NULL) { + return NULL; + } + + if (lexbor_str_init(&msg->text, log->mraw, length) == NULL) { + lexbor_array_obj_pop(&log->messages); + return NULL; + } + + msg->type = type; + + return msg; +} + +lxb_css_log_message_t * +lxb_css_log_format(lxb_css_log_t *log, lxb_css_log_type_t type, + const char *format, ...) +{ + size_t psize; + lxb_css_log_message_t *msg; + va_list va; + + va_start(va, format); + psize = lexbor_vprintf_size(format, va); + va_end(va); + + if (psize == LXB_PRINT_ERROR) { + return NULL; + } + + msg = lxb_css_log_push(log, LXB_CSS_LOG_SYNTAX_ERROR, psize); + if (msg == NULL) { + return NULL; + } + + va_start(va, format); + (void) lexbor_vsprintf(msg->text.data, psize, format, va); + va_end(va); + + msg->text.length = psize; + + return msg; +} + +lxb_css_log_message_t * +lxb_css_log_not_supported(lxb_css_log_t *log, + const char *module_name, const char *description) +{ + static const char unexpected[] = "%s. Not supported: %s"; + + return lxb_css_log_format(log, LXB_CSS_LOG_SYNTAX_ERROR, unexpected, + module_name, description); +} + +const lxb_char_t * +lxb_css_log_type_by_id(lxb_css_log_type_t type, size_t *out_length) +{ + if (out_length != NULL) { + *out_length = lxb_css_log_types_map[type].length; + } + + return (const lxb_char_t *) lxb_css_log_types_map[type].msg; +} + +lxb_status_t +lxb_css_log_serialize(lxb_css_log_t *log, lexbor_serialize_cb_f cb, void *ctx, + const lxb_char_t *indent, size_t indent_length) +{ + size_t i; + lxb_status_t status; + lxb_css_log_message_t *msg; + + if (log->messages.length == 0) { + return LXB_STATUS_OK; + } + + i = 0; + + do { + msg = lexbor_array_obj_get(&log->messages, i); + + if (indent != NULL) { + lexbor_serialize_write(cb, indent, indent_length, ctx, status); + } + + status = lxb_css_log_message_serialize(msg, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + i++; + + if (i == log->messages.length) { + break; + } + + lexbor_serialize_write(cb, "\n", 1, ctx, status); + } + while (true); + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_log_serialize_char(lxb_css_log_t *log, size_t *out_length, + const lxb_char_t *indent, size_t indent_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_log_serialize(log, lexbor_serialize_length_cb, &length, + indent, indent_length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_log_serialize(log, lexbor_serialize_copy_cb, &str, + indent, indent_length); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + + +lxb_status_t +lxb_css_log_message_serialize(lxb_css_log_message_t *msg, + lexbor_serialize_cb_f cb, void *ctx) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *type_name; + + type_name = lxb_css_log_type_by_id(msg->type, &length); + + lexbor_serialize_write(cb, type_name, length, ctx, status); + lexbor_serialize_write(cb, ". ", 2, ctx, status); + lexbor_serialize_write(cb, msg->text.data, msg->text.length, ctx, status); + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_log_message_serialize_char(lxb_css_log_message_t *msg, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_log_message_serialize(msg, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_log_message_serialize(msg, lexbor_serialize_copy_cb, &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} diff --git a/ext/dom/lexbor/lexbor/css/parser.c b/ext/dom/lexbor/lexbor/css/parser.c new file mode 100644 index 00000000000..44a6a64e0cb --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/parser.c @@ -0,0 +1,339 @@ +/* + * Copyright (C) 2021 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/parser.h" +#include "lexbor/css/state.h" +#include "lexbor/css/syntax/syntax.h" + + +lxb_css_parser_t * +lxb_css_parser_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_parser_t)); +} + +lxb_status_t +lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz) +{ + lxb_status_t status; + static const size_t lxb_rules_length = 128; + static const size_t lxb_states_length = 1024; + + if (parser == NULL) { + return LXB_STATUS_ERROR_OBJECT_IS_NULL; + } + + /* Stack */ + parser->states_begin = lexbor_malloc(sizeof(lxb_css_parser_state_t) + * lxb_states_length); + if (parser->states_begin == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->states = parser->states_begin; + parser->states_end = parser->states_begin + lxb_states_length; + + memset(parser->states, 0x00, sizeof(lxb_css_parser_state_t)); + parser->states->root = true; + + /* Syntax */ + parser->my_tkz = false; + + if (tkz == NULL) { + tkz = lxb_css_syntax_tokenizer_create(); + status = lxb_css_syntax_tokenizer_init(tkz); + if (status != LXB_STATUS_OK) { + return status; + } + + parser->my_tkz = true; + } + + /* Rules */ + parser->rules_begin = lexbor_malloc(sizeof(lxb_css_syntax_rule_t) + * lxb_rules_length); + if (parser->rules_begin == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->rules_end = parser->rules_begin + lxb_rules_length; + parser->rules = parser->rules_begin; + + /* Temp */ + parser->pos = NULL; + parser->str.length = 0; + parser->str_size = 4096; + + parser->str.data = lexbor_malloc(sizeof(lxb_char_t) * parser->str_size); + if (parser->str.data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->log = lxb_css_log_create(); + status = lxb_css_log_init(parser->log, NULL); + if (status != LXB_STATUS_OK) { + return status; + } + + parser->tkz = tkz; + parser->types_begin = NULL; + parser->types_pos = NULL; + parser->types_end = NULL; + parser->stage = LXB_CSS_PARSER_CLEAN; + parser->receive_endings = false; + parser->status = LXB_STATUS_OK; + parser->fake_null = false; + + return LXB_STATUS_OK; +} + +void +lxb_css_parser_clean(lxb_css_parser_t *parser) +{ + lxb_css_syntax_tokenizer_clean(parser->tkz); + lxb_css_log_clean(parser->log); + + parser->rules = parser->rules_begin; + parser->states = parser->states_begin; + parser->types_pos = parser->types_begin; + parser->stage = LXB_CSS_PARSER_CLEAN; + parser->status = LXB_STATUS_OK; + parser->pos = NULL; + parser->str.length = 0; + parser->fake_null = false; +} + +void +lxb_css_parser_erase(lxb_css_parser_t *parser) +{ + lxb_css_parser_clean(parser); + + if (parser->memory != NULL) { + lxb_css_memory_clean(parser->memory); + } +} + +lxb_css_parser_t * +lxb_css_parser_destroy(lxb_css_parser_t *parser, bool self_destroy) +{ + if (parser == NULL) { + return NULL; + } + + if (parser->my_tkz) { + parser->tkz = lxb_css_syntax_tokenizer_destroy(parser->tkz); + } + + parser->log = lxb_css_log_destroy(parser->log, true); + + if (parser->rules_begin != NULL) { + parser->rules_begin = lexbor_free(parser->rules_begin); + } + + if (parser->states_begin != NULL) { + parser->states_begin = lexbor_free(parser->states_begin); + } + + if (parser->types_begin != NULL) { + parser->types_begin = lexbor_free(parser->types_begin); + } + + if (parser->str.data != NULL) { + parser->str.data = lexbor_free(parser->str.data); + } + + if (self_destroy) { + return lexbor_free(parser); + } + + return parser; +} + +lxb_css_parser_state_t * +lxb_css_parser_states_push(lxb_css_parser_t *parser, + lxb_css_parser_state_f state, void *ctx, bool root) +{ + size_t length, cur_length; + lxb_css_parser_state_t *states = ++parser->states; + + if (states >= parser->states_end) { + cur_length = states - parser->states_begin; + + if (SIZE_MAX - cur_length < 1024) { + goto memory_error; + } + + length = cur_length + 1024; + + states = lexbor_realloc(parser->states_begin, + length * sizeof(lxb_css_parser_state_t)); + if (states == NULL) { + goto memory_error; + } + + parser->states_begin = states; + parser->states_end = states + length; + parser->states = states + cur_length; + + states = parser->states; + } + + states->state = state; + states->context = ctx; + states->root = root; + + return states; + +memory_error: + + parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + + return NULL; +} + +lxb_css_parser_state_t * +lxb_css_parser_states_next(lxb_css_parser_t *parser, + lxb_css_parser_state_f next, + lxb_css_parser_state_f back, void *ctx, bool root) +{ + lxb_css_parser_state_t *state; + + state = lxb_css_parser_states_push(parser, back, ctx, root); + if (state == NULL) { + return NULL; + } + + parser->rules->state = next; + + return state; +} + +lxb_status_t +lxb_css_parser_types_push(lxb_css_parser_t *parser, + lxb_css_syntax_token_type_t type) +{ + size_t length, new_length; + lxb_css_syntax_token_type_t *tmp; + + if (parser->types_pos >= parser->types_end) { + length = parser->types_end - parser->types_begin; + + if ((SIZE_MAX - length) < 1024) { + return LXB_STATUS_ERROR_OVERFLOW; + } + + new_length = length + 1024; + + tmp = lexbor_realloc(parser->types_begin, + new_length * sizeof(lxb_css_syntax_token_type_t)); + if (tmp == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->types_begin = tmp; + parser->types_end = tmp + new_length; + parser->types_pos = parser->types_begin + length; + } + + *parser->types_pos++ = type; + + return LXB_STATUS_OK; +} + +bool +lxb_css_parser_stop(lxb_css_parser_t *parser) +{ + parser->loop = false; + return true; +} + +bool +lxb_css_parser_fail(lxb_css_parser_t *parser, lxb_status_t status) +{ + parser->status = status; + parser->loop = false; + return true; +} + +bool +lxb_css_parser_unexpected(lxb_css_parser_t *parser) +{ + (void) lxb_css_parser_unexpected_status(parser); + return true; +} + +bool +lxb_css_parser_success(lxb_css_parser_t *parser) +{ + parser->rules->state = lxb_css_state_success; + return true; +} + +bool +lxb_css_parser_failed(lxb_css_parser_t *parser) +{ + lxb_css_syntax_rule_t *rule = parser->rules; + + rule->state = rule->cbx.cb->failed; + rule->failed = true; + + return true; +} + +lxb_status_t +lxb_css_parser_unexpected_status(lxb_css_parser_t *parser) +{ + parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA; + + parser->rules->failed = true; + + return LXB_STATUS_ERROR_UNEXPECTED_DATA; +} + +bool +lxb_css_parser_unexpected_data(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + static const char selectors[] = "Selectors"; + parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA; + + if (lxb_css_syntax_token_error(parser, token, selectors) == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return true; +} + +lxb_status_t +lxb_css_parser_unexpected_data_status(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + static const char selectors[] = "Selectors"; + parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA; + + if (lxb_css_syntax_token_error(parser, token, selectors) == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + return LXB_STATUS_ERROR_UNEXPECTED_DATA; +} + +bool +lxb_css_parser_memory_fail(lxb_css_parser_t *parser) +{ + parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + parser->loop = false; + return true; +} + +lxb_status_t +lxb_css_parser_memory_fail_status(lxb_css_parser_t *parser) +{ + parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + parser->loop = false; + + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; +} diff --git a/ext/dom/lexbor/lexbor/css/rule.h b/ext/dom/lexbor/lexbor/css/rule.h index f68491ee77e..bd191f9b651 100644 --- a/ext/dom/lexbor/lexbor/css/rule.h +++ b/ext/dom/lexbor/lexbor/css/rule.h @@ -339,15 +339,15 @@ lxb_css_rule_ref_dec(lxb_css_rule_t *rule) lxb_inline void lxb_css_rule_ref_dec_destroy(lxb_css_rule_t *rule) { +#if 0 if (rule->ref_count > 0) { rule->ref_count--; } if (rule->ref_count == 0) { -#if 0 (void) lxb_css_rule_destroy(rule, true); -#endif } +#endif } lxb_inline void diff --git a/ext/dom/lexbor/lexbor/css/selectors/pseudo.c b/ext/dom/lexbor/lexbor/css/selectors/pseudo.c new file mode 100644 index 00000000000..0468682e7d0 --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/selectors/pseudo.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2020-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/css.h" +#include "lexbor/css/selectors/state.h" +#include "lexbor/css/selectors/pseudo.h" +#include "lexbor/css/selectors/pseudo_state.h" +#include "lexbor/css/selectors/pseudo_res.h" + + +const lxb_css_selectors_pseudo_data_t * +lxb_css_selector_pseudo_class_by_name(const lxb_char_t *name, size_t length) +{ + const lexbor_shs_entry_t *entry; + + entry = lexbor_shs_entry_get_lower_static(lxb_css_selectors_pseudo_class_shs, + name, length); + if (entry == NULL) { + return NULL; + } + + return entry->value; +} + +const lxb_css_selectors_pseudo_data_func_t * +lxb_css_selector_pseudo_class_function_by_name(const lxb_char_t *name, + size_t length) +{ + const lexbor_shs_entry_t *entry; + + entry = lexbor_shs_entry_get_lower_static(lxb_css_selectors_pseudo_class_function_shs, + name, length); + if (entry == NULL) { + return NULL; + } + + return entry->value; +} + +const lxb_css_selectors_pseudo_data_func_t * +lxb_css_selector_pseudo_class_function_by_id(unsigned id) +{ + return &lxb_css_selectors_pseudo_data_pseudo_class_function[id]; +} + +const lxb_css_selectors_pseudo_data_t * +lxb_css_selector_pseudo_element_by_name(const lxb_char_t *name, size_t length) +{ + const lexbor_shs_entry_t *entry; + + entry = lexbor_shs_entry_get_lower_static(lxb_css_selectors_pseudo_element_shs, + name, length); + if (entry == NULL) { + return NULL; + } + + return entry->value; +} + +const lxb_css_selectors_pseudo_data_func_t * +lxb_css_selector_pseudo_element_function_by_name(const lxb_char_t *name, + size_t length) +{ + const lexbor_shs_entry_t *entry; + + entry = lexbor_shs_entry_get_lower_static(lxb_css_selectors_pseudo_element_function_shs, + name, length); + if (entry == NULL) { + return NULL; + } + + return entry->value; +} + +const lxb_css_selectors_pseudo_data_func_t * +lxb_css_selector_pseudo_element_function_by_id(unsigned id) +{ + return &lxb_css_selectors_pseudo_data_pseudo_element_function[id]; +} + +const lxb_css_selectors_pseudo_data_func_t * +lxb_css_selector_pseudo_function_by_id(unsigned id, bool is_class) +{ + if (is_class) { + return &lxb_css_selectors_pseudo_data_pseudo_class_function[id]; + } + + return &lxb_css_selectors_pseudo_data_pseudo_element_function[id]; +} + +bool +lxb_css_selector_pseudo_function_can_empty(unsigned id, bool is_class) +{ + if (is_class) { + return lxb_css_selectors_pseudo_data_pseudo_class_function[id].empty; + } + + return lxb_css_selectors_pseudo_data_pseudo_element_function[id].empty; +} diff --git a/ext/dom/lexbor/lexbor/css/selectors/pseudo_state.c b/ext/dom/lexbor/lexbor/css/selectors/pseudo_state.c new file mode 100644 index 00000000000..f40b089d130 --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/selectors/pseudo_state.c @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2020-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/css.h" +#include "lexbor/css/selectors/pseudo_state.h" +#include "lexbor/css/selectors/selectors.h" + + +static bool +lxb_css_selectors_state_pseudo_anb(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static bool +lxb_css_selectors_state_pseudo_of_begin(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static lxb_status_t +lxb_css_selectors_state_pseudo_of_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed); +static bool +lxb_css_selectors_state_pseudo_of_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_comp = { + .state = lxb_css_selectors_state_complex_list, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_state_pseudo_of_end +}; + + +lxb_inline bool +lxb_css_selectors_state_pseudo_anb_begin(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + (void) lxb_css_selectors_state_pseudo_anb(parser, token, ctx); + if (parser->status != LXB_STATUS_OK) { + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return lxb_css_parser_failed(parser); + } + + parser->selectors->list = NULL; + + return lxb_css_parser_success(parser); +} + + +bool +lxb_css_selectors_state_pseudo_class_function__undef(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_parser_fail(parser, LXB_STATUS_ERROR_UNEXPECTED_DATA); +} + +bool +lxb_css_selectors_state_pseudo_class_function_current(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_set(parser, lxb_css_selectors_state_complex_list); + + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return true; +} + +bool +lxb_css_selectors_state_pseudo_class_function_dir(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_parser_fail(parser, LXB_STATUS_ERROR_UNEXPECTED_DATA); +} + +bool +lxb_css_selectors_state_pseudo_class_function_has(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_set(parser, lxb_css_selectors_state_relative_list); + + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return true; +} + +bool +lxb_css_selectors_state_pseudo_class_function_is(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_set(parser, lxb_css_selectors_state_complex_list); + + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return true; +} + +bool +lxb_css_selectors_state_pseudo_class_function_lang(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_parser_fail(parser, LXB_STATUS_ERROR_UNEXPECTED_DATA); +} + +bool +lxb_css_selectors_state_pseudo_class_function_not(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_set(parser, lxb_css_selectors_state_complex_list); + + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return true; +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_child(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_of_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_col(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_anb_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_last_child(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_of_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_last_col(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_anb_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_last_of_type(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_anb_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_nth_of_type(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_selectors_state_pseudo_anb_begin(parser, token, ctx); +} + +bool +lxb_css_selectors_state_pseudo_class_function_where(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_set(parser, lxb_css_selectors_state_complex_list); + + parser->selectors->list = NULL; + parser->selectors->list_last = NULL; + + return true; +} + +bool +lxb_css_selectors_state_pseudo_element_function__undef(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return true; +} + +static bool +lxb_css_selectors_state_pseudo_anb(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_selectors_t *selectors; + lxb_css_selector_list_t *list; + lxb_css_selector_anb_of_t *anbof; + + selectors = parser->selectors; + + anbof = lexbor_mraw_alloc(parser->memory->mraw, + sizeof(lxb_css_selector_anb_of_t)); + if (anbof == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + parser->status = lxb_css_syntax_anb_handler(parser, token, &anbof->anb); + if (parser->status != LXB_STATUS_OK) { + lexbor_mraw_free(parser->memory->mraw, anbof); + return true; + } + + list = selectors->list_last; + list->last->u.pseudo.data = anbof; + + anbof->of = NULL; + + return true; +} + +static bool +lxb_css_selectors_state_pseudo_of_begin(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_syntax_rule_t *rule; + lxb_css_selectors_t *selectors; + lxb_css_selector_list_t *list; + lxb_css_syntax_token_ident_t *ident; + + static const lxb_char_t of[] = "of"; + + selectors = parser->selectors; + + (void) lxb_css_selectors_state_pseudo_anb(parser, token, ctx); + if (parser->status != LXB_STATUS_OK) { + selectors->list = NULL; + selectors->list_last = NULL; + + token = lxb_css_syntax_parser_token(parser); + if (token == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + if (token->type != LXB_CSS_SYNTAX_TOKEN__END) { + if (lxb_css_syntax_token_error(parser, token, "Selectors") == NULL) { + return lxb_css_parser_memory_fail(parser); + } + } + + return lxb_css_parser_failed(parser); + } + + list = selectors->list_last; + + selectors->list = NULL; + + lxb_css_parser_token_wo_ws_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + ident = lxb_css_syntax_token_ident(token); + + if (ident->length == sizeof(of) - 1 + && lexbor_str_data_ncasecmp(ident->data, of, ident->length)) + { + lxb_css_syntax_token_consume(parser->tkz); + + selectors->list = NULL; + selectors->list_last = NULL; + + token = lxb_css_syntax_parser_token(parser); + if (token == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + rule = lxb_css_syntax_parser_components_push(parser, token, + lxb_css_selectors_state_pseudo_of_back, + &lxb_css_selectors_comp, list, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + if (rule == NULL) { + lexbor_mraw_free(parser->memory->mraw, + list->last->u.pseudo.data); + return lxb_css_parser_memory_fail(parser); + } + + lxb_css_parser_state_set(parser, + lxb_css_selectors_state_complex_list); + return true; + } + } + + return lxb_css_parser_success(parser); +} + +static lxb_status_t +lxb_css_selectors_state_pseudo_of_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + lxb_css_selector_anb_of_t *anbof; + lxb_css_selector_list_t *list = ctx; + + anbof = list->last->u.pseudo.data; + anbof->of = parser->selectors->list; + + parser->selectors->list = NULL; + + return LXB_STATUS_OK; +} + +static bool +lxb_css_selectors_state_pseudo_of_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + parser->selectors->list = NULL; + + return lxb_css_parser_success(parser); +} diff --git a/ext/dom/lexbor/lexbor/css/selectors/selector.c b/ext/dom/lexbor/lexbor/css/selectors/selector.c new file mode 100644 index 00000000000..e8bf96ba88c --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/selectors/selector.c @@ -0,0 +1,942 @@ +/* + * Copyright (C) 2020 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/core/serialize.h" +#include "lexbor/css/css.h" +#include "lexbor/css/selectors/selectors.h" +#include "lexbor/css/selectors/selector.h" +#include "lexbor/css/selectors/pseudo.h" +#include "lexbor/css/selectors/pseudo_const.h" +#include "lexbor/css/selectors/pseudo_state.h" +#include "lexbor/css/selectors/state.h" +#include "lexbor/css/selectors/pseudo_res.h" + + +typedef void +(*lxb_css_selector_destroy_f)(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +typedef lxb_status_t +(*lxb_css_selector_serialize_f)(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); + + +static void +lxb_css_selector_destroy_undef(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +static void +lxb_css_selector_destroy_any(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +static void +lxb_css_selector_destroy_id(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +static void +lxb_css_selector_destroy_attribute(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +static void +lxb_css_selector_destroy_pseudo_class_function(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); +static void +lxb_css_selector_destroy_pseudo_element_function(lxb_css_selector_t *selector, + lxb_css_memory_t *mem); + +static lxb_status_t +lxb_css_selector_serialize_undef(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_any(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_id(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_class(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_attribute(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_pseudo_class(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_pseudo_class_function(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_pseudo_element(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); +static lxb_status_t +lxb_css_selector_serialize_pseudo_element_function(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx); + +static lxb_status_t +lxb_css_selector_serialize_pseudo_single(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx, + bool is_class); + + +static const lxb_css_selector_destroy_f + lxb_selector_destroy_map[LXB_CSS_SELECTOR_TYPE__LAST_ENTRY] = +{ + lxb_css_selector_destroy_undef, + lxb_css_selector_destroy_any, + lxb_css_selector_destroy_any, + lxb_css_selector_destroy_id, + lxb_css_selector_destroy_id, + lxb_css_selector_destroy_attribute, + lxb_css_selector_destroy_undef, + lxb_css_selector_destroy_pseudo_class_function, + lxb_css_selector_destroy_undef, + lxb_css_selector_destroy_pseudo_element_function +}; + +static const lxb_css_selector_serialize_f + lxb_selector_serialize_map[LXB_CSS_SELECTOR_TYPE__LAST_ENTRY] = +{ + lxb_css_selector_serialize_undef, + lxb_css_selector_serialize_any, + lxb_css_selector_serialize_any, + lxb_css_selector_serialize_id, + lxb_css_selector_serialize_class, + lxb_css_selector_serialize_attribute, + lxb_css_selector_serialize_pseudo_class, + lxb_css_selector_serialize_pseudo_class_function, + lxb_css_selector_serialize_pseudo_element, + lxb_css_selector_serialize_pseudo_element_function +}; + + +lxb_css_selector_t * +lxb_css_selector_create(lxb_css_selector_list_t *list) +{ + lxb_css_selector_t *selector = lexbor_dobject_calloc(list->memory->objs); + if (selector == NULL) { + return NULL; + } + + selector->list = list; + + return selector; +} + +void +lxb_css_selector_destroy(lxb_css_selector_t *selector) +{ + lxb_css_memory_t *memory; + + if (selector != NULL) { + memory = selector->list->memory; + + lxb_selector_destroy_map[selector->type](selector, memory); + lexbor_dobject_free(memory->objs, selector); + } +} + +void +lxb_css_selector_destroy_chain(lxb_css_selector_t *selector) +{ + lxb_css_selector_t *next; + + while (selector != NULL) { + next = selector->next; + lxb_css_selector_destroy(selector); + selector = next; + } +} + +void +lxb_css_selector_remove(lxb_css_selector_t *selector) +{ + if (selector->next != NULL) { + selector->next->prev = selector->prev; + } + + if (selector->prev != NULL) { + selector->prev->next = selector->next; + } + + if (selector->list->first == selector) { + selector->list->first = selector->next; + } + + if (selector->list->last == selector) { + selector->list->last = selector->prev; + } +} + +lxb_css_selector_list_t * +lxb_css_selector_list_create(lxb_css_memory_t *mem) +{ + lxb_css_selector_list_t *list; + + list = lexbor_dobject_calloc(mem->objs); + if (list == NULL) { + return NULL; + } + + list->memory = mem; + + return list; +} + +void +lxb_css_selector_list_remove(lxb_css_selector_list_t *list) +{ + if (list->next != NULL) { + list->next->prev = list->prev; + } + + if (list->prev != NULL) { + list->prev->next = list->next; + } +} + +void +lxb_css_selector_list_selectors_remove(lxb_css_selectors_t *selectors, + lxb_css_selector_list_t *list) +{ + lxb_css_selector_list_remove(list); + + if (selectors->list == list) { + selectors->list = list->next; + } + + if (selectors->list_last == list) { + selectors->list_last = list->prev; + } +} + +void +lxb_css_selector_list_destroy(lxb_css_selector_list_t *list) +{ + if (list != NULL) { + lxb_css_selector_destroy_chain(list->first); + lexbor_dobject_free(list->memory->objs, list); + } +} + +void +lxb_css_selector_list_destroy_chain(lxb_css_selector_list_t *list) +{ + lxb_css_selector_list_t *next; + + while (list != NULL) { + next = list->next; + lxb_css_selector_list_destroy(list); + list = next; + } +} + +void +lxb_css_selector_list_destroy_memory(lxb_css_selector_list_t *list) +{ + if (list != NULL) { + (void) lxb_css_memory_destroy(list->memory, true); + } +} + +static void +lxb_css_selector_destroy_undef(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + /* Do nothing. */ +} + +static void +lxb_css_selector_destroy_any(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + if (selector->ns.data != NULL) { + lexbor_mraw_free(mem->mraw, selector->ns.data); + } + + if (selector->name.data != NULL) { + lexbor_mraw_free(mem->mraw, selector->name.data); + } +} + +static void +lxb_css_selector_destroy_id(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + if (selector->name.data != NULL) { + (void) lexbor_mraw_free(mem->mraw, selector->name.data); + } +} + +static void +lxb_css_selector_destroy_attribute(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + if (selector->ns.data != NULL) { + lexbor_mraw_free(mem->mraw, selector->ns.data); + } + + if (selector->name.data != NULL) { + lexbor_mraw_free(mem->mraw, selector->name.data); + } + + if (selector->u.attribute.value.data != NULL) { + lexbor_mraw_free(mem->mraw, selector->u.attribute.value.data); + } +} + +static void +lxb_css_selector_destroy_pseudo_class_function(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + lxb_css_selector_anb_of_t *anbof; + lxb_css_selector_pseudo_t *pseudo; + + pseudo = &selector->u.pseudo; + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_CURRENT: + break; + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_HAS: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_IS: + lxb_css_selector_list_destroy_chain(pseudo->data); + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NOT: + lxb_css_selector_list_destroy_chain(pseudo->data); + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE: + anbof = pseudo->data; + + if (anbof != NULL) { + lxb_css_selector_list_destroy_chain(anbof->of); + lexbor_mraw_free(mem->mraw, anbof); + } + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE: + lxb_css_selector_list_destroy_chain(pseudo->data); + break; + + default: + break; + } +} + +static void +lxb_css_selector_destroy_pseudo_element_function(lxb_css_selector_t *selector, + lxb_css_memory_t *mem) +{ + +} + +lxb_status_t +lxb_css_selector_serialize(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + return lxb_selector_serialize_map[selector->type](selector, cb, ctx); +} + +lxb_status_t +lxb_css_selector_serialize_chain(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + size_t length; + lxb_char_t *data; + lxb_status_t status; + + if (selector == NULL) { + return LXB_STATUS_OK; + } + + if (selector->combinator > LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + data = lxb_css_selector_combinator(selector, &length); + if (data == NULL) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + lxb_css_selector_serialize_write(data, length); + lxb_css_selector_serialize_write(" ", 1); + } + + status = lxb_css_selector_serialize(selector, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + selector = selector->next; + + while (selector != NULL) { + data = lxb_css_selector_combinator(selector, &length); + if (data == NULL) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + if (length != 0) { + lxb_css_selector_serialize_write(" ", 1); + + if (*data != ' ') { + lxb_css_selector_serialize_write(data, length); + lxb_css_selector_serialize_write(" ", 1); + } + } + + status = lxb_css_selector_serialize(selector, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + selector = selector->next; + } + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_selector_serialize_chain_char(lxb_css_selector_t *selector, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_selector_serialize_chain(selector, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_selector_serialize_chain(selector, lexbor_serialize_copy_cb, + &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + +lxb_status_t +lxb_css_selector_serialize_list(lxb_css_selector_list_t *list, + lexbor_serialize_cb_f cb, void *ctx) +{ + if (list != NULL) { + return lxb_css_selector_serialize_chain(list->first, cb, ctx); + } + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_selector_serialize_list_char(lxb_css_selector_list_t *list, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_selector_serialize_list_chain(list, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_selector_serialize_list_chain(list, lexbor_serialize_copy_cb, + &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + +lxb_status_t +lxb_css_selector_serialize_list_chain(lxb_css_selector_list_t *list, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + + if (list == NULL) { + return LXB_STATUS_OK; + } + + status = lxb_css_selector_serialize_chain(list->first, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + list = list->next; + + while (list != NULL) { + lxb_css_selector_serialize_write(", ", 2); + + status = lxb_css_selector_serialize_chain(list->first, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + list = list->next; + } + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_selector_serialize_list_chain_char(lxb_css_selector_list_t *list, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_selector_serialize_list_chain(list, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_selector_serialize_list_chain(list, lexbor_serialize_copy_cb, + &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + + +static lxb_status_t +lxb_css_selector_serialize_undef(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + return LXB_STATUS_ERROR_UNEXPECTED_DATA; +} + +static lxb_status_t +lxb_css_selector_serialize_any(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + + if (selector->ns.data != NULL) { + lxb_css_selector_serialize_write(selector->ns.data, + selector->ns.length); + lxb_css_selector_serialize_write("|", 1); + } + + if (selector->name.data != NULL) { + return cb(selector->name.data, selector->name.length, ctx); + } + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selector_serialize_id(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + + lxb_css_selector_serialize_write("#", 1); + + if (selector->name.data != NULL) { + return cb(selector->name.data, selector->name.length, ctx); + } + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selector_serialize_class(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + + lxb_css_selector_serialize_write(".", 1); + + if (selector->name.data != NULL) { + return cb(selector->name.data, selector->name.length, ctx); + } + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selector_serialize_attribute(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_char_t *p, *begin, *end; + lxb_status_t status; + lxb_css_selector_attribute_t *attr; + + lxb_css_selector_serialize_write("[", 1); + + status = lxb_css_selector_serialize_any(selector, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + attr = &selector->u.attribute; + + if (attr->value.data == NULL) { + return cb((lxb_char_t *) "]", 1, ctx); + } + + switch (attr->match) { + case LXB_CSS_SELECTOR_MATCH_EQUAL: + lxb_css_selector_serialize_write("=", 1); + break; + case LXB_CSS_SELECTOR_MATCH_INCLUDE: + lxb_css_selector_serialize_write("~=", 2); + break; + case LXB_CSS_SELECTOR_MATCH_DASH: + lxb_css_selector_serialize_write("|=", 2); + break; + case LXB_CSS_SELECTOR_MATCH_PREFIX: + lxb_css_selector_serialize_write("^=", 2); + break; + case LXB_CSS_SELECTOR_MATCH_SUFFIX: + lxb_css_selector_serialize_write("$=", 2); + break; + case LXB_CSS_SELECTOR_MATCH_SUBSTRING: + lxb_css_selector_serialize_write("*=", 2); + break; + + default: + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + p = attr->value.data; + end = attr->value.data + attr->value.length; + + begin = p; + + lxb_css_selector_serialize_write("\"", 1); + + while (p < end) { + if (*p == '"') { + if (begin < p) { + lxb_css_selector_serialize_write(begin, p - begin); + } + + lxb_css_selector_serialize_write("\\000022", 7); + + begin = p + 1; + } + + p++; + } + + if (begin < p) { + lxb_css_selector_serialize_write(begin, p - begin); + } + + lxb_css_selector_serialize_write("\"", 1); + + if (attr->modifier != LXB_CSS_SELECTOR_MODIFIER_UNSET) { + switch (attr->modifier) { + case LXB_CSS_SELECTOR_MODIFIER_I: + lxb_css_selector_serialize_write("i", 1); + break; + + case LXB_CSS_SELECTOR_MODIFIER_S: + lxb_css_selector_serialize_write("s", 1); + break; + + default: + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + } + + return cb((lxb_char_t *) "]", 1, ctx); +} + +static lxb_status_t +lxb_css_selector_serialize_pseudo_class(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + return lxb_css_selector_serialize_pseudo_single(selector, cb, ctx, true); +} + +static lxb_status_t +lxb_css_selector_serialize_pseudo_class_function(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + lxb_css_selector_pseudo_t *pseudo; + const lxb_css_selectors_pseudo_data_func_t *pfunc; + + pseudo = &selector->u.pseudo; + + pfunc = &lxb_css_selectors_pseudo_data_pseudo_class_function[pseudo->type]; + + lxb_css_selector_serialize_write(":", 1); + lxb_css_selector_serialize_write(pfunc->name, pfunc->length); + lxb_css_selector_serialize_write("(", 1); + + switch (pseudo->type) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_CURRENT: + break; + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_HAS: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_IS: + status = lxb_css_selector_serialize_list_chain(pseudo->data, + cb, ctx); + break; + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG: + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NOT: + status = lxb_css_selector_serialize_list_chain(pseudo->data, + cb, ctx); + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE: + status = LXB_STATUS_OK; + + if (pseudo->data != NULL) { + status = lxb_css_selector_serialize_anb_of(pseudo->data, + cb, ctx); + } + break; + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE: + status = lxb_css_selector_serialize_list_chain(pseudo->data, + cb, ctx); + break; + + default: + status = LXB_STATUS_OK; + break; + } + + if (status != LXB_STATUS_OK) { + return status; + } + + lxb_css_selector_serialize_write(")", 1); + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selector_serialize_pseudo_element(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + return lxb_css_selector_serialize_pseudo_single(selector, cb, ctx, false); +} + +static lxb_status_t +lxb_css_selector_serialize_pseudo_element_function(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx) +{ + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selector_serialize_pseudo_single(lxb_css_selector_t *selector, + lexbor_serialize_cb_f cb, void *ctx, + bool is_class) +{ + lxb_status_t status; + lxb_css_selector_pseudo_t *pseudo; + const lxb_css_selectors_pseudo_data_t *pclass; + + pseudo = &selector->u.pseudo; + + if (is_class) { + pclass = &lxb_css_selectors_pseudo_data_pseudo_class[pseudo->type]; + lxb_css_selector_serialize_write(":", 1); + } + else { + pclass = &lxb_css_selectors_pseudo_data_pseudo_element[pseudo->type]; + lxb_css_selector_serialize_write("::", 2); + } + + lxb_css_selector_serialize_write(pclass->name, pclass->length); + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_selector_serialize_anb_of(lxb_css_selector_anb_of_t *anbof, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_status_t status; + + static const lxb_char_t of[] = " of "; + + status = lxb_css_syntax_anb_serialize(&anbof->anb, cb, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + if (anbof->of != NULL) { + lxb_css_selector_serialize_write(of, sizeof(of) - 1); + + return lxb_css_selector_serialize_list_chain(anbof->of, cb, ctx); + } + + return LXB_STATUS_OK; +} + +lxb_char_t * +lxb_css_selector_combinator(lxb_css_selector_t *selector, size_t *out_length) +{ + switch (selector->combinator) { + case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT: + if (out_length != NULL) {*out_length = 1;} + return (lxb_char_t *) " "; + + case LXB_CSS_SELECTOR_COMBINATOR_CLOSE: + if (out_length != NULL) {*out_length = 0;} + return (lxb_char_t *) ""; + + case LXB_CSS_SELECTOR_COMBINATOR_CHILD: + if (out_length != NULL) {*out_length = 1;} + return (lxb_char_t *) ">"; + + case LXB_CSS_SELECTOR_COMBINATOR_SIBLING: + if (out_length != NULL) {*out_length = 1;} + return (lxb_char_t *) "+"; + + case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: + if (out_length != NULL) {*out_length = 1;} + return (lxb_char_t *) "~"; + + case LXB_CSS_SELECTOR_COMBINATOR_CELL: + if (out_length != NULL) {*out_length = 2;} + return (lxb_char_t *) "||"; + + default: + if (out_length != NULL) {*out_length = 0;} + return NULL; + } +} + +void +lxb_css_selector_list_append(lxb_css_selector_list_t *list, + lxb_css_selector_t *selector) +{ + selector->prev = list->last; + + if (list->last != NULL) { + list->last->next = selector; + } + else { + list->first = selector; + } + + list->last = selector; +} + +void +lxb_css_selector_append_next(lxb_css_selector_t *dist, lxb_css_selector_t *src) +{ + if (dist->next != NULL) { + dist->next->prev = src; + } + + src->prev = dist; + src->next = dist->next; + + dist->next = src; +} + +void +lxb_css_selector_list_append_next(lxb_css_selector_list_t *dist, + lxb_css_selector_list_t *src) +{ + if (dist->next != NULL) { + dist->next->prev = src; + } + + src->prev = dist; + src->next = dist->next; + + dist->next = src; +} diff --git a/ext/dom/lexbor/lexbor/css/selectors/selectors.c b/ext/dom/lexbor/lexbor/css/selectors/selectors.c new file mode 100644 index 00000000000..52537873ef0 --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/selectors/selectors.c @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2020-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/core/print.h" +#include "lexbor/css/css.h" + + +static lxb_css_selector_list_t * +lxb_css_selectors_parse_list(lxb_css_parser_t *parser, + const lxb_css_syntax_cb_components_t *components, + const lxb_char_t *data, size_t length); + +static lxb_status_t +lxb_css_selectors_components_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed); + + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_complex_list_cb = { + .state = lxb_css_selectors_state_complex_list, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_compound_list_cb = { + .state = lxb_css_selectors_state_compound_list, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_simple_list_cb = { + .state = lxb_css_selectors_state_simple_list, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_relative_list_cb = { + .state = lxb_css_selectors_state_relative_list, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_complex_cb = { + .state = lxb_css_selectors_state_complex, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_compound_cb = { + .state = lxb_css_selectors_state_compound, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_simple_cb = { + .state = lxb_css_selectors_state_simple, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + +static const lxb_css_syntax_cb_components_t lxb_css_selectors_relative_cb = { + .state = lxb_css_selectors_state_relative, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_selectors_components_end +}; + + +lxb_css_selectors_t * +lxb_css_selectors_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_selectors_t)); +} + +lxb_status_t +lxb_css_selectors_init(lxb_css_selectors_t *selectors) +{ + if (selectors == NULL) { + return LXB_STATUS_ERROR_OBJECT_IS_NULL; + } + + selectors->list = NULL; + selectors->list_last = NULL; + selectors->parent = NULL; + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + selectors->comb_default = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + selectors->error = 0; + selectors->err_in_function = false; + selectors->failed = false; + + return LXB_STATUS_OK; +} + +void +lxb_css_selectors_clean(lxb_css_selectors_t *selectors) +{ + if (selectors != NULL) { + selectors->list = NULL; + selectors->list_last = NULL; + selectors->parent = NULL; + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + selectors->comb_default = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + selectors->error = 0; + selectors->err_in_function = false; + selectors->failed = false; + } +} + +lxb_css_selectors_t * +lxb_css_selectors_destroy(lxb_css_selectors_t *selectors, bool self_destroy) +{ + if (selectors == NULL) { + return NULL; + } + + if (self_destroy) { + return lexbor_free(selectors); + } + + return selectors; +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_complex_list(parser, data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_complex_list(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_complex_list_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_compound_list(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_compound_list_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_simple_list(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_simple_list_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_relative_list(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_relative_list_cb, + data, length); +} + +static lxb_status_t +lxb_css_selectors_parse_prepare(lxb_css_parser_t *parser, + lxb_css_memory_t *memory, + lxb_css_selectors_t *selectors) +{ + if (parser->stage != LXB_CSS_PARSER_CLEAN) { + if (parser->stage == LXB_CSS_PARSER_RUN) { + return LXB_STATUS_ERROR_WRONG_ARGS; + } + + lxb_css_parser_clean(parser); + } + + parser->tkz->with_comment = false; + parser->stage = LXB_CSS_PARSER_RUN; + + parser->old_memory = parser->memory; + parser->old_selectors = parser->selectors; + + parser->memory = memory; + parser->selectors = selectors; + + return LXB_STATUS_OK; +} + +static lxb_css_selector_list_t * +lxb_css_selectors_parse_process(lxb_css_parser_t *parser, + const lxb_css_syntax_cb_components_t *components, + const lxb_char_t *data, size_t length) +{ + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_buffer_set(parser, data, length); + + rule = lxb_css_syntax_parser_components_push(parser, NULL, NULL, + components, NULL, + LXB_CSS_SYNTAX_TOKEN_UNDEF); + if (rule == NULL) { + return NULL; + } + + parser->status = lxb_css_syntax_parser_run(parser); + if (parser->status != LXB_STATUS_OK) { + return NULL; + } + + return parser->selectors->list; +} + +static void +lxb_css_selectors_parse_finish(lxb_css_parser_t *parser) +{ + parser->stage = LXB_CSS_PARSER_END; + + parser->memory = parser->old_memory; + parser->selectors = parser->old_selectors; +} + +static lxb_css_selector_list_t * +lxb_css_selectors_parse_list(lxb_css_parser_t *parser, + const lxb_css_syntax_cb_components_t *components, + const lxb_char_t *data, size_t length) +{ + lxb_css_memory_t *memory; + lxb_css_selectors_t *selectors; + lxb_css_selector_list_t *list; + + memory = parser->memory; + selectors = parser->selectors; + + if (selectors == NULL) { + selectors = lxb_css_selectors_create(); + parser->status = lxb_css_selectors_init(selectors); + + if (parser->status != LXB_STATUS_OK) { + (void) lxb_css_selectors_destroy(selectors, true); + return NULL; + } + } + else { + lxb_css_selectors_clean(selectors); + } + + if (memory == NULL) { + memory = lxb_css_memory_create(); + parser->status = lxb_css_memory_init(memory, 256); + + if (parser->status != LXB_STATUS_OK) { + if (selectors != parser->selectors) { + (void) lxb_css_selectors_destroy(selectors, true); + } + + (void) lxb_css_memory_destroy(memory, true); + return NULL; + } + } + + parser->status = lxb_css_selectors_parse_prepare(parser, memory, selectors); + if (parser->status != LXB_STATUS_OK) { + list = NULL; + goto end; + } + + list = lxb_css_selectors_parse_process(parser, components, data, length); + + lxb_css_selectors_parse_finish(parser); + +end: + + if (list == NULL && memory != parser->memory) { + (void) lxb_css_memory_destroy(memory, true); + } + + if (selectors != parser->selectors) { + (void) lxb_css_selectors_destroy(selectors, true); + } + + return list; +} + +static lxb_status_t +lxb_css_selectors_components_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + lxb_css_selector_list_t *list; + lxb_css_selectors_t *selectors = parser->selectors; + + if (failed) { + list = selectors->list_last; + + if (list != NULL) { + lxb_css_selector_list_selectors_remove(selectors, list); + lxb_css_selector_list_destroy(list); + } + } + + return LXB_STATUS_OK; +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_complex(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_complex_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_compound(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_compound_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_simple(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_simple_cb, + data, length); +} + +lxb_css_selector_list_t * +lxb_css_selectors_parse_relative(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + return lxb_css_selectors_parse_list(parser, &lxb_css_selectors_relative_cb, + data, length); +} diff --git a/ext/dom/lexbor/lexbor/css/selectors/state.c b/ext/dom/lexbor/lexbor/css/selectors/state.c new file mode 100644 index 00000000000..873e7d31cfd --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/selectors/state.c @@ -0,0 +1,2053 @@ +/* + * Copyright (C) 2020-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/parser.h" +#include "lexbor/css/css.h" +#include "lexbor/css/selectors/selectors.h" +#include "lexbor/css/selectors/pseudo.h" +#include "lexbor/css/selectors/pseudo_const.h" + + +static const char lxb_css_selectors_module_name[] = "Selectors"; + + +#define lxb_css_selectors_state_string_dup_m(selectors, name) \ + do { \ + (status) = lxb_css_syntax_token_string_dup( \ + lxb_css_syntax_token_string(token), (name), \ + (parser)->memory->mraw); \ + if ((status) != LXB_STATUS_OK) { \ + return (status); \ + } \ + } \ + while (false) + +#define lxb_css_selectors_state_append(parser, selectors, selector) \ + do { \ + (selector) = lxb_css_selector_create((selectors)->list_last); \ + if ((selector) == NULL) { \ + return lxb_css_parser_memory_fail(parser); \ + } \ + \ + lxb_css_selectors_append_next((selectors), (selector)); \ + \ + (selector)->combinator = (selectors)->combinator; \ + (selectors)->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; \ + } \ + while (false) + +#define lxb_css_selectors_state_list_append(parser, selectors, list) \ + do { \ + (list) = lxb_css_selector_list_create((parser)->memory); \ + if ((list) == NULL) { \ + return lxb_css_parser_memory_fail(parser); \ + } \ + \ + lxb_css_selectors_list_append_next((selectors), (list)); \ + \ + (list)->parent = selectors->parent; \ + } \ + while (false) + + +static bool +lxb_css_selectors_state_complex_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_relative_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_relative_list_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_relative_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_relative_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool list, bool root); + +static bool +lxb_css_selectors_state_complex_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_complex_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_complex_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool root); + +static bool +lxb_css_selectors_state_compound_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_compound_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_compound_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_compound_sub(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_compound_pseudo(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_simple_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_simple_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_simple_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static bool +lxb_css_selectors_state_simple_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx); + +static lxb_status_t +lxb_css_selectors_state_hash(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_class(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_element_ns(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_element(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_attribute(lxb_css_parser_t *parser); + +static lxb_status_t +lxb_css_selectors_state_ns(lxb_css_parser_t *parser, + lxb_css_selector_t *selector); + +static lxb_status_t +lxb_css_selectors_state_ns_ident(lxb_css_parser_t *parser, + lxb_css_selector_t *selector); + +static lxb_status_t +lxb_css_selectors_state_pseudo_class(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_pseudo_class_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f success); + +static lxb_status_t +lxb_css_selectors_state_pseudo_element(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_selectors_state_pseudo_element_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f success); + +static lxb_status_t +lxb_css_selectors_state_forgiving_cb(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, lxb_css_parser_state_f state, + bool failed); + +static void +lxb_css_selectors_state_restore_parent(lxb_css_selectors_t *selectors, + lxb_css_selector_list_t *last); + +static bool +lxb_css_selectors_state_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state); + +static bool +lxb_css_selectors_state_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static const lxb_css_syntax_token_t * +lxb_css_selectors_state_function_error(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + + +lxb_inline bool +lxb_css_selectors_done(lxb_css_parser_t *parser) +{ + lxb_css_parser_states_pop(parser); + + return lxb_css_parser_states_set_back(parser); +} + +lxb_inline void +lxb_css_selectors_state_specificity_set_b(lxb_css_selectors_t *selectors) +{ + lxb_css_selector_list_t *last = selectors->list_last; + + if (selectors->parent == NULL) { + lxb_css_selector_sp_set_b(last->specificity, + lxb_css_selector_sp_b(last->specificity) + 1); + } + else if (last->specificity > LXB_CSS_SELECTOR_SP_B_MAX) { + if (selectors->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + lxb_css_selector_sp_set_b(last->specificity, 1); + } + } + else { + if (selectors->combinator != LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + last->specificity = 0; + } + + lxb_css_selector_sp_set_b(last->specificity, 1); + } +} + +lxb_inline void +lxb_css_selectors_state_specificity_set_c(lxb_css_selectors_t *selectors) +{ + lxb_css_selector_list_t *last = selectors->list_last; + + if (selectors->parent == NULL) { + lxb_css_selector_sp_set_c(last->specificity, + lxb_css_selector_sp_c(last->specificity) + 1); + } + else if (last->specificity > LXB_CSS_SELECTOR_SP_C_MAX) { + if (selectors->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + lxb_css_selector_sp_set_c(last->specificity, 1); + } + } + else { + if (selectors->combinator != LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + last->specificity = 0; + } + + lxb_css_selector_sp_set_c(last->specificity, 1); + } +} + +lxb_inline void +lxb_css_selectors_state_func_specificity(lxb_css_selectors_t *selectors) +{ + lxb_css_selector_list_t *prev, *last; + + last = selectors->list_last; + prev = last->prev; + + if (prev->specificity > last->specificity) { + last->specificity = prev->specificity; + } + + prev->specificity = 0; +} + +/* + * + */ +bool +lxb_css_selectors_state_complex_list(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_complex_wo_root, + lxb_css_selectors_state_complex_list_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_complex_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_list_end(parser, token, + lxb_css_selectors_state_complex_wo_root); +} + +/* + * + */ +bool +lxb_css_selectors_state_relative_list(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_relative_list_wo_root, + lxb_css_selectors_state_relative_list_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_relative_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_list_end(parser, token, + lxb_css_selectors_state_relative_list_wo_root); +} + +/* + * + */ +bool +lxb_css_selectors_state_relative(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_relative_wo_root, + lxb_css_selectors_state_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_relative_list_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_relative_handler(parser, token, ctx, true, + false); +} + +static bool +lxb_css_selectors_state_relative_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_relative_handler(parser, token, ctx, false, + false); +} + +static bool +lxb_css_selectors_state_relative_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool list, bool root) +{ + lxb_css_parser_state_f back; + lxb_css_parser_state_t *states; + lxb_css_selectors_t *selectors = parser->selectors; + + /* */ + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + return true; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '>': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_CHILD; + break; + + case '+': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_SIBLING; + break; + + case '~': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING; + break; + + case '|': + lxb_css_parser_token_next_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_DELIM + && lxb_css_syntax_token_delim_char(token) == '|') + { + lxb_css_syntax_parser_consume(parser); + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_CELL; + break; + } + + goto done; + + default: + goto done; + } + + break; + + default: + goto done; + } + + lxb_css_syntax_parser_consume(parser); + +done: + + back = (list) ? lxb_css_selectors_state_complex_end + : lxb_css_selectors_state_end; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_compound_wo_root, + back, ctx, root); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return true; +} + +/* + * + */ +bool +lxb_css_selectors_state_complex(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_complex_wo_root, + lxb_css_selectors_state_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_complex_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_complex_handler(parser, token, ctx, false); +} + +static bool +lxb_css_selectors_state_complex_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool root) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_compound_wo_root, + lxb_css_selectors_state_complex_end, + ctx, root); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_complex_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_selectors_t *selectors = parser->selectors; + + /* */ + +again: + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + + lxb_css_parser_token_m(parser, token); + goto again; + + case LXB_CSS_SYNTAX_TOKEN__END: + return lxb_css_selectors_done(parser); + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '>': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_CHILD; + break; + + case '+': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_SIBLING; + break; + + case '~': + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING; + break; + + case '|': + lxb_css_parser_token_next_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_DELIM + && lxb_css_syntax_token_delim_char(token) == '|') + { + lxb_css_syntax_parser_consume(parser); + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_CELL; + break; + } + + goto done; + + default: + if (selectors->combinator != LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT) { + goto unexpected; + } + + goto done; + } + + break; + + case LXB_CSS_SYNTAX_TOKEN_COMMA: + return lxb_css_selectors_done(parser); + + default: + if (selectors->combinator != LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT) { + goto unexpected; + } + + goto done; + } + + lxb_css_syntax_parser_consume(parser); + +done: + + lxb_css_parser_state_set(parser, lxb_css_selectors_state_compound_handler); + + return true; + +unexpected: + + (void) lxb_css_selectors_done(parser); + + return lxb_css_parser_unexpected(parser); +} + +/* + * + */ +bool +lxb_css_selectors_state_compound_list(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_compound_wo_root, + lxb_css_selectors_state_compound_list_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_compound_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_list_end(parser, token, + lxb_css_selectors_state_compound_wo_root); +} + +/* + * + * + */ +bool +lxb_css_selectors_state_compound(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_compound_wo_root, + lxb_css_selectors_state_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_compound_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_selector_list_t *list; + + lxb_css_selectors_state_list_append(parser, parser->selectors, list); + + lxb_css_parser_state_set(parser, lxb_css_selectors_state_compound_handler); + + return false; +} + +static bool +lxb_css_selectors_state_compound_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_status_t status; + lxb_css_selectors_t *selectors; + +again: + + lxb_css_parser_state_set(parser, lxb_css_selectors_state_compound_sub); + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_HASH: + status = lxb_css_selectors_state_hash(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '.': + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_class(parser, token); + break; + + case '|': + case '*': + status = lxb_css_selectors_state_element_ns(parser, token); + break; + + default: + goto unexpected; + } + + break; + + case LXB_CSS_SYNTAX_TOKEN_IDENT: + status = lxb_css_selectors_state_element(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_attribute(parser); + break; + + case LXB_CSS_SYNTAX_TOKEN_COLON: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + status = lxb_css_selectors_state_pseudo_class(parser, token); + break; + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_COLON) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + lxb_css_parser_state_set(parser, + lxb_css_selectors_state_compound_pseudo); + status = lxb_css_selectors_state_pseudo_element(parser, token); + break; + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + return lxb_css_parser_unexpected(parser); + } + + status = lxb_css_selectors_state_pseudo_element_function(parser, token, + lxb_css_selectors_state_compound_pseudo); + break; + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + goto unexpected; + } + + status = lxb_css_selectors_state_pseudo_class_function(parser, token, + lxb_css_selectors_state_compound_sub); + break; + + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + goto again; + + case LXB_CSS_SYNTAX_TOKEN__END: + selectors = parser->selectors; + + if (selectors->combinator > LXB_CSS_SELECTOR_COMBINATOR_CLOSE + || selectors->list_last->first == NULL) + { + goto unexpected; + } + + return lxb_css_selectors_done(parser); + + default: + goto unexpected; + } + + if (status == LXB_STATUS_OK) { + return true; + } + + if (status == LXB_STATUS_ERROR_MEMORY_ALLOCATION) { + return lxb_css_parser_memory_fail(parser); + } + +unexpected: + + (void) lxb_css_parser_states_to_root(parser); + (void) lxb_css_parser_states_set_back(parser); + + return lxb_css_parser_unexpected(parser); +} + +static bool +lxb_css_selectors_state_compound_sub(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_status_t status; + + /* */ + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_HASH: + status = lxb_css_selectors_state_hash(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '.': + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_class(parser, token); + break; + + default: + return lxb_css_parser_states_set_back(parser); + } + + break; + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_attribute(parser); + break; + + case LXB_CSS_SYNTAX_TOKEN_COLON: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + status = lxb_css_selectors_state_pseudo_class(parser, token); + break; + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_COLON) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + lxb_css_parser_state_set(parser, + lxb_css_selectors_state_compound_pseudo); + status = lxb_css_selectors_state_pseudo_element(parser, + token); + break; + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + return lxb_css_parser_unexpected(parser); + } + + status = lxb_css_selectors_state_pseudo_element_function(parser, + token, lxb_css_selectors_state_compound_pseudo); + break; + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + return lxb_css_parser_unexpected(parser); + } + + status = lxb_css_selectors_state_pseudo_class_function(parser, token, + lxb_css_selectors_state_compound_sub); + break; + + default: + return lxb_css_parser_states_set_back(parser); + } + + if (status == LXB_STATUS_OK) { + return true; + } + + if (status == LXB_STATUS_ERROR_MEMORY_ALLOCATION) { + return lxb_css_parser_memory_fail(parser); + } + + return lxb_css_parser_unexpected(parser); +} + +static bool +lxb_css_selectors_state_compound_pseudo(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_status_t status; + + if (token->type != LXB_CSS_SYNTAX_TOKEN_COLON) { + return lxb_css_parser_states_set_back(parser); + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + status = lxb_css_selectors_state_pseudo_class(parser, token); + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_COLON) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + status = lxb_css_selectors_state_pseudo_element(parser, token); + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + status = lxb_css_selectors_state_pseudo_element_function(parser, token, + lxb_css_selectors_state_compound_pseudo); + } + else { + return lxb_css_parser_unexpected(parser); + } + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + return lxb_css_parser_unexpected(parser); + } + else { + status = lxb_css_selectors_state_pseudo_class_function(parser, token, + lxb_css_selectors_state_compound_pseudo); + } + + if (status == LXB_STATUS_OK) { + return true; + } + + if (status == LXB_STATUS_ERROR_MEMORY_ALLOCATION) { + return lxb_css_parser_memory_fail(parser); + } + + return lxb_css_parser_unexpected(parser); +} + +/* + * + */ +bool +lxb_css_selectors_state_simple_list(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, lxb_css_selectors_state_simple_wo_root, + lxb_css_selectors_state_simple_list_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_simple_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_selectors_state_list_end(parser, token, + lxb_css_selectors_state_simple_wo_root); +} + +/* + * + */ +bool +lxb_css_selectors_state_simple(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_t *states; + + states = lxb_css_parser_states_next(parser, + lxb_css_selectors_state_simple_wo_root, + lxb_css_selectors_state_end, + ctx, true); + if (states == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return false; +} + +static bool +lxb_css_selectors_state_simple_wo_root(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_css_selector_list_t *list; + + lxb_css_selectors_state_list_append(parser, parser->selectors, list); + + lxb_css_parser_state_set(parser, lxb_css_selectors_state_simple_handler); + + return false; +} + +static bool +lxb_css_selectors_state_simple_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + lxb_status_t status; + +again: + + lxb_css_parser_state_set(parser, lxb_css_selectors_state_simple_back); + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_HASH: + status = lxb_css_selectors_state_hash(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '.': + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_class(parser, token); + break; + + case '|': + case '*': + status = lxb_css_selectors_state_element_ns(parser, token); + break; + + default: + goto unexpected; + } + + break; + + case LXB_CSS_SYNTAX_TOKEN_IDENT: + status = lxb_css_selectors_state_element(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + lxb_css_syntax_parser_consume(parser); + status = lxb_css_selectors_state_attribute(parser); + break; + + case LXB_CSS_SYNTAX_TOKEN_COLON: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + status = lxb_css_selectors_state_pseudo_class(parser, token); + break; + } + else if (token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + goto unexpected; + } + + status = lxb_css_selectors_state_pseudo_class_function(parser, token, + lxb_css_selectors_state_simple_back); + break; + + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_m(parser, token); + goto again; + + case LXB_CSS_SYNTAX_TOKEN__END: + return lxb_css_parser_states_set_back(parser); + + default: + goto unexpected; + } + + if (status == LXB_STATUS_OK) { + return true; + } + + if (status == LXB_STATUS_ERROR_MEMORY_ALLOCATION) { + return lxb_css_parser_memory_fail(parser); + } + +unexpected: + + (void) lxb_css_parser_states_set_back(parser); + + return lxb_css_parser_unexpected(parser); +} + +static bool +lxb_css_selectors_state_simple_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx) +{ + return lxb_css_parser_states_set_back(parser); +} + +static lxb_status_t +lxb_css_selectors_state_hash(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + lxb_css_selector_list_t *last; + + selectors = parser->selectors; + last = selectors->list_last; + + if (selectors->parent == NULL) { + lxb_css_selector_sp_set_a(last->specificity, + lxb_css_selector_sp_a(last->specificity) + 1); + } + else if (lxb_css_selector_sp_a(last->specificity) == 0) { + if (selectors->combinator != LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + last->specificity = 0; + } + + lxb_css_selector_sp_set_a(last->specificity, 1); + } + + lxb_css_selectors_state_append(parser, selectors, selector); + + selector->type = LXB_CSS_SELECTOR_TYPE_ID; + + status = lxb_css_syntax_token_string_dup(lxb_css_syntax_token_string(token), + &selector->name, parser->memory->mraw); + lxb_css_syntax_parser_consume(parser); + + return status; +} + +static lxb_status_t +lxb_css_selectors_state_class(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + + lxb_css_parser_token_status_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + selectors = parser->selectors; + + lxb_css_selectors_state_specificity_set_b(selectors); + lxb_css_selectors_state_append(parser, selectors, selector); + + selector->type = LXB_CSS_SELECTOR_TYPE_CLASS; + + status = lxb_css_syntax_token_string_dup(lxb_css_syntax_token_string(token), + &selector->name, parser->memory->mraw); + lxb_css_syntax_parser_consume(parser); + + return status; + } + + return lxb_css_parser_unexpected_status(parser); +} + +static lxb_status_t +lxb_css_selectors_state_element_ns(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + + selector->type = LXB_CSS_SELECTOR_TYPE_ANY; + + selector->name.data = lexbor_mraw_alloc(parser->memory->mraw, 2); + if (selector->name.data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + selector->name.data[0] = '*'; + selector->name.data[1] = '\0'; + selector->name.length = 1; + + if (lxb_css_syntax_token_delim_char(token) == '*') { + lxb_css_syntax_parser_consume(parser); + return lxb_css_selectors_state_ns(parser, selector); + } + + lxb_css_syntax_parser_consume(parser); + + return lxb_css_selectors_state_ns_ident(parser, selector); +} + +static lxb_status_t +lxb_css_selectors_state_element(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + + selectors = parser->selectors; + + lxb_css_selectors_state_specificity_set_c(selectors); + + lxb_css_selectors_state_append(parser, selectors, selector); + + selector->type = LXB_CSS_SELECTOR_TYPE_ELEMENT; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + lxb_css_syntax_parser_consume(parser); + + return lxb_css_selectors_state_ns(parser, selector); +} + + +static lxb_status_t +lxb_css_selectors_state_attribute(lxb_css_parser_t *parser) +{ + lxb_char_t modifier; + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + const lxb_css_syntax_token_t *token; + lxb_css_selector_attribute_t *attribute; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + lxb_css_parser_token_status_wo_ws_m(parser, token); + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_DELIM: + if (lxb_css_syntax_token_delim_char(token) != '|') { + goto failed; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + + if (token->type != LXB_CSS_SYNTAX_TOKEN_IDENT) { + goto failed; + } + + selector->type = LXB_CSS_SELECTOR_TYPE_ATTRIBUTE; + + selector->ns.data = lexbor_mraw_alloc(parser->memory->mraw, 2); + if (selector->ns.data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + selector->ns.data[0] = '*'; + selector->ns.data[1] = '\0'; + selector->ns.length = 1; + + lxb_css_selectors_state_string_dup_m(parser->selectors, + &selector->name); + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + break; + + case LXB_CSS_SYNTAX_TOKEN_IDENT: + selector->type = LXB_CSS_SELECTOR_TYPE_ATTRIBUTE; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + + if (token->type != LXB_CSS_SYNTAX_TOKEN_DELIM + || lxb_css_syntax_token_delim_char(token) != '|') + { + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + } + + break; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + + if (token->type != LXB_CSS_SYNTAX_TOKEN_IDENT) { + attribute = &selector->u.attribute; + attribute->match = LXB_CSS_SELECTOR_MATCH_DASH; + + goto assignment; + } + + selector->ns = selector->name; + lexbor_str_clean_all(&selector->name); + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + break; + + default: + goto failed; + } + + attribute = &selector->u.attribute; + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + goto done; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + switch (lxb_css_syntax_token_delim_char(token)) { + case '~': + attribute->match = LXB_CSS_SELECTOR_MATCH_INCLUDE; + break; + + case '|': + attribute->match = LXB_CSS_SELECTOR_MATCH_DASH; + break; + + case '^': + attribute->match = LXB_CSS_SELECTOR_MATCH_PREFIX; + break; + + case '$': + attribute->match = LXB_CSS_SELECTOR_MATCH_SUFFIX; + break; + + case '*': + attribute->match = LXB_CSS_SELECTOR_MATCH_SUBSTRING; + break; + + case '=': + attribute->match = LXB_CSS_SELECTOR_MATCH_EQUAL; + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + goto string_or_ident; + + default: + goto failed; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + + break; + + default: + goto failed; + } + +assignment: + + if (token->type != LXB_CSS_SYNTAX_TOKEN_DELIM + || lxb_css_syntax_token_delim_char(token) != '=') + { + goto failed; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + +string_or_ident: + + if (token->type != LXB_CSS_SYNTAX_TOKEN_STRING + && token->type != LXB_CSS_SYNTAX_TOKEN_IDENT) + { + goto failed; + } + + lxb_css_selectors_state_string_dup_m(selectors, &attribute->value); + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_RS_BRACKET) { + goto done; + } + + if (token->type != LXB_CSS_SYNTAX_TOKEN_IDENT) { + goto failed; + } + + modifier = *lxb_css_syntax_token_string(token)->data; + + switch (modifier) { + case 'i': + attribute->modifier = LXB_CSS_SELECTOR_MODIFIER_I; + break; + + case 's': + attribute->modifier = LXB_CSS_SELECTOR_MODIFIER_S; + break; + + default: + goto failed; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + + if (token->type != LXB_CSS_SYNTAX_TOKEN_RS_BRACKET) { + goto failed; + } + +done: + + lxb_css_selectors_state_specificity_set_b(selectors); + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; + +failed: + + return lxb_css_parser_unexpected_status(parser); +} + +static lxb_status_t +lxb_css_selectors_state_ns(lxb_css_parser_t *parser, + lxb_css_selector_t *selector) +{ + const lxb_css_syntax_token_t *token; + + lxb_css_parser_token_status_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_DELIM + && lxb_css_syntax_token_delim_char(token) == '|') + { + lxb_css_syntax_parser_consume(parser); + return lxb_css_selectors_state_ns_ident(parser, selector); + } + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selectors_state_ns_ident(lxb_css_parser_t *parser, + lxb_css_selector_t *selector) +{ + lxb_status_t status; + const lxb_css_syntax_token_t *token; + lxb_css_selectors_t *selectors; + + lxb_css_parser_token_status_m(parser, token); + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + selectors = parser->selectors; + + lxb_css_selectors_state_specificity_set_c(selectors); + + selector->type = LXB_CSS_SELECTOR_TYPE_ELEMENT; + + selector->ns = selector->name; + lexbor_str_clean_all(&selector->name); + + status = lxb_css_syntax_token_string_dup(lxb_css_syntax_token_string(token), + &selector->name, parser->memory->mraw); + + lxb_css_syntax_parser_consume(parser); + + return status; + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_DELIM + && lxb_css_syntax_token_delim_char(token) == '*') + { + lxb_css_syntax_parser_consume(parser); + + selector->type = LXB_CSS_SELECTOR_TYPE_ANY; + + selector->ns = selector->name; + + selector->name.data = lexbor_mraw_alloc(parser->memory->mraw, 2); + if (selector->name.data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + selector->name.data[0] = '*'; + selector->name.data[1] = '\0'; + selector->name.length = 1; + + return LXB_STATUS_OK; + } + + return lxb_css_parser_unexpected_status(parser); +} + +static lxb_status_t +lxb_css_selectors_state_pseudo_class(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_status_t status; + lxb_css_log_message_t *msg; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + const lxb_css_selectors_pseudo_data_t *pseudo; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + selector->type = LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + pseudo = lxb_css_selector_pseudo_class_by_name(selector->name.data, + selector->name.length); + if (pseudo == NULL) { + return lxb_css_parser_unexpected_status(parser); + } + + switch (pseudo->id) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_CURRENT: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_DEFAULT: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_VISIBLE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_WITHIN: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FULLSCREEN: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUTURE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_IN_RANGE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_INDETERMINATE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_INVALID: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_LOCAL_LINK: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_OUT_OF_RANGE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_PAST: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_SCOPE: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET_WITHIN: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_USER_INVALID: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_VALID: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_VISITED: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_WARNING: + msg = lxb_css_log_not_supported(parser->log, + lxb_css_selectors_module_name, + (const char *) selector->name.data); + if (msg == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + return lxb_css_parser_unexpected_status(parser); + + default: + break; + } + + selector->u.pseudo.type = pseudo->id; + selector->u.pseudo.data = NULL; + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selectors_state_pseudo_class_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f success) +{ + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + lxb_css_log_message_t *msg; + lxb_css_syntax_rule_t *rule; + const lxb_css_selectors_pseudo_data_func_t *func; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + selector->type = LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + func = lxb_css_selector_pseudo_class_function_by_name(selector->name.data, + selector->name.length); + if (func == NULL) { + return lxb_css_parser_unexpected_status(parser); + } + + switch (func->id) { + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL: + msg = lxb_css_log_not_supported(parser->log, + lxb_css_selectors_module_name, + (const char *) selector->name.data); + if (msg == NULL) { + goto failed; + } + + return lxb_css_parser_unexpected_status(parser); + + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD: + case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD: + lxb_css_selectors_state_specificity_set_b(selectors); + break; + + default: + break; + } + + selector->u.pseudo.type = func->id; + selector->u.pseudo.data = NULL; + + selectors->combinator = func->combinator; + selectors->comb_default = func->combinator; + selectors->parent = selector; + + rule = lxb_css_syntax_parser_function_push(parser, token, success, + &func->cb, selectors->list_last); + if (rule == NULL) { + goto failed; + } + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; + +failed: + + (void) lxb_css_parser_memory_fail(parser); + + return parser->status; +} + +static lxb_status_t +lxb_css_selectors_state_pseudo_element(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + lxb_status_t status; + lxb_css_log_message_t *msg; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + const lxb_css_selectors_pseudo_data_t *pseudo; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + selector->type = LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + pseudo = lxb_css_selector_pseudo_element_by_name(selector->name.data, + selector->name.length); + if (pseudo == NULL) { + return lxb_css_parser_unexpected_status(parser); + } + + switch (pseudo->id) { + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BACKDROP: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LINE: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_INACTIVE_SELECTION: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_MARKER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_PLACEHOLDER: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SELECTION: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SPELLING_ERROR: + case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TARGET_TEXT: + msg = lxb_css_log_not_supported(parser->log, + lxb_css_selectors_module_name, + (const char *) selector->name.data); + if (msg == NULL) { + (void) lxb_css_parser_memory_fail(parser); + return parser->status; + } + + return lxb_css_parser_unexpected_status(parser); + + default: + break; + } + + selector->u.pseudo.type = pseudo->id; + selector->u.pseudo.data = NULL; + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; +} + +static lxb_status_t +lxb_css_selectors_state_pseudo_element_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f success) +{ + lxb_status_t status; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors; + lxb_css_syntax_rule_t *rule; + const lxb_css_selectors_pseudo_data_func_t *func; + + selectors = parser->selectors; + + lxb_css_selectors_state_append(parser, selectors, selector); + selector->type = LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT_FUNCTION; + + lxb_css_selectors_state_string_dup_m(selectors, &selector->name); + + func = lxb_css_selector_pseudo_element_function_by_name(selector->name.data, + selector->name.length); + if (func == NULL) { + return lxb_css_parser_unexpected_status(parser); + } + + selector->u.pseudo.type = func->id; + selector->u.pseudo.data = NULL; + + selectors->combinator = func->combinator; + selectors->comb_default = func->combinator; + selectors->parent = selector; + + rule = lxb_css_syntax_parser_function_push(parser, token, success, + &func->cb, selectors->list_last); + if (rule == NULL) { + (void) lxb_css_parser_memory_fail(parser); + return parser->status; + } + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; +} + +lxb_inline void +lxb_css_selectors_state_restore_combinator(lxb_css_selectors_t *selectors) +{ + lxb_css_selector_t *parent; + lxb_css_selector_combinator_t comb_default; + const lxb_css_selectors_pseudo_data_func_t *data_func; + + comb_default = LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT; + + if (selectors->parent != NULL) { + parent = selectors->parent; + + if (parent->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION) { + data_func = lxb_css_selector_pseudo_class_function_by_id(parent->u.pseudo.type); + } + else { + data_func = lxb_css_selector_pseudo_element_function_by_id(parent->u.pseudo.type); + } + + comb_default = data_func->combinator; + } + + selectors->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE; + selectors->comb_default = comb_default; +} + +lxb_status_t +lxb_css_selectors_state_function_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + bool cy; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors = parser->selectors; + + if (token->type == LXB_CSS_SYNTAX_TOKEN__EOF) { + (void) lxb_css_log_format(parser->log, LXB_CSS_LOG_ERROR, + "%s. End Of File in pseudo function", + lxb_css_selectors_module_name); + } + + if (selectors->list_last == NULL) { + lxb_css_selectors_state_restore_parent(selectors, ctx); + goto empty; + } + + lxb_css_selectors_state_restore_parent(selectors, ctx); + + return LXB_STATUS_OK; + +empty: + + selector = selectors->list_last->last; + + cy = selector->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION; + cy = lxb_css_selector_pseudo_function_can_empty(selector->u.pseudo.type, + cy); + if (cy) { + lxb_css_parser_set_ok(parser); + return LXB_STATUS_OK; + } + + (void) lxb_css_log_format(parser->log, LXB_CSS_LOG_ERROR, + "%s. Pseudo function can't be empty: %S()", + lxb_css_selectors_module_name, &selector->name); + + lxb_css_selector_remove(selector); + lxb_css_selector_destroy(selector); + + lxb_css_parser_failed_set_by_id(parser, -1, true); + selectors->err_in_function = true; + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_selectors_state_function_forgiving(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + return lxb_css_selectors_state_forgiving_cb(parser, token, ctx, + lxb_css_selectors_state_complex_list, + failed); +} + +lxb_status_t +lxb_css_selectors_state_function_forgiving_relative(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + return lxb_css_selectors_state_forgiving_cb(parser, token, ctx, + lxb_css_selectors_state_relative_list, + failed); +} + +static lxb_status_t +lxb_css_selectors_state_forgiving_cb(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, lxb_css_parser_state_f state, + bool failed) +{ + bool cy; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors = parser->selectors; + + lxb_css_parser_set_ok(parser); + + if (token->type == LXB_CSS_SYNTAX_TOKEN__EOF) { + (void) lxb_css_log_format(parser->log, LXB_CSS_LOG_ERROR, + "%s. End Of File in pseudo function", + lxb_css_selectors_module_name); + } + + if (selectors->list_last == NULL) { + lxb_css_selectors_state_restore_parent(selectors, ctx); + goto empty; + } + + if (selectors->parent->u.pseudo.type + == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE) + { + selectors->list_last->specificity = 0; + } + + lxb_css_selectors_state_restore_parent(selectors, ctx); + + return LXB_STATUS_OK; + +empty: + + selector = selectors->list_last->last; + + cy = selector->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION; + cy = lxb_css_selector_pseudo_function_can_empty(selector->u.pseudo.type, + cy); + if (cy) { + return LXB_STATUS_OK; + } + + (void) lxb_css_log_format(parser->log, LXB_CSS_LOG_ERROR, + "%s. Pseudo function can't be empty: %S()", + lxb_css_selectors_module_name, &selector->name); + + lxb_css_selector_remove(selector); + lxb_css_selector_destroy(selector); + + lxb_css_parser_failed_set_by_id(parser, -1, true); + selectors->err_in_function = true; + + return LXB_STATUS_OK; +} + +static void +lxb_css_selectors_state_restore_parent(lxb_css_selectors_t *selectors, + lxb_css_selector_list_t *last) +{ + uint32_t src, dst; + + if (selectors->list_last != NULL && selectors->list_last != last) { + dst = last->specificity; + src = selectors->list_last->specificity; + + selectors->list_last = 0; + + if (last->parent == NULL) { + lxb_css_selector_sp_add_a(dst, lxb_css_selector_sp_a(src)); + lxb_css_selector_sp_add_b(dst, lxb_css_selector_sp_b(src)); + lxb_css_selector_sp_add_c(dst, lxb_css_selector_sp_c(src)); + } + else if (selectors->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) { + dst |= src; + } + else if (src > dst) { + dst = src; + } + + last->specificity = dst; + } + + if (selectors->list != NULL) { + last->last->u.pseudo.data = selectors->list; + } + + selectors->list_last = last; + + /* Get first Selector in chain. */ + while (last->prev != NULL) { + last = last->prev; + } + + selectors->list = last; + selectors->parent = last->parent; + + lxb_css_selectors_state_restore_combinator(selectors); +} + +static bool +lxb_css_selectors_state_list_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state) +{ + lxb_css_parser_state_t *states; + lxb_css_selectors_t *selectors = parser->selectors; + + if (lxb_css_parser_is_failed(parser)) { + token = lxb_css_selectors_state_function_error(parser, token); + if (token == NULL) { + return lxb_css_parser_fail(parser, + LXB_STATUS_ERROR_MEMORY_ALLOCATION); + } + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + } + + if (selectors->parent != NULL && selectors->list_last && + selectors->list_last->prev != NULL) + { + lxb_css_selectors_state_func_specificity(selectors); + } + + if (token->type != LXB_CSS_SYNTAX_TOKEN_COMMA) { + states = lxb_css_parser_states_current(parser); + + if (states->root) { + if (token->type != LXB_CSS_SYNTAX_TOKEN__END) { + token = lxb_css_selectors_state_function_error(parser, token); + if (token == NULL) { + return lxb_css_parser_fail(parser, + LXB_STATUS_ERROR_MEMORY_ALLOCATION); + } + } + + (void) lxb_css_parser_states_pop(parser); + return lxb_css_parser_success(parser); + } + + return lxb_css_selectors_done(parser); + } + + selectors->combinator = selectors->comb_default; + + lxb_css_syntax_token_consume(parser->tkz); + lxb_css_parser_state_set(parser, state); + lxb_css_parser_set_ok(parser); + + return true; +} + +static bool +lxb_css_selectors_state_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_parser_state_t *states; + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + } + + if (lxb_css_parser_is_failed(parser)) { + token = lxb_css_selectors_state_function_error(parser, token); + if (token == NULL) { + return lxb_css_parser_fail(parser, + LXB_STATUS_ERROR_MEMORY_ALLOCATION); + } + } + + states = lxb_css_parser_states_current(parser); + + if (states->root) { + if (token->type != LXB_CSS_SYNTAX_TOKEN__END) { + token = lxb_css_selectors_state_function_error(parser, token); + if (token == NULL) { + return lxb_css_parser_fail(parser, + LXB_STATUS_ERROR_MEMORY_ALLOCATION); + } + } + + (void) lxb_css_parser_states_pop(parser); + return lxb_css_parser_success(parser); + } + + return lxb_css_selectors_done(parser); +} + + +static const lxb_css_syntax_token_t * +lxb_css_selectors_state_function_error(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + bool cy, comma; + lxb_css_selector_list_t *list; + lxb_css_selector_t *selector; + lxb_css_selectors_t *selectors = parser->selectors; + const lxb_css_syntax_token_t *origin; + const lxb_css_selectors_pseudo_data_func_t *func; + + cy = false; + comma = true; + list = selectors->list_last; + selector = selectors->parent; + + if (selector != NULL) { + cy = selector->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION; + + func = lxb_css_selector_pseudo_function_by_id(selector->u.pseudo.type, + cy); + if (func == NULL) { + return NULL; + } + + cy = func->forgiving; + comma = func->comma; + } + + if (!selectors->err_in_function) { + origin = lxb_css_syntax_token(parser->tkz); + if (origin == NULL) { + return NULL; + } + + if (token->type != LXB_CSS_SYNTAX_TOKEN__END) { + origin = token; + } + else if (origin->type != LXB_CSS_SYNTAX_TOKEN__EOF) { + origin = NULL; + } + + if (origin != NULL) { + if (lxb_css_syntax_token_error(parser, origin, + "Selectors") == NULL) + { + return NULL; + } + } + } + + selectors->err_in_function = false; + + if (cy) { + lxb_css_selector_list_selectors_remove(selectors, list); + lxb_css_selector_list_destroy(list); + + while (token != NULL + && token->type != LXB_CSS_SYNTAX_TOKEN__END) + { + if (comma == true + && token->type == LXB_CSS_SYNTAX_TOKEN_COMMA + && lxb_css_parser_rule_deep(parser) == 0) + { + break; + } + + lxb_css_syntax_parser_consume(parser); + token = lxb_css_syntax_parser_token(parser); + } + + return token; + } + + lxb_css_selector_list_destroy_chain(selectors->list); + + selectors->list = NULL; + selectors->list_last = NULL; + + while (token != NULL + && token->type != LXB_CSS_SYNTAX_TOKEN__END) + { + lxb_css_syntax_parser_consume(parser); + token = lxb_css_syntax_parser_token(parser); + } + + return token; +} diff --git a/ext/dom/lexbor/lexbor/css/state.c b/ext/dom/lexbor/lexbor/css/state.c new file mode 100644 index 00000000000..15a9614948f --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/state.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2021-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/state.h" +#include "lexbor/css/css.h" +#include "lexbor/css/at_rule/state.h" + + +bool +lxb_css_state_success(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + return true; + + case LXB_CSS_SYNTAX_TOKEN__END: + return true; + + default: + break; + } + + return lxb_css_parser_failed(parser); +} + +bool +lxb_css_state_failed(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + if (token->type == LXB_CSS_SYNTAX_TOKEN__END) { + return lxb_css_parser_success(parser); + } + + /* The lxb_css_syntax_parser_consume(...) locked in this state. */ + + lxb_css_syntax_token_consume(parser->tkz); + + return true; +} + +bool +lxb_css_state_stop(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + return lxb_css_parser_stop(parser); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/anb.c b/ext/dom/lexbor/lexbor/css/syntax/anb.c new file mode 100644 index 00000000000..ffd1751cdab --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/anb.c @@ -0,0 +1,487 @@ +/* + * Copyright (C) 2021-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/core/conv.h" +#include "lexbor/core/serialize.h" +#include "lexbor/css/css.h" +#include "lexbor/css/parser.h" +#include "lexbor/css/syntax/anb.h" + + +static bool +lxb_css_syntax_anb_state(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static lxb_status_t +lxb_css_syntax_anb_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed); + +static lxb_css_log_message_t * +lxb_css_syntax_anb_fail(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token); + +static lxb_status_t +lxb_css_syntax_anb_state_ident(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_anb_t *anb); + +static lxb_status_t +lxb_css_syntax_anb_state_ident_data(lxb_css_parser_t *parser, + lxb_css_syntax_anb_t *anb, + const lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end); + + +static const lxb_css_syntax_cb_pipe_t lxb_css_syntax_anb_pipe = { + .state = lxb_css_syntax_anb_state, + .block = NULL, + .failed = lxb_css_state_failed, + .end = lxb_css_syntax_anb_end +}; + + +lxb_css_syntax_anb_t +lxb_css_syntax_anb_parse(lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length) +{ + lxb_status_t status; + lxb_css_syntax_anb_t anb; + lxb_css_syntax_rule_t *rule; + + memset(&anb, 0, sizeof(lxb_css_syntax_anb_t)); + + if (parser->stage != LXB_CSS_PARSER_CLEAN) { + if (parser->stage == LXB_CSS_PARSER_RUN) { + parser->status = LXB_STATUS_ERROR_WRONG_ARGS; + return anb; + } + + lxb_css_parser_clean(parser); + } + + lxb_css_parser_buffer_set(parser, data, length); + + rule = lxb_css_syntax_parser_pipe_push(parser, NULL, + &lxb_css_syntax_anb_pipe, &anb, + LXB_CSS_SYNTAX_TOKEN_UNDEF); + if (rule == NULL) { + return anb; + } + + parser->tkz->with_comment = false; + parser->stage = LXB_CSS_PARSER_RUN; + + status = lxb_css_syntax_parser_run(parser); + if (status != LXB_STATUS_OK) { + /* Destroy. */ + } + + parser->stage = LXB_CSS_PARSER_END; + + return anb; +} + +static bool +lxb_css_syntax_anb_state(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + parser->status = lxb_css_syntax_anb_handler(parser, token, ctx); + + token = lxb_css_syntax_parser_token(parser); + if (token == NULL) { + return lxb_css_parser_memory_fail(parser); + } + + if (parser->status != LXB_STATUS_OK + || (token->type != LXB_CSS_SYNTAX_TOKEN__END)) + { + (void) lxb_css_syntax_anb_fail(parser, token); + } + + return lxb_css_parser_success(parser); +} + +static lxb_status_t +lxb_css_syntax_anb_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + void *ctx, bool failed) +{ + return LXB_STATUS_OK; +} + +static lxb_css_log_message_t * +lxb_css_syntax_anb_fail(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token) +{ + parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA; + + static const char anb[] = "An+B"; + + return lxb_css_syntax_token_error(parser, token, anb); +} + +lxb_status_t +lxb_css_syntax_anb_handler(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_anb_t *anb) +{ + const lxb_char_t *data, *end; + lxb_css_syntax_token_ident_t *ident; + +again: + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_DIMENSION: + if (lxb_css_syntax_token_dimension(token)->num.is_float) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + anb->a = lexbor_conv_double_to_long(lxb_css_syntax_token_dimension(token)->num.num); + + ident = lxb_css_syntax_token_dimension_string(token); + + goto ident; + + case LXB_CSS_SYNTAX_TOKEN_IDENT: + return lxb_css_syntax_anb_state_ident(parser, token, anb); + + case LXB_CSS_SYNTAX_TOKEN_NUMBER: + if (lxb_css_syntax_token_number(token)->is_float) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + anb->a = 0; + anb->b = lexbor_conv_double_to_long(lxb_css_syntax_token_number(token)->num); + break; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + if (lxb_css_syntax_token_delim(token)->character != '+') { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + + if (token->type != LXB_CSS_SYNTAX_TOKEN_IDENT) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + anb->a = 1; + + ident = lxb_css_syntax_token_ident(token); + + goto ident; + + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_m(parser, token); + goto again; + + default: + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; + +ident: + + data = ident->data; + end = ident->data + ident->length; + + if (*data != 'n' && *data != 'N') { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + data++; + + return lxb_css_syntax_anb_state_ident_data(parser, anb, token, data, end); +} + +static lxb_status_t +lxb_css_syntax_anb_state_ident(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_anb_t *anb) +{ + size_t length; + lxb_char_t c; + const lxb_char_t *data, *end; + lxb_css_syntax_token_ident_t *ident; + + static const lxb_char_t odd[] = "odd"; + static const lxb_char_t even[] = "even"; + + ident = lxb_css_syntax_token_ident(token); + + length = ident->length; + data = ident->data; + end = data + length; + + c = *data++; + + /* 'n' or '-n' */ + + if (c == 'n' || c == 'N') { + anb->a = 1; + } + else if (c == '-') { + if (data >= end) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + c = *data++; + + if (c != 'n' && c != 'N') { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + anb->a = -1; + } + else if (length == sizeof(odd) - 1 + && lexbor_str_data_ncasecmp(ident->data, odd, sizeof(odd) - 1)) + { + anb->a = 2; + anb->b = 1; + + lxb_css_syntax_parser_consume(parser); + return LXB_STATUS_OK; + } + else if (length == sizeof(even) - 1 + && lexbor_str_data_ncasecmp(ident->data, even, sizeof(even) - 1)) + { + anb->a = 2; + anb->b = 0; + + lxb_css_syntax_parser_consume(parser); + return LXB_STATUS_OK; + } + else { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + return lxb_css_syntax_anb_state_ident_data(parser, anb, token, data, end); +} + +static lxb_status_t +lxb_css_syntax_anb_state_ident_data(lxb_css_parser_t *parser, + lxb_css_syntax_anb_t *anb, + const lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end) +{ + unsigned sign; + lxb_char_t c; + const lxb_char_t *p; + + sign = 0; + + if (data >= end) { + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_NUMBER: + if (!lxb_css_syntax_token_number(token)->have_sign) { + anb->b = 0; + return LXB_STATUS_OK; + } + + break; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + c = lxb_css_syntax_token_delim(token)->character; + + switch (c) { + case '-': + sign = 1; + break; + + case '+': + sign = 2; + break; + + default: + anb->b = 0; + return LXB_STATUS_OK; + } + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + + break; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + anb->b = 0; + return LXB_STATUS_OK; + + default: + anb->b = 0; + return LXB_STATUS_OK; + } + + goto number; + } + + c = *data++; + + if (c != '-') { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + if (data < end) { + p = data; + anb->b = -lexbor_conv_data_to_long(&data, end - data); + + if (anb->b > 0 || data == p || data < end) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + goto done; + } + + sign = 1; + + lxb_css_syntax_parser_consume(parser); + lxb_css_parser_token_status_wo_ws_m(parser, token); + +number: + + if (token->type != LXB_CSS_SYNTAX_TOKEN_NUMBER) { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + if (lxb_css_syntax_token_number(token)->is_float + || (sign > 0 && lxb_css_syntax_token_number(token)->have_sign)) + { + return LXB_STATUS_ERROR_UNEXPECTED_DATA; + } + + anb->b = lexbor_conv_double_to_long(lxb_css_syntax_token_number(token)->num); + + if (sign == 1) { + anb->b = -anb->b; + } + +done: + + lxb_css_syntax_parser_consume(parser); + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_anb_serialize(lxb_css_syntax_anb_t *anb, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_char_t buf[128]; + lxb_char_t *p, *end; + + if (anb == NULL) { + return LXB_STATUS_OK; + } + + static const lxb_char_t odd[] = "odd"; + static const lxb_char_t even[] = "even"; + + if (anb->a == 2) { + if (anb->b == 1) { + return cb(odd, sizeof(odd) - 1, ctx); + } + + if (anb->b == 0) { + return cb(even, sizeof(even) - 1, ctx); + } + } + + p = buf; + end = p + sizeof(buf); + + if (anb->a == 1) { + *p = '+'; + p++; + } + else if (anb->a == -1) { + *p = '-'; + p++; + } + else { + p += lexbor_conv_float_to_data((double) anb->a, p, end - p); + if (p >= end) { + return LXB_STATUS_ERROR_SMALL_BUFFER; + } + } + + *p = 'n'; + p++; + + if (p >= end) { + return cb(buf, p - buf, ctx); + } + + if (anb->b == 0) { + return cb(buf, p - buf, ctx); + } + + if (anb->b > 0) { + *p = '+'; + p++; + + if (p >= end) { + return LXB_STATUS_ERROR_SMALL_BUFFER; + } + } + + p += lexbor_conv_float_to_data((double) anb->b, p, end - p); + + return cb(buf, p - buf, ctx); +} + +lxb_char_t * +lxb_css_syntax_anb_serialize_char(lxb_css_syntax_anb_t *anb, size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_syntax_anb_serialize(anb, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_syntax_anb_serialize(anb, lexbor_serialize_copy_cb, &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/parser.c b/ext/dom/lexbor/lexbor/css/syntax/parser.c new file mode 100644 index 00000000000..f7c7728e7cf --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/parser.c @@ -0,0 +1,1795 @@ +/* + * Copyright (C) 2020-2022 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/css.h" +#include "lexbor/css/state.h" +#include "lexbor/css/syntax/parser.h" +#include "lexbor/css/syntax/syntax.h" +#include "lexbor/css/at_rule/state.h" + + +static const lxb_css_syntax_token_t lxb_css_syntax_token_terminated = +{ + .types.terminated = {.begin = NULL, .length = 0, .user_id = 0}, + .type = LXB_CSS_SYNTAX_TOKEN__END, + .offset = 0, + .cloned = false +}; + + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules_at(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules_qualified(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static bool +lxb_css_syntax_parser_list_rules_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_at_rule(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_qualified_rule(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static bool +lxb_css_syntax_parser_declarations_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_name(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_value(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_next(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_drop(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_components(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_block(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_pipe(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule); + + +lxb_inline const lxb_css_syntax_token_t * +lxb_css_syntax_parser_failed(lxb_css_parser_t *parser, lxb_status_t status) +{ + parser->status = status; + return NULL; +} + + +lxb_status_t +lxb_css_syntax_parser_run(lxb_css_parser_t *parser) +{ + const lxb_css_syntax_token_t *token; + + parser->loop = true; + + do { + token = lxb_css_syntax_parser_token(parser); + if (token == NULL) { + if (parser->fake_null) { + parser->fake_null = false; + continue; + } + + return parser->status; + } + + while (parser->rules->state(parser, token, + parser->rules->context) == false) {}; + } + while (parser->loop); + + return parser->status; +} + +const lxb_css_syntax_token_t * +lxb_css_syntax_parser_token(lxb_css_parser_t *parser) +{ + lxb_css_syntax_token_t *token; + lxb_css_syntax_rule_t *rule = parser->rules; + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + return rule->phase(parser, token, rule); +} + +const lxb_css_syntax_token_t * +lxb_css_syntax_parser_token_wo_ws(lxb_css_parser_t *parser) +{ + const lxb_css_syntax_token_t *token; + + token = lxb_css_syntax_parser_token(parser); + + if (token != NULL && token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_parser_consume(parser); + return lxb_css_syntax_parser_token(parser); + } + + return token; +} + +void +lxb_css_syntax_parser_consume(lxb_css_parser_t *parser) +{ + if (!parser->rules->skip_consume) { + lxb_css_syntax_token_consume(parser->tkz); + } +} + +lxb_status_t +lxb_css_syntax_parser_tkz_cb(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t **data, const lxb_char_t **end, + void *ctx) +{ + size_t length, size; + lxb_char_t *new_data; + lxb_status_t status; + lxb_css_parser_t *parser = ctx; + + if (parser->pos == NULL) { + return parser->chunk_cb(tkz, data, end, parser->chunk_ctx); + } + + length = (size_t) (*end - parser->pos); + + if (SIZE_MAX - parser->str.length < length) { + return LXB_STATUS_ERROR_OVERFLOW; + } + + if (parser->str.length + length >= parser->str_size) { + size = parser->str.length + length + 1; + + new_data = lexbor_realloc(parser->str.data, size); + if (new_data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->str.data = new_data; + parser->str_size = size; + } + + memcpy(parser->str.data + parser->str.length, parser->pos, length); + + status = parser->chunk_cb(tkz, data, end, parser->chunk_ctx); + parser->str.length += length; + parser->pos = *data; + + return status; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_list_rules_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_list_rules_t *cb_rules, + void *ctx, bool top_level, + lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_list_rules; + rule->state = cb_rules->cb.state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_list_rules; + rule->cbx.list_rules = cb_rules; + rule->context = ctx; + rule->block_end = stop; + rule->top_level = top_level; + + if (token != NULL) { + rule->u.list_rules.begin = token->offset; + } + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_at_rule_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_at_rule_t *at_rule, + void *ctx, lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_at_rule_offset_t *at; + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_at_rule; + rule->state = at_rule->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_at_rule; + rule->cbx.at_rule = at_rule; + rule->context = ctx; + rule->block_end = stop; + + if (token != NULL) { + at = &rule->u.at_rule; + + at->name = token->offset; + at->prelude = token->offset + lxb_css_syntax_token_base(token)->length; + } + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_qualified_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_qualified_rule_t *qualified, + void *ctx, lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_qualified_rule; + rule->state = qualified->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_qualified_rule; + rule->cbx.qualified_rule = qualified; + rule->context = ctx; + rule->block_end = stop; + + if (token != NULL) { + rule->u.qualified.prelude = token->offset; + } + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_declarations_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_declarations_t *declarations, + void *ctx, lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_declarations; + rule->state = declarations->cb.state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_declarations; + rule->cbx.declarations = declarations; + rule->context = ctx; + rule->block_end = stop; + + if (token != NULL) { + rule->u.declarations.begin = token->offset; + } + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_components_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_components_t *comp, + void *ctx, lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_components; + rule->state = comp->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_components; + rule->cbx.components = comp; + rule->context = ctx; + rule->block_end = stop; + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_function_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_function_t *func, + void *ctx) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + if (token == NULL || token->type != LXB_CSS_SYNTAX_TOKEN_FUNCTION) { + parser->status = LXB_STATUS_ERROR_WRONG_ARGS; + return NULL; + } + + if (parser->rules > parser->rules_begin) { + rule = parser->rules; + + if (rule->deep != 0 + && parser->types_pos[-1] == LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS) + { + rule->deep--; + parser->types_pos--; + } + } + + parser->rules->state = lxb_css_state_success; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_function; + rule->state = func->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_function; + rule->cbx.func = func; + rule->context = ctx; + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_block_push(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_block_t *block, + void *ctx) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + lxb_css_syntax_token_type_t block_end; + + if (token == NULL) { + parser->status = LXB_STATUS_ERROR_WRONG_ARGS; + return NULL; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + block_end = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET; + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + block_end = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS; + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + block_end = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET; + break; + + default: + parser->status = LXB_STATUS_ERROR_WRONG_ARGS; + return NULL; + } + + if (parser->rules > parser->rules_begin) { + rule = parser->rules; + + if (rule->deep != 0 && parser->types_pos[-1] == block_end) { + rule->deep--; + parser->types_pos--; + } + } + + parser->rules->state = lxb_css_state_success; + + lxb_css_parser_offset_set(parser, token); + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_block; + rule->state = block->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_block; + rule->cbx.block = block; + rule->context = ctx; + rule->block_end = block_end; + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_pipe_push(lxb_css_parser_t *parser, + lxb_css_parser_state_f state_back, + const lxb_css_syntax_cb_pipe_t *pipe, + void *ctx, lxb_css_syntax_token_type_t stop) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + status = lxb_css_syntax_stack_expand(parser, 1); + if (status != LXB_STATUS_OK) { + parser->status = status; + return NULL; + } + + parser->rules->state = lxb_css_state_success; + + rule = ++parser->rules; + + memset(rule, 0x00, sizeof(lxb_css_syntax_rule_t)); + + rule->phase = lxb_css_syntax_parser_pipe; + rule->state = pipe->state; + rule->state_back = state_back; + rule->back = lxb_css_syntax_parser_pipe; + rule->cbx.pipe = pipe; + rule->context = ctx; + rule->block_end = stop; + + parser->context = NULL; + + return rule; +} + +lxb_css_syntax_rule_t * +lxb_css_syntax_parser_stack_pop(lxb_css_parser_t *parser) +{ + return parser->rules--; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->offset > token->offset) { + return token; + } + +begin: + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + goto begin; + + case LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD: + rule->phase = lxb_css_syntax_parser_list_rules_at; + break; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + case LXB_CSS_SYNTAX_TOKEN_CDC: + case LXB_CSS_SYNTAX_TOKEN_CDO: + if (rule->top_level) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + goto begin; + } + + /* fall through */ + + default: + if (rule->block_end == token->type && rule->deep == 0) { + goto done; + } + + rule->phase = lxb_css_syntax_parser_list_rules_qualified; + break; + } + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + rule->u.list_rules.end = token->offset; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules_at(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->state != lxb_css_state_success) { + return token; + } + + rule = lxb_css_syntax_parser_at_rule_push(parser, token, + lxb_css_syntax_parser_list_rules_back, + rule->cbx.list_rules->at_rule, + rule->context, rule->block_end); + if (rule == NULL) { + return NULL; + } + + parser->fake_null = true; + + return NULL; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_list_rules_qualified(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->state != lxb_css_state_success) { + return token; + } + + rule = lxb_css_syntax_parser_qualified_push(parser, token, + lxb_css_syntax_parser_list_rules_back, + rule->cbx.list_rules->qualified_rule, + rule->context, rule->block_end); + if (rule == NULL) { + return NULL; + } + + parser->fake_null = true; + + return NULL; +} + +static bool +lxb_css_syntax_parser_list_rules_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_syntax_rule_t *rule; + + if (token->type == LXB_CSS_SYNTAX_TOKEN__END) { + return lxb_css_parser_success(parser); + } + + rule = parser->rules; + rule->state = rule->cbx.list_rules->next; + + return false; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_at_rule(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + if (rule->deep == 0) { + rule->phase = lxb_css_syntax_parser_start_block; + + rule->u.at_rule.prelude_end = token->offset; + rule->u.at_rule.block = token->offset + + lxb_css_syntax_token_base(token)->length; + + rule->skip_consume = true; + + parser->block = rule->cbx.cb->block; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + token = &lxb_css_syntax_token_terminated; + } + + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + if (rule->deep == 1) { + goto done; + } + + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + if (rule->deep == 0) { + goto done; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + if (rule->u.at_rule.prelude_end != 0) { + rule->u.at_rule.block_end = token->offset; + } + else { + rule->u.at_rule.prelude_end = token->offset; + } + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_qualified_rule(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + /* It is necessary to avoid re-entry of the token into the phase. */ + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + if (rule->deep == 0) { + rule->phase = lxb_css_syntax_parser_start_block; + + rule->u.qualified.prelude_end = token->offset; + rule->u.qualified.block = token->offset + + lxb_css_syntax_token_base(token)->length; + + rule->skip_consume = true; + + parser->block = rule->cbx.cb->block; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + token = &lxb_css_syntax_token_terminated; + } + + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + if (rule->deep == 1) { + goto done; + } + + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + if (rule->u.qualified.block != 0) { + rule->u.qualified.block_end = token->offset; + } + else { + rule->u.qualified.prelude_end = token->offset; + } + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->offset > token->offset) { + return token; + } + +begin: + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + goto begin; + + case LXB_CSS_SYNTAX_TOKEN_IDENT: + rule->u.declarations.name_begin = token->offset; + + if (lxb_css_syntax_tokenizer_lookup_colon(parser->tkz)) { + rule->phase = lxb_css_syntax_parser_declarations_name; + parser->block = rule->cbx.cb->block; + + return token; + } + + rule->state = rule->cbx.cb->failed; + rule->phase = lxb_css_syntax_parser_declarations_drop; + rule->failed = true; + + break; + + case LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD: + rule->u.declarations.name_begin = 0; + + rule = lxb_css_syntax_parser_at_rule_push(parser, token, + lxb_css_syntax_parser_declarations_back, + rule->cbx.declarations->at_rule, rule->context, + rule->block_end); + if (rule != NULL) { + parser->fake_null = true; + } + + return NULL; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + rule->state = rule->cbx.cb->failed; + rule->phase = lxb_css_syntax_parser_declarations_drop; + rule->failed = true; + + rule->u.declarations.name_begin = token->offset; + break; + } + + parser->fake_null = true; + + return NULL; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->state = lxb_css_state_success; + rule->skip_consume = true; + + rule->u.declarations.name_begin = 0; + rule->u.declarations.end = token->offset; + + parser->fake_null = true; + + return NULL; +} + +static bool +lxb_css_syntax_parser_declarations_back(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, void *ctx) +{ + lxb_css_syntax_rule_t *rules = parser->rules; + + rules->state = rules->cbx.declarations->cb.state; + + return rules->state(parser, token, ctx); +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_name(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->offset > token->offset) { + return token; + } + + if (rule->state != lxb_css_state_success) { + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + rule->skip_consume = false; + + /* 1. */ + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + } + + /* 2. */ + + if (token->type != LXB_CSS_SYNTAX_TOKEN_COLON) { + /* Parse error. */ + + /* + * It can't be. + * + * Before entering the lxb_css_syntax_parser_declarations_name() + * function, data validation takes place. In fact, these checks are not + * needed here. + */ + + /* + * But it's good for validation, if we come here it means we're + * doing badly. + */ + + return NULL; + } + + rule->u.declarations.name_end = token->offset; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + /* 3. */ + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + } + + rule->u.declarations.value_begin = token->offset; + + /* 4. */ + + rule->phase = lxb_css_syntax_parser_declarations_value; + rule->state = parser->block; + + return lxb_css_syntax_parser_declarations_value(parser, token, rule); +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_value(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + bool imp; + uintptr_t before_important; + lxb_status_t status; + + if (rule->offset > token->offset) { + return token; + } + +again: + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + if (rule->deep != 0) { + return token; + } + + imp = lxb_css_syntax_tokenizer_lookup_declaration_ws_end(parser->tkz, + rule->block_end, + (rule->block_end == LXB_CSS_SYNTAX_TOKEN_RC_BRACKET) ? 0x7D : 0x00); + + if (!imp) { + return token; + } + + before_important = token->offset; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + /* Have !important? */ + + if (token->type == LXB_CSS_SYNTAX_TOKEN_DELIM) { + rule->important = true; + rule->u.declarations.before_important = before_important; + + lxb_css_syntax_token_consume(parser->tkz); + + /* Skip important */ + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + } + } + + goto again; + + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + if (rule->deep == 0) { + rule->phase = lxb_css_syntax_parser_declarations_next; + + rule->u.declarations.value_end = token->offset; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + return &lxb_css_syntax_token_terminated; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_DELIM: + if (lxb_css_syntax_token_delim(token)->character != '!') { + return token; + } + + imp = lxb_css_syntax_tokenizer_lookup_important(parser->tkz, + rule->block_end, + (rule->block_end == LXB_CSS_SYNTAX_TOKEN_RC_BRACKET) ? 0x7D : 0x00); + + if (!imp) { + return token; + } + + rule->u.declarations.before_important = token->offset; + rule->important = true; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, parser->tkz->status); + } + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + } + + goto again; + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_declarations_end; + rule->skip_consume = true; + + rule->u.declarations.value_end = token->offset; + rule->u.declarations.end = token->offset; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_next(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + lxb_css_syntax_declarations_offset_t *decl; + + if (rule->state != lxb_css_state_success) { + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + status = rule->cbx.declarations->declaration_end(parser, rule->context, + rule->important, + rule->failed); + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->phase = lxb_css_syntax_parser_declarations; + rule->state = rule->cbx.cb->state; + + rule->skip_consume = false; + rule->important = false; + rule->failed = false; + + decl = &rule->u.declarations; + + decl->name_begin = 0; + decl->name_end = 0; + decl->value_begin = 0; + decl->before_important = 0; + decl->value_end = 0; + + return lxb_css_syntax_parser_declarations(parser, token, rule); +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_drop(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + /* It is necessary to avoid re-entry of the token into the phase. */ + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + if (rule->deep == 0) { + rule->phase = lxb_css_syntax_parser_declarations_next; + + rule->u.declarations.name_end = token->offset; + + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_declarations_end; + rule->skip_consume = true; + + rule->u.declarations.name_end = token->offset; + rule->u.declarations.end = token->offset; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_declarations_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rules; + + if (rule->state != lxb_css_state_success) { + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + status = rule->cbx.declarations->declaration_end(parser, rule->context, + rule->important, + rule->failed); + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + /* This code will be called exclusively from the lxb_css_parser_run(...). */ + + status = rule->cbx.cb->end(parser, token, rule->context, false); + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + if (!rule->skip_ending) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + } + + (void) lxb_css_syntax_parser_stack_pop(parser); + + rules = parser->rules; + + if (parser->rules <= parser->rules_begin) { + rules->state = lxb_css_state_stop; + return token; + } + + rules->phase = rules->back; + + return rules->phase(parser, token, rules); +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_components(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + rule->skip_ending = true; + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_function(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + if (rule->deep != 0) { + if (parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + } + else { + goto done; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_block(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + + if (rule->offset > token->offset) { + return token; + } + + rule->offset = token->offset + lxb_css_syntax_token_base(token)->length; + + if (rule->block_end == token->type && rule->deep == 0) { + goto done; + } + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RS_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS); + break; + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + status = lxb_css_parser_types_push(parser, + LXB_CSS_SYNTAX_TOKEN_RC_BRACKET); + break; + + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + if (rule->deep != 0 && parser->types_pos[-1] == token->type) { + parser->types_pos--; + rule->deep--; + } + + return token; + + case LXB_CSS_SYNTAX_TOKEN__EOF: + goto done; + + default: + return token; + } + + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + rule->deep++; + + return token; + +done: + + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; +} + +static const lxb_css_syntax_token_t * +lxb_css_syntax_parser_pipe(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if ((rule->block_end == token->type && rule->deep == 0) + || token->type == LXB_CSS_SYNTAX_TOKEN__EOF) + { + rule->phase = lxb_css_syntax_parser_end; + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + return token; +} + +const lxb_css_syntax_token_t * +lxb_css_syntax_parser_start_block(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + if (rule->state != lxb_css_state_success) { + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + /* This code will be called exclusively from the lxb_css_parser_run(...). */ + + rule->skip_consume = false; + + rule->phase = rule->back; + rule->state = parser->block; + + return rule->back(parser, token, rule); +} + +const lxb_css_syntax_token_t * +lxb_css_syntax_parser_end(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + lxb_css_syntax_rule_t *rule) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rules; + lxb_css_syntax_cb_base_t *base; + + if (rule->state != lxb_css_state_success) { + rule->skip_consume = true; + + return &lxb_css_syntax_token_terminated; + } + + /* This code will be called exclusively from the lxb_css_parser_run(...). */ + + base = rule->cbx.user; + + status = base->end(parser, token, rule->context, rule->failed); + if (status != LXB_STATUS_OK) { + return lxb_css_syntax_parser_failed(parser, status); + } + + if (!rule->skip_ending) { + lxb_css_syntax_token_consume(parser->tkz); + + token = lxb_css_syntax_token(parser->tkz); + if (token == NULL) { + return lxb_css_syntax_parser_failed(parser, + parser->tkz->status); + } + } + + (void) lxb_css_syntax_parser_stack_pop(parser); + + rules = parser->rules; + + if (parser->rules <= parser->rules_begin) { + rules->state = lxb_css_state_stop; + return token; + } + + rules->phase = rules->back; + rules->state = rule->state_back; + + return rules->phase(parser, token, rules); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/state.c b/ext/dom/lexbor/lexbor/css/syntax/state.c new file mode 100644 index 00000000000..942ff050126 --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/state.c @@ -0,0 +1,2744 @@ +/* + * Copyright (C) 2018-2020 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include +#include + +#include "lexbor/core/utils.h" +#include "lexbor/core/strtod.h" + +#include "lexbor/css/syntax/state.h" +#include "lexbor/css/syntax/syntax.h" +#include "lexbor/css/syntax/tokenizer/error.h" + +#define LXB_CSS_SYNTAX_RES_NAME_MAP +#include "lexbor/css/syntax/res.h" + +#define LEXBOR_STR_RES_MAP_HEX +#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER +#include "lexbor/core/str_res.h" + + +#define LXB_CSS_SYNTAX_NEXT_CHUNK(_tkz, _status, _data, _end) \ + do { \ + _status = lxb_css_syntax_tokenizer_next_chunk(_tkz, &_data, &_end); \ + if (_status != LXB_STATUS_OK) { \ + return NULL; \ + } \ + } \ + while (0) + + +#define LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, _length) \ + do { \ + _status = lxb_css_syntax_string_append(_tkz, _begin, _length); \ + if (_status != LXB_STATUS_OK) { \ + return NULL; \ + } \ + } \ + while (0) + +#define LXB_CSS_SYNTAX_STR_APPEND(_tkz, _status, _begin, _end) \ + LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, (_end - _begin)) + +#define LXB_CSS_SYNTAX_DELIM_APPEND(_tkz, _begin, _length, _ch) \ + do { \ + if (lxb_css_syntax_list_append_delim(_tkz, _begin, _length, _ch) \ + == NULL) \ + { \ + return NULL; \ + } \ + } \ + while (false) + + +static const lxb_char_t * +lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + lxb_char_t *buf_start, lxb_char_t *buf_end, + const lxb_char_t *data, const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end); + + +static const lxb_char_t * +lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end); + +static const lxb_char_t * +lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, + const lxb_char_t **end, size_t *length); + +static const lxb_char_t * +lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, + const lxb_char_t **end, size_t *length); + + +lxb_inline lxb_status_t +lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t *tkz, size_t upto) +{ + size_t len = tkz->pos - tkz->start; + size_t size = (tkz->end - tkz->start) + upto; + + lxb_char_t *tmp = lexbor_realloc(tkz->start, size); + if (tmp == NULL) { + tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + return tkz->status; + } + + tkz->start = tmp; + tkz->pos = tmp + len; + tkz->end = tmp + size; + + return LXB_STATUS_OK; +} + +lxb_inline lxb_status_t +lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, size_t length) +{ + if ((size_t) (tkz->end - tkz->pos) <= length) { + if (lxb_css_syntax_string_realloc(tkz, length + 1024) != LXB_STATUS_OK) { + return tkz->status; + } + } + + memcpy(tkz->pos, data, length); + + tkz->pos += length; + + return LXB_STATUS_OK; +} + +lxb_inline lxb_status_t +lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t *tkz) +{ + if (tkz->pos >= tkz->end) { + if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) { + return tkz->status; + } + } + + *tkz->pos = 0x00; + + return LXB_STATUS_OK; +} + +lxb_inline const lxb_char_t * +lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data) +{ + if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) { + return NULL; + } + + lxb_css_syntax_token_string(token)->data = tkz->start; + lxb_css_syntax_token_string(token)->length = tkz->pos - tkz->start; + + tkz->pos = tkz->start; + + return data; +} + +lxb_inline const lxb_char_t * +lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data) +{ + if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) { + return NULL; + } + + lxb_css_syntax_token_dimension_string(token)->data = tkz->start; + lxb_css_syntax_token_dimension_string(token)->length = tkz->pos - tkz->start; + + tkz->pos = tkz->start; + + return data; +} + +lxb_inline lxb_css_syntax_token_t * +lxb_css_syntax_state_token_create(lxb_css_syntax_tokenizer_t *tkz) +{ + if (tkz->prepared == 0) { + tkz->prepared = tkz->cache->length; + } + + return lxb_css_syntax_token_cached_create(tkz); +} + +/* + * Delim + */ +lxb_inline void +lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t *token, + const lxb_char_t *data, lxb_char_t ch, + size_t length) +{ + lxb_css_syntax_token_delim(token)->character = ch; + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = length; + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; +} + +lxb_inline lxb_css_syntax_token_t * +lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, + size_t length, lxb_char_t ch) +{ + lxb_css_syntax_token_t *delim; + + delim = lxb_css_syntax_state_token_create(tkz); + if (delim == NULL) { + return NULL; + } + + lxb_css_syntax_state_delim_set(delim, data, ch, length); + + return delim; +} + +const lxb_char_t * +lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_css_syntax_state_delim_set(token, data, *data, 1); + + return data + 1; +} + +/* + * Comment + */ +const lxb_char_t * +lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *begin; + + lxb_css_syntax_token_base(token)->begin = data; + + /* Skip forward slash (/) */ + data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + /* U+002A ASTERISK (*) */ + if (*data != 0x2A) { + goto delim; + } + + begin = ++data; + length = 2; + + do { + if (data >= end) { + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto error; + } + + begin = data; + } + + switch (*data) { + case 0x00: + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + lexbor_str_res_ansi_replacement_character, + sizeof(lexbor_str_res_ansi_replacement_character) - 1); + data += 1; + length += data - begin; + begin = data; + + continue; + + case 0x0D: + data++; + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + + tkz->pos[-1] = '\n'; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto error; + } + } + + if (*data != 0x0A) { + data--; + } + else { + length += 1; + } + + begin = ++data; + + continue; + + case 0x0C: + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (lxb_char_t *) "\n", 1); + data += 1; + length += data - begin; + begin = data; + + continue; + + /* U+002A ASTERISK (*) */ + case 0x2A: + data++; + + if (data >= end) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto error; + } + + if (*data == 0x2F) { + tkz->pos--; + *tkz->pos = 0x00; + + data++; + length++; + + goto done; + } + + begin = data; + } + + /* U+002F Forward slash (/) */ + if (*data == 0x2F) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, (data - 1)); + + data++; + length++; + + goto done; + } + + continue; + } + + data++; + } + while (true); + +done: + + token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT; + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '/'; + + return data; + +error: + + token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT; + + lxb_css_syntax_token_base(token)->length = length; + + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINCO); + + return lxb_css_syntax_state_string_set(tkz, token, data); +} + +/* + * Whitespace + */ +const lxb_char_t * +lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *begin; + + token->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE; + + lxb_css_syntax_token_base(token)->begin = data; + + begin = data; + length = 0; + + do { + switch (*data) { + case 0x0D: + data++; + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + + tkz->pos[-1] = '\n'; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto done; + } + } + + if (*data != 0x0A) { + data--; + } + else { + length += 1; + } + + begin = data + 1; + break; + + case 0x0C: + length += (data + 1) - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (const lxb_char_t *) "\n", 1); + begin = data + 1; + break; + + case 0x09: + case 0x20: + case 0x0A: + break; + + default: + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + + data++; + + if (data >= end) { + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + break; + } + + begin = data; + } + } + while (true); + +done: + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, data); +} + +/* + * String token for U+0022 Quotation Mark (") and U+0027 Apostrophe (') + */ +const lxb_char_t * +lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t mark; + lxb_status_t status; + const lxb_char_t *begin; + + lxb_css_syntax_token_base(token)->begin = data; + + mark = *data++; + begin = data; + length = 1; + + for (;; data++) { + if (data >= end) { + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto error; + } + + begin = data; + } + + switch (*data) { + case 0x00: + length += (data + 1) - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + lexbor_str_res_ansi_replacement_character, + sizeof(lexbor_str_res_ansi_replacement_character) - 1); + begin = data + 1; + break; + + /* + * U+000A LINE FEED + * U+000D CARRIAGE RETURN + * U+000C FORM FEED + */ + case 0x0A: + case 0x0D: + case 0x0C: + length += data - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_NEINST); + + token->type = LXB_CSS_SYNTAX_TOKEN_BAD_STRING; + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + + /* U+005C REVERSE SOLIDUS (\) */ + case 0x5C: + length += (data + 1) - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (const lxb_char_t *) "\\", 1); + goto error; + } + } + + data = lxb_css_syntax_state_escaped_string(tkz, data, &end, + &length); + if (data == NULL) { + return NULL; + } + + begin = data; + + data--; + break; + + default: + /* '"' or '\'' */ + if (*data == mark) { + length += (data + 1) - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + token->type = LXB_CSS_SYNTAX_TOKEN_STRING; + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, + data + 1); + } + + break; + } + } + + return data; + +error: + + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINST); + + token->type = LXB_CSS_SYNTAX_TOKEN_STRING; + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_string_set(tkz, token, data); +} + +/* + * U+0023 NUMBER SIGN (#) + */ +const lxb_char_t * +lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, const lxb_char_t *data, + const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *begin; + lxb_css_syntax_token_t *delim; + + lxb_css_syntax_token_base(token)->begin = data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + length = 1; + + if (lxb_css_syntax_res_name_map[*data] == 0x00) { + if (*data == 0x00) { + goto hash; + } + + /* U+005C REVERSE SOLIDUS (\) */ + if (*data != 0x5C) { + goto delim; + } + + begin = data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto push_delim; + } + } + + ch = *data; + + if (ch == 0x0A || ch == 0x0C || ch == 0x0D) { + goto push_delim; + } + + length += 1; + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + } + +hash: + + token->type = LXB_CSS_SYNTAX_TOKEN_HASH; + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_consume_ident(tkz, token, data, end); + +push_delim: + + delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '\\'); + if (delim == NULL) { + return NULL; + } + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '#'; + + return data; +} + +/* + * U+0028 LEFT PARENTHESIS (() + */ +const lxb_char_t * +lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+0029 RIGHT PARENTHESIS ()) + */ +const lxb_char_t * +lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+002B PLUS SIGN (+) + */ +const lxb_char_t * +lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_status_t status; + + lxb_css_syntax_token_base(token)->begin = data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '+'; + + return data; + } + } + + return lxb_css_syntax_state_plus_process(tkz, token, data, end); +} + +const lxb_char_t * +lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_status_t status; + const lxb_char_t *begin; + lxb_css_syntax_token_t *delim; + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_number(token)->have_sign = true; + lxb_css_syntax_token_base(token)->length = 1; + + return lxb_css_syntax_state_consume_numeric(tkz, token, data, end); + } + + /* U+002E FULL STOP (.) */ + if (*data == 0x2E) { + begin = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + + if (data >= end || *data < 0x30 || *data > 0x39) { + goto push_delim; + } + + lxb_css_syntax_token_number(token)->have_sign = true; + lxb_css_syntax_token_base(token)->length = 2; + + return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer, + tkz->buffer + sizeof(tkz->buffer), + data, end); + } + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_number(token)->have_sign = true; + lxb_css_syntax_token_base(token)->length = 2; + + return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer, + tkz->buffer + sizeof(tkz->buffer), + data, end); + } + + push_delim: + + delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.'); + if (delim == NULL) { + return NULL; + } + } + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '+'; + + return data; +} + +/* + * U+002C COMMA (,) + */ +const lxb_char_t * +lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_COMMA; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+002D HYPHEN-MINUS (-) + */ +const lxb_char_t * +lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_status_t status; + + lxb_css_syntax_token_base(token)->begin = data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '-'; + + return data; + } + } + + return lxb_css_syntax_state_minus_process(tkz, token, data, end); +} + +const lxb_char_t * +lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *begin, *second; + lxb_css_syntax_token_t *delim; + lxb_css_syntax_token_number_t *number; + + unsigned minuses_len = 1; + static const lxb_char_t minuses[3] = "---"; + + /* Check for */ + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_base(token)->length = 1; + + data = lxb_css_syntax_state_consume_numeric(tkz, token, data, end); + + number = lxb_css_syntax_token_number(token); + number->num = -number->num; + + lxb_css_syntax_token_number(token)->have_sign = true; + + return data; + } + + /* U+002E FULL STOP (.) */ + if (*data == 0x2E) { + begin = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto push_delim; + } + } + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_base(token)->length = 2; + + data = lxb_css_syntax_state_decimal(tkz, token, tkz->buffer, + tkz->buffer + sizeof(tkz->buffer), + data, end); + + number = lxb_css_syntax_token_number(token); + number->num = -number->num; + + lxb_css_syntax_token_number(token)->have_sign = true; + + return data; + } + + push_delim: + + delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.'); + if (delim == NULL) { + return NULL; + } + + goto delim; + } + + second = data; + + /* U+002D HYPHEN-MINUS (-) */ + if (*data == 0x2D) { + data++; + + /* Check for */ + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + delim = lxb_css_syntax_list_append_delim(tkz, second, 1, '-'); + if (delim == NULL) { + return NULL; + } + + goto delim; + } + } + + if (*data == 0x2D) { + lxb_css_syntax_token_base(token)->length = 3; + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 3); + + return lxb_css_syntax_state_ident_like_not_url(tkz, token, + ++data, end); + } + else if (*data == 0x3E) { + token->type = LXB_CSS_SYNTAX_TOKEN_CDC; + + lxb_css_syntax_token_base(token)->length = 3; + + return data + 1; + } + + minuses_len++; + } + + /* Check for */ + + if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START + || *data == 0x00) + { + lxb_css_syntax_token_base(token)->length = minuses_len; + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len); + + return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end); + } + + length = 0; + + /* U+005C REVERSE SOLIDUS (\) */ + if (*data == 0x5C) { + begin = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim_rev_solidus; + } + + ch = *data; + + if (ch != 0x0A && ch != 0x0C && ch != 0x0D) { + length += 1; + goto ident; + } + + goto delim_rev_solidus; + } + + ch = *data; + + if (ch != 0x0A && ch != 0x0C && ch != 0x0D) { + length += 1; + goto ident; + } + + delim_rev_solidus: + + if (minuses_len == 2) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 1, '-'); + } + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\'); + + goto delim; + } + + if (minuses_len == 2) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 0, '-'); + } + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '-'; + + return data; + +ident: + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len); + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(token)->length = minuses_len + length; + + return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end); +} + +/* + * U+002E FULL STOP (.) + */ +const lxb_char_t * +lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_status_t status; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_number(token)->have_sign = false; + + data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_base(token)->length = 1; + + return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer, + tkz->buffer + sizeof(tkz->buffer), + data, end); + } + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '.'; + + return data; +} + +/* + * U+003A COLON (:) + */ +const lxb_char_t * +lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_COLON; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+003B SEMICOLON (;) + */ +const lxb_char_t * +lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_SEMICOLON; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+003C LESS-THAN SIGN (<) + */ +const lxb_char_t * +lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *mark, *minus, *esc, *idnt; + lxb_css_syntax_token_t *ident; + + lxb_css_syntax_token_base(token)->begin = data++; + + if ((end - data) > 2) { + if (data[0] == '!' && data[1] == '-' && data[2] == '-') { + data += 3; + + token->type = LXB_CSS_SYNTAX_TOKEN_CDO; + lxb_css_syntax_token_base(token)->length = 4; + + return data; + } + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '<'; + + return data; + } + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + /* U+0021 EXCLAMATION MARK */ + if (*data != 0x21) { + goto delim; + } + + mark = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim_mark; + } + } + + /* U+002D HYPHEN-MINUS */ + if (*data != 0x2D) { + goto delim_mark; + } + + minus = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + + goto delim; + } + } + + /* U+002D HYPHEN-MINUS */ + if (*data == 0x2D) { + token->type = LXB_CSS_SYNTAX_TOKEN_CDO; + + lxb_css_syntax_token_base(token)->length = 4; + + return data + 1; + } + + length = 1; + idnt = data; + + if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) { + goto ident_with_minus; + } + + /* U+005C REVERSE SOLIDUS (\) */ + if (*data == 0x5C) { + esc = data++; + length += 1; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim_esc; + } + + ch = *data; + + if (ch != 0x0A && ch != 0x0C && ch != 0x0D) { + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (const lxb_char_t *) "-", 1); + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + goto ident; + } + + delim_esc: + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\'); + + goto delim; + } + + ch = *data--; + + if (ch == 0x0A || ch == 0x0C || ch == 0x0D) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + + goto delim; + } + + data = lxb_css_syntax_state_escaped(tkz, data + 1, &end, &length); + if (data == NULL) { + return NULL; + } + } + else if (*data != 0x00) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-'); + + goto delim; + } + +ident_with_minus: + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, (const lxb_char_t *) "-", 1); + +ident: + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + + ident = lxb_css_syntax_state_token_create(tkz); + if (ident == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(ident)->begin = idnt; + lxb_css_syntax_token_base(ident)->length = length; + + data = lxb_css_syntax_state_ident_like_not_url(tkz, ident, data, end); + if (data == NULL) { + return NULL; + } + + goto delim; + +delim_mark: + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!'); + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '<'; + + return data; +} + +/* + * U+0040 COMMERCIAL AT (@) + */ +const lxb_char_t * +lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *minus, *esc; + + unsigned minuses_len = 0; + static const lxb_char_t minuses[2] = "--"; + + token->type = LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD; + + lxb_css_syntax_token_base(token)->begin = data++; + lxb_css_syntax_token_base(token)->length = 1; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) { + return lxb_css_syntax_state_consume_ident(tkz, token, data, end); + } + + minus = data; + + /* U+002D HYPHEN-MINUS */ + if (*data == 0x2D) { + data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + goto delim; + } + } + + if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START + || *data == 0x00) + { + lxb_css_syntax_token_base(token)->length += 1; + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 1); + return lxb_css_syntax_state_consume_ident(tkz, token, data, end); + } + else if (*data == 0x2D) { + lxb_css_syntax_token_base(token)->length += 2; + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 2); + return lxb_css_syntax_state_consume_ident(tkz, token, + data + 1, end); + } + + minuses_len++; + } + + /* U+005C REVERSE SOLIDUS (\) */ + if (*data == 0x5C) { + esc = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim_esc; + } + } + + ch = *data; + + if (ch != 0x0A && ch != 0x0C && ch != 0x0D) { + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len); + + length = 0; + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(token)->length += 1 + minuses_len + length; + + return lxb_css_syntax_state_consume_ident(tkz, token, data, end); + } + + goto delim_esc; + } + else if (*data != 0x00) { + if (minuses_len != 0) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-'); + } + + goto delim; + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len); + + lxb_css_syntax_token_base(token)->length += minuses_len; + + return lxb_css_syntax_state_consume_ident(tkz, token, data, end); + +delim_esc: + + if (minuses_len != 0) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + } + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\'); + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '@'; + + return data; +} + +/* + * U+005B LEFT SQUARE BRACKET ([) + */ +const lxb_char_t * +lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_LS_BRACKET; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+005C REVERSE SOLIDUS (\) + */ +const lxb_char_t * +lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + + lxb_css_syntax_token_base(token)->begin = data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + ch = *data; + + if (ch == 0x0A || ch == 0x0C || ch == 0x0D) { + goto delim; + } + + length = 1; + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(token)->length = length; + + return lxb_css_syntax_state_ident_like(tkz, token, data, end); + +delim: + + token->type = LXB_CSS_SYNTAX_TOKEN_DELIM; + + lxb_css_syntax_token_base(token)->length = 1; + lxb_css_syntax_token_delim(token)->character = '\\'; + + return data; +} + +/* + * U+005D RIGHT SQUARE BRACKET (]) + */ +const lxb_char_t * +lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+007B LEFT CURLY BRACKET ({) + */ +const lxb_char_t * +lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_LC_BRACKET; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * U+007D RIGHT CURLY BRACKET (}) + */ +const lxb_char_t * +lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + token->type = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET; + + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 1; + + return data + 1; +} + +/* + * Numeric + */ +lxb_inline void +lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *start, const lxb_char_t *end) +{ + double num = lexbor_strtod_internal(start, (end - start), 0); + + token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER; + + lxb_css_syntax_token_number(token)->is_float = false; + lxb_css_syntax_token_number(token)->num = num; +} + +lxb_inline void +lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *start, const lxb_char_t *end, + bool e_is_negative, int exponent, int e_digit) +{ + if (e_is_negative) { + exponent -= e_digit; + } + else { + exponent += e_digit; + } + + double num = lexbor_strtod_internal(start, (end - start), exponent); + + token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER; + + lxb_css_syntax_token_number(token)->num = num; + lxb_css_syntax_token_number(token)->is_float = true; +} + +const lxb_char_t * +lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end) +{ + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 0; + lxb_css_syntax_token_number(token)->have_sign = false; + + return lxb_css_syntax_state_consume_numeric(tkz, token, data, end); +} + +static const lxb_char_t * +lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *begin; + + lxb_char_t *buf_start = tkz->buffer; + lxb_char_t *buf_end = buf_start + sizeof(tkz->buffer); + + begin = data; + length = 0; + + do { + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data < 0x30 || *data > 0x39) { + length += data - begin; + break; + } + + if (buf_start != buf_end) { + *buf_start++ = *data; + } + + if (++data == end) { + length += data - begin; + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_token_base(token)->length += length; + + lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, + buf_start); + return data; + } + + begin = data; + } + } + while (true); + + lxb_css_syntax_token_base(token)->length += length; + + /* U+002E FULL STOP (.) */ + if (*data != 0x2E) { + lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, + buf_start); + + return lxb_css_syntax_state_consume_numeric_name_start(tkz, token, + data, end); + } + + begin = data++; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim; + } + } + + if (*data >= 0x30 && *data <= 0x39) { + lxb_css_syntax_token_base(token)->length += 1; + + return lxb_css_syntax_state_decimal(tkz, token, buf_start, buf_end, + data, end); + } + +delim: + + lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, buf_start); + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '.'); + + return data; +} + +static const lxb_char_t * +lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + lxb_char_t *buf_start, lxb_char_t *buf_end, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + bool e_is_negative; + int exponent, e_digit; + lxb_char_t ch, by; + lxb_status_t status; + const lxb_char_t *last, *begin; + lxb_css_syntax_token_t *t_str; + lxb_css_syntax_token_string_t *str; + + exponent = 0; + begin = data; + length = lxb_css_syntax_token_base(token)->length; + + str = lxb_css_syntax_token_dimension_string(token); + t_str = (lxb_css_syntax_token_t *) (void *) str; + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + do { + if (buf_start != buf_end) { + *buf_start++ = *data; + exponent -= 1; + } + + data++; + + if (data >= end) { + length += data - begin; + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_token_base(token)->length = length; + + lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, + buf_start, 0, exponent, 0); + return data; + } + + begin = data; + } + } + while (*data >= 0x30 && *data <= 0x39); + + length += data - begin; + + lxb_css_syntax_token_base(token)->length = length; + lxb_css_syntax_token_base(str)->begin = data; + + ch = *data; + + /* U+0045 Latin Capital Letter (E) or U+0065 Latin Small Letter (e) */ + if (ch != 0x45 && ch != 0x65) { + lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, + buf_start, 0, exponent, 0); + + return lxb_css_syntax_state_consume_numeric_name_start(tkz, token, + data, end); + } + + e_digit = 0; + e_is_negative = false; + + lxb_css_syntax_token_base(t_str)->length = 1; + + if (++data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, + buf_start, 0, exponent, 0); + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1); + + token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION; + + data = lxb_css_syntax_state_dimension_set(tkz, token, data); + + lxb_css_syntax_token_base(token)->length += + lxb_css_syntax_token_base(t_str)->length; + return data; + } + } + + switch (*data) { + /* U+002D HYPHEN-MINUS (-) */ + case 0x2D: + e_is_negative = true; + /* fall through */ + + /* U+002B PLUS SIGN (+) */ + case 0x2B: + last = data++; + by = *last; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto dimension; + } + } + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data < 0x30 || *data > 0x39) { + goto dimension; + } + + length += 1; + break; + + default: + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + if (*data < 0x30 || *data > 0x39) { + lxb_css_syntax_consume_numeric_set_float(tkz, token, + tkz->buffer, buf_start, + 0, exponent, 0); + + token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION; + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1); + + data = lxb_css_syntax_state_consume_ident(tkz, t_str, + data, end); + if (begin == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(token)->length = length + + lxb_css_syntax_token_base(t_str)->length; + return data; + } + + break; + } + + length += 1; + begin = data; + + /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */ + do { + e_digit = (*data - 0x30) + e_digit * 0x0A; + + if (++data == end) { + length += data - begin; + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_token_base(token)->length = length; + + lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start, + e_is_negative, exponent, e_digit); + return data; + } + + begin = data; + } + } + while(*data >= 0x30 && *data <= 0x39); + + length += data - begin; + + lxb_css_syntax_token_base(token)->length = length; + + lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start, + e_is_negative, exponent, e_digit); + + return lxb_css_syntax_state_consume_numeric_name_start(tkz, token, + data, end); + +dimension: + + lxb_css_syntax_consume_numeric_set_float(tkz, token, + tkz->buffer, buf_start, + 0, exponent, 0); + + token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION; + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1); + + if (by == '-') { + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &by, 1); + + lxb_css_syntax_token_base(t_str)->length += 1; + + data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end); + + lxb_css_syntax_token_base(token)->length = length + + lxb_css_syntax_token_base(t_str)->length; + return data; + } + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, last, (data >= end), '+'); + + lxb_css_syntax_token_base(token)->length = length + + lxb_css_syntax_token_base(t_str)->length; + + return lxb_css_syntax_state_dimension_set(tkz, token, data); +} + +static const lxb_char_t * +lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, + const lxb_char_t *end) +{ + bool have_minus; + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *esc, *minus; + lxb_css_syntax_token_t *t_str; + lxb_css_syntax_token_string_t *str; + + str = lxb_css_syntax_token_dimension_string(token); + t_str = (lxb_css_syntax_token_t *) (void *) str; + + lxb_css_syntax_token_base(t_str)->begin = data; + + ch = *data; + + if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START + || ch == 0x00) + { + lxb_css_syntax_token_base(t_str)->length = 0; + goto dimension; + } + + /* U+0025 PERCENTAGE SIGN (%) */ + if (ch == 0x25) { + token->type = LXB_CSS_SYNTAX_TOKEN_PERCENTAGE; + + lxb_css_syntax_token_base(token)->length += 1; + + return data + 1; + } + + have_minus = false; + minus = data; + + /* U+002D HYPHEN-MINUS */ + if (ch == 0x2D) { + data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + return data; + } + } + + ch = *data; + + if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START + || ch == 0x2D || ch == 0x00) + { + lxb_css_syntax_token_base(t_str)->length = 1; + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (const lxb_char_t *) "-", 1); + goto dimension; + } + + have_minus = true; + } + + esc = data; + + /* U+005C REVERSE SOLIDUS (\) */ + if (ch == 0x5C) { + data++; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto delim_rev_solidus; + } + } + + ch = *data; + + if (ch != 0x0A && ch != 0x0C && ch != 0x0D) { + length = 1; + + if (have_minus) { + length += 1; + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + (const lxb_char_t *) "-", 1); + } + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + lxb_css_syntax_token_base(t_str)->length = length; + + goto dimension; + } + + delim_rev_solidus: + + if (have_minus) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-'); + } + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\'); + + return data; + } + + if (have_minus) { + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-'); + } + + return data; + +dimension: + + token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION; + + data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end); + + lxb_css_syntax_token_base(token)->length += + lxb_css_syntax_token_base(t_str)->length; + + return data; +} + +static const lxb_char_t * +lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *begin; + + begin = data; + length = 0; + + for (;; data++) { + if (data >= end) { + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + + begin = data; + } + + if (lxb_css_syntax_res_name_map[*data] == 0x00) { + + /* U+005C REVERSE SOLIDUS (\) */ + if (*data == 0x5C) { + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + begin = data; + + if (++data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + goto push_delim_last; + } + } + + if (*data == 0x0A || *data == 0x0C || *data == 0x0D) { + goto push_delim_last; + } + + length += 1; + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + begin = data--; + } + else if (*data == 0x00) { + length += (data + 1) - begin; + + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + lexbor_str_res_ansi_replacement_character, + sizeof(lexbor_str_res_ansi_replacement_character) - 1); + begin = data + 1; + } + else { + if (begin < data) { + length += data - begin; + + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + } + } + + return data; + +push_delim_last: + + lxb_css_syntax_token_base(token)->length += length; + + LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\'); + + return lxb_css_syntax_state_string_set(tkz, token, data); +} + +const lxb_char_t * +lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 0; + + return lxb_css_syntax_state_ident_like(tkz, token, data, end); +} + +static const lxb_char_t * +lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *begin, *ws_begin; + lxb_css_syntax_token_t *ws; + lxb_css_syntax_token_string_t *str, *ws_str; + static const lxb_char_t url[] = "url"; + + data = lxb_css_syntax_state_consume_ident(tkz, token, data, end); + + end = tkz->in_end; + + if (data >= end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + token->type = LXB_CSS_SYNTAX_TOKEN_IDENT; + return data; + } + } + + if (data < end && *data == '(') { + data++; + + lxb_css_syntax_token_base(token)->length += 1; + + str = lxb_css_syntax_token_string(token); + + if (str->length == 3 && lexbor_str_data_casecmp(str->data, url)) { + begin = data; + length = 0; + + tkz->pos += str->length + 1; + ws_begin = tkz->pos; + + do { + if (data >= end) { + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + begin = data; + goto with_ws; + } + + begin = data; + } + + ch = *data; + + if (lexbor_utils_whitespace(ch, !=, &&)) { + /* U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE (') */ + if (ch == 0x22 || ch == 0x27) { + goto with_ws; + } + + tkz->pos = tkz->start; + length += data - begin; + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_url(tkz, token, data, end); + } + + data++; + } + while (true); + } + + token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION; + + return data; + } + + token->type = LXB_CSS_SYNTAX_TOKEN_IDENT; + + return data; + +with_ws: + + token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION; + + if (ws_begin != tkz->pos || begin < data) { + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + if (tkz->pos >= tkz->end) { + if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) { + return NULL; + } + } + + str->data = tkz->start; + *tkz->pos = 0x00; + + ws = lxb_css_syntax_state_token_create(tkz); + if (ws == NULL) { + return NULL; + } + + ws->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE; + + lxb_css_syntax_token_base(ws)->begin = begin; + lxb_css_syntax_token_base(ws)->length = length; + + ws_str = lxb_css_syntax_token_string(ws); + + ws_str->data = tkz->start + str->length + 1; + ws_str->length = tkz->pos - ws_str->data; + } + + tkz->pos = tkz->start; + + return data; +} + +const lxb_char_t * +lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + lxb_css_syntax_token_base(token)->begin = data; + lxb_css_syntax_token_base(token)->length = 0; + + return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end); +} + +static const lxb_char_t * +lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + data = lxb_css_syntax_state_consume_ident(tkz, token, data, end); + if (data == NULL) { + return NULL; + } + + end = tkz->in_end; + + if (data < end && *data == '(') { + token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION; + + lxb_css_syntax_token_base(token)->length += 1; + + return data + 1; + } + + token->type = LXB_CSS_SYNTAX_TOKEN_IDENT; + + return data; +} + +/* + * URL + */ +static const lxb_char_t * +lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_char_t ch; + lxb_status_t status; + const lxb_char_t *begin; + + status = LXB_STATUS_OK; + + *tkz->pos = 0x00; + + begin = data; + length = 0; + + do { + if (data >= end) { + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR); + + token->type = LXB_CSS_SYNTAX_TOKEN_URL; + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + + begin = data; + } + + switch (*data) { + /* U+0000 NULL (\0) */ + case 0x00: + if (begin < data) { + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, + lexbor_str_res_ansi_replacement_character, + sizeof(lexbor_str_res_ansi_replacement_character) - 1); + + data += 1; + length += data - begin; + begin = data; + + continue; + + /* U+0029 RIGHT PARENTHESIS ()) */ + case 0x29: + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + token->type = LXB_CSS_SYNTAX_TOKEN_URL; + + lxb_css_syntax_token_base(token)->length += length + 1; + + return lxb_css_syntax_state_string_set(tkz, token, data + 1); + + /* + * U+0022 QUOTATION MARK (") + * U+0027 APOSTROPHE (') + * U+0028 LEFT PARENTHESIS (() + * U+000B LINE TABULATION + * U+007F DELETE + */ + case 0x22: + case 0x27: + case 0x28: + case 0x0B: + case 0x7F: + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR); + + lxb_css_syntax_token_base(token)->length += length + 1; + + return lxb_css_syntax_state_bad_url(tkz, token, data + 1, end); + + /* U+005C REVERSE SOLIDUS (\) */ + case 0x5C: + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + begin = ++data; + + if (data == end) { + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR); + + token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL; + + lxb_css_syntax_token_base(token)->length += length + 1; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + } + + ch = *data; + + if (ch == 0x0A || ch == 0x0C || ch == 0x0D) { + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR); + + lxb_css_syntax_token_base(token)->length += length + 1; + + return lxb_css_syntax_state_bad_url(tkz, token, data, end); + } + + data = lxb_css_syntax_state_escaped(tkz, data, &end, &length); + if (data == NULL) { + return NULL; + } + + begin = data--; + length += 1; + + break; + + /* + * U+0009 CHARACTER TABULATION (tab) + * U+000A LINE FEED (LF) + * U+000C FORM FEED (FF) + * U+000D CARRIAGE RETURN (CR) + * U+0020 SPACE + */ + case 0x09: + case 0x0A: + case 0x0C: + case 0x0D: + case 0x20: + if (begin < data) { + length += data - begin; + LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data); + } + + begin = ++data; + length += 1; + + do { + if (data == end) { + length += data - begin; + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR); + + token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL; + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_string_set(tkz, token, data); + } + + begin = data; + } + + ch = *data; + + if (lexbor_utils_whitespace(ch, !=, &&)) { + length += data - begin; + + /* U+0029 RIGHT PARENTHESIS ()) */ + if (*data == 0x29) { + token->type = LXB_CSS_SYNTAX_TOKEN_URL; + + lxb_css_syntax_token_base(token)->length += length + 1; + + return lxb_css_syntax_state_string_set(tkz, token, + data + 1); + } + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_bad_url(tkz, token, + data, end); + } + + data++; + } + while (true); + + default: + /* + * Inclusive: + * U+0000 NULL and U+0008 BACKSPACE or + * U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE + */ + if ((*data <= 0x08) + || (*data >= 0x0E && *data <= 0x1F)) + { + lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data, + LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR); + + lxb_css_syntax_token_base(token)->length += length; + + return lxb_css_syntax_state_bad_url(tkz, token, + data + 1, end); + } + + break; + } + + data++; + } + while (true); + + return data; +} + +/* + * Bad URL + */ +static const lxb_char_t * +lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, + const lxb_char_t *data, const lxb_char_t *end) +{ + size_t length; + lxb_status_t status; + const lxb_char_t *begin; + + token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL; + + if(lxb_css_syntax_state_string_set(tkz, token, data) == NULL) { + return NULL; + } + + begin = data; + length = 0; + + do { + if (data >= end) { + length += data - begin; + + LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end); + if (data >= end) { + lxb_css_syntax_token_base(token)->length += length; + return data; + } + + begin = data; + } + + /* U+0029 RIGHT PARENTHESIS ()) */ + if (*data == 0x29) { + data++; + length += data - begin; + + lxb_css_syntax_token_base(token)->length += length; + + return data; + } + /* U+005C REVERSE SOLIDUS (\) */ + else if (*data == 0x5C) { + data++; + + if (data >= end) { + continue; + } + } + + data++; + } + while (true); + + return data; +} + +lxb_inline lxb_status_t +lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t *tkz) +{ + return lxb_css_syntax_string_append(tkz, lexbor_str_res_ansi_replacement_character, + sizeof(lexbor_str_res_ansi_replacement_character) - 1); +} + +static const lxb_char_t * +lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, + const lxb_char_t **end, size_t *length) +{ + uint32_t cp; + unsigned count; + lxb_status_t status; + + cp = 0; + + for (count = 0; count < 6; count++, data++) { + if (data >= *end) { + status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end); + if (status != LXB_STATUS_OK) { + return NULL; + } + + if (data >= *end) { + if (count == 0) { + return *end; + } + + break; + } + } + + if (lexbor_str_res_map_hex[*data] == 0xFF) { + if (count == 0) { + *length += 1; + + if (*data == 0x00) { + status = lxb_css_syntax_string_append_rep(tkz); + if (status != LXB_STATUS_OK) { + return NULL; + } + + return data + 1; + } + + status = lxb_css_syntax_string_append(tkz, data, 1); + if (status != LXB_STATUS_OK) { + return NULL; + } + + return data + 1; + } + + switch (*data) { + case 0x0D: + data++; + *length += 1; + + status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, + end); + if (status != LXB_STATUS_OK) { + return NULL; + } + + if (data >= *end) { + break; + } + + if (*data == 0x0A) { + data++; + *length += 1; + } + + break; + + case 0x09: + case 0x20: + case 0x0A: + case 0x0C: + data++; + *length += 1; + break; + } + + break; + } + + cp <<= 4; + cp |= lexbor_str_res_map_hex[*data]; + } + + if ((tkz->end - tkz->pos) < 5) { + if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) { + return NULL; + } + } + + lxb_css_syntax_codepoint_to_ascii(tkz, cp); + + *length += count; + + return data; +} + +static const lxb_char_t * +lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, + const lxb_char_t **end, size_t *length) +{ + lxb_status_t status; + + /* U+000D CARRIAGE RETURN */ + if (*data == 0x0D) { + data++; + *length += 1; + + if (data >= *end) { + status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end); + if (status != LXB_STATUS_OK) { + return NULL; + } + + if (data >= *end) { + return data; + } + } + + /* U+000A LINE FEED */ + if (*data == 0x0A) { + data++; + *length += 1; + } + + return data; + } + + if (*data == 0x00) { + status = lxb_css_syntax_string_append_rep(tkz); + if (status != LXB_STATUS_OK) { + return NULL; + } + + *length += 1; + + return data + 1; + } + + if (*data == 0x0A || *data == 0x0C) { + *length += 1; + + return data + 1; + } + + return lxb_css_syntax_state_escaped(tkz, data, end, length); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/syntax.c b/ext/dom/lexbor/lexbor/css/syntax/syntax.c new file mode 100644 index 00000000000..8d54345ee5d --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/syntax.c @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2018-2023 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/syntax/syntax.h" +#include "lexbor/css/parser.h" + +#include "lexbor/core/str.h" + +#define LEXBOR_STR_RES_MAP_HEX +#define LEXBOR_STR_RES_MAP_HEX_TO_CHAR_LOWERCASE +#define LEXBOR_STR_RES_CHAR_TO_TWO_HEX_VALUE_LOWERCASE +#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER +#include "lexbor/core/str_res.h" + +#define LXB_CSS_SYNTAX_RES_NAME_MAP +#include "lexbor/css/syntax/res.h" + + +static const lexbor_str_t lxb_str_ws = lexbor_str(" "); + + +lxb_status_t +lxb_css_syntax_parse_list_rules(lxb_css_parser_t *parser, + const lxb_css_syntax_cb_list_rules_t *cb, + const lxb_char_t *data, size_t length, + void *ctx, bool top_level) +{ + lxb_status_t status; + lxb_css_syntax_rule_t *rule; + + if (lxb_css_parser_is_running(parser)) { + parser->status = LXB_STATUS_ERROR_WRONG_STAGE; + return parser->status; + } + + lxb_css_parser_clean(parser); + + lxb_css_parser_buffer_set(parser, data, length); + + rule = lxb_css_syntax_parser_list_rules_push(parser, NULL, NULL, cb, + ctx, top_level, + LXB_CSS_SYNTAX_TOKEN_UNDEF); + if (rule == NULL) { + status = parser->status; + goto end; + } + + parser->tkz->with_comment = false; + parser->stage = LXB_CSS_PARSER_RUN; + + status = lxb_css_syntax_parser_run(parser); + if (status != LXB_STATUS_OK) { + /* Destroy StyleSheet. */ + } + +end: + + parser->stage = LXB_CSS_PARSER_END; + + return status; +} + +lxb_status_t +lxb_css_syntax_stack_expand(lxb_css_parser_t *parser, size_t count) +{ + size_t length, cur_len, size; + lxb_css_syntax_rule_t *p; + + if ((parser->rules + count) >= parser->rules_end) { + cur_len = parser->rules - parser->rules_begin; + + length = cur_len + count + 1024; + size = length * sizeof(lxb_css_syntax_rule_t); + + p = lexbor_realloc(parser->rules_begin, size); + if (p == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + parser->rules_begin = p; + parser->rules_end = p + length; + parser->rules = p + cur_len; + } + + return LXB_STATUS_OK; +} + +void +lxb_css_syntax_codepoint_to_ascii(lxb_css_syntax_tokenizer_t *tkz, + lxb_codepoint_t cp) +{ + /* + * Zero, or is for a surrogate, or is greater than + * the maximum allowed code point (tkz->num > 0x10FFFF). + */ + if (cp == 0 || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) { + memcpy(tkz->pos, lexbor_str_res_ansi_replacement_character, 3); + + tkz->pos += 3; + *tkz->pos = '\0'; + + return; + } + + lxb_char_t *data = tkz->pos; + + if (cp <= 0x0000007F) { + /* 0xxxxxxx */ + data[0] = (lxb_char_t) cp; + + tkz->pos += 1; + } + else if (cp <= 0x000007FF) { + /* 110xxxxx 10xxxxxx */ + data[0] = (char)(0xC0 | (cp >> 6 )); + data[1] = (char)(0x80 | (cp & 0x3F)); + + tkz->pos += 2; + } + else if (cp <= 0x0000FFFF) { + /* 1110xxxx 10xxxxxx 10xxxxxx */ + data[0] = (char)(0xE0 | ((cp >> 12))); + data[1] = (char)(0x80 | ((cp >> 6 ) & 0x3F)); + data[2] = (char)(0x80 | ( cp & 0x3F)); + + tkz->pos += 3; + } + else if (cp <= 0x001FFFFF) { + /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + data[0] = (char)(0xF0 | ( cp >> 18)); + data[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + data[2] = (char)(0x80 | ((cp >> 6 ) & 0x3F)); + data[3] = (char)(0x80 | ( cp & 0x3F)); + + tkz->pos += 4; + } + + *tkz->pos = '\0'; +} + +lxb_status_t +lxb_css_syntax_ident_serialize(const lxb_char_t *data, size_t length, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_char_t ch; + lxb_status_t status; + const char **hex_map; + const lxb_char_t *p = data, *end; + + static const lexbor_str_t str_s = lexbor_str("\\"); + + p = data; + end = data + length; + hex_map = lexbor_str_res_char_to_two_hex_value_lowercase; + + while (p < end) { + ch = *p; + + if (lxb_css_syntax_res_name_map[ch] == 0x00) { + lexbor_serialize_write(cb, data, p - data, ctx, status); + lexbor_serialize_write(cb, str_s.data, str_s.length, ctx, status); + lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status); + + data = ++p; + + if (p < end && lexbor_str_res_map_hex[*p] != 0xff) { + lexbor_serialize_write(cb, lxb_str_ws.data, + lxb_str_ws.length, ctx, status); + } + + continue; + } + + p++; + } + + if (data < p) { + lexbor_serialize_write(cb, data, p - data, ctx, status); + } + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_string_serialize(const lxb_char_t *data, size_t length, + lexbor_serialize_cb_f cb, void *ctx) +{ + lxb_char_t ch; + lxb_status_t status; + const char **hex_map; + const lxb_char_t *p, *end; + + static const lexbor_str_t str_s = lexbor_str("\\"); + static const lexbor_str_t str_dk = lexbor_str("\""); + static const lexbor_str_t str_ds = lexbor_str("\\\\"); + static const lexbor_str_t str_dks = lexbor_str("\\\""); + + p = data; + end = data + length; + hex_map = lexbor_str_res_char_to_two_hex_value_lowercase; + + lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status); + + while (p < end) { + ch = *p; + + if (lxb_css_syntax_res_name_map[ch] == 0x00) { + switch (ch) { + case '\\': + lexbor_serialize_write(cb, data, p - data, ctx, status); + lexbor_serialize_write(cb, str_ds.data, str_ds.length, + ctx, status); + break; + + case '"': + lexbor_serialize_write(cb, data, p - data, ctx, status); + lexbor_serialize_write(cb, str_dks.data, str_dks.length, + ctx, status); + break; + + case '\n': + case '\t': + case '\r': + lexbor_serialize_write(cb, data, p - data, ctx, status); + lexbor_serialize_write(cb, str_s.data, str_s.length, + ctx, status); + lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status); + + p++; + + if (p < end && lexbor_str_res_map_hex[*p] != 0xff) { + lexbor_serialize_write(cb, lxb_str_ws.data, + lxb_str_ws.length, ctx, status); + } + + data = p; + continue; + + default: + p++; + continue; + } + + data = ++p; + continue; + } + + p++; + } + + if (data < p) { + lexbor_serialize_write(cb, data, p - data, ctx, status); + } + + lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status); + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_ident_or_string_serialize(const lxb_char_t *data, size_t length, + lexbor_serialize_cb_f cb, void *ctx) +{ + const lxb_char_t *p, *end; + + p = data; + end = data + length; + + while (p < end) { + if (lxb_css_syntax_res_name_map[*p++] == 0x00) { + return lxb_css_syntax_string_serialize(data, length, cb, ctx); + } + } + + return cb(data, length, ctx); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/token.c b/ext/dom/lexbor/lexbor/css/syntax/token.c new file mode 100644 index 00000000000..b757acfcf4d --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/token.c @@ -0,0 +1,648 @@ +/* + * Copyright (C) 2018-2020 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/core/shs.h" +#include "lexbor/core/conv.h" +#include "lexbor/core/serialize.h" +#include "lexbor/core/print.h" + +#include "lexbor/css/parser.h" +#include "lexbor/css/syntax/token.h" +#include "lexbor/css/syntax/state.h" +#include "lexbor/css/syntax/state_res.h" + +#define LXB_CSS_SYNTAX_TOKEN_RES_NAME_SHS_MAP +#include "lexbor/css/syntax/token_res.h" + +#define LEXBOR_STR_RES_MAP_HEX +#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER +#include "lexbor/core/str_res.h" + + +lxb_css_syntax_token_t * +lxb_css_syntax_tokenizer_token(lxb_css_syntax_tokenizer_t *tkz); + +lxb_status_t +lxb_css_syntax_tokenizer_cache_push(lxb_css_syntax_tokenizer_cache_t *cache, + lxb_css_syntax_token_t *value); + + +typedef struct { + lexbor_str_t *str; + lexbor_mraw_t *mraw; +} +lxb_css_syntax_token_ctx_t; + + +static lxb_status_t +lxb_css_syntax_token_str_cb(const lxb_char_t *data, size_t len, void *ctx); + + +lxb_css_syntax_token_t * +lxb_css_syntax_token(lxb_css_syntax_tokenizer_t *tkz) +{ + if (tkz->cache_pos < tkz->cache->length + && (tkz->prepared == 0 || tkz->cache_pos < tkz->prepared)) + { + return tkz->cache->list[tkz->cache_pos]; + } + + return lxb_css_syntax_tokenizer_token(tkz); +} + +lxb_css_syntax_token_t * +lxb_css_syntax_token_next(lxb_css_syntax_tokenizer_t *tkz) +{ + return lxb_css_syntax_tokenizer_token(tkz); +} + +void +lxb_css_syntax_token_consume(lxb_css_syntax_tokenizer_t *tkz) +{ + lxb_css_syntax_token_t *token; + + if (tkz->cache_pos < tkz->cache->length) { + if (tkz->prepared != 0 && tkz->cache_pos >= tkz->prepared) { + return; + } + + token = tkz->cache->list[tkz->cache_pos]; + + lxb_css_syntax_token_string_free(tkz, token); + lexbor_dobject_free(tkz->tokens, token); + + tkz->cache_pos += 1; + + if (tkz->cache_pos >= tkz->cache->length) { + tkz->cache->length = 0; + tkz->cache_pos = 0; + } + } +} + +void +lxb_css_syntax_token_consume_n(lxb_css_syntax_tokenizer_t *tkz, unsigned count) +{ + while (count != 0) { + count--; + lxb_css_syntax_token_consume(tkz); + } +} + +lxb_status_t +lxb_css_syntax_token_string_dup(lxb_css_syntax_token_string_t *token, + lexbor_str_t *str, lexbor_mraw_t *mraw) +{ + size_t length; + + length = token->length + 1; + + if (length > str->length) { + if (str->data == NULL) { + str->data = lexbor_mraw_alloc(mraw, length); + if (str->data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + str->length = 0; + } + else { + if (lexbor_str_realloc(str, mraw, length) == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + } + } + + /* + 1 = '\0' */ + memcpy(str->data, token->data, length); + + str->length = token->length; + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_token_string_make(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token) +{ + lxb_char_t *data; + lxb_css_syntax_token_string_t *token_string; + + if (token->type >= LXB_CSS_SYNTAX_TOKEN_IDENT + && token->type <= LXB_CSS_SYNTAX_TOKEN_WHITESPACE) + { + token_string = lxb_css_syntax_token_string(token); + goto copy; + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_DIMENSION) { + token_string = lxb_css_syntax_token_dimension_string(token); + goto copy; + } + + return LXB_STATUS_OK; + +copy: + + data = lexbor_mraw_alloc(tkz->mraw, token_string->length + 1); + if (data == NULL) { + tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + return tkz->status; + } + + /* + 1 = '\0' */ + memcpy(data, token_string->data, token_string->length + 1); + + token_string->data = data; + token->cloned = true; + + return LXB_STATUS_OK; +} + +lxb_css_syntax_token_t * +lxb_css_syntax_token_cached_create(lxb_css_syntax_tokenizer_t *tkz) +{ + lxb_status_t status; + lxb_css_syntax_token_t *token; + + token = lexbor_dobject_alloc(tkz->tokens); + if (token == NULL) { + tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; + return NULL; + } + + status = lxb_css_syntax_tokenizer_cache_push(tkz->cache, token); + if (status != LXB_STATUS_OK) { + tkz->status = status; + return NULL; + } + + token->cloned = false; + + return token; +} + + +void +lxb_css_syntax_token_string_free(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_t *token) +{ + lxb_css_syntax_token_string_t *token_string; + + if (token->cloned) { + if (token->type == LXB_CSS_SYNTAX_TOKEN_DIMENSION) { + token_string = lxb_css_syntax_token_dimension_string(token); + } + else { + token_string = lxb_css_syntax_token_string(token); + } + + lexbor_mraw_free(tkz->mraw, (lxb_char_t *) token_string->data); + } +} + +const lxb_char_t * +lxb_css_syntax_token_type_name_by_id(lxb_css_syntax_token_type_t type) +{ + switch (type) { + case LXB_CSS_SYNTAX_TOKEN_IDENT: + return (lxb_char_t *) "ident"; + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + return (lxb_char_t *) "function"; + case LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD: + return (lxb_char_t *) "at-keyword"; + case LXB_CSS_SYNTAX_TOKEN_HASH: + return (lxb_char_t *) "hash"; + case LXB_CSS_SYNTAX_TOKEN_STRING: + return (lxb_char_t *) "string"; + case LXB_CSS_SYNTAX_TOKEN_BAD_STRING: + return (lxb_char_t *) "bad-string"; + case LXB_CSS_SYNTAX_TOKEN_URL: + return (lxb_char_t *) "url"; + case LXB_CSS_SYNTAX_TOKEN_BAD_URL: + return (lxb_char_t *) "bad-url"; + case LXB_CSS_SYNTAX_TOKEN_DELIM: + return (lxb_char_t *) "delim"; + case LXB_CSS_SYNTAX_TOKEN_NUMBER: + return (lxb_char_t *) "number"; + case LXB_CSS_SYNTAX_TOKEN_PERCENTAGE: + return (lxb_char_t *) "percentage"; + case LXB_CSS_SYNTAX_TOKEN_DIMENSION: + return (lxb_char_t *) "dimension"; + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + return (lxb_char_t *) "whitespace"; + case LXB_CSS_SYNTAX_TOKEN_CDO: + return (lxb_char_t *) "CDO"; + case LXB_CSS_SYNTAX_TOKEN_CDC: + return (lxb_char_t *) "CDC"; + case LXB_CSS_SYNTAX_TOKEN_COLON: + return (lxb_char_t *) "colon"; + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + return (lxb_char_t *) "semicolon"; + case LXB_CSS_SYNTAX_TOKEN_COMMA: + return (lxb_char_t *) "comma"; + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + return (lxb_char_t *) "left-square-bracket"; + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + return (lxb_char_t *) "right-square-bracket"; + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + return (lxb_char_t *) "left-parenthesis"; + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + return (lxb_char_t *) "right-parenthesis"; + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + return (lxb_char_t *) "left-curly-bracket"; + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + return (lxb_char_t *) "right-curly-bracket"; + case LXB_CSS_SYNTAX_TOKEN_COMMENT: + return (lxb_char_t *) "comment"; + case LXB_CSS_SYNTAX_TOKEN__EOF: + return (lxb_char_t *) "end-of-file"; + case LXB_CSS_SYNTAX_TOKEN__END: + return (lxb_char_t *) "end"; + default: + return (lxb_char_t *) "undefined"; + } +} + +lxb_css_syntax_token_type_t +lxb_css_syntax_token_type_id_by_name(const lxb_char_t *type_name, size_t len) +{ + const lexbor_shs_entry_t *entry; + + entry = lexbor_shs_entry_get_lower_static(lxb_css_syntax_token_res_name_shs_map, + type_name, len); + if (entry == NULL) { + return LXB_CSS_SYNTAX_TOKEN_UNDEF; + } + + return (lxb_css_syntax_token_type_t) (uintptr_t) entry->value; +} + + +lxb_status_t +lxb_css_syntax_token_serialize(const lxb_css_syntax_token_t *token, + lxb_css_syntax_token_cb_f cb, void *ctx) +{ + size_t len; + lxb_status_t status; + lxb_char_t buf[128]; + const lxb_css_syntax_token_string_t *str; + const lxb_css_syntax_token_dimension_t *dim; + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_DELIM: + buf[0] = token->types.delim.character; + buf[1] = 0x00; + + return cb(buf, 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_NUMBER: + len = lexbor_conv_float_to_data(token->types.number.num, + buf, (sizeof(buf) - 1)); + + buf[len] = 0x00; + + return cb(buf, len, ctx); + + case LXB_CSS_SYNTAX_TOKEN_PERCENTAGE: + len = lexbor_conv_float_to_data(token->types.number.num, + buf, (sizeof(buf) - 1)); + + buf[len] = 0x00; + + status = cb(buf, len, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + return cb((lxb_char_t *) "%", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_CDO: + return cb((lxb_char_t *) "", 3, ctx); + + case LXB_CSS_SYNTAX_TOKEN_COLON: + return cb((lxb_char_t *) ":", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + return cb((lxb_char_t *) ";", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_COMMA: + return cb((lxb_char_t *) ",", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_LS_BRACKET: + return cb((lxb_char_t *) "[", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_RS_BRACKET: + return cb((lxb_char_t *) "]", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS: + return cb((lxb_char_t *) "(", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS: + return cb((lxb_char_t *) ")", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_LC_BRACKET: + return cb((lxb_char_t *) "{", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_RC_BRACKET: + return cb((lxb_char_t *) "}", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_HASH: + status = cb((lxb_char_t *) "#", 1, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + str = &token->types.string; + + return cb(str->data, str->length, ctx); + + case LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD: + status = cb((lxb_char_t *) "@", 1, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + str = &token->types.string; + + return cb(str->data, str->length, ctx); + + case LXB_CSS_SYNTAX_TOKEN_WHITESPACE: + case LXB_CSS_SYNTAX_TOKEN_IDENT: + str = &token->types.string; + + return cb(str->data, str->length, ctx); + + case LXB_CSS_SYNTAX_TOKEN_FUNCTION: + str = &token->types.string; + + status = cb(str->data, str->length, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + return cb((lxb_char_t *) "(", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_STRING: + case LXB_CSS_SYNTAX_TOKEN_BAD_STRING: { + status = cb((lxb_char_t *) "\"", 1, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + const lxb_char_t *begin = token->types.string.data; + const lxb_char_t *end = begin + token->types.string.length; + + const lxb_char_t *ptr = begin; + + for (; begin < end; begin++) { + /* 0x5C; '\'; Inverse/backward slash */ + if (*begin == 0x5C) { + begin += 1; + + status = cb(ptr, (begin - ptr), ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + if (begin == end) { + status = cb((const lxb_char_t *) "\\", 1, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + ptr = begin; + + break; + } + + begin -= 1; + ptr = begin; + } + /* 0x22; '"'; Only quotes above */ + else if (*begin == 0x22) { + if (ptr != begin) { + status = cb(ptr, (begin - ptr), ctx); + if (status != LXB_STATUS_OK) { + return status; + } + } + + status = cb((const lxb_char_t *) "\\", 1, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + ptr = begin; + } + } + + if (ptr != begin) { + status = cb(ptr, (begin - ptr), ctx); + if (status != LXB_STATUS_OK) { + return status; + } + } + + return cb((const lxb_char_t *) "\"", 1, ctx); + } + + case LXB_CSS_SYNTAX_TOKEN_URL: + case LXB_CSS_SYNTAX_TOKEN_BAD_URL: + status = cb((lxb_char_t *) "url(", 4, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + str = &token->types.string; + + status = cb(str->data, str->length, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + return cb((lxb_char_t *) ")", 1, ctx); + + case LXB_CSS_SYNTAX_TOKEN_COMMENT: + status = cb((lxb_char_t *) "/*", 2, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + str = &token->types.string; + + status = cb(str->data, str->length, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + return cb((lxb_char_t *) "*/", 2, ctx); + + case LXB_CSS_SYNTAX_TOKEN_DIMENSION: + len = lexbor_conv_float_to_data(token->types.number.num, + buf, (sizeof(buf) - 1)); + + buf[len] = 0x00; + + status = cb(buf, len, ctx); + if (status != LXB_STATUS_OK) { + return status; + } + + dim = &token->types.dimension; + + return cb(dim->str.data, dim->str.length, ctx); + + case LXB_CSS_SYNTAX_TOKEN__EOF: + return cb((lxb_char_t *) "END-OF-FILE", 11, ctx); + + case LXB_CSS_SYNTAX_TOKEN__END: + return cb((lxb_char_t *) "END", 3, ctx); + + default: + return LXB_STATUS_ERROR; + } +} + +lxb_status_t +lxb_css_syntax_token_serialize_str(const lxb_css_syntax_token_t *token, + lexbor_str_t *str, lexbor_mraw_t *mraw) +{ + lxb_css_syntax_token_ctx_t ctx; + + ctx.str = str; + ctx.mraw = mraw; + + if (str->data == NULL) { + lexbor_str_init(str, mraw, 1); + if (str->data == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + } + + return lxb_css_syntax_token_serialize(token, lxb_css_syntax_token_str_cb, + &ctx); +} + +static lxb_status_t +lxb_css_syntax_token_str_cb(const lxb_char_t *data, size_t len, void *cb_ctx) +{ + lxb_char_t *ptr; + lxb_css_syntax_token_ctx_t *ctx = (lxb_css_syntax_token_ctx_t *) cb_ctx; + + ptr = lexbor_str_append(ctx->str, ctx->mraw, data, len); + if (ptr == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + return LXB_STATUS_OK; +} + + +lxb_char_t * +lxb_css_syntax_token_serialize_char(const lxb_css_syntax_token_t *token, + size_t *out_length) +{ + size_t length = 0; + lxb_status_t status; + lexbor_str_t str; + + status = lxb_css_syntax_token_serialize(token, lexbor_serialize_length_cb, + &length); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* + 1 == '\0' */ + str.data = lexbor_malloc(length + 1); + if (str.data == NULL) { + goto failed; + } + + str.length = 0; + + status = lxb_css_syntax_token_serialize(token, lexbor_serialize_copy_cb, + &str); + if (status != LXB_STATUS_OK) { + lexbor_free(str.data); + goto failed; + } + + str.data[str.length] = '\0'; + + if (out_length != NULL) { + *out_length = str.length; + } + + return str.data; + +failed: + + if (out_length != NULL) { + *out_length = 0; + } + + return NULL; +} + +lxb_css_log_message_t * +lxb_css_syntax_token_error(lxb_css_parser_t *parser, + const lxb_css_syntax_token_t *token, + const char *module_name) +{ + lxb_char_t *name; + lxb_css_log_message_t *msg; + + static const char unexpected[] = "%s. Unexpected token: %s"; + + name = lxb_css_syntax_token_serialize_char(token, NULL); + if (name == NULL) { + return NULL; + } + + msg = lxb_css_log_format(parser->log, LXB_CSS_LOG_SYNTAX_ERROR, unexpected, + module_name, name); + + lexbor_free(name); + + return msg; +} + +/* + * No inline functions for ABI. + */ +lxb_css_syntax_token_t * +lxb_css_syntax_token_create_noi(lexbor_dobject_t *dobj) +{ + return lxb_css_syntax_token_create(dobj); +} + +void +lxb_css_syntax_token_clean_noi(lxb_css_syntax_token_t *token) +{ + lxb_css_syntax_token_clean(token); +} + +lxb_css_syntax_token_t * +lxb_css_syntax_token_destroy_noi(lxb_css_syntax_token_t *token, + lexbor_dobject_t *dobj) +{ + return lxb_css_syntax_token_destroy(token, dobj); +} + +const lxb_char_t * +lxb_css_syntax_token_type_name_noi(lxb_css_syntax_token_t *token) +{ + return lxb_css_syntax_token_type_name(token); +} + +lxb_css_syntax_token_type_t +lxb_css_syntax_token_type_noi(lxb_css_syntax_token_t *token) +{ + return lxb_css_syntax_token_type(token); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/tokenizer.c b/ext/dom/lexbor/lexbor/css/syntax/tokenizer.c new file mode 100644 index 00000000000..e8a8802e529 --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/tokenizer.c @@ -0,0 +1,709 @@ +/* + * Copyright (C) 2018-2020 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/syntax/tokenizer.h" +#include "lexbor/css/syntax/tokenizer/error.h" +#include "lexbor/css/syntax/state.h" +#include "lexbor/css/syntax/state_res.h" + +#include "lexbor/core/array.h" + +#define LEXBOR_STR_RES_MAP_LOWERCASE +#include "lexbor/core/str_res.h" + + +static const lxb_char_t lxb_css_syntax_tokenizer_important[] = "important"; + +static lxb_css_syntax_tokenizer_cache_t * +lxb_css_syntax_tokenizer_cache_create(void); + +static lxb_status_t +lxb_css_syntax_tokenizer_cache_init(lxb_css_syntax_tokenizer_cache_t *cache, + size_t size); + +static void +lxb_css_syntax_tokenizer_cache_clean(lxb_css_syntax_tokenizer_cache_t *cache); + +static lxb_css_syntax_tokenizer_cache_t * +lxb_css_syntax_tokenizer_cache_destroy(lxb_css_syntax_tokenizer_cache_t *cache); + +LXB_API lxb_status_t +lxb_css_syntax_tokenizer_cache_push(lxb_css_syntax_tokenizer_cache_t *cache, + lxb_css_syntax_token_t *value); + +static lxb_status_t +lxb_css_syntax_tokenizer_blank(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t **data, const lxb_char_t **end, + void *ctx); + +static bool +lxb_css_syntax_tokenizer_lookup_important_ch(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *p, + const lxb_char_t *end, + const lxb_char_t stop_ch, + lxb_css_syntax_token_type_t stop, + bool skip_first); + +static bool +lxb_css_syntax_tokenizer_lookup_important_end(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *p, + const lxb_char_t *end, + const lxb_char_t stop_ch, + lxb_css_syntax_token_type_t stop, + bool skip_first); + +static bool +lxb_css_syntax_tokenizer_lookup_important_tokens(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_type_t stop, + bool skip_first); + + +lxb_css_syntax_tokenizer_t * +lxb_css_syntax_tokenizer_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_syntax_tokenizer_t)); +} + +lxb_status_t +lxb_css_syntax_tokenizer_init(lxb_css_syntax_tokenizer_t *tkz) +{ + lxb_status_t status; + static const unsigned tmp_size = 1024; + + if (tkz == NULL) { + return LXB_STATUS_ERROR_OBJECT_IS_NULL; + } + + /* Tokens. */ + + tkz->tokens = lexbor_dobject_create(); + status = lexbor_dobject_init(tkz->tokens, 128, + sizeof(lxb_css_syntax_token_t)); + if (status != LXB_STATUS_OK) { + return status; + } + + /* Cache for Tokens. */ + + tkz->cache = lxb_css_syntax_tokenizer_cache_create(); + status = lxb_css_syntax_tokenizer_cache_init(tkz->cache, 128); + if (status != LXB_STATUS_OK) { + return status; + } + + /* Memory for text. */ + + tkz->mraw = lexbor_mraw_create(); + status = lexbor_mraw_init(tkz->mraw, 4096); + if (status != LXB_STATUS_OK) { + return status; + } + + /* Temp */ + tkz->start = lexbor_malloc(tmp_size); + if (tkz->start == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + tkz->pos = tkz->start; + tkz->end = tkz->start + tmp_size; + + /* Parse errors */ + tkz->parse_errors = lexbor_array_obj_create(); + status = lexbor_array_obj_init(tkz->parse_errors, 16, + sizeof(lxb_css_syntax_tokenizer_error_t)); + if (status != LXB_STATUS_OK) { + return status; + } + + tkz->offset = 0; + tkz->cache_pos = 0; + tkz->prepared = 0; + + tkz->eof = false; + tkz->with_comment = false; + tkz->status = LXB_STATUS_OK; + tkz->opt = LXB_CSS_SYNTAX_TOKENIZER_OPT_UNDEF; + tkz->chunk_cb = lxb_css_syntax_tokenizer_blank; + + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_tokenizer_clean(lxb_css_syntax_tokenizer_t *tkz) +{ + lexbor_mraw_clean(tkz->mraw); + lexbor_array_obj_clean(tkz->parse_errors); + lxb_css_syntax_tokenizer_cache_clean(tkz->cache); + lexbor_dobject_clean(tkz->tokens); + + tkz->offset = 0; + tkz->cache_pos = 0; + tkz->prepared = 0; + + tkz->eof = false; + tkz->status = LXB_STATUS_OK; + tkz->in_begin = NULL; + tkz->in_end = NULL; + tkz->pos = tkz->start; + + return LXB_STATUS_OK; +} + +lxb_css_syntax_tokenizer_t * +lxb_css_syntax_tokenizer_destroy(lxb_css_syntax_tokenizer_t *tkz) +{ + if (tkz == NULL) { + return NULL; + } + + if (tkz->tokens != NULL) { + tkz->tokens = lexbor_dobject_destroy(tkz->tokens, true); + tkz->cache = lxb_css_syntax_tokenizer_cache_destroy(tkz->cache); + } + + tkz->mraw = lexbor_mraw_destroy(tkz->mraw, true); + tkz->parse_errors = lexbor_array_obj_destroy(tkz->parse_errors, true); + + if (tkz->start != NULL) { + tkz->start = lexbor_free(tkz->start); + } + + return lexbor_free(tkz); +} + +static lxb_css_syntax_tokenizer_cache_t * +lxb_css_syntax_tokenizer_cache_create(void) +{ + return lexbor_calloc(1, sizeof(lxb_css_syntax_tokenizer_cache_t)); +} + +static lxb_status_t +lxb_css_syntax_tokenizer_cache_init(lxb_css_syntax_tokenizer_cache_t *cache, + size_t size) +{ + cache->length = 0; + cache->size = size; + + cache->list = lexbor_malloc(sizeof(lxb_css_syntax_token_t *) * size); + if (cache->list == NULL) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + + return LXB_STATUS_OK; +} + +static void +lxb_css_syntax_tokenizer_cache_clean(lxb_css_syntax_tokenizer_cache_t *cache) +{ + if (cache != NULL) { + cache->length = 0; + } +} + +static lxb_css_syntax_tokenizer_cache_t * +lxb_css_syntax_tokenizer_cache_destroy(lxb_css_syntax_tokenizer_cache_t *cache) +{ + if (cache == NULL) { + return NULL; + } + + if (cache->list) { + lexbor_free(cache->list); + } + + return lexbor_free(cache); +} + +static lxb_css_syntax_token_t ** +lxb_css_syntax_tokenizer_cache_expand(lxb_css_syntax_tokenizer_cache_t *cache, + size_t up_to) +{ + size_t new_size; + lxb_css_syntax_token_t **list; + + if (cache == NULL) { + return NULL; + } + + new_size = cache->length + up_to; + list = lexbor_realloc(cache->list, + sizeof(lxb_css_syntax_token_t *) * new_size); + if (cache == NULL) { + return NULL; + } + + cache->list = list; + cache->size = new_size; + + return list; +} + +lxb_status_t +lxb_css_syntax_tokenizer_cache_push(lxb_css_syntax_tokenizer_cache_t *cache, + lxb_css_syntax_token_t *value) +{ + if (cache->length >= cache->size) { + if ((lxb_css_syntax_tokenizer_cache_expand(cache, 128) == NULL)) { + return LXB_STATUS_ERROR_MEMORY_ALLOCATION; + } + } + + cache->list[ cache->length ] = value; + cache->length++; + + return LXB_STATUS_OK; +} + + +static lxb_status_t +lxb_css_syntax_tokenizer_blank(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t **data, const lxb_char_t **end, + void *ctx) +{ + return LXB_STATUS_OK; +} + +lxb_status_t +lxb_css_syntax_tokenizer_chunk(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *data, size_t size) +{ + return tkz->status; +} + +lxb_css_syntax_token_t * +lxb_css_syntax_tokenizer_token(lxb_css_syntax_tokenizer_t *tkz) +{ + lxb_status_t status; + lxb_css_syntax_token_t *token; + const lxb_char_t *begin, *end; + + begin = tkz->in_begin; + end = tkz->in_end; + + if (tkz->prepared != 0) { + if (tkz->cache_pos < tkz->prepared) { + token = tkz->cache->list[tkz->prepared - 1]; + + status = lxb_css_syntax_token_string_make(tkz, token); + if (status != LXB_STATUS_OK) { + return NULL; + } + } + + token = tkz->cache->list[tkz->prepared]; + token->offset = tkz->offset; + + tkz->prepared += 1; + + if (tkz->prepared >= tkz->cache->length) { + tkz->prepared = 0; + } + + if (lxb_css_syntax_token_base(token)->length != 0) { + tkz->offset += lxb_css_syntax_token_base(token)->length; + token->cloned = false; + return token; + } + + if (begin >= end) { + status = lxb_css_syntax_tokenizer_next_chunk(tkz, &begin, &end); + if (status != LXB_STATUS_OK) { + return NULL; + } + + if (begin >= end) { + lxb_css_syntax_token_base(token)->length = 1; + goto done; + } + } + + if (lxb_css_syntax_token_delim(token)->character == '-') { + begin = lxb_css_syntax_state_minus_process(tkz, token, begin, end); + } + else { + begin = lxb_css_syntax_state_plus_process(tkz, token, begin, end); + } + + goto done; + } + + if (tkz->cache_pos < tkz->cache->length) { + token = tkz->cache->list[tkz->cache->length - 1]; + + status = lxb_css_syntax_token_string_make(tkz, token); + if (status != LXB_STATUS_OK) { + return NULL; + } + } + + token = lxb_css_syntax_token_cached_create(tkz); + if (token == NULL) { + return NULL; + } + + token->offset = tkz->offset; + +again: + + if (begin >= end) { + status = lxb_css_syntax_tokenizer_next_chunk(tkz, &begin, &end); + if (status != LXB_STATUS_OK) { + return NULL; + } + + if (begin >= end) { + token->type = LXB_CSS_SYNTAX_TOKEN__EOF; + + lxb_css_syntax_token_base(token)->begin = begin; + lxb_css_syntax_token_base(token)->length = 0; + + token->cloned = false; + + return token; + } + } + + begin = lxb_css_syntax_state_res_map[*begin](tkz, token, begin, end); + +done: + + token->cloned = false; + + if (begin == NULL) { + return NULL; + } + + tkz->in_begin = begin; + tkz->offset += lxb_css_syntax_token_base(token)->length; + + if (token->type == LXB_CSS_SYNTAX_TOKEN_COMMENT && !tkz->with_comment) { + end = tkz->in_end; + goto again; + } + + return token; +} + +lxb_status_t +lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t **data, const lxb_char_t **end) +{ + const lxb_char_t *begin; + + if (tkz->eof == false) { + begin = *data; + + tkz->status = tkz->chunk_cb(tkz, data, end, tkz->chunk_ctx); + if (tkz->status != LXB_STATUS_OK) { + return tkz->status; + } + + if (*data >= *end) { + *data = begin; + *end = begin; + + tkz->in_begin = begin; + tkz->in_end = begin; + + tkz->eof = true; + } + else { + tkz->in_begin = *data; + tkz->in_end = *end; + } + } + + return LXB_STATUS_OK; +} + +bool +lxb_css_syntax_tokenizer_lookup_colon(lxb_css_syntax_tokenizer_t *tkz) +{ + const lxb_char_t *p, *end; + lxb_css_syntax_token_t *token; + + if (tkz->cache_pos + 1 < tkz->cache->length) { + token = tkz->cache->list[tkz->cache_pos + 1]; + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + if (tkz->cache_pos + 2 < tkz->cache->length) { + token = tkz->cache->list[tkz->cache_pos + 2]; + + return token->type == LXB_CSS_SYNTAX_TOKEN_COLON; + } + } + else if (token->type == LXB_CSS_SYNTAX_TOKEN_COLON) { + return true; + } + + return false; + } + + p = tkz->in_begin; + end = tkz->in_end; + + do { + if (p >= end) { + token = lxb_css_syntax_token_next(tkz); + if (token == NULL) { + return false; + } + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + token = lxb_css_syntax_token_next(tkz); + if (token == NULL) { + return false; + } + } + + return token->type == LXB_CSS_SYNTAX_TOKEN_COLON; + } + + switch (*p) { + case 0x3A: + return true; + + case 0x0D: + case 0x0C: + case 0x09: + case 0x20: + case 0x0A: + p++; + break; + + default: + return false; + } + } + while (true); +} + +bool +lxb_css_syntax_tokenizer_lookup_important(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_type_t stop, + const lxb_char_t stop_ch) +{ + const lxb_char_t *p, *end; + lxb_css_syntax_token_t *token; + + static const size_t length = sizeof(lxb_css_syntax_tokenizer_important) - 1; + + p = tkz->in_begin; + end = tkz->in_end; + + if (tkz->cache_pos + 1 < tkz->cache->length) { + token = tkz->cache->list[tkz->cache_pos + 1]; + + if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) { + return false; + } + + if (!(lxb_css_syntax_token_ident(token)->length == length + && lexbor_str_data_ncasecmp(lxb_css_syntax_token_ident(token)->data, + lxb_css_syntax_tokenizer_important, + length))) + { + return false; + } + + if (tkz->cache_pos + 2 < tkz->cache->length) { + token = tkz->cache->list[tkz->cache_pos + 2]; + + if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + if (tkz->cache_pos + 3 >= tkz->cache->length) { + return lxb_css_syntax_tokenizer_lookup_important_end(tkz, + p, end, stop_ch, stop, false); + } + + token = tkz->cache->list[tkz->cache_pos + 3]; + } + + return (token->type == LXB_CSS_SYNTAX_TOKEN_SEMICOLON + || token->type == stop + || token->type == LXB_CSS_SYNTAX_TOKEN__EOF); + } + + return lxb_css_syntax_tokenizer_lookup_important_end(tkz, p, end, + stop_ch, stop, false); + } + + return lxb_css_syntax_tokenizer_lookup_important_ch(tkz, p, end, stop_ch, + stop, false); +} + +static bool +lxb_css_syntax_tokenizer_lookup_important_ch(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *p, + const lxb_char_t *end, + const lxb_char_t stop_ch, + lxb_css_syntax_token_type_t stop, + bool skip_first) +{ + const lxb_char_t *imp; + + imp = lxb_css_syntax_tokenizer_important; + + do { + if (p >= end) { + return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop, + skip_first); + } + + if (lexbor_str_res_map_lowercase[*p++] != *imp++) { + return false; + } + } + while (*imp != 0x00); + + return lxb_css_syntax_tokenizer_lookup_important_end(tkz, p, end, stop_ch, + stop, skip_first); +} + +static bool +lxb_css_syntax_tokenizer_lookup_important_end(lxb_css_syntax_tokenizer_t *tkz, + const lxb_char_t *p, + const lxb_char_t *end, + const lxb_char_t stop_ch, + lxb_css_syntax_token_type_t stop, + bool skip_first) +{ + do { + if (p >= end) { + return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop, + skip_first); + } + + switch (*p) { + case 0x3B: + return true; + + case 0x0D: + case 0x0C: + case 0x09: + case 0x20: + case 0x0A: + p++; + break; + + default: + return (stop_ch != 0x00 && stop_ch == *p); + } + } + while (true); +} + +static bool +lxb_css_syntax_tokenizer_lookup_important_tokens(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_type_t stop, + bool skip_first) +{ + const lxb_css_syntax_token_t *next; + + static const size_t length = sizeof(lxb_css_syntax_tokenizer_important) - 1; + + if (skip_first) { + next = lxb_css_syntax_token_next(tkz); + if (next == NULL) { + return false; + } + } + + next = lxb_css_syntax_token_next(tkz); + if (next == NULL) { + return false; + } + + if (next->type != LXB_CSS_SYNTAX_TOKEN_IDENT) { + return false; + } + + if (!(lxb_css_syntax_token_ident(next)->length == length + && lexbor_str_data_ncasecmp(lxb_css_syntax_token_ident(next)->data, + lxb_css_syntax_tokenizer_important, + length))) + { + return false; + } + + next = lxb_css_syntax_token_next(tkz); + if (next == NULL) { + return false; + } + + if (next->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) { + next = lxb_css_syntax_token_next(tkz); + if (next == NULL) { + return false; + } + } + + return (next->type == LXB_CSS_SYNTAX_TOKEN_SEMICOLON + || next->type == stop || next->type == LXB_CSS_SYNTAX_TOKEN__EOF); +} + +bool +lxb_css_syntax_tokenizer_lookup_declaration_ws_end(lxb_css_syntax_tokenizer_t *tkz, + lxb_css_syntax_token_type_t stop, + const lxb_char_t stop_ch) +{ + lxb_css_syntax_token_t *token; + const lxb_char_t *p, *end; + + if (tkz->cache_pos + 1 < tkz->cache->length) { + token = tkz->cache->list[tkz->cache_pos + 1]; + + switch (token->type) { + case LXB_CSS_SYNTAX_TOKEN_DELIM: + if (lxb_css_syntax_token_delim(token)->character != '!') { + return lxb_css_syntax_tokenizer_lookup_important(tkz, stop, + stop_ch); + } + + return false; + + case LXB_CSS_SYNTAX_TOKEN_SEMICOLON: + return true; + + default: + return token->type == stop_ch || + token->type == LXB_CSS_SYNTAX_TOKEN__EOF; + } + } + + p = tkz->in_begin; + end = tkz->in_end; + + do { + if (p >= end) { + return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop, + true); + } + + switch (*p) { + case 0x3B: + return true; + + case 0x21: + p++; + return lxb_css_syntax_tokenizer_lookup_important_ch(tkz, p, end, + stop_ch, stop, true); + + default: + return (stop_ch != 0x00 && stop_ch == *p); + } + } + while (true); +} + +/* + * No inline functions for ABI. + */ +lxb_status_t +lxb_css_syntax_tokenizer_status_noi(lxb_css_syntax_tokenizer_t *tkz) +{ + return lxb_css_syntax_tokenizer_status(tkz); +} diff --git a/ext/dom/lexbor/lexbor/css/syntax/tokenizer/error.c b/ext/dom/lexbor/lexbor/css/syntax/tokenizer/error.c new file mode 100644 index 00000000000..a95723cb6da --- /dev/null +++ b/ext/dom/lexbor/lexbor/css/syntax/tokenizer/error.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2018-2019 Alexander Borisov + * + * Author: Alexander Borisov + */ + +#include "lexbor/css/syntax/tokenizer/error.h" + + +lxb_css_syntax_tokenizer_error_t * +lxb_css_syntax_tokenizer_error_add(lexbor_array_obj_t *parse_errors, + const lxb_char_t *pos, + lxb_css_syntax_tokenizer_error_id_t id) +{ + if (parse_errors == NULL) { + return NULL; + } + + lxb_css_syntax_tokenizer_error_t *entry; + + entry = lexbor_array_obj_push(parse_errors); + if (entry == NULL) { + return NULL; + } + + entry->id = id; + entry->pos = pos; + + return entry; +} diff --git a/ext/dom/lexbor/lexbor/html/interfaces/document.c b/ext/dom/lexbor/lexbor/html/interfaces/document.c index 06a9456265a..b3e46b7fb85 100644 --- a/ext/dom/lexbor/lexbor/html/interfaces/document.c +++ b/ext/dom/lexbor/lexbor/html/interfaces/document.c @@ -58,6 +58,7 @@ static lxb_html_document_css_custom_entry_t * lxb_html_document_css_customs_insert(lxb_html_document_t *document, const lxb_char_t *key, size_t length); +#if 0 static lxb_status_t lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, @@ -72,7 +73,6 @@ static lxb_status_t lxb_html_document_style_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, void *ctx); -#if 0 static lxb_status_t lxb_html_document_done(lxb_html_document_t *document); #endif @@ -604,23 +604,30 @@ lxb_status_t lxb_html_document_style_attach(lxb_html_document_t *document, lxb_css_rule_style_t *style) { +#if 0 lxb_html_document_css_t *css = &document->css; return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document), style->selector, lxb_html_document_style_cb, style); +#endif + return LXB_STATUS_OK; } lxb_status_t lxb_html_document_style_remove(lxb_html_document_t *document, lxb_css_rule_style_t *style) { +#if 0 lxb_html_document_css_t *css = &document->css; return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document), style->selector, lxb_html_document_style_remove_by_rule_cb, style); +#endif + return LXB_STATUS_OK; } +#if 0 static lxb_status_t lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, @@ -668,20 +675,25 @@ lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl, style, context->list); return LXB_STATUS_OK; } +#endif lxb_status_t lxb_html_document_style_attach_by_element(lxb_html_document_t *document, lxb_html_element_t *element, lxb_css_rule_style_t *style) { +#if 0 lxb_html_document_css_t *css = &document->css; return lxb_selectors_match_node(css->selectors, lxb_dom_interface_node(element), style->selector, lxb_html_document_style_cb, style); +#endif + return LXB_STATUS_OK; } +#if 0 static lxb_status_t lxb_html_document_style_cb(lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, void *ctx) @@ -703,6 +715,7 @@ lxb_html_document_style_cb(lxb_dom_node_t *node, return lxb_html_element_style_list_append(lxb_html_interface_element(node), style->declarations, spec); } +#endif lxb_html_document_t * lxb_html_document_destroy(lxb_html_document_t *document) diff --git a/ext/dom/lexbor/lexbor/html/interfaces/document.h b/ext/dom/lexbor/lexbor/html/interfaces/document.h index 7e8d1ea2e56..827ff64f50f 100644 --- a/ext/dom/lexbor/lexbor/html/interfaces/document.h +++ b/ext/dom/lexbor/lexbor/html/interfaces/document.h @@ -20,7 +20,6 @@ extern "C" { #include "lexbor/dom/interfaces/attr.h" #include "lexbor/dom/interfaces/document.h" #include "lexbor/css/css.h" -#include "lexbor/selectors/selectors.h" typedef lxb_status_t @@ -45,7 +44,6 @@ typedef struct { lxb_css_memory_t *memory; lxb_css_selectors_t *css_selectors; lxb_css_parser_t *parser; - lxb_selectors_t *selectors; lexbor_avl_t *styles; lexbor_array_t *stylesheets; diff --git a/ext/dom/lexbor/lexbor/html/tokenizer/res.h b/ext/dom/lexbor/lexbor/html/tokenizer/res.h old mode 100755 new mode 100644 diff --git a/ext/dom/lexbor/patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch b/ext/dom/lexbor/patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch index 7549403ea09..2174b670bb3 100644 --- a/ext/dom/lexbor/patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch +++ b/ext/dom/lexbor/patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch @@ -1,7 +1,7 @@ -From 9d60c0fda0b51e9374a234c48df36130d2c988ee Mon Sep 17 00:00:00 2001 +From 7c7c35d8ea9f65f081564b3ad1bfe9f0db33dd69 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 26 Aug 2023 15:08:59 +0200 -Subject: [PATCH] Expose line and column information for use in PHP +Subject: [PATCH 1/6] Expose line and column information for use in PHP --- source/lexbor/dom/interfaces/node.h | 2 ++ @@ -15,7 +15,7 @@ Subject: [PATCH] Expose line and column information for use in PHP 8 files changed, 48 insertions(+), 5 deletions(-) diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h -index 4a10197..ff9c924 100755 +index c37b790..8ac218b 100644 --- a/source/lexbor/dom/interfaces/node.h +++ b/source/lexbor/dom/interfaces/node.h @@ -58,6 +58,8 @@ struct lxb_dom_node { @@ -28,7 +28,7 @@ index 4a10197..ff9c924 100755 LXB_DOM_NODE_USER_VARIABLES #endif /* LXB_DOM_NODE_USER_VARIABLES */ diff --git a/source/lexbor/html/token.h b/source/lexbor/html/token.h -index 79accd0..0b7f4fd 100755 +index 79accd0..0b7f4fd 100644 --- a/source/lexbor/html/token.h +++ b/source/lexbor/html/token.h @@ -33,6 +33,8 @@ enum lxb_html_token_type { @@ -41,7 +41,7 @@ index 79accd0..0b7f4fd 100755 const lxb_char_t *text_start; const lxb_char_t *text_end; diff --git a/source/lexbor/html/tokenizer.c b/source/lexbor/html/tokenizer.c -index 741bced..0bd9aec 100755 +index 741bced..0bd9aec 100644 --- a/source/lexbor/html/tokenizer.c +++ b/source/lexbor/html/tokenizer.c @@ -91,6 +91,7 @@ lxb_html_tokenizer_init(lxb_html_tokenizer_t *tkz) @@ -90,7 +90,7 @@ index 741bced..0bd9aec 100755 return tkz->status; diff --git a/source/lexbor/html/tokenizer.h b/source/lexbor/html/tokenizer.h -index ba9602f..74bb55e 100755 +index ba9602f..74bb55e 100644 --- a/source/lexbor/html/tokenizer.h +++ b/source/lexbor/html/tokenizer.h @@ -73,6 +73,8 @@ struct lxb_html_tokenizer { @@ -103,7 +103,7 @@ index ba9602f..74bb55e 100755 /* Entities */ const lexbor_sbst_entry_static_t *entity; diff --git a/source/lexbor/html/tokenizer/state.h b/source/lexbor/html/tokenizer/state.h -index 0892846..77b86ac 100755 +index 0892846..77b86ac 100644 --- a/source/lexbor/html/tokenizer/state.h +++ b/source/lexbor/html/tokenizer/state.h @@ -90,6 +90,8 @@ extern "C" { @@ -116,7 +116,7 @@ index 0892846..77b86ac 100755 while (0) diff --git a/source/lexbor/html/tree.c b/source/lexbor/html/tree.c -index 0f067e4..bdec6a5 100755 +index 0f067e4..bdec6a5 100644 --- a/source/lexbor/html/tree.c +++ b/source/lexbor/html/tree.c @@ -434,6 +434,9 @@ lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree, @@ -152,7 +152,7 @@ index 0f067e4..bdec6a5 100755 tree->document->dom_document.text); if (tree->status != LXB_STATUS_OK) { diff --git a/source/lexbor/html/tree/error.c b/source/lexbor/html/tree/error.c -index e6e43f4..88ad8c4 100755 +index e6e43f4..88ad8c4 100644 --- a/source/lexbor/html/tree/error.c +++ b/source/lexbor/html/tree/error.c @@ -21,8 +21,9 @@ lxb_html_tree_error_add(lexbor_array_obj_t *parse_errors, @@ -168,7 +168,7 @@ index e6e43f4..88ad8c4 100755 return entry; } diff --git a/source/lexbor/html/tree/error.h b/source/lexbor/html/tree/error.h -index 2fd06cb..ed1859f 100755 +index 2fd06cb..ed1859f 100644 --- a/source/lexbor/html/tree/error.h +++ b/source/lexbor/html/tree/error.h @@ -97,8 +97,9 @@ lxb_html_tree_error_id_t; @@ -184,5 +184,5 @@ index 2fd06cb..ed1859f 100755 lxb_html_tree_error_t; -- -2.43.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch b/ext/dom/lexbor/patches/0002-Track-implied-added-nodes-for-options-use-in-PHP.patch similarity index 90% rename from ext/dom/lexbor/patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch rename to ext/dom/lexbor/patches/0002-Track-implied-added-nodes-for-options-use-in-PHP.patch index c3ccc51df40..b3f56201040 100644 --- a/ext/dom/lexbor/patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch +++ b/ext/dom/lexbor/patches/0002-Track-implied-added-nodes-for-options-use-in-PHP.patch @@ -1,7 +1,7 @@ -From 7f04b3dc1501458e7f5cd0d6e6cd05db524ae6ae Mon Sep 17 00:00:00 2001 +From ce1cb529bb5415e4ea17332731c20d2c78adc601 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:18:51 +0200 -Subject: [PATCH] Track implied added nodes for options use in PHP +Subject: [PATCH 2/6] Track implied added nodes for options use in PHP --- source/lexbor/html/tree.h | 3 +++ @@ -11,7 +11,7 @@ Subject: [PATCH] Track implied added nodes for options use in PHP 4 files changed, 8 insertions(+) diff --git a/source/lexbor/html/tree.h b/source/lexbor/html/tree.h -index 2a43f8b..d964f01 100755 +index 2a43f8b..d964f01 100644 --- a/source/lexbor/html/tree.h +++ b/source/lexbor/html/tree.h @@ -55,6 +55,9 @@ struct lxb_html_tree { @@ -25,7 +25,7 @@ index 2a43f8b..d964f01 100755 lxb_html_tree_insertion_mode_f mode; lxb_html_tree_insertion_mode_f original_mode; diff --git a/source/lexbor/html/tree/insertion_mode/after_head.c b/source/lexbor/html/tree/insertion_mode/after_head.c -index ad551b5..1448654 100755 +index ad551b5..1448654 100644 --- a/source/lexbor/html/tree/insertion_mode/after_head.c +++ b/source/lexbor/html/tree/insertion_mode/after_head.c @@ -71,6 +71,7 @@ lxb_html_tree_insertion_mode_after_head_open(lxb_html_tree_t *tree, @@ -37,7 +37,7 @@ index ad551b5..1448654 100755 tree->mode = lxb_html_tree_insertion_mode_in_body; diff --git a/source/lexbor/html/tree/insertion_mode/before_head.c b/source/lexbor/html/tree/insertion_mode/before_head.c -index 14621f2..cd2ac2a 100755 +index 14621f2..cd2ac2a 100644 --- a/source/lexbor/html/tree/insertion_mode/before_head.c +++ b/source/lexbor/html/tree/insertion_mode/before_head.c @@ -67,6 +67,8 @@ lxb_html_tree_insertion_mode_before_head_open(lxb_html_tree_t *tree, @@ -50,7 +50,7 @@ index 14621f2..cd2ac2a 100755 break; diff --git a/source/lexbor/html/tree/insertion_mode/before_html.c b/source/lexbor/html/tree/insertion_mode/before_html.c -index ed5e367..b078ac5 100755 +index ed5e367..b078ac5 100644 --- a/source/lexbor/html/tree/insertion_mode/before_html.c +++ b/source/lexbor/html/tree/insertion_mode/before_html.c @@ -79,6 +79,8 @@ lxb_html_tree_insertion_mode_before_html_open(lxb_html_tree_t *tree, @@ -63,5 +63,5 @@ index ed5e367..b078ac5 100755 break; -- -2.41.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch b/ext/dom/lexbor/patches/0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch similarity index 95% rename from ext/dom/lexbor/patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch rename to ext/dom/lexbor/patches/0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch index 75135dcc447..5b32b3337d0 100644 --- a/ext/dom/lexbor/patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch +++ b/ext/dom/lexbor/patches/0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch @@ -1,7 +1,7 @@ -From d9c670e2a58fbfd3670a5ca69cd583b3573e77ef Mon Sep 17 00:00:00 2001 +From 0514a1cb2f45ab6dd814118780d56a713f4925a2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:57:48 +0200 -Subject: [PATCH] Patch utilities and data structure to be able to generate +Subject: [PATCH 3/6] Patch utilities and data structure to be able to generate smaller lookup tables Changed the generation script to check if everything fits in 32-bits. @@ -15,7 +15,7 @@ tables in size. 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/source/lexbor/core/shs.h b/source/lexbor/core/shs.h -index 7a63a07..c84dfaa 100755 +index 7a63a07..c84dfaa 100644 --- a/source/lexbor/core/shs.h +++ b/source/lexbor/core/shs.h @@ -27,9 +27,9 @@ lexbor_shs_entry_t; @@ -98,5 +98,5 @@ index 3e75812..2370c66 100755 result.append("};") -- -2.41.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch b/ext/dom/lexbor/patches/0004-Remove-unused-upper-case-tag-static-data.patch similarity index 91% rename from ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch rename to ext/dom/lexbor/patches/0004-Remove-unused-upper-case-tag-static-data.patch index 529649a369c..a022e16f36e 100644 --- a/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch +++ b/ext/dom/lexbor/patches/0004-Remove-unused-upper-case-tag-static-data.patch @@ -1,7 +1,7 @@ -From 24b52ec63eb55adb1c039e58dd3e1156f01083b2 Mon Sep 17 00:00:00 2001 +From 01aad1074657586677f05ac1998da2158c57ee74 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 29 Nov 2023 21:26:47 +0100 -Subject: [PATCH 1/2] Remove unused upper case tag static data +Subject: [PATCH 4/6] Remove unused upper case tag static data --- source/lexbor/tag/res.h | 2 ++ @@ -29,7 +29,7 @@ index c7190c5..4ad1f37 100644 static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] = { diff --git a/source/lexbor/tag/tag.c b/source/lexbor/tag/tag.c -index f8fcdf0..0571957 100755 +index f8fcdf0..0571957 100644 --- a/source/lexbor/tag/tag.c +++ b/source/lexbor/tag/tag.c @@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len) @@ -49,5 +49,5 @@ index f8fcdf0..0571957 100755 /* * No inline functions for ABI. -- -2.43.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch b/ext/dom/lexbor/patches/0005-Shrink-size-of-static-binary-search-tree.patch similarity index 93% rename from ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch rename to ext/dom/lexbor/patches/0005-Shrink-size-of-static-binary-search-tree.patch index 3c0af9f7a72..dfb0c0a980e 100644 --- a/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch +++ b/ext/dom/lexbor/patches/0005-Shrink-size-of-static-binary-search-tree.patch @@ -1,7 +1,7 @@ -From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001 +From 16daa8e860e393ff39613b908550b0982a2210f2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 29 Nov 2023 21:29:31 +0100 -Subject: [PATCH 2/2] Shrink size of static binary search tree +Subject: [PATCH 5/6] Shrink size of static binary search tree This also makes it more efficient on the data cache. --- @@ -13,7 +13,7 @@ This also makes it more efficient on the data cache. 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h -index b0fbc54..40e0e91 100755 +index b0fbc54..40e0e91 100644 --- a/source/lexbor/core/sbst.h +++ b/source/lexbor/core/sbst.h @@ -19,12 +19,12 @@ extern "C" { @@ -35,7 +35,7 @@ index b0fbc54..40e0e91 100755 lexbor_sbst_entry_static_t; diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c -index 70ca391..2f3414f 100755 +index 70ca391..2f3414f 100644 --- a/source/lexbor/html/tokenizer/state.c +++ b/source/lexbor/html/tokenizer/state.c @@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz, @@ -48,7 +48,7 @@ index 70ca391..2f3414f 100755 tkz->entity_match = entry; } diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h -index b3701d5..73ab66e 100755 +index b3701d5..73ab66e 100644 --- a/utils/lexbor/html/tmp/tokenizer_res.h +++ b/utils/lexbor/html/tmp/tokenizer_res.h @@ -6,7 +6,7 @@ @@ -98,7 +98,7 @@ index ee7dcb4..7cd1335 100755 + "../../../source/lexbor/html/tokenizer/res.h", "data/entities.json"); diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py -index 3e75812..b068ea3 100755 +index 2370c66..c41e645 100755 --- a/utils/lexbor/lexbor/LXB.py +++ b/utils/lexbor/lexbor/LXB.py @@ -27,7 +27,7 @@ class Temp: @@ -111,5 +111,5 @@ index 3e75812..b068ea3 100755 fh.close() -- -2.43.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/0001-Patch-out-CSS-parser.patch b/ext/dom/lexbor/patches/0006-Patch-out-unused-CSS-style-code.patch similarity index 70% rename from ext/dom/lexbor/patches/0001-Patch-out-CSS-parser.patch rename to ext/dom/lexbor/patches/0006-Patch-out-unused-CSS-style-code.patch index 9928b5f32e4..500daf5026a 100644 --- a/ext/dom/lexbor/patches/0001-Patch-out-CSS-parser.patch +++ b/ext/dom/lexbor/patches/0006-Patch-out-unused-CSS-style-code.patch @@ -1,38 +1,50 @@ -From c19debb4d26f731b39860f27073b69927aa611f0 Mon Sep 17 00:00:00 2001 +From 168dad55b6278cd45e0f0b2aed802ce9bace3274 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> -Date: Wed, 23 Aug 2023 21:48:12 +0200 -Subject: [PATCH] Patch out CSS parser +Date: Sun, 7 Jan 2024 21:59:28 +0100 +Subject: [PATCH 6/6] Patch out unused CSS style code --- source/lexbor/css/rule.h | 2 ++ - source/lexbor/html/interfaces/document.c | 16 ++++++++++ + source/lexbor/html/interfaces/document.c | 29 +++++++++++++++++++ + source/lexbor/html/interfaces/document.h | 2 -- source/lexbor/html/interfaces/element.c | 29 +++++++++++++++++++ source/lexbor/html/interfaces/style_element.c | 6 ++++ - 4 files changed, 53 insertions(+) + 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/source/lexbor/css/rule.h b/source/lexbor/css/rule.h -index 7cc4f0b..f68491e 100644 +index 7cc4f0b..bd191f9 100644 --- a/source/lexbor/css/rule.h +++ b/source/lexbor/css/rule.h -@@ -344,7 +344,9 @@ lxb_css_rule_ref_dec_destroy(lxb_css_rule_t *rule) - } - - if (rule->ref_count == 0) { +@@ -339,6 +339,7 @@ lxb_css_rule_ref_dec(lxb_css_rule_t *rule) + lxb_inline void + lxb_css_rule_ref_dec_destroy(lxb_css_rule_t *rule) + { +#if 0 - (void) lxb_css_rule_destroy(rule, true); -+#endif + if (rule->ref_count > 0) { + rule->ref_count--; } +@@ -346,6 +347,7 @@ lxb_css_rule_ref_dec_destroy(lxb_css_rule_t *rule) + if (rule->ref_count == 0) { + (void) lxb_css_rule_destroy(rule, true); + } ++#endif } + lxb_inline void diff --git a/source/lexbor/html/interfaces/document.c b/source/lexbor/html/interfaces/document.c -index 73184f0..b4de5b8 100755 +index bd3c02b..aa305a9 100644 --- a/source/lexbor/html/interfaces/document.c +++ b/source/lexbor/html/interfaces/document.c -@@ -72,8 +72,10 @@ static lxb_status_t - lxb_html_document_style_cb(lxb_dom_node_t *node, - lxb_css_selector_specificity_t spec, void *ctx); +@@ -58,6 +58,7 @@ static lxb_html_document_css_custom_entry_t * + lxb_html_document_css_customs_insert(lxb_html_document_t *document, + const lxb_char_t *key, size_t length); +#if 0 + static lxb_status_t + lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node, + lxb_css_selector_specificity_t spec, +@@ -74,6 +75,7 @@ lxb_html_document_style_cb(lxb_dom_node_t *node, + static lxb_status_t lxb_html_document_done(lxb_html_document_t *document); +#endif @@ -96,7 +108,7 @@ index 73184f0..b4de5b8 100755 size_t length; lxb_css_stylesheet_t *sst; lxb_html_document_css_t *css = &document->css; -@@ -586,6 +597,7 @@ lxb_html_document_stylesheet_destroy_all(lxb_html_document_t *document, +@@ -586,29 +597,37 @@ lxb_html_document_stylesheet_destroy_all(lxb_html_document_t *document, (void) lxb_css_stylesheet_destroy(sst, destroy_memory); } @@ -104,7 +116,71 @@ index 73184f0..b4de5b8 100755 } lxb_status_t -@@ -849,6 +861,7 @@ lxb_html_document_parser_prepare(lxb_html_document_t *document) + lxb_html_document_style_attach(lxb_html_document_t *document, + lxb_css_rule_style_t *style) + { ++#if 0 + lxb_html_document_css_t *css = &document->css; + + return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document), + style->selector, lxb_html_document_style_cb, style); ++#endif ++ return LXB_STATUS_OK; + } + + lxb_status_t + lxb_html_document_style_remove(lxb_html_document_t *document, + lxb_css_rule_style_t *style) + { ++#if 0 + lxb_html_document_css_t *css = &document->css; + + return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document), + style->selector, + lxb_html_document_style_remove_by_rule_cb, style); ++#endif ++ return LXB_STATUS_OK; + } + ++#if 0 + static lxb_status_t + lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node, + lxb_css_selector_specificity_t spec, +@@ -646,20 +665,25 @@ lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl, + style, context->list); + return LXB_STATUS_OK; + } ++#endif + + lxb_status_t + lxb_html_document_style_attach_by_element(lxb_html_document_t *document, + lxb_html_element_t *element, + lxb_css_rule_style_t *style) + { ++#if 0 + lxb_html_document_css_t *css = &document->css; + + return lxb_selectors_match_node(css->selectors, + lxb_dom_interface_node(element), + style->selector, + lxb_html_document_style_cb, style); ++#endif ++ return LXB_STATUS_OK; + } + ++#if 0 + static lxb_status_t + lxb_html_document_style_cb(lxb_dom_node_t *node, + lxb_css_selector_specificity_t spec, void *ctx) +@@ -675,6 +699,7 @@ lxb_html_document_style_cb(lxb_dom_node_t *node, + return lxb_html_element_style_list_append(lxb_html_interface_element(node), + style->declarations, spec); + } ++#endif + + lxb_html_document_t * + lxb_html_document_destroy(lxb_html_document_t *document) +@@ -851,6 +876,7 @@ lxb_html_document_parser_prepare(lxb_html_document_t *document) return LXB_STATUS_OK; } @@ -112,7 +188,7 @@ index 73184f0..b4de5b8 100755 static lxb_status_t lxb_html_document_done(lxb_html_document_t *document) { -@@ -873,6 +886,7 @@ lxb_html_document_done(lxb_html_document_t *document) +@@ -875,6 +901,7 @@ lxb_html_document_done(lxb_html_document_t *document) return LXB_STATUS_OK; } @@ -120,7 +196,7 @@ index 73184f0..b4de5b8 100755 const lxb_char_t * lxb_html_document_title(lxb_html_document_t *document, size_t *len) -@@ -960,6 +974,7 @@ lxb_html_document_import_node(lxb_html_document_t *doc, lxb_dom_node_t *node, +@@ -962,6 +989,7 @@ lxb_html_document_import_node(lxb_html_document_t *doc, lxb_dom_node_t *node, return lxb_dom_document_import_node(&doc->dom_document, node, deep); } @@ -128,7 +204,7 @@ index 73184f0..b4de5b8 100755 static lxb_status_t lxb_html_document_event_insert(lxb_dom_node_t *node) { -@@ -1231,6 +1246,7 @@ lxb_html_document_event_set_value(lxb_dom_node_t *node, +@@ -1233,6 +1261,7 @@ lxb_html_document_event_set_value(lxb_dom_node_t *node, return lxb_html_element_style_parse(lxb_html_interface_element(node), value, length); } @@ -136,8 +212,28 @@ index 73184f0..b4de5b8 100755 /* * No inline functions for ABI. +diff --git a/source/lexbor/html/interfaces/document.h b/source/lexbor/html/interfaces/document.h +index 7e8d1ea..827ff64 100644 +--- a/source/lexbor/html/interfaces/document.h ++++ b/source/lexbor/html/interfaces/document.h +@@ -20,7 +20,6 @@ extern "C" { + #include "lexbor/dom/interfaces/attr.h" + #include "lexbor/dom/interfaces/document.h" + #include "lexbor/css/css.h" +-#include "lexbor/selectors/selectors.h" + + + typedef lxb_status_t +@@ -45,7 +44,6 @@ typedef struct { + lxb_css_memory_t *memory; + lxb_css_selectors_t *css_selectors; + lxb_css_parser_t *parser; +- lxb_selectors_t *selectors; + + lexbor_avl_t *styles; + lexbor_array_t *stylesheets; diff --git a/source/lexbor/html/interfaces/element.c b/source/lexbor/html/interfaces/element.c -index 229d3d7..363040c 100755 +index 229d3d7..363040c 100644 --- a/source/lexbor/html/interfaces/element.c +++ b/source/lexbor/html/interfaces/element.c @@ -38,9 +38,11 @@ static lxb_status_t @@ -306,7 +402,7 @@ index 229d3d7..363040c 100755 } +#endif diff --git a/source/lexbor/html/interfaces/style_element.c b/source/lexbor/html/interfaces/style_element.c -index 66d55c4..9a402ef 100755 +index 66d55c4..9a402ef 100644 --- a/source/lexbor/html/interfaces/style_element.c +++ b/source/lexbor/html/interfaces/style_element.c @@ -35,7 +35,9 @@ lxb_html_style_element_interface_destroy(lxb_html_style_element_t *style_element @@ -345,5 +441,5 @@ index 66d55c4..9a402ef 100755 return css->parser->status; } -- -2.41.0 +2.44.0 diff --git a/ext/dom/lexbor/patches/README.md b/ext/dom/lexbor/patches/README.md index 09a112d8da2..d10b437818c 100644 --- a/ext/dom/lexbor/patches/README.md +++ b/ext/dom/lexbor/patches/README.md @@ -9,19 +9,19 @@ This contains the following patch files in mailbox format. * 0001-Expose-line-and-column-information-for-use-in-PHP.patch A PHP specific patch to expose the line and column number to PHP. -* 0001-Track-implied-added-nodes-for-options-use-in-PHP.patch +* 0002-Track-implied-added-nodes-for-options-use-in-PHP.patch A PHP specific patch to track implied added nodes for options. -* 0001-Patch-out-CSS-parser.patch - A PHP specific patch to patch out the CSS parser, which is unused and hence this patch reduces the binary size. -* 0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch +* 0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch A PHP specific patch to patch utilities and data structure to be able to generate smaller lookup tables. This patch won't be upstreamed because it breaks generality of those data structures, i.e. it only works because we only use it for character encoding. -* 0001-Remove-unused-upper-case-tag-static-data.patch +* 0004-Remove-unused-upper-case-tag-static-data.patch A PHP specific patch to remove unused upper case tag static data. This shrinks the static data size. -* 0001-Shrink-size-of-static-binary-search-tree.patch +* 0005-Shrink-size-of-static-binary-search-tree.patch A PHP specific patch to shrink the size of the static binary search tree for entities. This shrinks the static data size and reduces data cache pressure. +* 0006-Patch-out-unused-CSS-style-code.patch + A PHP specific patch to remove CSS style and selector bindings from the Lexbor document. **Note** for this patch the utilities to generate the tables are also patched. Make sure to apply on a fresh Lexbor clone and run (in `lexbor/utils/encoding`): `python3 single-byte.py` and `python3 multi-byte.py` to generate the tables. @@ -31,9 +31,9 @@ This contains the following patch files in mailbox format. * cd into `ext/dom/lexbor/lexbor` * `git am -3 ../patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch` -* `git am -3 ../patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch` -* `git am -3 ../patches/0001-Patch-out-CSS-parser.patch` -* `git am -3 ../patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch` -* `git am -3 ../patches/0001-Remove-unused-upper-case-tag-static-data.patch` -* `git am -3 ../patches/0001-Shrink-size-of-static-binary-search-tree.patch` +* `git am -3 ../patches/0002-Track-implied-added-nodes-for-options-use-in-PHP.patch` +* `git am -3 ../patches/0003-Patch-utilities-and-data-structure-to-be-able-to-gen.patch` +* `git am -3 ../patches/0004-Remove-unused-upper-case-tag-static-data.patch` +* `git am -3 ../patches/0005-Shrink-size-of-static-binary-search-tree.patch` +* `git am -3 ../patches/0006-Patch-out-unused-CSS-style-code.patch` * `git reset HEAD~6` # 6 is the number of commits created by the above commands