Use new encoding conversion filters for mb_parse_str and php_mb_post_handler

When micro-benchmarking on relatively short ASCII strings, the new
implementation was about 30% faster than the old one.
This commit is contained in:
Alex Dowad 2022-08-03 09:21:36 +02:00
parent 98e5c4e3a3
commit aeccb139c3
2 changed files with 63 additions and 66 deletions

View File

@ -27,7 +27,6 @@
#include "main/php_output.h"
#include "ext/standard/info.h"
#include "php_variables.h"
#include "php_globals.h"
#include "rfc1867.h"
#include "php_content_types.h"
@ -173,33 +172,23 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
/* }}} */
/* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res)
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *array_ptr, char *res)
{
char *var, *val;
const char *s1, *s2;
char *strtok_buf = NULL, **val_list = NULL;
zval *array_ptr = (zval *) arg;
size_t n, num, *len_list = NULL;
size_t val_len, new_val_len;
mbfl_string string, resvar, resval;
size_t n, num = 1, *len_list = NULL;
size_t new_val_len;
const mbfl_encoding *from_encoding = NULL;
mbfl_encoding_detector *identd = NULL;
mbfl_buffer_converter *convd = NULL;
mbfl_string_init_set(&string, info->to_encoding);
mbfl_string_init_set(&resvar, info->to_encoding);
mbfl_string_init_set(&resval, info->to_encoding);
if (!res || *res == '\0') {
goto out;
}
/* count the variables(separators) contained in the "res".
* separator may contain multiple separator chars.
*/
num = 1;
for (s1=res; *s1 != '\0'; s1++) {
for (s2=info->separator; *s2 != '\0'; s2++) {
/* count variables contained in `res`.
* separator may contain multiple separator chars; ANY of them demarcate variables */
for (char *s1 = res; *s1; s1++) {
for (const char *s2 = info->separator; *s2; s2++) {
if (*s1 == *s2) {
num++;
}
@ -212,7 +201,6 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
/* split and decode the query */
n = 0;
strtok_buf = NULL;
var = php_strtok_r(res, info->separator, &strtok_buf);
while (var) {
val = strchr(var, '=');
@ -255,6 +243,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
if (identd != NULL) {
n = 0;
while (n < num) {
mbfl_string string;
string.val = (unsigned char *)val_list[n];
string.len = len_list[n];
if (mbfl_encoding_detector_feed(identd, &string)) {
@ -273,62 +262,40 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
}
}
convd = NULL;
if (from_encoding != &mbfl_encoding_pass) {
convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
if (convd != NULL) {
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
} else {
if (info->report_errors) {
php_error_docref(NULL, E_WARNING, "Unable to create converter");
}
goto out;
}
}
/* convert encoding */
string.encoding = from_encoding;
n = 0;
while (n < num) {
string.val = (unsigned char *)val_list[n];
string.len = len_list[n];
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
var = (char *)resvar.val;
} else {
var = val_list[n];
}
if (from_encoding != &mbfl_encoding_pass && info->to_encoding != &mbfl_encoding_pass) {
unsigned int num_errors = 0;
zend_string *converted_var = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
MBSTRG(illegalchars) += num_errors;
n++;
string.val = (unsigned char *)val_list[n];
string.len = len_list[n];
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
val = (char *)resval.val;
val_len = resval.len;
} else {
val = val_list[n];
val_len = len_list[n];
}
num_errors = 0;
zend_string *converted_val = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
MBSTRG(illegalchars) += num_errors;
n++;
/* we need val to be emalloc()ed */
val = estrndup(val, val_len);
if (sapi_module.input_filter(info->data_type, var, &val, val_len, &new_val_len)) {
/* `val` must be a pointer returned by `emalloc` */
val = estrndup(ZSTR_VAL(converted_val), ZSTR_LEN(converted_val));
if (sapi_module.input_filter(info->data_type, ZSTR_VAL(converted_var), &val, ZSTR_LEN(converted_val), &new_val_len)) {
/* add variable to symbol table */
php_register_variable_safe(ZSTR_VAL(converted_var), val, new_val_len, array_ptr);
}
zend_string_free(converted_var);
zend_string_free(converted_val);
} else {
var = val_list[n++];
val = estrndup(val_list[n], len_list[n]);
if (sapi_module.input_filter(info->data_type, var, &val, len_list[n], &new_val_len)) {
php_register_variable_safe(var, val, new_val_len, array_ptr);
}
efree(val);
if (convd != NULL){
mbfl_string_clear(&resvar);
mbfl_string_clear(&resval);
n++;
}
efree(val);
}
out:
if (convd != NULL) {
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
mbfl_buffer_converter_delete(convd);
}
if (val_list != NULL) {
efree((void *)val_list);
}

View File

@ -0,0 +1,30 @@
--TEST--
mb_parse_str() error handling
--EXTENSIONS--
mbstring
--FILE--
<?php
mb_internal_encoding('UTF-8');
$queries = array(
"\x80\x80\x80",
"\xFF=\xFF"
);
foreach ($queries as $query) {
echo "Query: " . bin2hex($query) . "\n";
$array = [];
mb_parse_str($query, $array);
foreach ($array as $key => $value) {
echo bin2hex($key) . "=>" . bin2hex($value) . "\n";
}
}
?>
--EXPECT--
Query: 808080
3f3f3f=>
Query: ff3dff
3f=>3f