mirror of
https://github.com/php/php-src.git
synced 2024-09-22 02:17:32 +00:00
Use new encoding conversion filters for mb_parse_str and php_mb_post_handler
When micro-benchmarking on relatively short ASCII strings, the new implementation was about 30% faster than the old one.
This commit is contained in:
parent
98e5c4e3a3
commit
aeccb139c3
@ -27,7 +27,6 @@
|
||||
#include "main/php_output.h"
|
||||
#include "ext/standard/info.h"
|
||||
|
||||
#include "php_variables.h"
|
||||
#include "php_globals.h"
|
||||
#include "rfc1867.h"
|
||||
#include "php_content_types.h"
|
||||
@ -173,33 +172,23 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
|
||||
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res)
|
||||
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *array_ptr, char *res)
|
||||
{
|
||||
char *var, *val;
|
||||
const char *s1, *s2;
|
||||
char *strtok_buf = NULL, **val_list = NULL;
|
||||
zval *array_ptr = (zval *) arg;
|
||||
size_t n, num, *len_list = NULL;
|
||||
size_t val_len, new_val_len;
|
||||
mbfl_string string, resvar, resval;
|
||||
size_t n, num = 1, *len_list = NULL;
|
||||
size_t new_val_len;
|
||||
const mbfl_encoding *from_encoding = NULL;
|
||||
mbfl_encoding_detector *identd = NULL;
|
||||
mbfl_buffer_converter *convd = NULL;
|
||||
|
||||
mbfl_string_init_set(&string, info->to_encoding);
|
||||
mbfl_string_init_set(&resvar, info->to_encoding);
|
||||
mbfl_string_init_set(&resval, info->to_encoding);
|
||||
|
||||
if (!res || *res == '\0') {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* count the variables(separators) contained in the "res".
|
||||
* separator may contain multiple separator chars.
|
||||
*/
|
||||
num = 1;
|
||||
for (s1=res; *s1 != '\0'; s1++) {
|
||||
for (s2=info->separator; *s2 != '\0'; s2++) {
|
||||
/* count variables contained in `res`.
|
||||
* separator may contain multiple separator chars; ANY of them demarcate variables */
|
||||
for (char *s1 = res; *s1; s1++) {
|
||||
for (const char *s2 = info->separator; *s2; s2++) {
|
||||
if (*s1 == *s2) {
|
||||
num++;
|
||||
}
|
||||
@ -212,7 +201,6 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
|
||||
|
||||
/* split and decode the query */
|
||||
n = 0;
|
||||
strtok_buf = NULL;
|
||||
var = php_strtok_r(res, info->separator, &strtok_buf);
|
||||
while (var) {
|
||||
val = strchr(var, '=');
|
||||
@ -255,6 +243,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
|
||||
if (identd != NULL) {
|
||||
n = 0;
|
||||
while (n < num) {
|
||||
mbfl_string string;
|
||||
string.val = (unsigned char *)val_list[n];
|
||||
string.len = len_list[n];
|
||||
if (mbfl_encoding_detector_feed(identd, &string)) {
|
||||
@ -273,62 +262,40 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
|
||||
}
|
||||
}
|
||||
|
||||
convd = NULL;
|
||||
if (from_encoding != &mbfl_encoding_pass) {
|
||||
convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
|
||||
if (convd != NULL) {
|
||||
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
|
||||
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
|
||||
} else {
|
||||
if (info->report_errors) {
|
||||
php_error_docref(NULL, E_WARNING, "Unable to create converter");
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* convert encoding */
|
||||
string.encoding = from_encoding;
|
||||
|
||||
n = 0;
|
||||
while (n < num) {
|
||||
string.val = (unsigned char *)val_list[n];
|
||||
string.len = len_list[n];
|
||||
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
|
||||
var = (char *)resvar.val;
|
||||
} else {
|
||||
var = val_list[n];
|
||||
}
|
||||
if (from_encoding != &mbfl_encoding_pass && info->to_encoding != &mbfl_encoding_pass) {
|
||||
unsigned int num_errors = 0;
|
||||
zend_string *converted_var = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
|
||||
MBSTRG(illegalchars) += num_errors;
|
||||
n++;
|
||||
string.val = (unsigned char *)val_list[n];
|
||||
string.len = len_list[n];
|
||||
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
|
||||
val = (char *)resval.val;
|
||||
val_len = resval.len;
|
||||
} else {
|
||||
val = val_list[n];
|
||||
val_len = len_list[n];
|
||||
}
|
||||
|
||||
num_errors = 0;
|
||||
zend_string *converted_val = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
|
||||
MBSTRG(illegalchars) += num_errors;
|
||||
n++;
|
||||
/* we need val to be emalloc()ed */
|
||||
val = estrndup(val, val_len);
|
||||
if (sapi_module.input_filter(info->data_type, var, &val, val_len, &new_val_len)) {
|
||||
|
||||
/* `val` must be a pointer returned by `emalloc` */
|
||||
val = estrndup(ZSTR_VAL(converted_val), ZSTR_LEN(converted_val));
|
||||
if (sapi_module.input_filter(info->data_type, ZSTR_VAL(converted_var), &val, ZSTR_LEN(converted_val), &new_val_len)) {
|
||||
/* add variable to symbol table */
|
||||
php_register_variable_safe(ZSTR_VAL(converted_var), val, new_val_len, array_ptr);
|
||||
}
|
||||
zend_string_free(converted_var);
|
||||
zend_string_free(converted_val);
|
||||
} else {
|
||||
var = val_list[n++];
|
||||
val = estrndup(val_list[n], len_list[n]);
|
||||
if (sapi_module.input_filter(info->data_type, var, &val, len_list[n], &new_val_len)) {
|
||||
php_register_variable_safe(var, val, new_val_len, array_ptr);
|
||||
}
|
||||
efree(val);
|
||||
|
||||
if (convd != NULL){
|
||||
mbfl_string_clear(&resvar);
|
||||
mbfl_string_clear(&resval);
|
||||
n++;
|
||||
}
|
||||
efree(val);
|
||||
}
|
||||
|
||||
out:
|
||||
if (convd != NULL) {
|
||||
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
|
||||
mbfl_buffer_converter_delete(convd);
|
||||
}
|
||||
if (val_list != NULL) {
|
||||
efree((void *)val_list);
|
||||
}
|
||||
|
30
ext/mbstring/tests/mb_parse_str_error.phpt
Normal file
30
ext/mbstring/tests/mb_parse_str_error.phpt
Normal file
@ -0,0 +1,30 @@
|
||||
--TEST--
|
||||
mb_parse_str() error handling
|
||||
--EXTENSIONS--
|
||||
mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('UTF-8');
|
||||
|
||||
$queries = array(
|
||||
"\x80\x80\x80",
|
||||
"\xFF=\xFF"
|
||||
);
|
||||
|
||||
foreach ($queries as $query) {
|
||||
echo "Query: " . bin2hex($query) . "\n";
|
||||
|
||||
$array = [];
|
||||
mb_parse_str($query, $array);
|
||||
|
||||
foreach ($array as $key => $value) {
|
||||
echo bin2hex($key) . "=>" . bin2hex($value) . "\n";
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
Query: 808080
|
||||
3f3f3f=>
|
||||
Query: ff3dff
|
||||
3f=>3f
|
Loading…
Reference in New Issue
Block a user