mirror of
https://github.com/php/php-src.git
synced 2024-09-21 18:07:23 +00:00
Use fast text conversion filters to implement mb_convert_variables
This commit is contained in:
parent
adfdfb2e1e
commit
b1954f5fd6
@ -3180,7 +3180,7 @@ next_option:
|
||||
RETVAL_STR(jp_kana_convert(str, enc, opt));
|
||||
}
|
||||
|
||||
static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
|
||||
static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, bool *recursion_error) /* {{{ */
|
||||
{
|
||||
mbfl_string string;
|
||||
HashTable *ht;
|
||||
@ -3196,7 +3196,7 @@ static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zv
|
||||
} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
|
||||
if (Z_REFCOUNTED_P(var)) {
|
||||
if (Z_IS_RECURSIVE_P(var)) {
|
||||
*recursion_error = 1;
|
||||
*recursion_error = true;
|
||||
return 0;
|
||||
}
|
||||
Z_PROTECT_RECURSION_P(var);
|
||||
@ -3226,31 +3226,25 @@ static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zv
|
||||
return 0;
|
||||
} /* }}} */
|
||||
|
||||
static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
|
||||
static bool mb_recursive_convert_variable(zval *var, const mbfl_encoding* from_encoding, const mbfl_encoding* to_encoding)
|
||||
{
|
||||
mbfl_string string, result, *ret;
|
||||
HashTable *ht;
|
||||
zval *entry, *orig_var;
|
||||
|
||||
orig_var = var;
|
||||
ZVAL_DEREF(var);
|
||||
|
||||
if (Z_TYPE_P(var) == IS_STRING) {
|
||||
string.val = (unsigned char *)Z_STRVAL_P(var);
|
||||
string.len = Z_STRLEN_P(var);
|
||||
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
|
||||
if (ret != NULL) {
|
||||
zval_ptr_dtor(orig_var);
|
||||
// TODO: avoid reallocation ???
|
||||
ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
|
||||
efree(ret->val);
|
||||
}
|
||||
zend_string *ret = php_mb_convert_encoding_ex(Z_STRVAL_P(var), Z_STRLEN_P(var), to_encoding, from_encoding);
|
||||
zval_ptr_dtor(orig_var);
|
||||
ZVAL_STR(orig_var, ret);
|
||||
} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
|
||||
if (Z_TYPE_P(var) == IS_ARRAY) {
|
||||
SEPARATE_ARRAY(var);
|
||||
}
|
||||
if (Z_REFCOUNTED_P(var)) {
|
||||
if (Z_IS_RECURSIVE_P(var)) {
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
Z_PROTECT_RECURSION_P(var);
|
||||
}
|
||||
@ -3258,11 +3252,11 @@ static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var
|
||||
ht = HASH_OF(var);
|
||||
if (ht != NULL) {
|
||||
ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
|
||||
if (mb_recursive_convert_variable(convd, entry)) {
|
||||
if (mb_recursive_convert_variable(entry, from_encoding, to_encoding)) {
|
||||
if (Z_REFCOUNTED_P(var)) {
|
||||
Z_UNPROTECT_RECURSION_P(var);
|
||||
}
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
} ZEND_HASH_FOREACH_END();
|
||||
}
|
||||
@ -3271,8 +3265,9 @@ static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var
|
||||
Z_UNPROTECT_RECURSION_P(var);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} /* }}} */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* {{{ Converts the string resource in variables to desired encoding */
|
||||
PHP_FUNCTION(mb_convert_variables)
|
||||
@ -3281,14 +3276,12 @@ PHP_FUNCTION(mb_convert_variables)
|
||||
zend_string *to_enc_str;
|
||||
zend_string *from_enc_str;
|
||||
HashTable *from_enc_ht;
|
||||
mbfl_string string, result;
|
||||
const mbfl_encoding *from_encoding, *to_encoding;
|
||||
mbfl_encoding_detector *identd;
|
||||
mbfl_buffer_converter *convd;
|
||||
int n, argc;
|
||||
size_t elistsz;
|
||||
const mbfl_encoding **elist;
|
||||
int recursion_error = 0;
|
||||
bool recursion_error = false;
|
||||
|
||||
ZEND_PARSE_PARAMETERS_START(3, -1)
|
||||
Z_PARAM_STR(to_enc_str)
|
||||
@ -3302,10 +3295,7 @@ PHP_FUNCTION(mb_convert_variables)
|
||||
RETURN_THROWS();
|
||||
}
|
||||
|
||||
/* initialize string */
|
||||
from_encoding = MBSTRG(current_internal_encoding);
|
||||
mbfl_string_init_set(&string, from_encoding);
|
||||
mbfl_string_init(&result);
|
||||
|
||||
/* pre-conversion encoding */
|
||||
if (from_enc_ht) {
|
||||
@ -3356,29 +3346,18 @@ PHP_FUNCTION(mb_convert_variables)
|
||||
|
||||
efree(ZEND_VOIDP(elist));
|
||||
|
||||
convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
|
||||
/* If this assertion fails this means some memory allocation failure which is a bug */
|
||||
ZEND_ASSERT(convd != NULL);
|
||||
|
||||
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
|
||||
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
|
||||
|
||||
/* convert */
|
||||
n = 0;
|
||||
while (n < argc) {
|
||||
zval *zv = &args[n];
|
||||
|
||||
ZVAL_DEREF(zv);
|
||||
recursion_error = mb_recursive_convert_variable(convd, zv);
|
||||
recursion_error = mb_recursive_convert_variable(zv, from_encoding, to_encoding);
|
||||
if (recursion_error) {
|
||||
break;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
|
||||
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
|
||||
mbfl_buffer_converter_delete(convd);
|
||||
|
||||
if (recursion_error) {
|
||||
php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
|
||||
RETURN_FALSE;
|
||||
|
@ -17,19 +17,19 @@ $sjis = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
|
||||
// JIS string (BASE64 encoded)
|
||||
$jis = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
|
||||
// EUC-JP string
|
||||
$euc_jp = '日本語テキストです。0123456789。';
|
||||
$euc_jp = mb_convert_encoding("日本語テキストです。0123456789。", 'EUC-JP', 'UTF-8');
|
||||
|
||||
// Test for single scalar
|
||||
echo "== SCALAR TEST ==\n";
|
||||
$s = $sjis;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'SJIS', $s);
|
||||
print("$encoding\n"); // SJIS
|
||||
print("$s\n"); // Converted to EUC-JP
|
||||
echo bin2hex($s), "\n"; // Converted to EUC-JP
|
||||
|
||||
$s = $jis;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'JIS', $s);
|
||||
print("$encoding\n"); // JIS
|
||||
print("$s\n"); // Converted to EUC-JP
|
||||
echo bin2hex($s), "\n"; // Converted to EUC-JP
|
||||
|
||||
$s = $euc_jp;
|
||||
$encoding = mb_convert_variables('SJIS', 'EUC-JP', $s);
|
||||
@ -47,9 +47,7 @@ $s2 = $euc_jp;
|
||||
$s3 = $euc_jp;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $s1, $s2, $s3);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("$s1$s2$s3\n"); // Converted to EUC-JP
|
||||
|
||||
|
||||
echo bin2hex("$s1$s2$s3"), "\n"; // Converted to EUC-JP
|
||||
|
||||
// Note: Mixing encoding in array/object is not supported?
|
||||
// Test for array
|
||||
@ -58,15 +56,13 @@ $a = array($s3, $s2, $s1);
|
||||
$aa = $a;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $aa);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("{$aa[0]}{$aa[1]}{$aa[2]}\n"); // Converted to EUC-JP
|
||||
echo bin2hex("{$aa[0]}{$aa[1]}{$aa[2]}"), "\n"; // Converted to EUC-JP
|
||||
|
||||
$a = array($s1, $s2, $s3);
|
||||
$aa = $a;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $aa);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("{$aa[0]}{$aa[1]}{$aa[2]}\n"); // Converted to EUC-JP
|
||||
|
||||
|
||||
echo bin2hex("{$aa[0]}{$aa[1]}{$aa[2]}"), "\n"; // Converted to EUC-JP
|
||||
|
||||
// Test for object
|
||||
echo "== OBJECT TEST ==\n";
|
||||
@ -102,19 +98,17 @@ class bar
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$o = new foo;
|
||||
$oo = $o;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $oo);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("{$oo->s1}{$oo->s2}{$oo->s3}\n"); // Converted to EUC-JP
|
||||
echo bin2hex("{$oo->s1}{$oo->s2}{$oo->s3}"), "\n"; // Converted to EUC-JP
|
||||
|
||||
$o = new bar;
|
||||
$oo = $o;
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $oo);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("{$oo->s1}{$oo->s2}{$oo->s3}\n"); // Converted to EUC-JP
|
||||
|
||||
echo bin2hex("{$oo->s1}{$oo->s2}{$oo->s3}"), "\n"; // Converted to EUC-JP
|
||||
|
||||
// Test for scalar, array and object
|
||||
echo "== SCALAR, ARRAY AND OBJECT TEST ==\n";
|
||||
@ -127,36 +121,79 @@ $oo = $o;
|
||||
|
||||
$encoding = mb_convert_variables('EUC-JP', 'auto', $s1, $s2, $s3, $aa, $oo);
|
||||
print("$encoding\n"); // EUC-JP
|
||||
print("$s1$s2$s3\n"); // Converted to EUC-JP
|
||||
print("{$aa[0]}{$aa[1]}{$aa[2]}\n"); // Converted to EUC-JP
|
||||
print("{$oo->s1}{$oo->s2}{$oo->s3}\n"); // Converted to EUC-JP
|
||||
echo bin2hex("$s1$s2$s3"), "\n"; // Converted to EUC-JP
|
||||
echo bin2hex("{$aa[0]}{$aa[1]}{$aa[2]}"), "\n"; // Converted to EUC-JP
|
||||
echo bin2hex("{$oo->s1}{$oo->s2}{$oo->s3}"), "\n"; // Converted to EUC-JP
|
||||
|
||||
echo "== DEEPLY NESTED OBJECT/ARRAY TEST ==\n";
|
||||
|
||||
class Nested
|
||||
{
|
||||
public $inner;
|
||||
|
||||
function __construct($value)
|
||||
{
|
||||
$this->inner = $value;
|
||||
}
|
||||
}
|
||||
|
||||
$deeplyNested = array(new Nested(array(new Nested(array(new Nested("BLAH"))))));
|
||||
|
||||
$encoding = mb_convert_variables('UTF-16LE', 'UTF-8', $deeplyNested);
|
||||
echo $encoding, "\n";
|
||||
echo bin2hex($deeplyNested[0]->inner[0]->inner[0]->inner), "\n";
|
||||
|
||||
echo "== INVALID STRING ENCODING TEST ==\n";
|
||||
// Make sure both that the correct invalid encoding marker is used,
|
||||
// and that the count of illegal characters is incremented
|
||||
|
||||
$illegalCount = mb_get_info('illegal_chars');
|
||||
$nested = array(new Nested("\xFF"));
|
||||
mb_substitute_character(0x25);
|
||||
mb_convert_variables('UTF-16LE', 'UTF-8', $nested);
|
||||
echo bin2hex($nested[0]->inner), "\n";
|
||||
echo "# of illegal characters detected: ", mb_get_info('illegal_chars') - $illegalCount, "\n";
|
||||
|
||||
$illegalCount = mb_get_info('illegal_chars');
|
||||
$nested = array(new Nested("\xFF"));
|
||||
mb_substitute_character(0x26);
|
||||
mb_convert_variables('UTF-16LE', 'UTF-8', $nested);
|
||||
echo bin2hex($nested[0]->inner), "\n";
|
||||
echo "# of illegal characters detected: ", mb_get_info('illegal_chars') - $illegalCount, "\n";
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
== SCALAR TEST ==
|
||||
SJIS
|
||||
日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
JIS
|
||||
日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
EUC-JP
|
||||
k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==
|
||||
EUC-JP
|
||||
GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
== ARRAY TEST ==
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
== OBJECT TEST ==
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
== SCALAR, ARRAY AND OBJECT TEST ==
|
||||
EUC-JP
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
日本語テキストです。0123456789。日本語テキストです。0123456789。日本語テキストです。0123456789。
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
|
||||
== DEEPLY NESTED OBJECT/ARRAY TEST ==
|
||||
UTF-8
|
||||
42004c0041004800
|
||||
== INVALID STRING ENCODING TEST ==
|
||||
2500
|
||||
# of illegal characters detected: 1
|
||||
2600
|
||||
# of illegal characters detected: 1
|
||||
|
Loading…
Reference in New Issue
Block a user