mirror of
https://github.com/php/php-src.git
synced 2024-09-22 02:17:32 +00:00
- Fixed CHARSET_UNICODE_COMPAT (ISO-8859-1 is compatible in the relevant sense).
- Fixed usage of zend_multibyte_get_internal_encoding (its return cannot be cast to char*). - Change tests to reflect that charset detection now relies on internal_encoding, not on current_internal_encoding. NOTE: This fixes the changes in rev 306077, but it remains that that change introduced a BC break. I assumed it was intentional
This commit is contained in:
parent
05b2d22a00
commit
4a946a91e5
@ -148,6 +148,9 @@ UPGRADE NOTES - PHP X.Y
|
||||
behavior follows the recommendations of Unicode Technical Report #36.
|
||||
- htmlspecialchars_decode/html_entity_decode now decode ' if the document
|
||||
type is ENT_XML1, ENT_XHTML, or ENT_HTML5.
|
||||
- Charset detection with $charset == '' no longer turns to mbstring's
|
||||
internal encoding defined through mb_internal_encoding(). Only the encoding
|
||||
defined through the ini setting mbstring.internal_encoding is considered.
|
||||
- number_format() no longer truncates multibyte decimal points and thousand
|
||||
separators to the first byte.
|
||||
- The third parameter ($matches) to preg_match_all() is now optional. If
|
||||
|
@ -367,6 +367,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
|
||||
int i;
|
||||
enum entity_charset charset = cs_utf_8;
|
||||
int len = 0;
|
||||
const zend_encoding *zenc;
|
||||
|
||||
/* Default is now UTF-8 */
|
||||
if (charset_hint == NULL)
|
||||
@ -376,9 +377,20 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
|
||||
goto det_charset;
|
||||
}
|
||||
|
||||
charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
|
||||
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
|
||||
goto det_charset;
|
||||
zenc = zend_multibyte_get_internal_encoding(TSRMLS_C);
|
||||
if (zenc != NULL) {
|
||||
charset_hint = zend_multibyte_get_encoding_name(zenc);
|
||||
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
|
||||
if ((len == 4) /* sizeof (none|auto|pass) */ &&
|
||||
(!memcmp("pass", charset_hint, 4) ||
|
||||
!memcmp("auto", charset_hint, 4) ||
|
||||
!memcmp("auto", charset_hint, 4))) {
|
||||
charset_hint = NULL;
|
||||
len = 0;
|
||||
} else {
|
||||
goto det_charset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
charset_hint = SG(default_charset);
|
||||
|
@ -33,7 +33,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
|
||||
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
||||
cs_numelems /* used to count the number of charsets */
|
||||
};
|
||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
|
||||
#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
|
||||
#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
|
||||
|
||||
|
@ -56,7 +56,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
|
||||
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
||||
cs_numelems /* used to count the number of charsets */
|
||||
};
|
||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
|
||||
#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
|
||||
#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
|
||||
|
||||
|
@ -2,19 +2,12 @@
|
||||
htmlentities() test 5 (mbstring / cp1252)
|
||||
--INI--
|
||||
output_handler=
|
||||
mbstring.internal_encoding=cp1252
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
mb_internal_encoding('cp1252');
|
||||
$php_errormsg = NULL;
|
||||
@htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, '');
|
||||
if ($php_errormsg) {
|
||||
die("skip cp1252 chracter set is not supported on this platform.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('cp1252');
|
||||
print mb_internal_encoding()."\n";
|
||||
var_dump(htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, ''));
|
||||
var_dump(htmlentities("\x80\xa2\xa3\xa4\xa5", ENT_QUOTES, ''));
|
||||
|
@ -2,15 +2,10 @@
|
||||
htmlentities() test 6 (mbstring / ISO-8859-15)
|
||||
--INI--
|
||||
output_handler=
|
||||
mbstring.internal_encoding=ISO-8859-15
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
@mb_internal_encoding('ISO-8859-15');
|
||||
@htmlentities("\xbc\xbd\xbe", ENT_QUOTES, '');
|
||||
if (@$php_errormsg) {
|
||||
die("skip ISO-8859-15 chracter set is not supported on this platform.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('ISO-8859-15');
|
||||
|
@ -2,16 +2,10 @@
|
||||
htmlentities() test 7 (mbstring / ISO-8859-1)
|
||||
--INI--
|
||||
output_handler=
|
||||
mbstring.internal_encoding=ISO-8859-1
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
mb_internal_encoding('ISO-8859-1');
|
||||
$php_errormsg = NULL;
|
||||
@htmlentities("\xe4\xf6\xfc", ENT_QUOTES, '');
|
||||
if ($php_errormsg) {
|
||||
die("skip ISO-8859-1 chracter set is not supported on this platform.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('ISO-8859-1');
|
||||
|
@ -2,16 +2,11 @@
|
||||
htmlentities() test 8 (mbstring / EUC-JP)
|
||||
--INI--
|
||||
output_handler=
|
||||
error_reporting=~E_STRICT
|
||||
mbstring.internal_encoding=EUC-JP
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
mb_internal_encoding('EUC-JP');
|
||||
$php_errormsg = NULL;
|
||||
@htmlentities("\xa1\xa2\xa1\xa3\xa1\xa4", ENT_QUOTES, '');
|
||||
if ($php_errormsg) {
|
||||
die("skip EUC-JP chracter set is not supported on this platform.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('EUC-JP');
|
||||
|
@ -2,16 +2,11 @@
|
||||
htmlentities() test 9 (mbstring / Shift_JIS)
|
||||
--INI--
|
||||
output_handler=
|
||||
error_reporting=~E_STRICT
|
||||
mbstring.internal_encoding=Shift_JIS
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
mb_internal_encoding('Shift_JIS');
|
||||
$php_errormsg = NULL;
|
||||
@htmlentities("\x81\x41\x81\x42\x81\x43", ENT_QUOTES, '');
|
||||
if ($php_errormsg) {
|
||||
die("skip Shift_JIS chracter set is not supported on this platform.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('Shift_JIS');
|
||||
|
@ -2,17 +2,13 @@
|
||||
htmlentities() test 16 (mbstring / cp1251)
|
||||
--INI--
|
||||
output_handler=
|
||||
mbstring.internal_encoding=cp1251
|
||||
--SKIPIF--
|
||||
<?php
|
||||
extension_loaded("mbstring") or die("skip mbstring not available\n");
|
||||
if (!@mb_internal_encoding('cp1251') ||
|
||||
@htmlentities("\x88\xa9\xd2\xcf\xd3\xcb\xcf\xdb\xce\xd9\xca", ENT_QUOTES, '') == '') {
|
||||
die("skip cp1251 character set is not available in this build.\n");
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_internal_encoding('cp1251');
|
||||
$str = "\x88\xa9\xf0\xee\xf1\xea\xee\xf8\xed\xfb\xe9";
|
||||
var_dump(bin2hex($str), bin2hex(htmlentities($str, ENT_QUOTES, '')));
|
||||
var_dump(htmlentities($str, ENT_QUOTES | ENT_HTML5, ''));
|
||||
|
Loading…
Reference in New Issue
Block a user