mirror of
https://github.com/php/php-src.git
synced 2024-09-21 01:47:25 +00:00
ext/mbstring: Update to Unicode 16
Updates UCD to Unicode 16.0 (released 2024 Sept).
Previously: 0fdffc18
, #7502, #14680
Unicode 16 adds several new character sets and case folding rules.
However, the existing ucgendat script can still parse them.
This also adds a couple test cases to make sure the new rules for
East Asian Wide characters and case folding work correctly. These
tests fail on Unicode 15.1 and older because those verisons do not
contain those rules.
This commit is contained in:
parent
71edc05139
commit
3afb96184e
1
NEWS
1
NEWS
@ -10,6 +10,7 @@ PHP NEWS
|
||||
- MBString:
|
||||
. Fixed bug GH-15824 (mb_detect_encoding(): Argument $encodings contains
|
||||
invalid encoding "UTF8"). (Yuya Hamada)
|
||||
. Updated Unicode data tables to Unicode 16.0. (Ayesh Karunaratne)
|
||||
|
||||
- Opcache:
|
||||
. Fixed bug GH-15657 (Segmentation fault in dasm_x86.h). (nielsdos)
|
||||
|
@ -966,7 +966,7 @@ PHP 8.4 UPGRADE NOTES
|
||||
. The libxml extension now requires at least libxml2 2.9.4.
|
||||
|
||||
- MBString:
|
||||
. Unicode data tables have been updated to Unicode 15.1.
|
||||
. Unicode data tables have been updated to Unicode 16.0.
|
||||
|
||||
- Mysqli:
|
||||
. The unused and undocumented constant MYSQLI_SET_CHARSET_DIR
|
||||
|
@ -28,8 +28,10 @@ static const struct {
|
||||
{ 0x23f3, 0x23f3 },
|
||||
{ 0x25fd, 0x25fe },
|
||||
{ 0x2614, 0x2615 },
|
||||
{ 0x2630, 0x2637 },
|
||||
{ 0x2648, 0x2653 },
|
||||
{ 0x267f, 0x267f },
|
||||
{ 0x268a, 0x268f },
|
||||
{ 0x2693, 0x2693 },
|
||||
{ 0x26a1, 0x26a1 },
|
||||
{ 0x26aa, 0x26ab },
|
||||
@ -63,11 +65,10 @@ static const struct {
|
||||
{ 0x3099, 0x30ff },
|
||||
{ 0x3105, 0x312f },
|
||||
{ 0x3131, 0x318e },
|
||||
{ 0x3190, 0x31e3 },
|
||||
{ 0x3190, 0x31e5 },
|
||||
{ 0x31ef, 0x321e },
|
||||
{ 0x3220, 0x3247 },
|
||||
{ 0x3250, 0x4dbf },
|
||||
{ 0x4e00, 0xa48c },
|
||||
{ 0x3250, 0xa48c },
|
||||
{ 0xa490, 0xa4c6 },
|
||||
{ 0xa960, 0xa97c },
|
||||
{ 0xac00, 0xd7a3 },
|
||||
@ -82,7 +83,7 @@ static const struct {
|
||||
{ 0x16ff0, 0x16ff1 },
|
||||
{ 0x17000, 0x187f7 },
|
||||
{ 0x18800, 0x18cd5 },
|
||||
{ 0x18d00, 0x18d08 },
|
||||
{ 0x18cff, 0x18d08 },
|
||||
{ 0x1aff0, 0x1aff3 },
|
||||
{ 0x1aff5, 0x1affb },
|
||||
{ 0x1affd, 0x1affe },
|
||||
@ -92,6 +93,8 @@ static const struct {
|
||||
{ 0x1b155, 0x1b155 },
|
||||
{ 0x1b164, 0x1b167 },
|
||||
{ 0x1b170, 0x1b2fb },
|
||||
{ 0x1d300, 0x1d356 },
|
||||
{ 0x1d360, 0x1d376 },
|
||||
{ 0x1f004, 0x1f004 },
|
||||
{ 0x1f0cf, 0x1f0cf },
|
||||
{ 0x1f18e, 0x1f18e },
|
||||
@ -132,11 +135,10 @@ static const struct {
|
||||
{ 0x1f93c, 0x1f945 },
|
||||
{ 0x1f947, 0x1f9ff },
|
||||
{ 0x1fa70, 0x1fa7c },
|
||||
{ 0x1fa80, 0x1fa88 },
|
||||
{ 0x1fa90, 0x1fabd },
|
||||
{ 0x1fabf, 0x1fac5 },
|
||||
{ 0x1face, 0x1fadb },
|
||||
{ 0x1fae0, 0x1fae8 },
|
||||
{ 0x1fa80, 0x1fa89 },
|
||||
{ 0x1fa8f, 0x1fac6 },
|
||||
{ 0x1face, 0x1fadc },
|
||||
{ 0x1fadf, 0x1fae9 },
|
||||
{ 0x1faf0, 0x1faf8 },
|
||||
{ 0x20000, 0x2fffd },
|
||||
{ 0x30000, 0x3fffd },
|
||||
|
@ -5,6 +5,8 @@ mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
echo "Char widths:\n";
|
||||
|
||||
print "ASCII (PHP): " . mb_strwidth('PHP', 'UTF-8') . "\n";
|
||||
|
||||
print "Vietnamese (Xin chào): " . mb_strwidth('Xin chào', 'UTF-8') . "\n";
|
||||
@ -18,11 +20,22 @@ print "Emoji (\u{1F418}): " . mb_strwidth("\u{1F418}", 'UTF-8') . "\n";
|
||||
// New in Unicode 15.0, width=2
|
||||
print "Emoji (\u{1F6DC}): " . mb_strwidth("\u{1F6DC}", 'UTF-8') . "\n";
|
||||
|
||||
// Changed in Unicode 16.0, U+2630...U+2637 are wide
|
||||
print "Emoji (\u{2630}): " . mb_strwidth("\u{2630}", 'UTF-8') . "\n";
|
||||
|
||||
echo "Char case changes:\n";
|
||||
|
||||
print "Upper(\u{019b}) = \u{a7dc} : ";
|
||||
var_dump(mb_strtoupper("\u{019b}", 'UTF-8') === "\u{a7dc}");
|
||||
?>
|
||||
--EXPECT--
|
||||
Char widths:
|
||||
ASCII (PHP): 3
|
||||
Vietnamese (Xin chào): 8
|
||||
Traditional Chinese (你好): 4
|
||||
Sinhalese (අයේෂ්): 5
|
||||
Emoji (🐘): 2
|
||||
Emoji (🛜): 2
|
||||
Emoji (☰): 2
|
||||
Char case changes:
|
||||
Upper(ƛ) = : bool(true)
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user