Merge branch 'PHP-8.2'

* PHP-8.2:
  Support Microsoft's "Best Fit" mappings for Windows-1252 text encoding
This commit is contained in:
Alex Dowad 2022-12-09 15:41:07 +02:00
commit 14110bff7f
3 changed files with 11 additions and 16 deletions

View File

@ -485,10 +485,10 @@ DEF_SB_TBL(cp1251, "Windows-1251", "Windows-1251", cp1251_aliases, 0x80, cp1251_
static const char *cp1252_aliases[] = {"cp1252", NULL};
static const unsigned short cp1252_ucs_table[] = {
0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
};
DEF_SB(cp1252, "Windows-1252", "Windows-1252", cp1252_aliases);
@ -504,7 +504,7 @@ static int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
}
}
CK(mbfl_filt_conv_illegal_output(c, filter));
} else if (c <= 0x7F || c >= 0xA0) {
} else if (c <= 0x7F || c >= 0xA0 || c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D) {
CK((*filter->output_function)(c, filter->data));
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
@ -562,7 +562,7 @@ static void mb_wchar_to_cp1252(uint32_t *in, size_t len, mb_convert_buf *buf, bo
}
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (w <= 0x7F || w >= 0xA0) {
} else if (w <= 0x7F || w >= 0xA0 || w == 0x81 || w == 0x8D || w == 0x8F || w == 0x90 || w == 0x9D) {
out = mb_convert_buf_add(out, w);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);

View File

@ -11,11 +11,6 @@ if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
include('encoding_tests.inc');
testEncodingFromUTF16ConversionTable(__DIR__ . '/data/CP1252.txt', 'CP1252');
// Test "long" illegal character markers
mb_substitute_character("long");
convertInvalidString("\x81", "%", "CP1252", "UTF-8");
convertInvalidString("\x9D", "%", "CP1252", "UTF-8");
// Test replacement character which cannot be encoded in CP1252
mb_substitute_character(0x1234);
convertInvalidString("\x23\x45", '?', 'UTF-16BE', 'CP1252');

View File

@ -145,7 +145,7 @@
0x7E 0x007E #TILDE
0x7F 0x007F #DELETE
0x80 0x20AC #EURO SIGN
0x81 #UNDEFINED
0x81 0x0081 #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK
0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
@ -157,10 +157,10 @@
0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x8C 0x0152 #LATIN CAPITAL LIGATURE OE
0x8D #UNDEFINED
0x8D 0x008D #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
0x8F #UNDEFINED
0x90 #UNDEFINED
0x8F 0x008F #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
0x90 0x0090 #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
0x91 0x2018 #LEFT SINGLE QUOTATION MARK
0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
0x93 0x201C #LEFT DOUBLE QUOTATION MARK
@ -173,7 +173,7 @@
0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x9C 0x0153 #LATIN SMALL LIGATURE OE
0x9D #UNDEFINED
0x9D 0x009D #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS
0xA0 0x00A0 #NO-BREAK SPACE