mirror of
https://github.com/php/php-src.git
synced 2024-09-21 18:07:23 +00:00
Fix new conversion filter for CP50220 (multi-codepoint kana at end of buffer)
If two codepoints which needed to be collapsed into a single kuten code were separated, with one at the end of one buffer and the other at the beginning of the next buffer, they were not converted correctly. This was discovered while fuzzing the new implementation of mb_decode_numericentity.
This commit is contained in:
parent
7559bf77d2
commit
3cf432798e
@ -847,11 +847,27 @@ static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, b
|
||||
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
|
||||
|
||||
bool consumed = false;
|
||||
uint32_t w;
|
||||
|
||||
if (buf->state & 0xFFFF00) {
|
||||
/* Reprocess cached codepoint */
|
||||
w = buf->state >> 8;
|
||||
buf->state &= 0xFF;
|
||||
goto reprocess_codepoint;
|
||||
}
|
||||
|
||||
while (len--) {
|
||||
uint32_t w = *in++;
|
||||
w = *in++;
|
||||
reprocess_codepoint:
|
||||
|
||||
w = mbfl_convert_kana(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE);
|
||||
if (w >= 0xFF61 && w <= 0xFF9F && !len && !end) {
|
||||
/* This codepoint may need to combine with the next one,
|
||||
* but the 'next one' will come in a separate buffer */
|
||||
buf->state |= w << 8;
|
||||
break;
|
||||
} else {
|
||||
w = mbfl_convert_kana(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE);
|
||||
}
|
||||
|
||||
if (consumed) {
|
||||
/* Two successive codepoints were converted into one */
|
||||
|
@ -382,6 +382,14 @@ $converted = mb_convert_encoding("\xff\xff\x00&", 'CP50220', 'UTF-16BE');
|
||||
if ($converted !== '?&')
|
||||
die("Bad handling of erroneous codepoint followed by good one (got " . bin2hex($converted) . ")");
|
||||
|
||||
// In CP50220, two codepoints can be collapsed into a single kuten code in some cases
|
||||
// This should work even on a boundary between separately processed buffers
|
||||
$shouldCollapse = "\xFF\x76\xFF\x9E";
|
||||
$expected = "\x1B\$B%,\x1B(B";
|
||||
for ($i = 0; $i < 256; $i++) {
|
||||
convertValidString(str_repeat("\x00a", $i) . $shouldCollapse, str_repeat('a', $i) . $expected, 'UTF-16BE', 'CP50220', false);
|
||||
}
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
ASCII support OK
|
||||
|
Loading…
Reference in New Issue
Block a user