mirror of
https://github.com/php/php-src.git
synced 2024-09-21 18:07:23 +00:00
Bug #81390: mb_detect_encoding should not prematurely stop processing input
As a performance optimization, mb_detect_encoding tries to stop processing the input string early when there is only one 'candidate' encoding which the input string is valid in. However, the code which keeps count of how many candidate encodings have already been rejected was buggy. This caused mb_detect_encoding to prematurely stop processing the input when it should have continued. As a result, it did not notice that in the test case provided by Alec, the input string was not valid in UTF-16.
This commit is contained in:
parent
ca33ab59ad
commit
c25a1ef8d0
@ -352,9 +352,10 @@ int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *str
|
||||
while (n--) {
|
||||
for (int i = 0; i < num; i++) {
|
||||
mbfl_convert_filter *filter = identd->filter_list[i];
|
||||
if (!filter->num_illegalchar) {
|
||||
mbfl_encoding_detector_data *data = &identd->filter_data[i];
|
||||
if (!data->num_illegalchars) {
|
||||
(*filter->filter_function)(*p, filter);
|
||||
if (identd->filter_data[i].num_illegalchars) {
|
||||
if (data->num_illegalchars) {
|
||||
bad++;
|
||||
}
|
||||
}
|
||||
|
@ -34,8 +34,6 @@ $s = $euc_jp;
|
||||
$s = mb_detect_encoding($s, 'JIS,EUC-JP');
|
||||
print("EUC-JP: $s\n");
|
||||
|
||||
|
||||
|
||||
// Using Encoding List Array
|
||||
echo "== ARRAY ENCODING LIST ==\n";
|
||||
|
||||
@ -53,6 +51,15 @@ $s = $sjis;
|
||||
$s = mb_detect_encoding($s, $a);
|
||||
print("SJIS: $s\n");
|
||||
|
||||
$test = "CHARSET=windows-1252:Do\xeb;John";
|
||||
$encodings = ['UTF-8', 'SJIS', 'GB2312',
|
||||
'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
|
||||
'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
|
||||
'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
|
||||
'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', 'BIG-5',
|
||||
'ISO-2022-KR', 'ISO-2022-JP', 'UTF-16'
|
||||
];
|
||||
echo mb_detect_encoding($test, $encodings), "\n";
|
||||
|
||||
// Using Detect Order
|
||||
echo "== DETECT ORDER ==\n";
|
||||
@ -100,6 +107,7 @@ EUC-JP: EUC-JP
|
||||
JIS: JIS
|
||||
EUC-JP: EUC-JP
|
||||
SJIS: SJIS
|
||||
ISO-8859-1
|
||||
== DETECT ORDER ==
|
||||
JIS: JIS
|
||||
EUC-JP: EUC-JP
|
||||
|
Loading…
Reference in New Issue
Block a user