Legacy conversion code for '7bit' to '8bit' inserts error markers

The use of a special 'vtbl' for converting between '7bit' and
'8bit' text meant that '7bit' text would not be converted to
wchars before going to '8bit'. This meant that the special
value MBFL_BAD_INPUT, which we use to flag an erroneous byte
sequence in input text (and which is required by functions
like mb_check_encoding), would pass directly to the output,
instead of being converted to the error marker specified
by mb_substitute_character.

This issue dates back to the time when I removed the mbfl
'identify filters' and made encoding validity checking and
encoding detection rely only on the conversion filters.
This commit is contained in:
Alex Dowad 2022-08-06 11:58:14 +02:00
parent f3c8efd711
commit 983a29d3c0
3 changed files with 20 additions and 26 deletions

View File

@ -34,21 +34,18 @@
static size_t mb_7bit_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_7bit(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
const mbfl_encoding mbfl_encoding_7bit = {
const struct mbfl_convert_vtbl vtbl_7bit_wchar = {
mbfl_no_encoding_7bit,
"7bit",
"7bit",
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_7bit_any,
mbfl_filt_conv_common_flush,
NULL,
MBFL_ENCTYPE_SBCS,
NULL,
NULL,
mb_7bit_to_wchar,
mb_wchar_to_7bit
};
const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
mbfl_no_encoding_8bit,
const struct mbfl_convert_vtbl vtbl_wchar_7bit = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_7bit,
mbfl_filt_conv_common_ctor,
NULL,
@ -57,17 +54,19 @@ const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
NULL,
};
const struct mbfl_convert_vtbl vtbl_7bit_8bit = {
const mbfl_encoding mbfl_encoding_7bit = {
mbfl_no_encoding_7bit,
mbfl_no_encoding_8bit,
mbfl_filt_conv_common_ctor,
"7bit",
"7bit",
NULL,
mbfl_filt_conv_7bit_any,
mbfl_filt_conv_common_flush,
NULL,
MBFL_ENCTYPE_SBCS,
&vtbl_7bit_wchar,
&vtbl_wchar_7bit,
mb_7bit_to_wchar,
mb_wchar_to_7bit
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_7bit_any(int c, mbfl_convert_filter *filter)
@ -75,7 +74,6 @@ int mbfl_filt_conv_7bit_any(int c, mbfl_convert_filter *filter)
return (*filter->output_function)(c < 0x80 ? c : MBFL_BAD_INPUT, filter->data);
}
int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < 0x80) {

View File

@ -85,8 +85,6 @@ static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
&vtbl_uuencode_8bit,
&vtbl_8bit_qprint,
&vtbl_qprint_8bit,
&vtbl_8bit_7bit,
&vtbl_7bit_8bit,
&vtbl_pass,
NULL
};
@ -302,13 +300,11 @@ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
const struct mbfl_convert_vtbl* mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
{
if (to->no_encoding == mbfl_no_encoding_base64 ||
to->no_encoding == mbfl_no_encoding_qprint ||
to->no_encoding == mbfl_no_encoding_7bit) {
to->no_encoding == mbfl_no_encoding_qprint) {
from = &mbfl_encoding_8bit;
} else if (from->no_encoding == mbfl_no_encoding_base64 ||
from->no_encoding == mbfl_no_encoding_qprint ||
from->no_encoding == mbfl_no_encoding_uuencode ||
from->no_encoding == mbfl_no_encoding_7bit) {
from->no_encoding == mbfl_no_encoding_uuencode) {
to = &mbfl_encoding_8bit;
}
@ -353,9 +349,9 @@ zend_string* mb_fast_convert(unsigned char *in, size_t in_len, const mbfl_encodi
uint32_t wchar_buf[128];
unsigned int state = 0;
if (to == &mbfl_encoding_base64 || to == &mbfl_encoding_qprint || to == &mbfl_encoding_7bit) {
if (to == &mbfl_encoding_base64 || to == &mbfl_encoding_qprint) {
from = &mbfl_encoding_8bit;
} else if (from == &mbfl_encoding_base64 || from == &mbfl_encoding_qprint || from == &mbfl_encoding_uuencode || from == &mbfl_encoding_7bit) {
} else if (from == &mbfl_encoding_base64 || from == &mbfl_encoding_qprint || from == &mbfl_encoding_uuencode) {
to = &mbfl_encoding_8bit;
}

View File

@ -28,7 +28,7 @@ string(3) "ABC"
string(1) "%"
string(3) "ABC"
bool(false)
string(2) "%%"
string(1) "%"
7bit done
string(1) "%"
8bit done