mirror of
https://github.com/php/php-src.git
synced 2024-09-23 10:57:26 +00:00
- Bug #49785: take 5. What the hell happened to me...
This commit is contained in:
parent
5d34329dab
commit
20737bac6a
@ -538,19 +538,18 @@ inline static unsigned int get_next_char(enum entity_charset charset,
|
|||||||
c = str[pos];
|
c = str[pos];
|
||||||
if (c < 0x80) {
|
if (c < 0x80) {
|
||||||
MB_WRITE(c);
|
MB_WRITE(c);
|
||||||
this_char = c;
|
this_char = c;
|
||||||
pos++;
|
pos++;
|
||||||
} else if (c < 0xc0) {
|
} else if (c < 0xc0) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
} else if (c < 0xe0) {
|
} else if (c < 0xe0) {
|
||||||
CHECK_LEN(pos, 2);
|
CHECK_LEN(pos, 2);
|
||||||
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
|
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
|
||||||
if (this_char < 0x80) {
|
if (this_char < 0x80) {
|
||||||
*status = FAILURE;
|
MB_FAILURE(pos);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
MB_WRITE((unsigned char)c);
|
MB_WRITE((unsigned char)c);
|
||||||
MB_WRITE((unsigned char)str[pos + 1]);
|
MB_WRITE((unsigned char)str[pos + 1]);
|
||||||
@ -558,14 +557,14 @@ inline static unsigned int get_next_char(enum entity_charset charset,
|
|||||||
} else if (c < 0xf0) {
|
} else if (c < 0xf0) {
|
||||||
CHECK_LEN(pos, 3);
|
CHECK_LEN(pos, 3);
|
||||||
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
|
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
|
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
|
||||||
if (this_char < 0x800) {
|
if (this_char < 0x800) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
MB_WRITE((unsigned char)c);
|
MB_WRITE((unsigned char)c);
|
||||||
MB_WRITE((unsigned char)str[pos + 1]);
|
MB_WRITE((unsigned char)str[pos + 1]);
|
||||||
@ -574,17 +573,17 @@ inline static unsigned int get_next_char(enum entity_charset charset,
|
|||||||
} else if (c < 0xf8) {
|
} else if (c < 0xf8) {
|
||||||
CHECK_LEN(pos, 4);
|
CHECK_LEN(pos, 4);
|
||||||
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
|
if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
|
if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
|
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
|
||||||
if (this_char < 0x10000) {
|
if (this_char < 0x10000) {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
MB_WRITE((unsigned char)c);
|
MB_WRITE((unsigned char)c);
|
||||||
MB_WRITE((unsigned char)str[pos + 1]);
|
MB_WRITE((unsigned char)str[pos + 1]);
|
||||||
@ -592,7 +591,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
|
|||||||
MB_WRITE((unsigned char)str[pos + 3]);
|
MB_WRITE((unsigned char)str[pos + 3]);
|
||||||
pos += 4;
|
pos += 4;
|
||||||
} else {
|
} else {
|
||||||
MB_FAILURE(pos);
|
MB_FAILURE(pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -35,6 +35,12 @@ var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
|
|||||||
var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
|
var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
|
||||||
var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
|
var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
|
||||||
|
|
||||||
|
echo "--\n";
|
||||||
|
// UTF-8: with ENT_IGNORE
|
||||||
|
var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
|
||||||
|
var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
|
||||||
|
var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
|
||||||
|
|
||||||
echo "--\n";
|
echo "--\n";
|
||||||
// UTF-8: alternative (invalid) UTF-8 sequence
|
// UTF-8: alternative (invalid) UTF-8 sequence
|
||||||
var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
|
var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
|
||||||
@ -155,6 +161,10 @@ string(0) ""
|
|||||||
string(0) ""
|
string(0) ""
|
||||||
string(0) ""
|
string(0) ""
|
||||||
--
|
--
|
||||||
|
string(4) "c280"
|
||||||
|
string(6) "e0a080"
|
||||||
|
string(8) "f0908080"
|
||||||
|
--
|
||||||
string(0) ""
|
string(0) ""
|
||||||
string(0) ""
|
string(0) ""
|
||||||
string(0) ""
|
string(0) ""
|
||||||
|
Loading…
Reference in New Issue
Block a user