[DOC] Added a 4th parameter flag to htmlspecialchars() and htmlentities()

that makes the function not encode existing html entities. The feature is
disabled by default and can be activated by passing FALSE as the 4th param
This commit is contained in:
Ilia Alshanetsky 2007-05-22 12:37:00 +00:00
parent 43fccbf2b9
commit c98cbb6020
4 changed files with 70 additions and 6 deletions

2
NEWS
View File

@ -5,6 +5,8 @@ PHP NEWS
- Optimized out a couple of per-request syscalls (Rasmus)
- Optimized digest generation in md5() and sha1() functions. (Ilia)
- Upgraded SQLite 3 to version 3.3.16 (Ilia)
- Added a 4th parameter flag to htmlspecialchars() and htmlentities() that
makes the function not encode existing html entities. (Ilia)
- Added PDO::FETCH_KEY_PAIR mode that will fetch a 2 column result set into
an associated array. (Ilia)
- Added function mysql_set_charset(). Allows connection encoding to be

View File

@ -1078,12 +1078,15 @@ empty_source:
}
/* }}} */
PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
{
return php_escape_html_entities_ex(old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC);
}
/* {{{ php_escape_html_entities
*/
PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
{
int i, j, maxlen, len;
char *replaced;
@ -1145,8 +1148,34 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle
int is_basic = 0;
if (this_char == '&') {
if (double_encode) {
encode_amp:
memcpy(replaced + len, "&", sizeof("&") - 1);
len += sizeof("&") - 1;
} else {
char *e = memchr(old + i, ';', len - i);
char *s = old + i + 1;
if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
goto encode_amp;
} else {
if (*s == '#') { /* numeric entities */
s++;
while (s < e) {
if (!isdigit(*s++)) {
goto encode_amp;
}
}
} else { /* text entities */
while (s < e) {
if (!isalnum(*s++)) {
goto encode_amp;
}
}
}
replaced[len++] = '&';
}
}
is_basic = 1;
} else {
for (j = 0; basic_entities[j].charcode != 0; j++) {
@ -1193,12 +1222,13 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
int len;
long quote_style = ENT_COMPAT;
char *replaced;
zend_bool double_encode = 1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &str, &str_len, &quote_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) {
return;
}
replaced = php_escape_html_entities(str, str_len, &len, all, quote_style, hint_charset TSRMLS_CC);
replaced = php_escape_html_entities_ex(str, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC);
RETVAL_STRINGL(replaced, len, 0);
}
/* }}} */

View File

@ -38,6 +38,7 @@ PHP_FUNCTION(html_entity_decode);
PHP_FUNCTION(get_html_translation_table);
PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC);
PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC);
PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC);
#endif /* HTML_H */

View File

@ -0,0 +1,31 @@
--TEST--
htmlentities() / htmlspecialchars() "don't double encode" flag support
--FILE--
<?php
$tests = array(
"abc",
"abc&amp;sfdsa",
"test&#043;s &amp; some more &#68;",
"&; &amp &#a; &9;",
"&kffjadfdhsjfhjasdhffasdfas;",
"&#8787978789",
);
foreach ($tests as $test) {
var_dump(htmlentities($test, ENT_QUOTES, NULL, FALSE));
var_dump(htmlspecialchars($test, ENT_QUOTES, NULL, FALSE));
}
?>
--EXPECT--
string(3) "abc"
string(3) "abc"
string(13) "abc&amp;sfdsa"
string(13) "abc&amp;sfdsa"
string(33) "test&#043;s &amp; some more &#68;"
string(33) "test&#043;s &amp; some more &#68;"
string(20) "&; &amp;amp &#a; &9;"
string(20) "&; &amp;amp &#a; &9;"
string(32) "&amp;kffjadfdhsjfhjasdhffasdfas;"
string(32) "&amp;kffjadfdhsjfhjasdhffasdfas;"
string(16) "&amp;#8787978789"
string(16) "&amp;#8787978789"