fix bug #37911: preg_replace_callback ignores named groups

This commit is contained in:
Nuno Lopes 2007-10-07 11:48:27 +00:00
parent 41dcd7d0b5
commit 424db6feb9
2 changed files with 115 additions and 48 deletions

View File

@ -174,6 +174,49 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
}
/* }}} */
/* {{{ static make_subpats_table */
static char **make_subpats_table(int num_subpats, int rc, pcre_cache_entry *pce)
{
pcre_extra *extra = pce->extra;
int name_cnt = 0, name_size, ni = 0;
char *name_table;
unsigned short name_idx;
char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(subpat_names);
return NULL;
}
if (name_cnt > 0) {
int rc1, rc2;
rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
rc = rc2 ? rc2 : rc1;
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(subpat_names);
return NULL;
}
while (ni++ < name_cnt) {
name_idx = 0xff * name_table[0] + name_table[1];
subpat_names[name_idx] = name_table + 2;
if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
efree(subpat_names);
return NULL;
}
name_table += name_size;
}
}
return subpat_names;
}
/* }}} */
/* {{{ pcre_get_compiled_regex_cache
*/
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@ -484,7 +527,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
int g_notempty = 0; /* If the match should not be empty */
const char **stringlist; /* Holds list of subpatterns */
char *match; /* The current match */
char **subpat_names = NULL;/* Array for named subpatterns */
char **subpat_names; /* Array for named subpatterns */
int i, rc;
int subpats_order; /* Order of subpattern matches */
int offset_capture; /* Capture match offsets: yes/no */
@ -539,54 +582,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
num_subpats++;
size_offsets = num_subpats * 3;
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
/*
* Build a mapping from subpattern numbers to their names. We will always
* allocate the table, even though there may be no named subpatterns. This
* avoids somewhat more complicated logic in the inner loops.
*/
subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
memset(subpat_names, 0, sizeof(char *) * num_subpats);
{
int name_cnt = 0, name_size, ni = 0;
char *name_table;
unsigned short name_idx;
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
}
if (name_cnt > 0) {
int rc1, rc2;
rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
rc = rc2 ? rc2 : rc1;
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
}
while (ni++ < name_cnt) {
name_idx = 0xff * name_table[0] + name_table[1];
subpat_names[name_idx] = name_table + 2;
if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
}
name_table += name_size;
}
}
subpat_names = make_subpats_table(num_subpats, rc, pce);
if (!subpat_names) {
RETURN_FALSE;
}
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
/* Allocate match sets array and initialize the values. */
if (global && subpats_order == PREG_PATTERN_ORDER) {
match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
@ -794,7 +802,7 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
{
zval *retval_ptr; /* Function return value */
zval **args[1]; /* Argument to pass to function */
@ -804,8 +812,12 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, int co
MAKE_STD_ZVAL(subpats);
array_init(subpats);
for (i = 0; i < count; i++)
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
}
add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
}
args[0] = &subpats;
if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@ -944,6 +956,8 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
int exoptions = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
int *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
int new_len; /* Length of needed storage */
int alloc_len; /* Actual allocated length */
@ -987,12 +1001,24 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
}
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
return NULL;
}
size_offsets = (size_offsets + 1) * 3;
num_subpats++;
size_offsets = num_subpats * 3;
/*
* Build a mapping from subpattern numbers to their names. We will always
* allocate the table, even though there may be no named subpatterns. This
* avoids somewhat more complicated logic in the inner loops.
*/
subpat_names = make_subpats_table(num_subpats, rc, pce);
if (!subpat_names) {
return NULL;
}
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
alloc_len = 2 * subject_len + 1;
@ -1033,8 +1059,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
new_len += eval_result_len;
} else if (is_callable_replace) {
/* Use custom function to get replacement string and its length. */
eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
count, &eval_result TSRMLS_CC);
eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
new_len += eval_result_len;
} else { /* do regular substitution */
walk = replace;
@ -1149,8 +1174,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
/* Advance to the next piece. */
start_offset = offsets[1];
}
efree(offsets);
efree(subpat_names);
return result;
}

View File

@ -0,0 +1,41 @@
--TEST--
Bug #37911 (preg_replace_callback ignores named groups)
--FILE--
<?php
function callback($match)
{
var_dump($match);
return $match[1].'/'.strlen($match['name']);
}
var_dump(preg_replace_callback('|(?P<name>blub)|', 'callback', 'bla blub blah'));
var_dump(preg_match('|(?P<name>blub)|', 'bla blub blah', $m));
var_dump($m);
var_dump(preg_replace_callback('|(?P<1>blub)|', 'callback', 'bla blub blah'));
?>
--EXPECTF--
array(3) {
[0]=>
string(4) "blub"
["name"]=>
string(4) "blub"
[1]=>
string(4) "blub"
}
string(15) "bla blub/4 blah"
int(1)
array(3) {
[0]=>
string(4) "blub"
["name"]=>
string(4) "blub"
[1]=>
string(4) "blub"
}
Warning: preg_replace_callback(): Numeric named subpatterns are not allowed in %sbug37911.php on line 14
NULL