php-src/ext/mbstring/php_mbregex.c
Arnaud Le Blanc 4df3dd7679
Reduce memory allocated by var_export, json_encode, serialize, and other (#8902)
smart_str uses an over-allocated string to optimize for append operations. Functions that use smart_str tend to return the over-allocated string directly. This results in unnecessary memory usage, especially for small strings.

The overhead can be up to 231 bytes for strings smaller than that, and 4095 for other strings. This can be avoided for strings smaller than `4096 - zend_string header size - 1` by reallocating the string.

This change introduces `smart_str_trim_to_size()`, and calls it in `smart_str_extract()`. Functions that use `smart_str` are updated to use `smart_str_extract()`.

Fixes GH-8896
2022-07-08 14:47:46 +02:00

1637 lines
40 KiB
C

/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
+----------------------------------------------------------------------+
*/
#include "libmbfl/config.h"
#include "php.h"
#include "php_ini.h"
#ifdef HAVE_MBREGEX
#include "zend_smart_str.h"
#include "ext/standard/info.h"
#include "php_mbregex.h"
#include "mbstring.h"
#include "libmbfl/filters/mbfilter_utf8.h"
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
#include <oniguruma.h>
#undef UChar
#if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800
typedef void OnigMatchParam;
#define onig_new_match_param() (NULL)
#define onig_initialize_match_param(x) (void)(x)
#define onig_set_match_stack_limit_size_of_match_param(x, y)
#define onig_set_retry_limit_in_match_of_match_param(x, y)
#define onig_free_match_param(x)
#define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
onig_search(reg, str, end, start, range, region, option)
#define onig_match_with_param(re, str, end, at, region, option, mp) \
onig_match(re, str, end, at, region, option)
#endif
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
char php_mb_oniguruma_version[256];
struct _zend_mb_regex_globals {
OnigEncoding default_mbctype;
OnigEncoding current_mbctype;
const mbfl_encoding *current_mbctype_mbfl_encoding;
HashTable ht_rc;
zval search_str;
zval *search_str_val;
size_t search_pos;
php_mb_regex_t *search_re;
OnigRegion *search_regs;
OnigOptionType regex_default_options;
OnigSyntaxType *regex_default_syntax;
};
#define MBREX(g) (MBSTRG(mb_regex_globals)->g)
/* {{{ static void php_mb_regex_free_cache() */
static void php_mb_regex_free_cache(zval *el) {
onig_free((php_mb_regex_t *)Z_PTR_P(el));
}
/* }}} */
/* {{{ _php_mb_regex_globals_ctor */
static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
{
pglobals->default_mbctype = ONIG_ENCODING_UTF8;
pglobals->current_mbctype = ONIG_ENCODING_UTF8;
pglobals->current_mbctype_mbfl_encoding = &mbfl_encoding_utf8;
ZVAL_UNDEF(&pglobals->search_str);
pglobals->search_re = (php_mb_regex_t*)NULL;
pglobals->search_pos = 0;
pglobals->search_regs = (OnigRegion*)NULL;
pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_globals_alloc */
zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
{
zend_mb_regex_globals *pglobals = pemalloc(
sizeof(zend_mb_regex_globals), 1);
if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
pefree(pglobals, 1);
return NULL;
}
return pglobals;
}
/* }}} */
/* {{{ php_mb_regex_globals_free */
void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
{
if (!pglobals) {
return;
}
pefree(pglobals, 1);
}
/* }}} */
/* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)
{
onig_init();
snprintf(php_mb_oniguruma_version, sizeof(php_mb_oniguruma_version), "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)
{
onig_end();
return SUCCESS;
}
/* }}} */
/* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)
{
if (!MBSTRG(mb_regex_globals)) return FAILURE;
zend_hash_init(&MBREX(ht_rc), 0, NULL, php_mb_regex_free_cache, 0);
return SUCCESS;
}
/* }}} */
/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)
{
MBREX(current_mbctype) = MBREX(default_mbctype);
MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(php_mb_regex_get_default_mbctype());
if (!Z_ISUNDEF(MBREX(search_str))) {
zval_ptr_dtor(&MBREX(search_str));
ZVAL_UNDEF(&MBREX(search_str));
}
MBREX(search_pos) = 0;
MBREX(search_re) = NULL;
if (MBREX(search_regs) != NULL) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = (OnigRegion *)NULL;
}
zend_hash_destroy(&MBREX(ht_rc));
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)
{
char buf[32];
php_info_print_table_start();
php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
snprintf(buf, sizeof(buf), "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
php_info_print_table_end();
}
/* }}} */
/*
* encoding name resolver
*/
/* {{{ encoding name map */
typedef struct _php_mb_regex_enc_name_map_t {
const char *names;
OnigEncoding code;
} php_mb_regex_enc_name_map_t;
static const php_mb_regex_enc_name_map_t enc_name_map[] = {
#ifdef ONIG_ENCODING_EUC_JP
{
"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
ONIG_ENCODING_EUC_JP
},
#endif
#ifdef ONIG_ENCODING_UTF8
{
"UTF-8\0UTF8\0",
ONIG_ENCODING_UTF8
},
#endif
#ifdef ONIG_ENCODING_UTF16_BE
{
"UTF-16\0UTF-16BE\0",
ONIG_ENCODING_UTF16_BE
},
#endif
#ifdef ONIG_ENCODING_UTF16_LE
{
"UTF-16LE\0",
ONIG_ENCODING_UTF16_LE
},
#endif
#ifdef ONIG_ENCODING_UTF32_BE
{
"UCS-4\0UTF-32\0UTF-32BE\0",
ONIG_ENCODING_UTF32_BE
},
#endif
#ifdef ONIG_ENCODING_UTF32_LE
{
"UCS-4LE\0UTF-32LE\0",
ONIG_ENCODING_UTF32_LE
},
#endif
#ifdef ONIG_ENCODING_SJIS
{
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
ONIG_ENCODING_SJIS
},
#endif
#ifdef ONIG_ENCODING_BIG5
{
"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
ONIG_ENCODING_BIG5
},
#endif
#ifdef ONIG_ENCODING_EUC_CN
{
"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
ONIG_ENCODING_EUC_CN
},
#endif
#ifdef ONIG_ENCODING_EUC_TW
{
"EUC-TW\0EUCTW\0EUC_TW\0",
ONIG_ENCODING_EUC_TW
},
#endif
#ifdef ONIG_ENCODING_EUC_KR
{
"EUC-KR\0EUCKR\0EUC_KR\0",
ONIG_ENCODING_EUC_KR
},
#endif
#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
{
"KOI8\0KOI-8\0",
ONIG_ENCODING_KOI8
},
#endif
#ifdef ONIG_ENCODING_KOI8_R
{
"KOI8R\0KOI8-R\0KOI-8R\0",
ONIG_ENCODING_KOI8_R
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_1
{
"ISO-8859-1\0ISO8859-1\0",
ONIG_ENCODING_ISO_8859_1
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_2
{
"ISO-8859-2\0ISO8859-2\0",
ONIG_ENCODING_ISO_8859_2
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_3
{
"ISO-8859-3\0ISO8859-3\0",
ONIG_ENCODING_ISO_8859_3
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_4
{
"ISO-8859-4\0ISO8859-4\0",
ONIG_ENCODING_ISO_8859_4
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_5
{
"ISO-8859-5\0ISO8859-5\0",
ONIG_ENCODING_ISO_8859_5
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_6
{
"ISO-8859-6\0ISO8859-6\0",
ONIG_ENCODING_ISO_8859_6
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_7
{
"ISO-8859-7\0ISO8859-7\0",
ONIG_ENCODING_ISO_8859_7
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_8
{
"ISO-8859-8\0ISO8859-8\0",
ONIG_ENCODING_ISO_8859_8
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_9
{
"ISO-8859-9\0ISO8859-9\0",
ONIG_ENCODING_ISO_8859_9
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_10
{
"ISO-8859-10\0ISO8859-10\0",
ONIG_ENCODING_ISO_8859_10
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_11
{
"ISO-8859-11\0ISO8859-11\0",
ONIG_ENCODING_ISO_8859_11
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_13
{
"ISO-8859-13\0ISO8859-13\0",
ONIG_ENCODING_ISO_8859_13
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_14
{
"ISO-8859-14\0ISO8859-14\0",
ONIG_ENCODING_ISO_8859_14
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_15
{
"ISO-8859-15\0ISO8859-15\0",
ONIG_ENCODING_ISO_8859_15
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_16
{
"ISO-8859-16\0ISO8859-16\0",
ONIG_ENCODING_ISO_8859_16
},
#endif
#ifdef ONIG_ENCODING_ASCII
{
"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
ONIG_ENCODING_ASCII
},
#endif
{ NULL, ONIG_ENCODING_UNDEF }
};
/* }}} */
/* {{{ php_mb_regex_name2mbctype */
static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
{
const char *p;
const php_mb_regex_enc_name_map_t *mapping;
if (pname == NULL || !*pname) {
return ONIG_ENCODING_UNDEF;
}
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
if (strcasecmp(p, pname) == 0) {
return mapping->code;
}
}
}
return ONIG_ENCODING_UNDEF;
}
/* }}} */
/* {{{ php_mb_regex_mbctype2name */
static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
{
const php_mb_regex_enc_name_map_t *mapping;
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
if (mapping->code == mbctype) {
return mapping->names;
}
}
return NULL;
}
/* }}} */
/* {{{ php_mb_regex_set_mbctype */
int php_mb_regex_set_mbctype(const char *encname)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(current_mbctype) = mbctype;
MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(encname);
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_set_default_mbctype */
int php_mb_regex_set_default_mbctype(const char *encname)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(default_mbctype) = mbctype;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_get_mbctype */
const char *php_mb_regex_get_mbctype(void)
{
return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
}
/* }}} */
/* {{{ php_mb_regex_get_mbctype_encoding */
const mbfl_encoding *php_mb_regex_get_mbctype_encoding(void)
{
return MBREX(current_mbctype_mbfl_encoding);
}
/* }}} */
/* {{{ php_mb_regex_get_default_mbctype */
const char *php_mb_regex_get_default_mbctype(void)
{
return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
}
/* }}} */
/*
* regex cache
*/
/* {{{ php_mbregex_compile_pattern */
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax)
{
int err_code = 0;
php_mb_regex_t *retval = NULL, *rc = NULL;
OnigErrorInfo err_info;
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
OnigEncoding enc = MBREX(current_mbctype);
if (!php_mb_check_encoding(pattern, patlen, php_mb_regex_get_mbctype_encoding())) {
php_error_docref(NULL, E_WARNING,
"Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
return NULL;
}
rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
onig_error_code_to_str(err_str, err_code, &err_info);
php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
return NULL;
}
if (rc == MBREX(search_re)) {
/* reuse the new rc? see bug #72399 */
MBREX(search_re) = NULL;
}
zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
} else {
retval = rc;
}
return retval;
}
/* }}} */
/* {{{ _php_mb_regex_get_option_string */
static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
{
size_t len_left = len;
size_t len_req = 0;
char *p = str;
char c;
if ((option & ONIG_OPTION_IGNORECASE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'i';
}
++len_req;
}
if ((option & ONIG_OPTION_EXTEND) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'x';
}
++len_req;
}
if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
if (len_left > 0) {
--len_left;
*(p++) = 'p';
}
++len_req;
} else {
if ((option & ONIG_OPTION_MULTILINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'm';
}
++len_req;
}
if ((option & ONIG_OPTION_SINGLELINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 's';
}
++len_req;
}
}
if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'l';
}
++len_req;
}
if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'n';
}
++len_req;
}
c = 0;
if (syntax == ONIG_SYNTAX_JAVA) {
c = 'j';
} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
c = 'u';
} else if (syntax == ONIG_SYNTAX_GREP) {
c = 'g';
} else if (syntax == ONIG_SYNTAX_EMACS) {
c = 'c';
} else if (syntax == ONIG_SYNTAX_RUBY) {
c = 'r';
} else if (syntax == ONIG_SYNTAX_PERL) {
c = 'z';
} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
c = 'b';
} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
c = 'd';
}
if (c != 0) {
if (len_left > 0) {
--len_left;
*(p++) = c;
}
++len_req;
}
if (len_left > 0) {
--len_left;
*(p++) = '\0';
}
++len_req;
if (len < len_req) {
return len_req;
}
return 0;
}
/* }}} */
/* {{{ _php_mb_regex_init_options */
static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
OnigSyntaxType **syntax)
{
size_t n;
char c;
OnigOptionType optm = 0;
*syntax = ONIG_SYNTAX_RUBY;
if (parg != NULL) {
n = 0;
while(n < narg) {
c = parg[n++];
switch (c) {
case 'i':
optm |= ONIG_OPTION_IGNORECASE;
break;
case 'x':
optm |= ONIG_OPTION_EXTEND;
break;
case 'm':
optm |= ONIG_OPTION_MULTILINE;
break;
case 's':
optm |= ONIG_OPTION_SINGLELINE;
break;
case 'p':
optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
break;
case 'l':
optm |= ONIG_OPTION_FIND_LONGEST;
break;
case 'n':
optm |= ONIG_OPTION_FIND_NOT_EMPTY;
break;
case 'j':
*syntax = ONIG_SYNTAX_JAVA;
break;
case 'u':
*syntax = ONIG_SYNTAX_GNU_REGEX;
break;
case 'g':
*syntax = ONIG_SYNTAX_GREP;
break;
case 'c':
*syntax = ONIG_SYNTAX_EMACS;
break;
case 'r':
*syntax = ONIG_SYNTAX_RUBY;
break;
case 'z':
*syntax = ONIG_SYNTAX_PERL;
break;
case 'b':
*syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case 'd':
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
default:
zend_value_error("Option \"%c\" is not supported", c);
return false;
}
}
if (option != NULL) *option|=optm;
}
return true;
}
/* }}} */
/*
* Callbacks for named subpatterns
*/
/* {{{ struct mb_ereg_groups_iter_arg */
typedef struct mb_regex_groups_iter_args {
zval *groups;
char *search_str;
size_t search_len;
OnigRegion *region;
} mb_regex_groups_iter_args;
/* }}} */
/* {{{ mb_ereg_groups_iter */
static int
mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
{
mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
int gn, beg, end;
/*
* In case of duplicate groups, keep only the last succeeding one
* to be consistent with preg_match with the PCRE_DUPNAMES option.
*/
gn = onig_name_to_backref_number(reg, name, name_end, args->region);
beg = args->region->beg[gn];
end = args->region->end[gn];
if (beg >= 0 && beg < end && end <= args->search_len) {
add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
} else {
add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
}
return 0;
}
/* }}} */
/*
* Helper for _php_mb_regex_ereg_replace_exec
*/
/* {{{ mb_regex_substitute */
static inline void mb_regex_substitute(
smart_str *pbuf,
const char *subject,
size_t subject_len,
char *replace,
size_t replace_len,
php_mb_regex_t *regexp,
OnigRegion *regs,
const mbfl_encoding *enc
) {
char *p, *sp, *eos;
int no; /* bakreference group number */
int clen; /* byte-length of the current character */
p = replace;
eos = replace + replace_len;
while (p < eos) {
clen = (int) php_mb_mbchar_bytes(p, enc);
if (clen != 1 || p == eos || p[0] != '\\') {
/* skip anything that's not an ascii backslash */
smart_str_appendl(pbuf, p, clen);
p += clen;
continue;
}
sp = p; /* save position */
clen = (int) php_mb_mbchar_bytes(++p, enc);
if (clen != 1 || p == eos) {
/* skip backslash followed by multibyte char */
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
no = -1;
switch (p[0]) {
case '0':
no = 0;
p++;
break;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (!onig_noname_group_capture_is_active(regexp)) {
/*
* FIXME:
* Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
* For now we just ignore them, but in the future we might want to raise a warning
* and abort the whole replace operation.
*/
p++;
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
no = p[0] - '0';
p++;
break;
case 'k':
{
clen = (int) php_mb_mbchar_bytes(++p, enc);
if (clen != 1 || p == eos || (p[0] != '<' && p[0] != '\'')) {
/* not a backref delimiter */
p += clen;
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
/* try to consume everything until next delimiter */
char delim = p[0] == '<' ? '>' : '\'';
char *name, *name_end;
char maybe_num = 1;
name_end = name = p + 1;
while (name_end < eos) {
clen = (int) php_mb_mbchar_bytes(name_end, enc);
if (clen != 1) {
name_end += clen;
maybe_num = 0;
continue;
}
if (name_end[0] == delim) break;
if (maybe_num && !isdigit(name_end[0])) maybe_num = 0;
name_end++;
}
p = name_end + 1;
if (name_end - name < 1 || name_end >= eos) {
/* the backref was empty or we failed to find the end delimiter */
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
/* we have either a name or a number */
if (maybe_num) {
if (!onig_noname_group_capture_is_active(regexp)) {
/* see above note on mixing numbered & named backrefs */
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
if (name_end - name == 1) {
no = name[0] - '0';
break;
}
if (name[0] == '0') {
/* 01 is not a valid number */
break;
}
no = (int) strtoul(name, NULL, 10);
break;
}
no = onig_name_to_backref_number(regexp, (OnigUChar *)name, (OnigUChar *)name_end, regs);
break;
}
default:
/* We're not treating \ as an escape character and will interpret something like
* \\1 as \ followed by \1, rather than \\ followed by 1. This is because this
* function has not supported escaping of backslashes historically. */
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
if (no < 0 || no >= regs->num_regs) {
/* invalid group number reference, keep the escape sequence in the output */
smart_str_appendl(pbuf, sp, p - sp);
continue;
}
if (regs->beg[no] >= 0 && regs->beg[no] < regs->end[no] && (size_t)regs->end[no] <= subject_len) {
smart_str_appendl(pbuf, subject + regs->beg[no], regs->end[no] - regs->beg[no]);
}
}
if (p < eos) {
smart_str_appendl(pbuf, p, eos - p);
}
}
/* }}} */
/*
* php functions
*/
/* {{{ Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)
{
char *encoding = NULL;
size_t encoding_len;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &encoding, &encoding_len) == FAILURE) {
RETURN_THROWS();
}
if (!encoding) {
const char *retval = php_mb_regex_get_mbctype();
ZEND_ASSERT(retval != NULL);
RETURN_STRING(retval);
} else {
if (php_mb_regex_set_mbctype(encoding) == FAILURE) {
zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", encoding);
RETURN_THROWS();
}
/* TODO Make function return previous encoding? */
RETURN_TRUE;
}
}
/* }}} */
/* {{{ _php_mb_onig_search */
static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start,
const OnigUChar* range, OnigRegion* region, OnigOptionType option) {
OnigMatchParam *mp = onig_new_match_param();
int err;
onig_initialize_match_param(mp);
if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
}
if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
}
/* search */
err = onig_search_with_param(reg, str, end, start, range, region, option, mp);
onig_free_match_param(mp);
return err;
}
/* }}} */
/* {{{ _php_mb_regex_ereg_exec */
static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
zval *array = NULL;
char *arg_pattern, *string;
size_t arg_pattern_len, string_len;
php_mb_regex_t *re;
OnigRegion *regs = NULL;
int i, beg, end;
OnigOptionType options;
char *str;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_pattern, &arg_pattern_len, &string, &string_len, &array) == FAILURE) {
RETURN_THROWS();
}
if (arg_pattern_len == 0) {
zend_argument_value_error(1, "must not be empty");
RETURN_THROWS();
}
if (array != NULL) {
array = zend_try_array_init(array);
if (!array) {
RETURN_THROWS();
}
}
if (!php_mb_check_encoding(
string,
string_len,
php_mb_regex_get_mbctype_encoding()
)) {
RETURN_FALSE;
}
options = MBREX(regex_default_options);
if (icase) {
options |= ONIG_OPTION_IGNORECASE;
}
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
if (re == NULL) {
RETVAL_FALSE;
goto out;
}
regs = onig_region_new();
/* actually execute the regular expression */
if (_php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
RETVAL_FALSE;
goto out;
}
str = string;
if (array != NULL) {
for (i = 0; i < regs->num_regs; i++) {
beg = regs->beg[i];
end = regs->end[i];
if (beg >= 0 && beg < end && (size_t)end <= string_len) {
add_index_stringl(array, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(array, i, 0);
}
}
if (onig_number_of_names(re) > 0) {
mb_regex_groups_iter_args args = {array, string, string_len, regs};
onig_foreach_name(re, mb_regex_groups_iter, &args);
}
}
RETVAL_TRUE;
out:
if (regs != NULL) {
onig_region_free(regs, 1);
}
}
/* }}} */
/* {{{ Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)
{
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */
/* {{{ Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)
{
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
/* {{{ _php_mb_regex_ereg_replace_exec */
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
{
char *arg_pattern;
size_t arg_pattern_len;
char *replace;
size_t replace_len;
zend_fcall_info arg_replace_fci;
zend_fcall_info_cache arg_replace_fci_cache;
char *string;
size_t string_len;
php_mb_regex_t *re;
OnigSyntaxType *syntax;
OnigRegion *regs = NULL;
smart_str out_buf = {0};
smart_str eval_buf = {0};
smart_str *pbuf;
int err, n;
OnigUChar *pos;
OnigUChar *string_lim;
char *description = NULL;
const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
ZEND_ASSERT(enc != NULL);
{
char *option_str = NULL;
size_t option_str_len = 0;
if (!is_callable) {
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|s!",
&arg_pattern, &arg_pattern_len,
&replace, &replace_len,
&string, &string_len,
&option_str, &option_str_len) == FAILURE) {
RETURN_THROWS();
}
} else {
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sfs|s!",
&arg_pattern, &arg_pattern_len,
&arg_replace_fci, &arg_replace_fci_cache,
&string, &string_len,
&option_str, &option_str_len) == FAILURE) {
RETURN_THROWS();
}
}
if (!php_mb_check_encoding(string, string_len, enc)) {
RETURN_NULL();
}
if (option_str != NULL) {
/* Initialize option and in case of failure it means there is a value error */
if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax)) {
RETURN_THROWS();
}
} else {
options |= MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
}
/* create regex pattern buffer */
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
if (re == NULL) {
RETURN_FALSE;
}
if (is_callable) {
pbuf = &eval_buf;
description = zend_make_compiled_string_description("mbregex replace");
} else {
pbuf = &out_buf;
description = NULL;
}
/* do the actual work */
err = 0;
pos = (OnigUChar *)string;
string_lim = (OnigUChar*)(string + string_len);
regs = onig_region_new();
while (err >= 0) {
err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
break;
}
if (err >= 0) {
/* copy the part of the string before the match */
smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
if (!is_callable) {
mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc);
}
if (is_callable) {
zval args[1];
zval subpats, retval;
int i;
array_init(&subpats);
for (i = 0; i < regs->num_regs; i++) {
add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
}
if (onig_number_of_names(re) > 0) {
mb_regex_groups_iter_args args = {&subpats, string, string_len, regs};
onig_foreach_name(re, mb_regex_groups_iter, &args);
}
ZVAL_COPY_VALUE(&args[0], &subpats);
/* null terminate buffer */
smart_str_0(&eval_buf);
arg_replace_fci.param_count = 1;
arg_replace_fci.params = args;
arg_replace_fci.retval = &retval;
if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
!Z_ISUNDEF(retval)) {
convert_to_string(&retval);
smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
smart_str_free(&eval_buf);
zval_ptr_dtor(&retval);
} else {
if (!EG(exception)) {
zend_throw_error(NULL, "Unable to call custom replacement function");
zval_ptr_dtor(&subpats);
RETURN_THROWS();
}
}
zval_ptr_dtor(&subpats);
}
n = regs->end[0];
if ((pos - (OnigUChar *)string) < n) {
pos = (OnigUChar *)string + n;
} else {
if (pos < string_lim) {
smart_str_appendl(&out_buf, (char *)pos, 1);
}
pos++;
}
} else { /* nomatch */
/* stick that last bit of string on our output */
if (string_lim - pos > 0) {
smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
}
}
onig_region_free(regs, 0);
}
if (description) {
efree(description);
}
if (regs != NULL) {
onig_region_free(regs, 1);
}
smart_str_free(&eval_buf);
if (err <= -2) {
smart_str_free(&out_buf);
RETURN_FALSE;
}
RETURN_STR(smart_str_extract(&out_buf));
}
/* }}} */
/* {{{ Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
}
/* }}} */
/* {{{ Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
}
/* }}} */
/* {{{ regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
}
/* }}} */
/* {{{ split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)
{
char *arg_pattern;
size_t arg_pattern_len;
php_mb_regex_t *re;
OnigRegion *regs = NULL;
char *string;
OnigUChar *pos, *chunk_pos;
size_t string_len;
int err;
zend_long count = -1;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
RETURN_THROWS();
}
if (count > 0) {
count--;
}
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
RETURN_FALSE;
}
/* create regex pattern buffer */
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
RETURN_FALSE;
}
array_init(return_value);
chunk_pos = pos = (OnigUChar *)string;
err = 0;
regs = onig_region_new();
/* churn through str, generating array entries as we go */
while (count != 0 && (size_t)(pos - (OnigUChar *)string) < string_len) {
size_t beg, end;
err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
if (err < 0) {
break;
}
beg = regs->beg[0], end = regs->end[0];
/* add it to the array */
if ((size_t)(pos - (OnigUChar *)string) < end) {
if (beg < string_len && beg >= (size_t)(chunk_pos - (OnigUChar *)string)) {
add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
--count;
} else {
err = -2;
break;
}
/* point at our new starting point */
chunk_pos = pos = (OnigUChar *)string + end;
} else {
pos++;
}
onig_region_free(regs, 0);
}
onig_region_free(regs, 1);
/* see if we encountered an error */
// ToDo investigate if this can actually/should happen ...
if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
zend_array_destroy(Z_ARR_P(return_value));
RETURN_FALSE;
}
/* otherwise we just have one last element to add to the array */
if ((OnigUChar *)(string + string_len) > chunk_pos) {
size_t n = ((OnigUChar *)(string + string_len) - chunk_pos);
add_next_index_stringl(return_value, (char *)chunk_pos, n);
} else {
add_next_index_stringl(return_value, "", 0);
}
}
/* }}} */
/* {{{ Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)
{
char *arg_pattern;
size_t arg_pattern_len;
char *string;
size_t string_len;
php_mb_regex_t *re;
OnigSyntaxType *syntax;
OnigOptionType option = 0;
int err;
OnigMatchParam *mp;
{
char *option_str = NULL;
size_t option_str_len = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s!",
&arg_pattern, &arg_pattern_len, &string, &string_len,
&option_str, &option_str_len)==FAILURE) {
RETURN_THROWS();
}
if (option_str != NULL) {
if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax)) {
RETURN_THROWS();
}
} else {
option |= MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
}
if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
RETURN_FALSE;
}
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
RETURN_FALSE;
}
mp = onig_new_match_param();
onig_initialize_match_param(mp);
if (MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) {
onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
}
if (MBSTRG(regex_retry_limit) > 0 && MBSTRG(regex_retry_limit) < UINT_MAX) {
onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
}
/* match */
err = onig_match_with_param(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0, mp);
onig_free_match_param(mp);
if (err >= 0) {
RETVAL_TRUE;
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* regex search */
/* {{{ _php_mb_regex_ereg_search_exec */
static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
char *arg_pattern = NULL, *arg_options = NULL;
size_t arg_pattern_len, arg_options_len;
int err;
size_t n, i, pos, len;
/* Stored as int* in the OnigRegion struct */
int beg, end;
OnigOptionType option = 0;
OnigUChar *str;
OnigSyntaxType *syntax;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
RETURN_THROWS();
}
if (arg_options) {
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
} else {
option |= MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
if (MBREX(search_regs)) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = NULL;
}
if (arg_pattern) {
/* create regex pattern buffer */
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
RETURN_FALSE;
}
}
pos = MBREX(search_pos);
str = NULL;
len = 0;
if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
len = Z_STRLEN(MBREX(search_str));
}
if (MBREX(search_re) == NULL) {
zend_throw_error(NULL, "No pattern was provided");
RETURN_THROWS();
}
if (str == NULL) {
zend_throw_error(NULL, "No string was provided");
RETURN_THROWS();
}
MBREX(search_regs) = onig_region_new();
err = _php_mb_onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
if (err == ONIG_MISMATCH) {
MBREX(search_pos) = len;
RETVAL_FALSE;
} else if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
RETVAL_FALSE;
} else {
switch (mode) {
case 1:
array_init(return_value);
beg = MBREX(search_regs)->beg[0];
end = MBREX(search_regs)->end[0];
add_next_index_long(return_value, beg);
add_next_index_long(return_value, end - beg);
break;
case 2:
array_init(return_value);
n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) {
beg = MBREX(search_regs)->beg[i];
end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(return_value, i, 0);
}
}
if (onig_number_of_names(MBREX(search_re)) > 0) {
mb_regex_groups_iter_args args = {
return_value,
Z_STRVAL(MBREX(search_str)),
Z_STRLEN(MBREX(search_str)),
MBREX(search_regs)
};
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
}
break;
default:
RETVAL_TRUE;
break;
}
end = MBREX(search_regs)->end[0];
if (pos <= end) {
MBREX(search_pos) = end;
} else {
MBREX(search_pos) = pos + 1;
}
}
if (err < 0) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = (OnigRegion *)NULL;
}
}
/* }}} */
/* {{{ Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */
/* {{{ Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
/* {{{ Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
}
/* }}} */
/* {{{ Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)
{
zend_string *arg_str;
char *arg_pattern = NULL, *arg_options = NULL;
size_t arg_pattern_len = 0, arg_options_len = 0;
OnigSyntaxType *syntax = NULL;
OnigOptionType option;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|s!s!", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
RETURN_THROWS();
}
if (arg_pattern && arg_pattern_len == 0) {
zend_argument_value_error(2, "must not be empty");
RETURN_THROWS();
}
if (arg_options) {
option = 0;
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
} else {
option = MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
if (arg_pattern) {
/* create regex pattern buffer */
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
RETURN_FALSE;
}
}
if (!Z_ISNULL(MBREX(search_str))) {
zval_ptr_dtor(&MBREX(search_str));
}
ZVAL_STR_COPY(&MBREX(search_str), arg_str);
if (php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) {
MBREX(search_pos) = 0;
RETVAL_TRUE;
} else {
MBREX(search_pos) = ZSTR_LEN(arg_str);
RETVAL_FALSE;
}
if (MBREX(search_regs) != NULL) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = NULL;
}
}
/* }}} */
/* {{{ Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)
{
size_t n, i, len;
/* Stored as int* in the OnigRegion struct */
int beg, end;
OnigUChar *str;
if (zend_parse_parameters_none() == FAILURE) {
RETURN_THROWS();
}
if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
array_init(return_value);
str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
len = Z_STRLEN(MBREX(search_str));
n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) {
beg = MBREX(search_regs)->beg[i];
end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(return_value, i, 0);
}
}
if (onig_number_of_names(MBREX(search_re)) > 0) {
mb_regex_groups_iter_args args = {
return_value,
Z_STRVAL(MBREX(search_str)),
len,
MBREX(search_regs)
};
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
}
} else {
// TODO This seems to be some logical error, promote to Error
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)
{
if (zend_parse_parameters_none() == FAILURE) {
RETURN_THROWS();
}
RETVAL_LONG(MBREX(search_pos));
}
/* }}} */
/* {{{ Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)
{
zend_long position;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
RETURN_THROWS();
}
/* Accept negative position if length of search string can be determined */
if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
position += Z_STRLEN(MBREX(search_str));
}
if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
zend_argument_value_error(1, "is out of range");
RETURN_THROWS();
}
MBREX(search_pos) = position;
// TODO Return void
RETURN_TRUE;
}
/* }}} */
/* {{{ php_mb_regex_set_options */
static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
{
if (prev_options != NULL) {
*prev_options = MBREX(regex_default_options);
}
if (prev_syntax != NULL) {
*prev_syntax = MBREX(regex_default_syntax);
}
MBREX(regex_default_options) = options;
MBREX(regex_default_syntax) = syntax;
}
/* }}} */
/* {{{ Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)
{
OnigOptionType opt, prev_opt;
OnigSyntaxType *syntax, *prev_syntax;
char *string = NULL;
size_t string_len;
char buf[16];
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!",
&string, &string_len) == FAILURE) {
RETURN_THROWS();
}
if (string != NULL) {
opt = 0;
syntax = NULL;
if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax)) {
RETURN_THROWS();
}
_php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
opt = prev_opt;
syntax = prev_syntax;
} else {
opt = MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
RETVAL_STRING(buf);
}
/* }}} */
#endif /* HAVE_MBREGEX */