php-src/ext/mbstring/php_mbregex.c
Dmitry Stogov 7fcd4064c0 Merge branch 'master' into phpng
* master: (41 commits)
  fix test - output can be chunked
  fix test
  fix test
  Fixed test for commit 997be125eb
  Add bug fix to NEWS
  Update UPGRADING according to bug fix
  fix test
  improve CURL tests to allow testing without separate server
  improve CURL tests to allow testing without separate server
  Fixed bug #67199	mb_regex_encoding mishmash
  Fix bug #67248 (imageaffinematrixget missing check of parameters)
  Fix bug #67247	spl_fixedarray_resize integer overflow
  fix news
  add tests stuff to README
  Updated NEWS
  Fix Linux specific fail in error traces (cherry-picked and fix for bug #67245) Linux apparently does not like memcpy in overlapping regions...
  - Fixed off-by-one in phar_build (patch by crrodriguez at opensuse dot org)
  - Move checking
  - Fixed missing NULL check in SimpleXMLElement::xpath()
  - Fixed missing NULL check
  ...

Conflicts:
	ext/bz2/bz2.c
	ext/gd/gd.c
	ext/mbstring/php_mbregex.c
	ext/session/tests/031.phpt
	ext/simplexml/simplexml.c
	ext/spl/spl_fixedarray.c
2014-05-13 15:24:40 +04:00

1471 lines
36 KiB
C

/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2014 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_ini.h"
#if HAVE_MBREGEX
#include "ext/standard/php_smart_str.h"
#include "ext/standard/info.h"
#include "php_mbregex.h"
#include "mbstring.h"
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
#include <oniguruma.h>
#undef UChar
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
struct _zend_mb_regex_globals {
OnigEncoding default_mbctype;
OnigEncoding current_mbctype;
HashTable ht_rc;
zval search_str;
zval *search_str_val;
unsigned int search_pos;
php_mb_regex_t *search_re;
OnigRegion *search_regs;
OnigOptionType regex_default_options;
OnigSyntaxType *regex_default_syntax;
};
#define MBREX(g) (MBSTRG(mb_regex_globals)->g)
/* {{{ static void php_mb_regex_free_cache() */
static void php_mb_regex_free_cache(zval *el) {
onig_free((php_mb_regex_t *)Z_PTR_P(el));
}
/* }}} */
/* {{{ _php_mb_regex_globals_ctor */
static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
{
pglobals->default_mbctype = ONIG_ENCODING_UTF8;
pglobals->current_mbctype = ONIG_ENCODING_UTF8;
zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
ZVAL_UNDEF(&pglobals->search_str);
pglobals->search_re = (php_mb_regex_t*)NULL;
pglobals->search_pos = 0;
pglobals->search_regs = (OnigRegion*)NULL;
pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
return SUCCESS;
}
/* }}} */
/* {{{ _php_mb_regex_globals_dtor */
static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
{
zend_hash_destroy(&pglobals->ht_rc);
}
/* }}} */
/* {{{ php_mb_regex_globals_alloc */
zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
{
zend_mb_regex_globals *pglobals = pemalloc(
sizeof(zend_mb_regex_globals), 1);
if (!pglobals) {
return NULL;
}
if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
pefree(pglobals, 1);
return NULL;
}
return pglobals;
}
/* }}} */
/* {{{ php_mb_regex_globals_free */
void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
{
if (!pglobals) {
return;
}
_php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
pefree(pglobals, 1);
}
/* }}} */
/* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)
{
onig_init();
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)
{
onig_end();
return SUCCESS;
}
/* }}} */
/* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)
{
return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
}
/* }}} */
/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)
{
MBREX(current_mbctype) = MBREX(default_mbctype);
if (!Z_ISUNDEF(MBREX(search_str))) {
zval_ptr_dtor(&MBREX(search_str));
ZVAL_UNDEF(&MBREX(search_str));
}
MBREX(search_pos) = 0;
if (MBREX(search_regs) != NULL) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = (OnigRegion *)NULL;
}
zend_hash_clean(&MBREX(ht_rc));
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)
{
char buf[32];
php_info_print_table_start();
php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
snprintf(buf, sizeof(buf), "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
#ifdef PHP_ONIG_BUNDLED
#ifdef USE_COMBINATION_EXPLOSION_CHECK
php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
#else /* USE_COMBINATION_EXPLOSION_CHECK */
php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
#endif /* PHP_BUNDLED_ONIG */
php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
php_info_print_table_end();
}
/* }}} */
/*
* encoding name resolver
*/
/* {{{ encoding name map */
typedef struct _php_mb_regex_enc_name_map_t {
const char *names;
OnigEncoding code;
} php_mb_regex_enc_name_map_t;
php_mb_regex_enc_name_map_t enc_name_map[] = {
#ifdef ONIG_ENCODING_EUC_JP
{
"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
ONIG_ENCODING_EUC_JP
},
#endif
#ifdef ONIG_ENCODING_UTF8
{
"UTF-8\0UTF8\0",
ONIG_ENCODING_UTF8
},
#endif
#ifdef ONIG_ENCODING_UTF16_BE
{
"UTF-16\0UTF-16BE\0",
ONIG_ENCODING_UTF16_BE
},
#endif
#ifdef ONIG_ENCODING_UTF16_LE
{
"UTF-16LE\0",
ONIG_ENCODING_UTF16_LE
},
#endif
#ifdef ONIG_ENCODING_UTF32_BE
{
"UCS-4\0UTF-32\0UTF-32BE\0",
ONIG_ENCODING_UTF32_BE
},
#endif
#ifdef ONIG_ENCODING_UTF32_LE
{
"UCS-4LE\0UTF-32LE\0",
ONIG_ENCODING_UTF32_LE
},
#endif
#ifdef ONIG_ENCODING_SJIS
{
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
ONIG_ENCODING_SJIS
},
#endif
#ifdef ONIG_ENCODING_BIG5
{
"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
ONIG_ENCODING_BIG5
},
#endif
#ifdef ONIG_ENCODING_EUC_CN
{
"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
ONIG_ENCODING_EUC_CN
},
#endif
#ifdef ONIG_ENCODING_EUC_TW
{
"EUC-TW\0EUCTW\0EUC_TW\0",
ONIG_ENCODING_EUC_TW
},
#endif
#ifdef ONIG_ENCODING_EUC_KR
{
"EUC-KR\0EUCKR\0EUC_KR\0",
ONIG_ENCODING_EUC_KR
},
#endif
#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
{
"KOI8\0KOI-8\0",
ONIG_ENCODING_KOI8
},
#endif
#ifdef ONIG_ENCODING_KOI8_R
{
"KOI8R\0KOI8-R\0KOI-8R\0",
ONIG_ENCODING_KOI8_R
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_1
{
"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
ONIG_ENCODING_ISO_8859_1
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_2
{
"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
ONIG_ENCODING_ISO_8859_2
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_3
{
"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
ONIG_ENCODING_ISO_8859_3
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_4
{
"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
ONIG_ENCODING_ISO_8859_4
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_5
{
"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
ONIG_ENCODING_ISO_8859_5
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_6
{
"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
ONIG_ENCODING_ISO_8859_6
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_7
{
"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
ONIG_ENCODING_ISO_8859_7
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_8
{
"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
ONIG_ENCODING_ISO_8859_8
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_9
{
"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
ONIG_ENCODING_ISO_8859_9
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_10
{
"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
ONIG_ENCODING_ISO_8859_10
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_11
{
"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
ONIG_ENCODING_ISO_8859_11
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_13
{
"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
ONIG_ENCODING_ISO_8859_13
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_14
{
"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
ONIG_ENCODING_ISO_8859_14
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_15
{
"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
ONIG_ENCODING_ISO_8859_15
},
#endif
#ifdef ONIG_ENCODING_ISO_8859_16
{
"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
ONIG_ENCODING_ISO_8859_16
},
#endif
#ifdef ONIG_ENCODING_ASCII
{
"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
ONIG_ENCODING_ASCII
},
#endif
{ NULL, ONIG_ENCODING_UNDEF }
};
/* }}} */
/* {{{ php_mb_regex_name2mbctype */
static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
{
const char *p;
php_mb_regex_enc_name_map_t *mapping;
if (pname == NULL || !*pname) {
return ONIG_ENCODING_UNDEF;
}
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
if (strcasecmp(p, pname) == 0) {
return mapping->code;
}
}
}
return ONIG_ENCODING_UNDEF;
}
/* }}} */
/* {{{ php_mb_regex_mbctype2name */
static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
{
php_mb_regex_enc_name_map_t *mapping;
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
if (mapping->code == mbctype) {
return mapping->names;
}
}
return NULL;
}
/* }}} */
/* {{{ php_mb_regex_set_mbctype */
int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(current_mbctype) = mbctype;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_set_default_mbctype */
int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(default_mbctype) = mbctype;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_get_mbctype */
const char *php_mb_regex_get_mbctype(TSRMLS_D)
{
return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
}
/* }}} */
/* {{{ php_mb_regex_get_default_mbctype */
const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
{
return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
}
/* }}} */
/*
* regex cache
*/
/* {{{ php_mbregex_compile_pattern */
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
{
int err_code = 0;
php_mb_regex_t *retval = NULL, *rc = NULL;
OnigErrorInfo err_info;
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) {
if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
onig_error_code_to_str(err_str, err_code, err_info);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
retval = NULL;
goto out;
}
zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
} else if (rc) {
retval = rc;
}
out:
return retval;
}
/* }}} */
/* {{{ _php_mb_regex_get_option_string */
static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
{
size_t len_left = len;
size_t len_req = 0;
char *p = str;
char c;
if ((option & ONIG_OPTION_IGNORECASE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'i';
}
++len_req;
}
if ((option & ONIG_OPTION_EXTEND) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'x';
}
++len_req;
}
if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
if (len_left > 0) {
--len_left;
*(p++) = 'p';
}
++len_req;
} else {
if ((option & ONIG_OPTION_MULTILINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'm';
}
++len_req;
}
if ((option & ONIG_OPTION_SINGLELINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 's';
}
++len_req;
}
}
if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'l';
}
++len_req;
}
if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'n';
}
++len_req;
}
c = 0;
if (syntax == ONIG_SYNTAX_JAVA) {
c = 'j';
} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
c = 'u';
} else if (syntax == ONIG_SYNTAX_GREP) {
c = 'g';
} else if (syntax == ONIG_SYNTAX_EMACS) {
c = 'c';
} else if (syntax == ONIG_SYNTAX_RUBY) {
c = 'r';
} else if (syntax == ONIG_SYNTAX_PERL) {
c = 'z';
} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
c = 'b';
} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
c = 'd';
}
if (c != 0) {
if (len_left > 0) {
--len_left;
*(p++) = c;
}
++len_req;
}
if (len_left > 0) {
--len_left;
*(p++) = '\0';
}
++len_req;
if (len < len_req) {
return len_req;
}
return 0;
}
/* }}} */
/* {{{ _php_mb_regex_init_options */
static void
_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
{
int n;
char c;
int optm = 0;
*syntax = ONIG_SYNTAX_RUBY;
if (parg != NULL) {
n = 0;
while(n < narg) {
c = parg[n++];
switch (c) {
case 'i':
optm |= ONIG_OPTION_IGNORECASE;
break;
case 'x':
optm |= ONIG_OPTION_EXTEND;
break;
case 'm':
optm |= ONIG_OPTION_MULTILINE;
break;
case 's':
optm |= ONIG_OPTION_SINGLELINE;
break;
case 'p':
optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
break;
case 'l':
optm |= ONIG_OPTION_FIND_LONGEST;
break;
case 'n':
optm |= ONIG_OPTION_FIND_NOT_EMPTY;
break;
case 'j':
*syntax = ONIG_SYNTAX_JAVA;
break;
case 'u':
*syntax = ONIG_SYNTAX_GNU_REGEX;
break;
case 'g':
*syntax = ONIG_SYNTAX_GREP;
break;
case 'c':
*syntax = ONIG_SYNTAX_EMACS;
break;
case 'r':
*syntax = ONIG_SYNTAX_RUBY;
break;
case 'z':
*syntax = ONIG_SYNTAX_PERL;
break;
case 'b':
*syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case 'd':
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
case 'e':
if (eval != NULL) *eval = 1;
break;
default:
break;
}
}
if (option != NULL) *option|=optm;
}
}
/* }}} */
/*
* php functions
*/
/* {{{ proto string mb_regex_encoding([string encoding])
Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)
{
size_t argc = ZEND_NUM_ARGS();
char *encoding;
int encoding_len;
OnigEncoding mbctype;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
return;
}
if (argc == 0) {
const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
if (retval == NULL) {
RETURN_FALSE;
}
RETURN_STRING((char *)retval);
} else if (argc == 1) {
mbctype = _php_mb_regex_name2mbctype(encoding);
if (mbctype == ONIG_ENCODING_UNDEF) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
RETURN_FALSE;
}
MBREX(current_mbctype) = mbctype;
RETURN_TRUE;
}
}
/* }}} */
/* {{{ _php_mb_regex_ereg_exec */
static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
zval *arg_pattern, *array;
char *string;
int string_len;
php_mb_regex_t *re;
OnigRegion *regs = NULL;
int i, match_len, beg, end;
OnigOptionType options;
char *str;
array = NULL;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
RETURN_FALSE;
}
options = MBREX(regex_default_options);
if (icase) {
options |= ONIG_OPTION_IGNORECASE;
}
/* compile the regular expression from the supplied regex */
if (Z_TYPE_P(arg_pattern) != IS_STRING) {
/* we convert numbers to integers and treat them as a string */
if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
convert_to_long_ex(arg_pattern); /* get rid of decimal places */
}
convert_to_string_ex(arg_pattern);
/* don't bother doing an extended regex with just a number */
}
if (!Z_STRVAL_P(arg_pattern) || Z_STRLEN_P(arg_pattern) == 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
RETVAL_FALSE;
goto out;
}
re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
if (re == NULL) {
RETVAL_FALSE;
goto out;
}
regs = onig_region_new();
/* actually execute the regular expression */
if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
RETVAL_FALSE;
goto out;
}
match_len = 1;
str = string;
if (array != NULL) {
ZVAL_DEREF(array);
zval_dtor(array);
array_init(array);
match_len = regs->end[0] - regs->beg[0];
for (i = 0; i < regs->num_regs; i++) {
beg = regs->beg[i];
end = regs->end[i];
if (beg >= 0 && beg < end && end <= string_len) {
add_index_stringl(array, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(array, i, 0);
}
}
}
if (match_len == 0) {
match_len = 1;
}
RETVAL_LONG(match_len);
out:
if (regs != NULL) {
onig_region_free(regs, 1);
}
}
/* }}} */
/* {{{ proto int mb_ereg(string pattern, string string [, array registers])
Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)
{
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */
/* {{{ proto int mb_eregi(string pattern, string string [, array registers])
Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)
{
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
/* {{{ _php_mb_regex_ereg_replace_exec */
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
{
zval *arg_pattern_zval;
char *arg_pattern;
int arg_pattern_len;
char *replace;
int replace_len;
zend_fcall_info arg_replace_fci;
zend_fcall_info_cache arg_replace_fci_cache;
char *string;
int string_len;
char *p;
php_mb_regex_t *re;
OnigSyntaxType *syntax;
OnigRegion *regs = NULL;
smart_str out_buf = {0};
smart_str eval_buf = {0};
smart_str *pbuf;
int i, err, eval, n;
OnigUChar *pos;
OnigUChar *string_lim;
char *description = NULL;
char pat_buf[2];
const mbfl_encoding *enc;
{
const char *current_enc_name;
current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
if (current_enc_name == NULL ||
(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
RETURN_FALSE;
}
}
eval = 0;
{
char *option_str = NULL;
int option_str_len = 0;
if (!is_callable) {
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zss|s",
&arg_pattern_zval,
&replace, &replace_len,
&string, &string_len,
&option_str, &option_str_len) == FAILURE) {
RETURN_FALSE;
}
} else {
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zfs|s",
&arg_pattern_zval,
&arg_replace_fci, &arg_replace_fci_cache,
&string, &string_len,
&option_str, &option_str_len) == FAILURE) {
RETURN_FALSE;
}
}
if (option_str != NULL) {
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
} else {
options |= MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
}
if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
arg_pattern = Z_STRVAL_P(arg_pattern_zval);
arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
} else {
/* FIXME: this code is not multibyte aware! */
convert_to_long_ex(arg_pattern_zval);
pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
pat_buf[1] = '\0';
arg_pattern = pat_buf;
arg_pattern_len = 1;
}
/* create regex pattern buffer */
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
if (re == NULL) {
RETURN_FALSE;
}
if (eval || is_callable) {
pbuf = &eval_buf;
description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
} else {
pbuf = &out_buf;
description = NULL;
}
if (is_callable) {
if (eval) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
RETURN_FALSE;
}
}
/* do the actual work */
err = 0;
pos = (OnigUChar *)string;
string_lim = (OnigUChar*)(string + string_len);
regs = onig_region_new();
while (err >= 0) {
err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
break;
}
if (err >= 0) {
#if moriyoshi_0
if (regs->beg[0] == regs->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
break;
}
#endif
/* copy the part of the string before the match */
smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
if (!is_callable) {
/* copy replacement and backrefs */
i = 0;
p = replace;
while (i < replace_len) {
int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
n = -1;
if ((replace_len - i) >= 2 && fwd == 1 &&
p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
n = p[1] - '0';
}
if (n >= 0 && n < regs->num_regs) {
if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
}
p += 2;
i += 2;
} else {
smart_str_appendl(pbuf, p, fwd);
p += fwd;
i += fwd;
}
}
}
if (eval) {
zval v;
/* null terminate buffer */
smart_str_0(&eval_buf);
/* do eval */
if (zend_eval_stringl(eval_buf.s->val, eval_buf.s->len, &v, description TSRMLS_CC) == FAILURE) {
efree(description);
php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.s->val);
/* zend_error() does not return in this case */
}
/* result of eval */
convert_to_string(&v);
smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
/* Clean up */
eval_buf.s->len = 0;
zval_dtor(&v);
} else if (is_callable) {
zval args[1];
zval subpats, retval;
int i;
array_init(&subpats);
for (i = 0; i < regs->num_regs; i++) {
add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
}
ZVAL_COPY_VALUE(&args[0], &subpats);
/* null terminate buffer */
smart_str_0(&eval_buf);
arg_replace_fci.param_count = 1;
arg_replace_fci.params = args;
arg_replace_fci.retval = &retval;
if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS &&
!Z_ISUNDEF(retval)) {
convert_to_string_ex(&retval);
smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
if (eval_buf.s) {
eval_buf.s->len = 0;
}
zval_ptr_dtor(&retval);
} else {
efree(description);
if (!EG(exception)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
}
}
zval_ptr_dtor(&subpats);
}
n = regs->end[0];
if ((pos - (OnigUChar *)string) < n) {
pos = (OnigUChar *)string + n;
} else {
if (pos < string_lim) {
smart_str_appendl(&out_buf, pos, 1);
}
pos++;
}
} else { /* nomatch */
/* stick that last bit of string on our output */
if (string_lim - pos > 0) {
smart_str_appendl(&out_buf, pos, string_lim - pos);
}
}
onig_region_free(regs, 0);
}
if (description) {
efree(description);
}
if (regs != NULL) {
onig_region_free(regs, 1);
}
smart_str_free(&eval_buf);
if (err <= -2) {
smart_str_free(&out_buf);
RETVAL_FALSE;
} else if (out_buf.s) {
smart_str_0(&out_buf);
RETVAL_STR(out_buf.s);
} else {
RETVAL_EMPTY_STRING();
}
}
/* }}} */
/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
}
/* }}} */
/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
}
/* }}} */
/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
}
/* }}} */
/* {{{ proto array mb_split(string pattern, string string [, int limit])
split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)
{
char *arg_pattern;
int arg_pattern_len;
php_mb_regex_t *re;
OnigRegion *regs = NULL;
char *string;
OnigUChar *pos, *chunk_pos;
int string_len;
int n, err;
long count = -1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
RETURN_FALSE;
}
if (count > 0) {
count--;
}
/* create regex pattern buffer */
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
array_init(return_value);
chunk_pos = pos = (OnigUChar *)string;
err = 0;
regs = onig_region_new();
/* churn through str, generating array entries as we go */
while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
int beg, end;
err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
if (err < 0) {
break;
}
beg = regs->beg[0], end = regs->end[0];
/* add it to the array */
if ((pos - (OnigUChar *)string) < end) {
if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
--count;
} else {
err = -2;
break;
}
/* point at our new starting point */
chunk_pos = pos = (OnigUChar *)string + end;
} else {
pos++;
}
onig_region_free(regs, 0);
}
onig_region_free(regs, 1);
/* see if we encountered an error */
if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
zval_dtor(return_value);
RETURN_FALSE;
}
/* otherwise we just have one last element to add to the array */
n = ((OnigUChar *)(string + string_len) - chunk_pos);
if (n > 0) {
add_next_index_stringl(return_value, (char *)chunk_pos, n);
} else {
add_next_index_stringl(return_value, "", 0);
}
}
/* }}} */
/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)
{
char *arg_pattern;
int arg_pattern_len;
char *string;
int string_len;
php_mb_regex_t *re;
OnigSyntaxType *syntax;
OnigOptionType option = 0;
int err;
{
char *option_str = NULL;
int option_str_len = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
&arg_pattern, &arg_pattern_len, &string, &string_len,
&option_str, &option_str_len)==FAILURE) {
RETURN_FALSE;
}
if (option_str != NULL) {
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
} else {
option |= MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
}
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
/* match */
err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
if (err >= 0) {
RETVAL_TRUE;
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* regex search */
/* {{{ _php_mb_regex_ereg_search_exec */
static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
size_t argc = ZEND_NUM_ARGS();
char *arg_pattern, *arg_options;
int arg_pattern_len, arg_options_len;
int n, i, err, pos, len, beg, end;
OnigOptionType option;
OnigUChar *str;
OnigSyntaxType *syntax;
if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
return;
}
option = MBREX(regex_default_options);
if (argc == 2) {
option = 0;
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
}
if (argc > 0) {
/* create regex pattern buffer */
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
}
pos = MBREX(search_pos);
str = NULL;
len = 0;
if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
len = Z_STRLEN(MBREX(search_str));
}
if (MBREX(search_re) == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
RETURN_FALSE;
}
if (str == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
RETURN_FALSE;
}
if (MBREX(search_regs)) {
onig_region_free(MBREX(search_regs), 1);
}
MBREX(search_regs) = onig_region_new();
err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
if (err == ONIG_MISMATCH) {
MBREX(search_pos) = len;
RETVAL_FALSE;
} else if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
RETVAL_FALSE;
} else {
if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
}
switch (mode) {
case 1:
array_init(return_value);
beg = MBREX(search_regs)->beg[0];
end = MBREX(search_regs)->end[0];
add_next_index_long(return_value, beg);
add_next_index_long(return_value, end - beg);
break;
case 2:
array_init(return_value);
n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) {
beg = MBREX(search_regs)->beg[i];
end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(return_value, i, 0);
}
}
break;
default:
RETVAL_TRUE;
break;
}
end = MBREX(search_regs)->end[0];
if (pos < end) {
MBREX(search_pos) = end;
} else {
MBREX(search_pos) = pos + 1;
}
}
if (err < 0) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = (OnigRegion *)NULL;
}
}
/* }}} */
/* {{{ proto bool mb_ereg_search([string pattern[, string option]])
Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */
/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)
{
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
}
/* }}} */
/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)
{
size_t argc = ZEND_NUM_ARGS();
zval *arg_str;
char *arg_pattern = NULL, *arg_options = NULL;
int arg_pattern_len = 0, arg_options_len = 0;
OnigSyntaxType *syntax = NULL;
OnigOptionType option;
if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
return;
}
if (argc > 1 && arg_pattern_len == 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
RETURN_FALSE;
}
option = MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
if (argc == 3) {
option = 0;
_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
}
if (argc > 1) {
/* create regex pattern buffer */
if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
}
if (!Z_ISNULL(MBREX(search_str))) {
zval_ptr_dtor(&MBREX(search_str));
}
ZVAL_COPY(&MBREX(search_str), arg_str);
SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
MBREX(search_pos) = 0;
if (MBREX(search_regs) != NULL) {
onig_region_free(MBREX(search_regs), 1);
MBREX(search_regs) = NULL;
}
RETURN_TRUE;
}
/* }}} */
/* {{{ proto array mb_ereg_search_getregs(void)
Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)
{
int n, i, len, beg, end;
OnigUChar *str;
if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
array_init(return_value);
str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
len = Z_STRLEN(MBREX(search_str));
n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) {
beg = MBREX(search_regs)->beg[i];
end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
} else {
add_index_bool(return_value, i, 0);
}
}
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto int mb_ereg_search_getpos(void)
Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)
{
RETVAL_LONG(MBREX(search_pos));
}
/* }}} */
/* {{{ proto bool mb_ereg_search_setpos(int position)
Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)
{
long position;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
return;
}
if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN(MBREX(search_str)))) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
MBREX(search_pos) = 0;
RETURN_FALSE;
}
MBREX(search_pos) = position;
RETURN_TRUE;
}
/* }}} */
/* {{{ php_mb_regex_set_options */
static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
{
if (prev_options != NULL) {
*prev_options = MBREX(regex_default_options);
}
if (prev_syntax != NULL) {
*prev_syntax = MBREX(regex_default_syntax);
}
MBREX(regex_default_options) = options;
MBREX(regex_default_syntax) = syntax;
}
/* }}} */
/* {{{ proto string mb_regex_set_options([string options])
Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)
{
OnigOptionType opt;
OnigSyntaxType *syntax;
char *string = NULL;
int string_len;
char buf[16];
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
&string, &string_len) == FAILURE) {
RETURN_FALSE;
}
if (string != NULL) {
opt = 0;
syntax = NULL;
_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
_php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
} else {
opt = MBREX(regex_default_options);
syntax = MBREX(regex_default_syntax);
}
_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
RETVAL_STRING(buf);
}
/* }}} */
#endif /* HAVE_MBREGEX */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: fdm=marker
* vim: noet sw=4 ts=4
*/