Fix zend.multibyte oddities. Hope this will address all the known problems.

This commit is contained in:
Moriyoshi Koizumi 2011-03-06 07:00:30 +00:00
parent 6ba343aeff
commit cdb9ee0d1a
10 changed files with 643 additions and 764 deletions

View File

@ -5988,8 +5988,7 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */
/* need to re-scan if input filter changed */
if (old_input_filter != LANG_SCNG(input_filter) ||
((old_input_filter == zend_multibyte_script_encoding_filter) &&
(new_encoding != old_encoding))) {
(old_input_filter && new_encoding != old_encoding)) {
zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -120,6 +120,33 @@ do { \
BEGIN_EXTERN_C()
static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
}
static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
}
static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
return zend_multibyte_encoding_converter(to, to_length, from, from_length,
LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
}
static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
return zend_multibyte_encoding_converter(to, to_length, from, from_length,
internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
}
static void _yy_push_state(int new_state TSRMLS_DC)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
@ -321,7 +348,6 @@ static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
/* remove BOM */
script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size);
memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size);
efree(LANG_SCNG(script_org));
LANG_SCNG(script_org) = script;
LANG_SCNG(script_org_size) -= bom_size;
@ -418,8 +444,8 @@ ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSR
if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
} else {
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = NULL;
@ -427,9 +453,17 @@ ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSR
return SUCCESS;
}
/* both script and internal encodings are incompatible w/ flex */
LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
LANG_SCNG(output_filter) = NULL;
} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
} else {
/* both script and internal encodings are incompatible w/ flex */
LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
}
return 0;
}
@ -789,21 +823,9 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
{
size_t original_offset, offset, length;
size_t length;
unsigned char *new_yy_start;
/* calculate current position */
offset = original_offset = YYCURSOR - SCNG(yy_start);
if (old_input_filter && offset > 0) {
const zend_encoding *new_encoding = SCNG(script_encoding);
zend_encoding_filter new_filter = SCNG(input_filter);
SCNG(script_encoding) = old_encoding;
SCNG(input_filter) = old_input_filter;
offset = zend_get_scanned_file_offset(TSRMLS_C);
SCNG(script_encoding) = new_encoding;
SCNG(input_filter) = new_filter;
}
/* convert and set */
if (!SCNG(input_filter)) {
if (SCNG(script_filtered)) {
@ -811,10 +833,10 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter
SCNG(script_filtered) = NULL;
}
SCNG(script_filtered_size) = 0;
length = SCNG(script_org_size) - offset;
new_yy_start = SCNG(script_org) + offset;
length = SCNG(script_org_size);
new_yy_start = SCNG(script_org);
} else {
if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC)) {
if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}

View File

@ -1,4 +1,4 @@
/* Generated by re2c 0.13.5 on Mon Jan 3 06:07:39 2011 */
/* Generated by re2c 0.13.5 on Sun Mar 6 15:09:12 2011 */
#line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE {

View File

@ -206,26 +206,6 @@ ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value,
return SUCCESS;
}
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t
*to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) {
internal_encoding = zend_multibyte_encoding_utf8;
}
return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
}
ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) {
internal_encoding = zend_multibyte_encoding_utf8;
}
return zend_multibyte_encoding_converter(to, to_length, from, from_length,
LANG_SCNG(script_encoding), internal_encoding TSRMLS_CC);
}
/*
* Local variables:
* tab-width: 4

View File

@ -75,9 +75,6 @@ ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_l
ZEND_API int zend_multibyte_set_internal_encoding(const zend_encoding *encoding TSRMLS_DC);
ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length TSRMLS_DC);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC);
ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC);
END_EXTERN_C()
#endif /* ZEND_MULTIBYTE_H */

View File

@ -99,6 +99,12 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring);
static void php_mb_populate_current_detect_order_list(TSRMLS_D);
static int php_mb_encoding_translation(TSRMLS_D);
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
/* }}} */
/* {{{ php_mb_default_identify_list */
@ -1049,7 +1055,7 @@ static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,
}
/* }}} */
static char *php_mb_rfc1867_substring(char *start, int len, char quote TSRMLS_DC)
static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
{
char *result = emalloc(len + 2);
char *resp = result;
@ -1059,7 +1065,7 @@ static char *php_mb_rfc1867_substring(char *start, int len, char quote TSRMLS_DC
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
while (j-- > 0 && i < len) {
*resp++ = start[i++];
@ -1072,9 +1078,49 @@ static char *php_mb_rfc1867_substring(char *start, int len, char quote TSRMLS_DC
return result;
}
static char *php_mb_rfc1867_getword(char *str TSRMLS_DC) /* {{{ */
static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
{
while (*str && isspace(*str)) {
char *pos = *line, quote;
char *res;
while (*pos && *pos != stop) {
if ((quote = *pos) == '"' || quote == '\'') {
++pos;
while (*pos && *pos != quote) {
if (*pos == '\\' && pos[1] && pos[1] == quote) {
pos += 2;
} else {
++pos;
}
}
if (*pos) {
++pos;
}
} else {
pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
}
}
if (*pos == '\0') {
res = estrdup(*line);
*line += strlen(*line);
return res;
}
res = estrndup(*line, pos - *line);
while (*pos == stop) {
pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
}
*line = pos;
return res;
}
/* }}} */
static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
{
while (*str && isspace(*(unsigned char *)str)) {
++str;
}
@ -1086,29 +1132,30 @@ static char *php_mb_rfc1867_getword(char *str TSRMLS_DC) /* {{{ */
char quote = *str;
str++;
return php_mb_rfc1867_substring(str, strlen(str), quote TSRMLS_CC);
return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
} else {
char *strend = str;
while (*strend && !isspace(*strend)) {
while (*strend && !isspace(*(unsigned char *)strend)) {
++strend;
}
return php_mb_rfc1867_substring(str, strend - str, 0 TSRMLS_CC);
return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
}
}
/* }}} */
static char *php_mb_rfc1867_basename(char *filename TSRMLS_DC) /* {{{ */
static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
{
char *s, *tmp;
const size_t filename_len = strlen(filename);
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding TSRMLS_CC);
if ((tmp = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding TSRMLS_CC)) > s) {
s = tmp;
}
if (s) {
@ -1499,11 +1546,12 @@ PHP_MINIT_FUNCTION(mbstring)
php_rfc1867_set_multibyte_callbacks(
php_mb_encoding_translation,
php_mb_gpc_encoding_detector,
php_mb_gpc_encoding_converter,
php_mb_gpc_get_detect_order,
php_mb_gpc_set_input_encoding,
php_mb_rfc1867_getword,
php_mb_rfc1867_getword_conf,
php_mb_rfc1867_basename);
return SUCCESS;
}
/* }}} */
@ -1512,7 +1560,7 @@ PHP_MINIT_FUNCTION(mbstring)
PHP_MSHUTDOWN_FUNCTION(mbstring)
{
UNREGISTER_INI_ENTRIES();
#if HAVE_MBREGEX
PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
@ -2150,7 +2198,7 @@ PHP_FUNCTION(mb_strpos)
mbfl_string haystack, needle;
char *enc_name = NULL;
int enc_name_len;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
@ -3415,7 +3463,7 @@ PHP_FUNCTION(mb_convert_variables)
const mbfl_encoding **elist;
char *to_enc;
void *ptmp;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
return;
}
@ -3958,7 +4006,7 @@ PHP_FUNCTION(mb_send_mail)
smart_str *s;
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
char *pp, *ee;
/* initialize */
mbfl_memory_device_init(&device, 0, 0);
mbfl_string_init(&orig_str);
@ -4422,7 +4470,7 @@ PHP_FUNCTION(mb_check_encoding)
RETURN_FALSE;
}
}
convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
@ -4430,7 +4478,7 @@ PHP_FUNCTION(mb_check_encoding)
}
mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
mbfl_buffer_converter_illegal_substchar(convd, 0);
/* initialize string */
mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
mbfl_string_init(&result);
@ -4479,8 +4527,8 @@ static void php_mb_populate_current_detect_order_list(TSRMLS_D)
MBSTRG(current_detect_order_list_size) = nentries;
}
/* {{{ MBSTRING_API int php_mb_encoding_translation() */
MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
/* {{{ static int php_mb_encoding_translation() */
static int php_mb_encoding_translation(TSRMLS_D)
{
return MBSTRG(encoding_translation);
}
@ -4559,154 +4607,6 @@ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nby
}
/* }}} */
/* {{{ MBSTRING_API char *php_mb_strrchr() */
MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
{
return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
}
/* }}} */
/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
{
if (MBSTRG(http_input_identify)) {
return php_mb_mbchar_bytes_ex(s, MBSTRG(http_input_identify));
} else {
return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
}
}
/* }}} */
/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
int i;
mbfl_string string, result, *ret = NULL;
const mbfl_encoding *from_encoding, *to_encoding;
mbfl_buffer_converter *convd;
if (encoding_to) {
/* new encoding */
to_encoding = mbfl_name2encoding(encoding_to);
if (!to_encoding) {
return -1;
}
} else {
to_encoding = MBSTRG(current_internal_encoding);
}
if (encoding_from) {
/* old encoding */
from_encoding = mbfl_name2encoding(encoding_from);
if (from_encoding) {
return -1;
}
} else {
from_encoding = MBSTRG(http_input_identify);
}
if (from_encoding == mbfl_no_encoding_pass) {
return 0;
}
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
for (i=0; i<num; i++){
string.val = (unsigned char *)str[i];
string.len = len[i];
/* initialize converter */
convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
if (convd == NULL) {
return -1;
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
/* do it */
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (ret != NULL) {
efree(str[i]);
str[i] = (char *)ret->val;
len[i] = (int)ret->len;
}
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
mbfl_buffer_converter_delete(convd);
}
return ret ? 0 : -1;
}
/* }}} */
/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
*/
MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
{
mbfl_string string;
const mbfl_encoding **elist;
const mbfl_encoding *encoding = NULL;
mbfl_encoding_detector *identd = NULL;
size_t size;
const mbfl_encoding **list;
php_mb_populate_current_detect_order_list(TSRMLS_C);
if (MBSTRG(http_input_list_size) == 1 && MBSTRG(http_input_list)[0] == &mbfl_encoding_pass) {
MBSTRG(http_input_identify) = &mbfl_encoding_pass;
return SUCCESS;
}
if (arg_list && strlen(arg_list)>0) {
/* make encoding list */
list = NULL;
size = 0;
php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
if (size > 0 && list != NULL) {
elist = list;
} else {
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
}
} else {
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
identd = mbfl_encoding_detector_new2(elist, size, MBSTRG(strict_detection));
if (identd) {
int n = 0;
while(n < num){
string.val = (unsigned char *)arg_string[n];
string.len = arg_length[n];
if (mbfl_encoding_detector_feed(identd, &string)) {
break;
}
n++;
}
encoding = mbfl_encoding_detector_judge2(identd);
mbfl_encoding_detector_delete(identd);
}
if (encoding) {
MBSTRG(http_input_identify) = encoding;
return SUCCESS;
} else {
return FAILURE;
}
}
/* }}} */
/* {{{ MBSTRING_API int php_mb_stripos()
*/
MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
@ -4784,6 +4684,19 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
}
/* }}} */
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
{
*list = (const zend_encoding **)MBSTRG(http_input_list);
*list_size = MBSTRG(http_input_list_size);
}
/* }}} */
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
{
MBSTRG(http_input_identify) = encoding;
}
/* }}} */
#endif /* HAVE_MBSTRING */
/*

View File

@ -128,13 +128,10 @@ PHP_FUNCTION(mb_send_mail);
PHP_FUNCTION(mb_get_info);
PHP_FUNCTION(mb_check_encoding);
MBSTRING_API int php_mb_encoding_translation(TSRMLS_D);
MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c,
size_t nbytes, const mbfl_encoding *enc);
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c,
size_t nbytes TSRMLS_DC);
MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC);
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length,
const char *_to_encoding,
@ -146,17 +143,11 @@ MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC)
MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc);
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC);
MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC);
MBSTRING_API int php_mb_encoding_detector_ex(const char *arg_string, int arg_length,
char *arg_list TSRMLS_DC);
MBSTRING_API int php_mb_encoding_converter_ex(char **str, int *len, const char *encoding_to,
const char *encoding_from TSRMLS_DC);
MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC);
MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC);
MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC);
/* internal use only */

View File

@ -41,85 +41,20 @@ static int dummy_encoding_translation(TSRMLS_D)
return 0;
}
static char *php_ap_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC);
static char *php_ap_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC);
static php_rfc1867_encoding_translation_t php_rfc1867_encoding_translation = dummy_encoding_translation;
static php_rfc1867_encoding_detector_t php_rfc1867_encoding_detector = NULL;
static php_rfc1867_encoding_converter_t php_rfc1867_encoding_converter = NULL;
static php_rfc1867_getword_t php_rfc1867_getword = NULL;
static php_rfc1867_get_detect_order_t php_rfc1867_get_detect_order = NULL;
static php_rfc1867_set_input_encoding_t php_rfc1867_set_input_encoding = NULL;
static php_rfc1867_getword_t php_rfc1867_getword = php_ap_getword;
static php_rfc1867_getword_conf_t php_rfc1867_getword_conf = php_ap_getword_conf;
static php_rfc1867_basename_t php_rfc1867_basename = NULL;
PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC) = NULL;
static void safe_php_register_variable(char *var, char *strval, int val_len, zval *track_vars_array, zend_bool override_protection TSRMLS_DC);
static void php_flush_gpc_variables(int num_vars, char **val_list, int *len_list, zval *array_ptr TSRMLS_DC) /* {{{ */
{
int i;
unsigned int new_val_len;
if (num_vars > 0 &&
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
php_rfc1867_encoding_converter(val_list, len_list, num_vars, NULL, NULL TSRMLS_CC);
}
for (i = 0; i<num_vars; i += 2) {
if (sapi_module.input_filter(PARSE_POST, val_list[i], &val_list[i+1], len_list[i+1], &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
void *event_extra_data = NULL;
event_formdata.post_bytes_processed = SG(read_post_bytes);
event_formdata.name = val_list[i];
event_formdata.value = &val_list[i+1];
event_formdata.length = new_val_len;
event_formdata.newlength = &new_val_len;
if (php_rfc1867_callback(MULTIPART_EVENT_FORMDATA, &event_formdata, &event_extra_data TSRMLS_CC) == FAILURE) {
efree(val_list[i]);
efree(val_list[i+1]);
continue;
}
}
safe_php_register_variable(val_list[i], val_list[i+1], new_val_len, array_ptr, 0 TSRMLS_CC);
}
efree(val_list[i]);
efree(val_list[i+1]);
}
efree(val_list);
efree(len_list);
}
/* }}} */
static void php_gpc_realloc_buffer(char ***pval_list, int **plen_list, int *num_vars_max, int inc TSRMLS_DC) /* {{{ */
{
/* allow only even increments */
if (inc & 1) {
inc++;
}
(*num_vars_max) += inc;
*pval_list = (char **)erealloc(*pval_list, (*num_vars_max+2)*sizeof(char *));
*plen_list = (int *)erealloc(*plen_list, (*num_vars_max+2)*sizeof(int));
}
/* }}} */
static void php_gpc_stack_variable(char *param, char *value, char ***pval_list, int **plen_list, int *num_vars, int *num_vars_max TSRMLS_DC) /* {{{ */
{
char **val_list = *pval_list;
int *len_list = *plen_list;
if (*num_vars >= *num_vars_max) {
php_gpc_realloc_buffer(pval_list, plen_list, num_vars_max, 16 TSRMLS_CC);
/* in case realloc relocated the buffer */
val_list = *pval_list;
len_list = *plen_list;
}
val_list[*num_vars] = (char *)estrdup(param);
len_list[*num_vars] = strlen(param);
(*num_vars)++;
val_list[*num_vars] = (char *)estrdup(value);
len_list[*num_vars] = strlen(value);
(*num_vars)++;
}
/* }}} */
/* The longest property name we use in an uploaded file array */
#define MAX_SIZE_OF_INDEX sizeof("[tmp_name]")
@ -283,6 +218,9 @@ typedef struct {
char *boundary_next;
int boundary_next_len;
const zend_encoding *input_encoding;
const zend_encoding **detect_order;
size_t detect_order_size;
} multipart_buffer;
typedef struct {
@ -340,7 +278,7 @@ static int multipart_buffer_eof(multipart_buffer *self TSRMLS_DC)
}
/* create new multipart_buffer structure */
static multipart_buffer *multipart_buffer_new(char *boundary, int boundary_len)
static multipart_buffer *multipart_buffer_new(char *boundary, int boundary_len TSRMLS_DC)
{
multipart_buffer *self = (multipart_buffer *) ecalloc(1, sizeof(multipart_buffer));
@ -357,6 +295,15 @@ static multipart_buffer *multipart_buffer_new(char *boundary, int boundary_len)
self->buf_begin = self->buffer;
self->bytes_in_buffer = 0;
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
php_rfc1867_get_detect_order(&self->detect_order, &self->detect_order_size TSRMLS_CC);
} else {
self->detect_order = NULL;
self->detect_order_size = 0;
}
self->input_encoding = NULL;
return self;
}
@ -467,6 +414,10 @@ static int multipart_buffer_headers(multipart_buffer *self, zend_llist *header T
char *key = line;
char *value = NULL;
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
self->input_encoding = zend_multibyte_encoding_detector(line, strlen(line), self->detect_order, self->detect_order_size TSRMLS_CC);
}
/* space in the beginning means same header */
if (!isspace(line[0])) {
value = strchr(line, ':');
@ -522,7 +473,7 @@ static char *php_mime_get_hdr_value(zend_llist header, char *key)
return NULL;
}
static char *php_ap_getword(char **line, char stop)
static char *php_ap_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC)
{
char *pos = *line, quote;
char *res;
@ -558,7 +509,7 @@ static char *php_ap_getword(char **line, char stop)
return res;
}
static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
static char *substring_conf(char *start, int len, char quote)
{
char *result = emalloc(len + 1);
char *resp = result;
@ -576,7 +527,7 @@ static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
return result;
}
static char *php_ap_getword_conf(char *str TSRMLS_DC)
static char *php_ap_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC)
{
while (*str && isspace(*str)) {
++str;
@ -590,17 +541,33 @@ static char *php_ap_getword_conf(char *str TSRMLS_DC)
char quote = *str;
str++;
return substring_conf(str, strlen(str), quote TSRMLS_CC);
return substring_conf(str, strlen(str), quote);
} else {
char *strend = str;
while (*strend && !isspace(*strend)) {
++strend;
}
return substring_conf(str, strend - str, 0 TSRMLS_CC);
return substring_conf(str, strend - str, 0);
}
}
static char *php_ap_basename(const zend_encoding *encoding, char *path TSRMLS_DC)
{
char *s = strrchr(path, '\\');
if (s) {
char *tmp = strrchr(path, '/');
if (tmp && tmp > s) {
s = tmp + 1;
} else {
s++;
}
} else {
s = path;
}
return s;
}
/*
* Search for a string in a fixed-length byte string.
* If partial is true, partial matches are allowed at the end of the buffer.
@ -709,8 +676,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
int max_file_size = 0, skip_upload = 0, anonindex = 0, is_anonymous;
zval *http_post_files = NULL;
HashTable *uploaded_files = NULL;
int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL;
char **val_list = NULL;
int str_len = 0;
multipart_buffer *mbuff;
zval *array_ptr = (zval *) arg;
int fd = -1;
@ -718,6 +684,20 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
void *event_extra_data = NULL;
unsigned int llen = 0;
int upload_cnt = INI_INT("max_file_uploads");
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
php_rfc1867_getword_t getword;
php_rfc1867_getword_conf_t getword_conf;
php_rfc1867_basename_t _basename;
if (php_rfc1867_encoding_translation(TSRMLS_C) && internal_encoding) {
getword = php_rfc1867_getword;
getword_conf = php_rfc1867_getword_conf;
_basename = php_rfc1867_basename;
} else {
getword = php_ap_getword;
getword_conf = php_ap_getword_conf;
_basename = php_ap_basename;
}
if (SG(post_max_size) > 0 && SG(request_info).content_length > SG(post_max_size)) {
sapi_module.sapi_error(E_WARNING, "POST Content-Length of %ld bytes exceeds the limit of %ld bytes", SG(request_info).content_length, SG(post_max_size));
@ -780,11 +760,6 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
INIT_PZVAL(http_post_files);
PG(http_globals)[TRACK_VARS_FILES] = http_post_files;
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *));
len_list = (int *)ecalloc(num_vars_max+2, sizeof(int));
}
zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0);
if (php_rfc1867_callback != NULL) {
@ -817,7 +792,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
++cd;
}
while (*cd && (pair = php_ap_getword(&cd, ';')))
while (*cd && (pair = getword(mbuff->input_encoding, &cd, ';' TSRMLS_CC)))
{
char *key = NULL, *word = pair;
@ -826,41 +801,33 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
}
if (strchr(pair, '=')) {
key = php_ap_getword(&pair, '=');
key = getword(mbuff->input_encoding, &pair, '=' TSRMLS_CC);
if (!strcasecmp(key, "name")) {
if (param) {
efree(param);
}
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
param = getword_conf(mbuff->input_encoding, pair TSRMLS_CC);
if (mbuff->input_encoding && internal_encoding) {
unsigned char *new_param;
size_t new_param_len;
if ((size_t)-1 != zend_multibyte_encoding_converter(&new_param, &new_param_len, (unsigned char *)param, strlen(param), internal_encoding, mbuff->input_encoding TSRMLS_CC)) {
efree(param);
param = (char *)new_param;
}
val_list[num_vars] = pair;
len_list[num_vars] = strlen(pair);
num_vars++;
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
num_vars--;
param = php_rfc1867_getword(pair TSRMLS_CC);
} else {
param = php_ap_getword_conf(pair TSRMLS_CC);
}
} else if (!strcasecmp(key, "filename")) {
if (filename) {
efree(filename);
}
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
filename = getword_conf(mbuff->input_encoding, pair TSRMLS_CC);
if (mbuff->input_encoding && internal_encoding) {
unsigned char *new_filename;
size_t new_filename_len;
if ((size_t)-1 != zend_multibyte_encoding_converter(&new_filename, &new_filename_len, (unsigned char *)filename, strlen(filename), internal_encoding, mbuff->input_encoding TSRMLS_CC)) {
efree(filename);
filename = (char *)new_filename;
}
val_list[num_vars] = pair;
len_list[num_vars] = strlen(pair);
num_vars++;
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
num_vars--;
filename = php_rfc1867_getword(pair TSRMLS_CC);
} else {
filename = php_ap_getword_conf(pair TSRMLS_CC);
}
}
}
@ -878,12 +845,20 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
if (!value) {
value = estrdup("");
value_len = 0;
}
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
/* postpone filtering, callback call and registration */
php_gpc_stack_variable(param, value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
} else if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (mbuff->input_encoding && internal_encoding) {
unsigned char *new_value;
size_t new_value_len;
if ((size_t)-1 != zend_multibyte_encoding_converter(&new_value, &new_value_len, (unsigned char *)value, value_len, internal_encoding, mbuff->input_encoding TSRMLS_CC)) {
efree(value);
value = (char *)new_value;
value_len = new_value_len;
}
}
if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
size_t newlength = new_val_len;
@ -1135,43 +1110,23 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
snprintf(lbuf, llen, "%s_name", param);
}
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = filename;
len_list[num_vars] = strlen(filename);
num_vars++;
if (php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
str_len = strlen(filename);
php_rfc1867_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
}
s = php_rfc1867_basename(filename TSRMLS_CC);
num_vars--;
} else {
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
s = strrchr(filename, '\\');
if ((tmp = strrchr(filename, '/')) > s) {
s = tmp;
}
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
s = _basename(internal_encoding, filename TSRMLS_CC);
#ifdef PHP_WIN32
if (PG(magic_quotes_gpc)) {
s = s ? s : filename;
tmp = strrchr(s, '\'');
s = tmp > s ? tmp : s;
tmp = strrchr(s, '"');
s = tmp > s ? tmp : s;
}
if (PG(magic_quotes_gpc)) {
s = s ? s : filename;
tmp = strrchr(s, '\'');
s = tmp >= s ? tmp + 1: s;
tmp = strrchr(s, '"');
s = tmp >= s ? tmp + 1: s;
}
#endif
if (s) {
s++;
} else {
s = filename;
}
if (!s) {
s = filename;
}
if (!is_anonymous) {
@ -1300,10 +1255,6 @@ fileupload_done:
php_rfc1867_callback(MULTIPART_EVENT_END, &event_end, &event_extra_data TSRMLS_CC);
}
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
php_flush_gpc_variables(num_vars, val_list, len_list, array_ptr TSRMLS_CC);
}
if (lbuf) efree(lbuf);
if (abuf) efree(abuf);
if (array_index) efree(array_index);
@ -1318,15 +1269,17 @@ fileupload_done:
SAPI_API void php_rfc1867_set_multibyte_callbacks(
php_rfc1867_encoding_translation_t encoding_translation,
php_rfc1867_encoding_detector_t encoding_detector,
php_rfc1867_encoding_converter_t encoding_converter,
php_rfc1867_get_detect_order_t get_detect_order,
php_rfc1867_set_input_encoding_t set_input_encoding,
php_rfc1867_getword_t getword,
php_rfc1867_getword_conf_t getword_conf,
php_rfc1867_basename_t basename) /* {{{ */
{
php_rfc1867_encoding_translation = encoding_translation;
php_rfc1867_encoding_detector = encoding_detector;
php_rfc1867_encoding_converter = encoding_converter;
php_rfc1867_get_detect_order = get_detect_order;
php_rfc1867_set_input_encoding = set_input_encoding;
php_rfc1867_getword = getword;
php_rfc1867_getword_conf = getword_conf;
php_rfc1867_basename = basename;
}
/* }}} */

View File

@ -68,10 +68,11 @@ typedef struct _multipart_event_end {
} multipart_event_end;
typedef int (*php_rfc1867_encoding_translation_t)(TSRMLS_D);
typedef int (*php_rfc1867_encoding_detector_t)(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC);
typedef int (*php_rfc1867_encoding_converter_t)(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC);
typedef char* (*php_rfc1867_getword_t)(char *str TSRMLS_DC);
typedef char* (*php_rfc1867_basename_t)(char *str TSRMLS_DC);
typedef void (*php_rfc1867_get_detect_order_t)(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
typedef void (*php_rfc1867_set_input_encoding_t)(const zend_encoding *encoding TSRMLS_DC);
typedef char* (*php_rfc1867_getword_t)(const zend_encoding *encoding, char **line, char stop TSRMLS_DC);
typedef char* (*php_rfc1867_getword_conf_t)(const zend_encoding *encoding, char *str TSRMLS_DC);
typedef char* (*php_rfc1867_basename_t)(const zend_encoding *encoding, char *str TSRMLS_DC);
SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler);
@ -81,9 +82,10 @@ extern PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data,
SAPI_API void php_rfc1867_set_multibyte_callbacks(
php_rfc1867_encoding_translation_t encoding_translation,
php_rfc1867_encoding_detector_t encoding_detector,
php_rfc1867_encoding_converter_t encoding_converter,
php_rfc1867_get_detect_order_t get_detect_order,
php_rfc1867_set_input_encoding_t set_input_encoding,
php_rfc1867_getword_t getword,
php_rfc1867_getword_conf_t getword_conf,
php_rfc1867_basename_t basename);
#endif /* RFC1867_H */