* Refactor zend_multibyte facility.

Now mbstring.script_encoding is superseded by zend.script_encoding.
This commit is contained in:
Moriyoshi Koizumi 2010-12-19 16:36:37 +00:00
parent c28cac404d
commit bbf3d43c1e
33 changed files with 1740 additions and 2361 deletions

View File

@ -89,11 +89,25 @@ static ZEND_INI_MH(OnUpdateGCEnabled) /* {{{ */
}
/* }}} */
static ZEND_INI_MH(OnUpdateScriptEncoding) /* {{{ */
{
if (!CG(multibyte)) {
return FAILURE;
}
if (!zend_multibyte_get_functions(TSRMLS_C)) {
return SUCCESS;
}
return zend_multibyte_set_script_encoding_by_string(new_value, new_value_length TSRMLS_CC);
}
/* }}} */
ZEND_INI_BEGIN()
ZEND_INI_ENTRY("error_reporting", NULL, ZEND_INI_ALL, OnUpdateErrorReporting)
STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1", ZEND_INI_ALL, OnUpdateGCEnabled, gc_enabled, zend_gc_globals, gc_globals)
STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, OnUpdateBool, multibyte, zend_compiler_globals, compiler_globals)
STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals)
ZEND_INI_ENTRY("zend.script_encoding", NULL, ZEND_INI_ALL, OnUpdateScriptEncoding)
STD_ZEND_INI_BOOLEAN("zend.detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals)
ZEND_INI_END()
@ -528,6 +542,9 @@ static void compiler_globals_dtor(zend_compiler_globals *compiler_globals TSRMLS
if (compiler_globals->static_members_table) {
free(compiler_globals->static_members_table);
}
if (compiler_globals->script_encoding_list) {
pefree(compiler_globals->script_encoding_list, 1);
}
compiler_globals->last_static_member = 0;
}
/* }}} */

View File

@ -197,9 +197,6 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */
init_compiler_declarables(TSRMLS_C);
zend_stack_init(&CG(context_stack));
CG(script_encoding_list) = NULL;
CG(script_encoding_list_size) = 0;
CG(internal_encoding) = NULL;
CG(encoding_declared) = 0;
}
/* }}} */
@ -238,10 +235,6 @@ void shutdown_compiler(TSRMLS_D) /* {{{ */
zend_hash_destroy(&CG(filenames_table));
zend_llist_destroy(&CG(open_files));
zend_stack_destroy(&CG(context_stack));
if (CG(script_encoding_list)) {
efree(CG(script_encoding_list));
}
}
/* }}} */
@ -5864,7 +5857,7 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */
CG(encoding_declared) = 1;
convert_to_string(&val->u.constant);
new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val TSRMLS_CC);
if (!new_encoding) {
zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.constant.value.str.val);
} else {
@ -5879,6 +5872,8 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */
zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC);
}
}
} else {
zend_error(E_COMPILE_WARNING, "declare(encoding=...) ignored because Zend multibyte feature is turned off by settings");
}
zval_dtor(&val->u.constant);
} else {

View File

@ -147,14 +147,12 @@ struct _zend_compiler_globals {
HashTable interned_strings;
zend_encoding **script_encoding_list;
const zend_encoding **script_encoding_list;
size_t script_encoding_list_size;
zend_bool multibyte;
zend_bool detect_unicode;
zend_bool encoding_declared;
zend_encoding *internal_encoding;
#ifdef ZTS
zval ***static_members_table;
int last_static_member;
@ -310,8 +308,7 @@ struct _zend_php_scanner_globals {
/* input/ouput filters */
zend_encoding_filter input_filter;
zend_encoding_filter output_filter;
zend_encoding *script_encoding;
zend_encoding *internal_encoding;
const zend_encoding *script_encoding;
};
#endif /* ZEND_GLOBALS_H */

File diff suppressed because it is too large Load Diff

View File

@ -47,8 +47,7 @@ typedef struct _zend_lex_state {
/* input/ouput filters */
zend_encoding_filter input_filter;
zend_encoding_filter output_filter;
zend_encoding *script_encoding;
zend_encoding *internal_encoding;
const zend_encoding *script_encoding;
} zend_lex_state;
@ -57,6 +56,10 @@ int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2);
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC);
ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC);
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC);
END_EXTERN_C()

View File

@ -181,16 +181,13 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
lex_state->lineno = CG(zend_lineno);
if (CG(multibyte)) {
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
lex_state->script_filtered_size = SCNG(script_filtered_size);
lex_state->input_filter = SCNG(input_filter);
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
lex_state->internal_encoding = SCNG(internal_encoding);
}
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
lex_state->script_filtered_size = SCNG(script_filtered_size);
lex_state->input_filter = SCNG(input_filter);
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
}
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
@ -209,24 +206,22 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
YYSETCONDITION(lex_state->yy_state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
if (CG(multibyte)) {
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
SCNG(script_org) = lex_state->script_org;
SCNG(script_org_size) = lex_state->script_org_size;
SCNG(script_filtered) = lex_state->script_filtered;
SCNG(script_filtered_size) = lex_state->script_filtered_size;
SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
SCNG(internal_encoding) = lex_state->internal_encoding;
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
SCNG(script_org) = lex_state->script_org;
SCNG(script_org_size) = lex_state->script_org_size;
SCNG(script_filtered) = lex_state->script_filtered;
SCNG(script_filtered_size) = lex_state->script_filtered_size;
SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
}
ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
@ -239,6 +234,203 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
}
}
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
#define BOM_UTF16_LE "\xff\xfe"
#define BOM_UTF8 "\xef\xbb\xbf"
static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
{
const unsigned char *p;
int wchar_size = 2;
int le = 0;
/* utf-16 or utf-32? */
p = script;
while ((p-script) < script_size) {
p = memchr(p, 0, script_size-(p-script)-2);
if (!p) {
break;
}
if (*(p+1) == '\0' && *(p+2) == '\0') {
wchar_size = 4;
break;
}
/* searching for UTF-32 specific byte orders, so this will do */
p += 4;
}
/* BE or LE? */
p = script;
while ((p-script) < script_size) {
if (*p == '\0' && *(p+wchar_size-1) != '\0') {
/* BE */
le = 0;
break;
} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
/* LE* */
le = 1;
break;
}
p += wchar_size;
}
if (wchar_size == 2) {
return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
} else {
return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
}
return NULL;
}
static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
{
const zend_encoding *script_encoding = NULL;
int bom_size;
unsigned char *script;
unsigned char *pos1, *pos2;
if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
return NULL;
}
/* check out BOM */
if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
script_encoding = zend_multibyte_encoding_utf32be;
bom_size = sizeof(BOM_UTF32_BE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
script_encoding = zend_multibyte_encoding_utf32le;
bom_size = sizeof(BOM_UTF32_LE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
script_encoding = zend_multibyte_encoding_utf16be;
bom_size = sizeof(BOM_UTF16_BE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
script_encoding = zend_multibyte_encoding_utf16le;
bom_size = sizeof(BOM_UTF16_LE)-1;
} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
script_encoding = zend_multibyte_encoding_utf8;
bom_size = sizeof(BOM_UTF8)-1;
}
if (script_encoding) {
/* remove BOM */
script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size);
memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size);
efree(LANG_SCNG(script_org));
LANG_SCNG(script_org) = script;
LANG_SCNG(script_org_size) -= bom_size;
return script_encoding;
}
/* script contains NULL bytes -> auto-detection */
if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
/* check if the NULL byte is after the __HALT_COMPILER(); */
pos2 = LANG_SCNG(script_org);
while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
pos2 = memchr(pos2, '_', pos1 - pos2);
if (!pos2) break;
pos2++;
if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
pos2 += sizeof("_HALT_COMPILER")-1;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == '(') {
pos2++;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == ')') {
pos2++;
while (*pos2 == ' ' ||
*pos2 == '\t' ||
*pos2 == '\r' ||
*pos2 == '\n') {
pos2++;
}
if (*pos2 == ';') {
return NULL;
}
}
}
}
}
/* make best effort if BOM is missing */
return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
}
return NULL;
}
static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
{
const zend_encoding *script_encoding;
if (CG(detect_unicode)) {
/* check out bom(byte order mark) and see if containing wchars */
script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
if (script_encoding != NULL) {
/* bom or wchar detection is prior to 'script_encoding' option */
return script_encoding;
}
}
/* if no script_encoding specified, just leave alone */
if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
return NULL;
}
/* if multiple encodings specified, detect automagically */
if (CG(script_encoding_list_size) > 1) {
return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
}
return CG(script_encoding_list)[0];
}
ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
if (!script_encoding) {
return FAILURE;
}
/* judge input/output filter */
LANG_SCNG(script_encoding) = script_encoding;
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = NULL;
if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
} else {
LANG_SCNG(input_filter) = NULL;
LANG_SCNG(output_filter) = NULL;
}
return SUCCESS;
}
/* both script and internal encodings are incompatible w/ flex */
LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
return 0;
}
ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
{
@ -286,13 +478,13 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
if (SCNG(script_filtered) == NULL) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name);
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
}
SCNG(yy_start) = SCNG(script_filtered) - offset;
yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
} else {
SCNG(yy_start) = buf - offset;
SCNG(yy_start) = (unsigned char *)buf - offset;
yy_scan_buffer(buf, size TSRMLS_CC);
}
} else {
@ -441,7 +633,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D
SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
SCNG(script_org_size) = str->value.str.len;
zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
if (!SCNG(input_filter)) {
SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
@ -615,7 +807,7 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter
/* calculate current position */
offset = original_offset = YYCURSOR - SCNG(yy_start);
if (old_input_filter && offset > 0) {
zend_encoding *new_encoding = SCNG(script_encoding);
const zend_encoding *new_encoding = SCNG(script_encoding);
zend_encoding_filter new_filter = SCNG(input_filter);
SCNG(script_encoding) = old_encoding;
SCNG(input_filter) = old_input_filter;

View File

@ -1,4 +1,4 @@
/* Generated by re2c 0.13.5 on Thu Nov 25 23:17:48 2010 */
/* Generated by re2c 0.13.5 on Mon Dec 20 01:33:18 2010 */
#line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE {

File diff suppressed because it is too large Load Diff

View File

@ -22,59 +22,62 @@
#ifndef ZEND_MULTIBYTE_H
#define ZEND_MULTIBYTE_H
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
#define BOM_UTF16_LE "\xff\xfe"
#define BOM_UTF8 "\xef\xbb\xbf"
typedef struct _zend_encoding zend_encoding;
typedef size_t (*zend_encoding_filter)(unsigned char **str, size_t *str_length, const unsigned char *buf, size_t length TSRMLS_DC);
typedef char* (*zend_encoding_detector)(const unsigned char *string, size_t length, char *list TSRMLS_DC);
typedef int (*zend_encoding_converter)(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
typedef size_t (*zend_encoding_oddlen)(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
typedef int (*zend_encoding_list_checker)(const char *encoding_list TSRMLS_DC);
typedef const char* (*zend_encoding_name_getter)(TSRMLS_D);
typedef struct _zend_encoding {
zend_encoding_filter input_filter; /* escape input filter */
zend_encoding_filter output_filter; /* escape output filter */
const char *name; /* encoding name */
const char *(*aliases)[]; /* encoding name aliases */
zend_bool compatible; /* flex compatible or not */
} zend_encoding;
typedef const zend_encoding* (*zend_encoding_fetcher)(const char *encoding_name TSRMLS_DC);
typedef const char* (*zend_encoding_name_getter)(const zend_encoding *encoding);
typedef int (*zend_encoding_lexer_compatibility_checker)(const zend_encoding *encoding);
typedef const zend_encoding *(*zend_encoding_detector)(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC);
typedef size_t (*zend_encoding_converter)(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC);
typedef int (*zend_encoding_list_parser)(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC);
typedef const zend_encoding *(*zend_encoding_internal_encoding_getter)(TSRMLS_D);
typedef int (*zend_encoding_internal_encoding_setter)(const zend_encoding *encoding TSRMLS_DC);
typedef struct _zend_multibyte_functions {
const char *provider_name;
zend_encoding_fetcher encoding_fetcher;
zend_encoding_name_getter encoding_name_getter;
zend_encoding_lexer_compatibility_checker lexer_compatibility_checker;
zend_encoding_detector encoding_detector;
zend_encoding_converter encoding_converter;
zend_encoding_list_parser encoding_list_parser;
zend_encoding_internal_encoding_getter internal_encoding_getter;
zend_encoding_internal_encoding_setter internal_encoding_setter;
} zend_multibyte_functions;
/*
* zend multibyte APIs
*/
BEGIN_EXTERN_C()
/* multibyte utility functions */
ZEND_API extern zend_encoding_detector zend_multibyte_encoding_detector;
ZEND_API extern zend_encoding_converter zend_multibyte_encoding_converter;
ZEND_API extern zend_encoding_oddlen zend_multibyte_encoding_oddlen;
ZEND_API extern zend_encoding_list_checker zend_multibyte_check_encoding_list;
ZEND_API extern zend_encoding_name_getter zend_multibyte_get_internal_encoding;
ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf32be;
ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf32le;
ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf16be;
ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf16le;
ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf8;
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC);
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC);
ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t
*to_length, const unsigned char *from, size_t from_length TSRMLS_DC);
/* multibyte utility functions */
ZEND_API int zend_multibyte_set_functions(const zend_multibyte_functions *functions TSRMLS_DC);
ZEND_API const zend_multibyte_functions *zend_multibyte_get_functions(TSRMLS_D);
ZEND_API const zend_encoding *zend_multibyte_fetch_encoding(const char *name TSRMLS_DC);
ZEND_API const char *zend_multibyte_get_encoding_name(const zend_encoding *encoding);
ZEND_API int zend_multibyte_check_lexer_compatibility(const zend_encoding *encoding);
ZEND_API const zend_encoding *zend_multibyte_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC);
ZEND_API size_t zend_multibyte_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC);
ZEND_API int zend_multibyte_parse_encoding_list(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC);
ZEND_API const zend_encoding *zend_multibyte_get_internal_encoding(TSRMLS_D);
ZEND_API const zend_encoding *zend_multibyte_get_script_encoding(TSRMLS_D);
ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_list, size_t encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(const zend_encoding *encoding TSRMLS_DC);
ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length TSRMLS_DC);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC);
ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC);
/* in zend_language_scanner.l */
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC);
ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC);
END_EXTERN_C()
#endif /* ZEND_MULTIBYTE_H */

View File

@ -41,7 +41,7 @@ const mbfl_encoding mbfl_encoding_base64 = {
"BASE64",
NULL,
NULL,
MBFL_ENCTYPE_SBCS
MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_convert_vtbl vtbl_8bit_b64 = {

View File

@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_big5 = {
"BIG5",
(const char *(*)[])&mbfl_encoding_big5_aliases,
mblen_table_big5,
MBFL_ENCTYPE_MBCS
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_big5 = {

View File

@ -54,7 +54,7 @@ const mbfl_encoding mbfl_encoding_jis_ms = {
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_cp50220 = {
@ -63,7 +63,7 @@ const mbfl_encoding mbfl_encoding_cp50220 = {
"ISO-2022-JP",
(const char *(*)[])NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_cp50220raw = {
@ -72,7 +72,7 @@ const mbfl_encoding mbfl_encoding_cp50220raw = {
"ISO-2022-JP",
(const char *(*)[])NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_cp50221 = {
@ -81,7 +81,7 @@ const mbfl_encoding mbfl_encoding_cp50221 = {
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_cp50222 = {
@ -90,7 +90,7 @@ const mbfl_encoding mbfl_encoding_cp50222 = {
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_jis_ms = {

View File

@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_cp932 = {
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_cp932_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_cp932 = {

View File

@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_cp936 = {
"CP936",
(const char *(*)[])&mbfl_encoding_cp936_aliases,
mblen_table_cp936,
MBFL_ENCTYPE_MBCS
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_cp936 = {

View File

@ -70,7 +70,7 @@ const mbfl_encoding mbfl_encoding_html_ent = {
"HTML-ENTITIES",
(const char *(*)[])&mbfl_encoding_html_ent_aliases,
NULL,
MBFL_ENCTYPE_HTML_ENT
MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_convert_vtbl vtbl_wchar_html = {

View File

@ -44,7 +44,7 @@ const mbfl_encoding mbfl_encoding_hz = {
"HZ-GB-2312",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_hz = {

View File

@ -48,7 +48,7 @@ const mbfl_encoding mbfl_encoding_2022jpms = {
"ISO-2022-JP",
(const char *(*)[])&mbfl_encoding_2022jpms_aliases,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {

View File

@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_2022kr = {
"ISO-2022-KR",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_2022kr = {

View File

@ -46,7 +46,7 @@ const mbfl_encoding mbfl_encoding_jis = {
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_2022jp = {
@ -55,7 +55,7 @@ const mbfl_encoding mbfl_encoding_2022jp = {
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_jis = {

View File

@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_qprint = {
"Quoted-Printable",
(const char *(*)[])&mbfl_encoding_qprint_aliases,
NULL,
MBFL_ENCTYPE_SBCS
MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_convert_vtbl vtbl_8bit_qprint = {

View File

@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis = {
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_sjis = {

View File

@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis_open = {
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis_open_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_open = {

View File

@ -57,7 +57,7 @@ const mbfl_encoding mbfl_encoding_utf7 = {
"UTF-7",
(const char *(*)[])&mbfl_encoding_utf7_aliases,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_utf7 = {

View File

@ -127,6 +127,18 @@ mbfl_buffer_converter_new(
enum mbfl_no_encoding from,
enum mbfl_no_encoding to,
int buf_initsz)
{
const mbfl_encoding *_from = mbfl_no2encoding(from);
const mbfl_encoding *_to = mbfl_no2encoding(to);
return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
}
mbfl_buffer_converter *
mbfl_buffer_converter_new2(
const mbfl_encoding *from,
const mbfl_encoding *to,
int buf_initsz)
{
mbfl_buffer_converter *convd;
@ -137,14 +149,8 @@ mbfl_buffer_converter_new(
}
/* initialize */
convd->from = mbfl_no2encoding(from);
convd->to = mbfl_no2encoding(to);
if (convd->from == NULL) {
convd->from = &mbfl_encoding_pass;
}
if (convd->to == NULL) {
convd->to = &mbfl_encoding_pass;
}
convd->from = from;
convd->to = to;
/* create convert filter */
convd->filter1 = NULL;
@ -173,6 +179,7 @@ mbfl_buffer_converter_new(
return convd;
}
void
mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
{
@ -250,6 +257,12 @@ mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char
int
mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
{
return mbfl_buffer_converter_feed2(convd, string, NULL);
}
int
mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
{
int n;
unsigned char *p;
@ -263,20 +276,27 @@ mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
/* feed data */
n = string->len;
p = string->val;
filter = convd->filter1;
if (filter != NULL) {
filter_function = filter->filter_function;
while (n > 0) {
if ((*filter_function)(*p++, filter) < 0) {
if (loc) {
*loc = p - string->val;
}
return -1;
}
n--;
}
}
if (loc) {
*loc = p - string->val;
}
return 0;
}
int
mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
{
@ -400,6 +420,49 @@ mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict
return identd;
}
mbfl_encoding_detector *
mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
{
mbfl_encoding_detector *identd;
int i, num;
mbfl_identify_filter *filter;
if (elist == NULL || elistsz <= 0) {
return NULL;
}
/* allocate */
identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
if (identd == NULL) {
return NULL;
}
identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
if (identd->filter_list == NULL) {
mbfl_free(identd);
return NULL;
}
/* create filters */
i = 0;
num = 0;
while (i < elistsz) {
filter = mbfl_identify_filter_new2(elist[i]);
if (filter != NULL) {
identd->filter_list[num] = filter;
num++;
}
i++;
}
identd->filter_list_size = num;
/* set strict flag */
identd->strict = strict;
return identd;
}
void
mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
{
@ -454,33 +517,32 @@ mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
return res;
}
enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
{
mbfl_identify_filter *filter;
enum mbfl_no_encoding encoding;
const mbfl_encoding *encoding = NULL;
int n;
/* judge */
encoding = mbfl_no_encoding_invalid;
if (identd != NULL) {
n = identd->filter_list_size - 1;
while (n >= 0) {
filter = identd->filter_list[n];
if (!filter->flag) {
if (!identd->strict || !filter->status) {
encoding = filter->encoding->no_encoding;
encoding = filter->encoding;
}
}
n--;
}
/* fallback judge */
if (encoding == mbfl_no_encoding_invalid) {
if (!encoding) {
n = identd->filter_list_size - 1;
while (n >= 0) {
filter = identd->filter_list[n];
if (!filter->flag) {
encoding = filter->encoding->no_encoding;
encoding = filter->encoding;
}
n--;
}
@ -490,6 +552,12 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident
return encoding;
}
enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
{
const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
}
/*
* encoding converter
@ -646,36 +714,88 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el
return encoding;
}
const char*
mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
const mbfl_encoding *
mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
{
int i, n, num, bad;
unsigned char *p;
mbfl_identify_filter *flist, *filter;
const mbfl_encoding *encoding;
encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
if (encoding != NULL &&
encoding->no_encoding > mbfl_no_encoding_charset_min &&
encoding->no_encoding < mbfl_no_encoding_charset_max) {
return encoding->name;
} else {
/* flist is an array of mbfl_identify_filter instances */
flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
if (flist == NULL) {
return NULL;
}
}
enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
{
const mbfl_encoding *encoding;
encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
if (encoding != NULL &&
encoding->no_encoding > mbfl_no_encoding_charset_min &&
encoding->no_encoding < mbfl_no_encoding_charset_max) {
return encoding->no_encoding;
} else {
return mbfl_no_encoding_invalid;
num = 0;
if (elist != NULL) {
for (i = 0; i < elistsz; i++) {
if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
num++;
}
}
}
}
/* feed data */
n = string->len;
p = string->val;
if (p != NULL) {
bad = 0;
while (n > 0) {
for (i = 0; i < num; i++) {
filter = &flist[i];
if (!filter->flag) {
(*filter->filter_function)(*p, filter);
if (filter->flag) {
bad++;
}
}
}
if ((num - 1) <= bad && !strict) {
break;
}
p++;
n--;
}
}
/* judge */
encoding = NULL;
for (i = 0; i < num; i++) {
filter = &flist[i];
if (!filter->flag) {
if (strict && filter->status) {
continue;
}
encoding = filter->encoding;
break;
}
}
/* fall-back judge */
if (!encoding) {
for (i = 0; i < num; i++) {
filter = &flist[i];
if (!filter->flag && (!strict || !filter->status)) {
encoding = filter->encoding;
break;
}
}
}
/* cleanup */
/* dtors should be called in reverse order */
i = num; while (--i >= 0) {
mbfl_identify_filter_cleanup(&flist[i]);
}
mbfl_free((void *)flist);
return encoding;
}
/*
* strlen

View File

@ -127,12 +127,14 @@ struct _mbfl_buffer_converter {
};
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz);
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new2(const mbfl_encoding *from, const mbfl_encoding *to, int buf_initsz);
MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
MBFLAPI extern void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd);
MBFLAPI extern int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
MBFLAPI extern int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar);
MBFLAPI extern int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n);
MBFLAPI extern int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
MBFLAPI extern int mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc);
MBFLAPI extern int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd);
MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result);
MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
@ -151,9 +153,11 @@ struct _mbfl_encoding_detector {
};
MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict);
MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
MBFLAPI extern const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd);
/*
@ -169,12 +173,8 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc
MBFLAPI extern const mbfl_encoding *
mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern const char *
mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern const mbfl_encoding *
mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict);
/*
* strlen
*/

View File

@ -42,7 +42,8 @@
#define MBFL_ENCTYPE_MWC4BE 0x00000400
#define MBFL_ENCTYPE_MWC4LE 0x00000800
#define MBFL_ENCTYPE_SHFTCODE 0x00001000
#define MBFL_ENCTYPE_HTML_ENT 0x00002000
#define MBFL_ENCTYPE_ENC_STRM 0x00002000
#define MBFL_ENCTYPE_GL_UNSAFE 0x00004000
/* wchar plane, special charactor */
#define MBFL_WCSPLANE_MASK 0xffff

View File

@ -191,15 +191,37 @@ mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding)
return filter;
}
mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding)
{
mbfl_identify_filter *filter;
/* allocate */
filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter));
if (filter == NULL) {
return NULL;
}
if (mbfl_identify_filter_init2(filter, encoding)) {
mbfl_free(filter);
return NULL;
}
return filter;
}
int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding)
{
const mbfl_encoding *enc = mbfl_no2encoding(encoding);
return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass);
}
int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding)
{
const struct mbfl_identify_vtbl *vtbl;
/* encoding structure */
filter->encoding = mbfl_no2encoding(encoding);
if (filter->encoding == NULL) {
filter->encoding = &mbfl_encoding_pass;
}
filter->encoding = encoding;
filter->status = 0;
filter->flag = 0;

View File

@ -58,8 +58,10 @@ struct mbfl_identify_vtbl {
MBFLAPI extern const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding);
MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding);
MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new2(const mbfl_encoding *encoding);
MBFLAPI extern void mbfl_identify_filter_delete(mbfl_identify_filter *filter);
MBFLAPI extern int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding);
MBFLAPI extern int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding);
MBFLAPI void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter);
MBFLAPI extern void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter);

View File

@ -27,6 +27,7 @@
#include "php.h"
#include "php_ini.h"
#include "php_variables.h"
#include "libmbfl/mbfl/mbfilter_pass.h"
#include "mbstring.h"
#include "ext/standard/php_string.h"
#include "ext/standard/php_mail.h"
@ -56,7 +57,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
const char *c_var;
zval *array_ptr;
int free_buffer=0;
enum mbfl_no_encoding detected;
const mbfl_encoding *detected;
php_mb_encoding_handler_info_t info;
if (arg != PARSE_STRING) {
@ -136,16 +137,16 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
switch(arg) {
case PARSE_POST:
MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
MBSTRG(http_input_identify_post) = NULL;
break;
case PARSE_GET:
MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
MBSTRG(http_input_identify_get) = NULL;
break;
case PARSE_COOKIE:
MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
MBSTRG(http_input_identify_cookie) = NULL;
break;
case PARSE_STRING:
MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
MBSTRG(http_input_identify_string) = NULL;
break;
}
@ -163,7 +164,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
detected = _php_mb_encoding_handler_ex(&info, array_ptr, res TSRMLS_CC);
MBSTRG(http_input_identify) = detected;
if (detected != mbfl_no_encoding_invalid) {
if (detected) {
switch(arg){
case PARSE_POST:
MBSTRG(http_input_identify_post) = detected;
@ -191,7 +192,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
/* }}} */
/* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC)
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC)
{
char *var, *val;
const char *s1, *s2;
@ -200,13 +201,13 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
int n, num, *len_list = NULL;
unsigned int val_len, new_val_len;
mbfl_string string, resvar, resval;
enum mbfl_no_encoding from_encoding = mbfl_no_encoding_invalid;
const mbfl_encoding *from_encoding = NULL;
mbfl_encoding_detector *identd = NULL;
mbfl_buffer_converter *convd = NULL;
mbfl_string_init_set(&string, info->to_language, info->to_encoding);
mbfl_string_init_set(&resvar, info->to_language, info->to_encoding);
mbfl_string_init_set(&resval, info->to_language, info->to_encoding);
mbfl_string_init_set(&string, info->to_language, info->to_encoding->no_encoding);
mbfl_string_init_set(&resvar, info->to_language, info->to_encoding->no_encoding);
mbfl_string_init_set(&resval, info->to_language, info->to_encoding->no_encoding);
if (!res || *res == '\0') {
goto out;
@ -257,12 +258,12 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
/* initialize converter */
if (info->num_from_encodings <= 0) {
from_encoding = mbfl_no_encoding_pass;
from_encoding = &mbfl_encoding_pass;
} else if (info->num_from_encodings == 1) {
from_encoding = info->from_encodings[0];
} else {
/* auto detect */
from_encoding = mbfl_no_encoding_invalid;
from_encoding = NULL;
identd = mbfl_encoding_detector_new((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection));
if (identd) {
n = 0;
@ -274,10 +275,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
}
n++;
}
from_encoding = mbfl_encoding_detector_judge(identd);
from_encoding = mbfl_encoding_detector_judge2(identd);
mbfl_encoding_detector_delete(identd);
}
if (from_encoding == mbfl_no_encoding_invalid) {
if (!from_encoding) {
if (info->report_errors) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
}
@ -286,8 +287,8 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
}
convd = NULL;
if (from_encoding != mbfl_no_encoding_pass) {
convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
if (from_encoding != &mbfl_encoding_pass) {
convd = mbfl_buffer_converter_new2(from_encoding, info->to_encoding, 0);
if (convd != NULL) {
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
@ -300,7 +301,7 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
}
/* convert encoding */
string.no_encoding = from_encoding;
string.no_encoding = from_encoding->no_encoding;
n = 0;
while (n < num) {
@ -312,10 +313,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
var = val_list[n];
}
n++;
string.val = val_list[n];
string.val = (unsigned char *)val_list[n];
string.len = len_list[n];
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
val = resval.val;
val = (char *)resval.val;
val_len = resval.len;
} else {
val = val_list[n];
@ -355,10 +356,10 @@ out:
/* {{{ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) */
SAPI_POST_HANDLER_FUNC(php_mb_post_handler)
{
enum mbfl_no_encoding detected;
const mbfl_encoding *detected;
php_mb_encoding_handler_info_t info;
MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
MBSTRG(http_input_identify_post) = NULL;
info.data_type = PARSE_POST;
info.separator = "&";
@ -372,7 +373,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler)
detected = _php_mb_encoding_handler_ex(&info, arg, SG(request_info).post_data TSRMLS_CC);
MBSTRG(http_input_identify) = detected;
if (detected != mbfl_no_encoding_invalid) {
if (detected) {
MBSTRG(http_input_identify_post) = detected;
}
}

View File

@ -34,10 +34,10 @@ typedef struct _php_mb_encoding_handler_info_t {
const char *separator;
unsigned int report_errors: 1;
enum mbfl_no_language to_language;
enum mbfl_no_encoding to_encoding;
const mbfl_encoding *to_encoding;
enum mbfl_no_language from_language;
int num_from_encodings;
const enum mbfl_no_encoding *from_encodings;
const mbfl_encoding **from_encodings;
size_t num_from_encodings;
} php_mb_encoding_handler_info_t;
/* }}}*/
@ -47,7 +47,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler);
MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data);
int _php_mb_enable_encoding_translation(int flag);
enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC);
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC);
/* }}} */
#endif /* HAVE_MBSTRING */

File diff suppressed because it is too large Load Diff

View File

@ -165,25 +165,23 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v
ZEND_BEGIN_MODULE_GLOBALS(mbstring)
char *internal_encoding_name;
enum mbfl_no_language language;
enum mbfl_no_encoding internal_encoding;
enum mbfl_no_encoding current_internal_encoding;
enum mbfl_no_encoding *script_encoding_list;
int script_encoding_list_size;
enum mbfl_no_encoding http_output_encoding;
enum mbfl_no_encoding current_http_output_encoding;
enum mbfl_no_encoding http_input_identify;
enum mbfl_no_encoding http_input_identify_get;
enum mbfl_no_encoding http_input_identify_post;
enum mbfl_no_encoding http_input_identify_cookie;
enum mbfl_no_encoding http_input_identify_string;
enum mbfl_no_encoding *http_input_list;
int http_input_list_size;
enum mbfl_no_encoding *detect_order_list;
int detect_order_list_size;
enum mbfl_no_encoding *current_detect_order_list;
int current_detect_order_list_size;
const mbfl_encoding *internal_encoding;
const mbfl_encoding *current_internal_encoding;
const mbfl_encoding *http_output_encoding;
const mbfl_encoding *current_http_output_encoding;
const mbfl_encoding *http_input_identify;
const mbfl_encoding *http_input_identify_get;
const mbfl_encoding *http_input_identify_post;
const mbfl_encoding *http_input_identify_cookie;
const mbfl_encoding *http_input_identify_string;
const mbfl_encoding **http_input_list;
size_t http_input_list_size;
const mbfl_encoding **detect_order_list;
size_t detect_order_list_size;
const mbfl_encoding **current_detect_order_list;
size_t current_detect_order_list_size;
enum mbfl_no_encoding *default_detect_order_list;
int default_detect_order_list_size;
size_t default_detect_order_list_size;
int filter_illegal_mode;
int filter_illegal_substchar;
int current_filter_illegal_mode;

View File

@ -749,7 +749,17 @@ PHPAPI void php_print_info(int flag TSRMLS_DC)
php_info_print_table_row(2, "Zend Memory Manager", is_zend_mm(TSRMLS_C) ? "enabled" : "disabled" );
php_info_print_table_row(2, "Zend Multibyte Support", CG(multibyte) ? "enabled" : "disabled");
{
const zend_multibyte_functions *functions = zend_multibyte_get_functions(TSRMLS_C);
char *descr;
if (functions) {
spprintf(&descr, 0, "provided by %s", functions->provider_name);
} else {
descr = estrdup("disabled");
}
php_info_print_table_row(2, "Zend Multibyte Support", descr);
efree(descr);
}
#if HAVE_IPV6
php_info_print_table_row(2, "IPv6 Support", "enabled" );