From bbf3d43c1ee0ad53b03c3821cd630f0746d5e954 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Sun, 19 Dec 2010 16:36:37 +0000 Subject: [PATCH] * Refactor zend_multibyte facility. Now mbstring.script_encoding is superseded by zend.script_encoding. --- Zend/zend.c | 19 +- Zend/zend_compile.c | 11 +- Zend/zend_globals.h | 7 +- Zend/zend_language_scanner.c | 960 +++++++----- Zend/zend_language_scanner.h | 7 +- Zend/zend_language_scanner.l | 254 +++- Zend/zend_language_scanner_defs.h | 2 +- Zend/zend_multibyte.c | 1340 ++--------------- Zend/zend_multibyte.h | 83 +- .../libmbfl/filters/mbfilter_base64.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_big5.c | 2 +- .../libmbfl/filters/mbfilter_cp5022x.c | 10 +- ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_cp936.c | 2 +- .../libmbfl/filters/mbfilter_htmlent.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_hz.c | 2 +- .../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 2 +- .../libmbfl/filters/mbfilter_iso2022_kr.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_jis.c | 4 +- .../libmbfl/filters/mbfilter_qprint.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_sjis.c | 2 +- .../libmbfl/filters/mbfilter_sjis_open.c | 2 +- ext/mbstring/libmbfl/filters/mbfilter_utf7.c | 2 +- ext/mbstring/libmbfl/mbfl/mbfilter.c | 194 ++- ext/mbstring/libmbfl/mbfl/mbfilter.h | 12 +- ext/mbstring/libmbfl/mbfl/mbfl_consts.h | 3 +- ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 30 +- ext/mbstring/libmbfl/mbfl/mbfl_ident.h | 2 + ext/mbstring/mb_gpc.c | 47 +- ext/mbstring/mb_gpc.h | 8 +- ext/mbstring/mbstring.c | 1038 ++++++------- ext/mbstring/mbstring.h | 34 +- ext/standard/info.c | 12 +- 33 files changed, 1740 insertions(+), 2361 deletions(-) diff --git a/Zend/zend.c b/Zend/zend.c index d091593ec4a..849a7edf2f4 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -89,11 +89,25 @@ static ZEND_INI_MH(OnUpdateGCEnabled) /* {{{ */ } /* }}} */ +static ZEND_INI_MH(OnUpdateScriptEncoding) /* {{{ */ +{ + if (!CG(multibyte)) { + return FAILURE; + } + if (!zend_multibyte_get_functions(TSRMLS_C)) { + return SUCCESS; + } + return zend_multibyte_set_script_encoding_by_string(new_value, new_value_length TSRMLS_CC); +} +/* }}} */ + + ZEND_INI_BEGIN() ZEND_INI_ENTRY("error_reporting", NULL, ZEND_INI_ALL, OnUpdateErrorReporting) STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1", ZEND_INI_ALL, OnUpdateGCEnabled, gc_enabled, zend_gc_globals, gc_globals) STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, OnUpdateBool, multibyte, zend_compiler_globals, compiler_globals) - STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals) + ZEND_INI_ENTRY("zend.script_encoding", NULL, ZEND_INI_ALL, OnUpdateScriptEncoding) + STD_ZEND_INI_BOOLEAN("zend.detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals) ZEND_INI_END() @@ -528,6 +542,9 @@ static void compiler_globals_dtor(zend_compiler_globals *compiler_globals TSRMLS if (compiler_globals->static_members_table) { free(compiler_globals->static_members_table); } + if (compiler_globals->script_encoding_list) { + pefree(compiler_globals->script_encoding_list, 1); + } compiler_globals->last_static_member = 0; } /* }}} */ diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 968fed7bd12..9b928437b04 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -197,9 +197,6 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */ init_compiler_declarables(TSRMLS_C); zend_stack_init(&CG(context_stack)); - CG(script_encoding_list) = NULL; - CG(script_encoding_list_size) = 0; - CG(internal_encoding) = NULL; CG(encoding_declared) = 0; } /* }}} */ @@ -238,10 +235,6 @@ void shutdown_compiler(TSRMLS_D) /* {{{ */ zend_hash_destroy(&CG(filenames_table)); zend_llist_destroy(&CG(open_files)); zend_stack_destroy(&CG(context_stack)); - - if (CG(script_encoding_list)) { - efree(CG(script_encoding_list)); - } } /* }}} */ @@ -5864,7 +5857,7 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */ CG(encoding_declared) = 1; convert_to_string(&val->u.constant); - new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val); + new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val TSRMLS_CC); if (!new_encoding) { zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.constant.value.str.val); } else { @@ -5879,6 +5872,8 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */ zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC); } } + } else { + zend_error(E_COMPILE_WARNING, "declare(encoding=...) ignored because Zend multibyte feature is turned off by settings"); } zval_dtor(&val->u.constant); } else { diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 8ec2a88c043..e6b504ebe39 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -147,14 +147,12 @@ struct _zend_compiler_globals { HashTable interned_strings; - zend_encoding **script_encoding_list; + const zend_encoding **script_encoding_list; size_t script_encoding_list_size; zend_bool multibyte; zend_bool detect_unicode; zend_bool encoding_declared; - zend_encoding *internal_encoding; - #ifdef ZTS zval ***static_members_table; int last_static_member; @@ -310,8 +308,7 @@ struct _zend_php_scanner_globals { /* input/ouput filters */ zend_encoding_filter input_filter; zend_encoding_filter output_filter; - zend_encoding *script_encoding; - zend_encoding *internal_encoding; + const zend_encoding *script_encoding; }; #endif /* ZEND_GLOBALS_H */ diff --git a/Zend/zend_language_scanner.c b/Zend/zend_language_scanner.c index 2d2429feb67..b636d2c7816 100644 --- a/Zend/zend_language_scanner.c +++ b/Zend/zend_language_scanner.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.13.5 on Thu Nov 25 23:17:48 2010 */ +/* Generated by re2c 0.13.5 on Mon Dec 20 01:33:18 2010 */ #line 1 "Zend/zend_language_scanner.l" /* +----------------------------------------------------------------------+ @@ -183,16 +183,13 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); - if (CG(multibyte)) { - lex_state->script_org = SCNG(script_org); - lex_state->script_org_size = SCNG(script_org_size); - lex_state->script_filtered = SCNG(script_filtered); - lex_state->script_filtered_size = SCNG(script_filtered_size); - lex_state->input_filter = SCNG(input_filter); - lex_state->output_filter = SCNG(output_filter); - lex_state->script_encoding = SCNG(script_encoding); - lex_state->internal_encoding = SCNG(internal_encoding); - } + lex_state->script_org = SCNG(script_org); + lex_state->script_org_size = SCNG(script_org_size); + lex_state->script_filtered = SCNG(script_filtered); + lex_state->script_filtered_size = SCNG(script_filtered_size); + lex_state->input_filter = SCNG(input_filter); + lex_state->output_filter = SCNG(output_filter); + lex_state->script_encoding = SCNG(script_encoding); } ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) @@ -211,24 +208,22 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); - if (CG(multibyte)) { - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - SCNG(script_org) = lex_state->script_org; - SCNG(script_org_size) = lex_state->script_org_size; - SCNG(script_filtered) = lex_state->script_filtered; - SCNG(script_filtered_size) = lex_state->script_filtered_size; - SCNG(input_filter) = lex_state->input_filter; - SCNG(output_filter) = lex_state->output_filter; - SCNG(script_encoding) = lex_state->script_encoding; - SCNG(internal_encoding) = lex_state->internal_encoding; + + if (SCNG(script_org)) { + efree(SCNG(script_org)); + SCNG(script_org) = NULL; } + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_org) = lex_state->script_org; + SCNG(script_org_size) = lex_state->script_org_size; + SCNG(script_filtered) = lex_state->script_filtered; + SCNG(script_filtered_size) = lex_state->script_filtered_size; + SCNG(input_filter) = lex_state->input_filter; + SCNG(output_filter) = lex_state->output_filter; + SCNG(script_encoding) = lex_state->script_encoding; } ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) @@ -241,6 +236,203 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) } } +#define BOM_UTF32_BE "\x00\x00\xfe\xff" +#define BOM_UTF32_LE "\xff\xfe\x00\x00" +#define BOM_UTF16_BE "\xfe\xff" +#define BOM_UTF16_LE "\xff\xfe" +#define BOM_UTF8 "\xef\xbb\xbf" + +static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) +{ + const unsigned char *p; + int wchar_size = 2; + int le = 0; + + /* utf-16 or utf-32? */ + p = script; + while ((p-script) < script_size) { + p = memchr(p, 0, script_size-(p-script)-2); + if (!p) { + break; + } + if (*(p+1) == '\0' && *(p+2) == '\0') { + wchar_size = 4; + break; + } + + /* searching for UTF-32 specific byte orders, so this will do */ + p += 4; + } + + /* BE or LE? */ + p = script; + while ((p-script) < script_size) { + if (*p == '\0' && *(p+wchar_size-1) != '\0') { + /* BE */ + le = 0; + break; + } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { + /* LE* */ + le = 1; + break; + } + p += wchar_size; + } + + if (wchar_size == 2) { + return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; + } else { + return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) +{ + const zend_encoding *script_encoding = NULL; + int bom_size; + unsigned char *script; + unsigned char *pos1, *pos2; + + if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { + return NULL; + } + + /* check out BOM */ + if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf32be; + bom_size = sizeof(BOM_UTF32_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf32le; + bom_size = sizeof(BOM_UTF32_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf16be; + bom_size = sizeof(BOM_UTF16_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf16le; + bom_size = sizeof(BOM_UTF16_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { + script_encoding = zend_multibyte_encoding_utf8; + bom_size = sizeof(BOM_UTF8)-1; + } + + if (script_encoding) { + /* remove BOM */ + script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size); + memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size); + efree(LANG_SCNG(script_org)); + LANG_SCNG(script_org) = script; + LANG_SCNG(script_org_size) -= bom_size; + + return script_encoding; + } + + /* script contains NULL bytes -> auto-detection */ + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } + /* make best effort if BOM is missing */ + return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) +{ + const zend_encoding *script_encoding; + + if (CG(detect_unicode)) { + /* check out bom(byte order mark) and see if containing wchars */ + script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); + if (script_encoding != NULL) { + /* bom or wchar detection is prior to 'script_encoding' option */ + return script_encoding; + } + } + + /* if no script_encoding specified, just leave alone */ + if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { + return NULL; + } + + /* if multiple encodings specified, detect automagically */ + if (CG(script_encoding_list_size) > 1) { + return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); + } + + return CG(script_encoding_list)[0]; +} + +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); + + if (!script_encoding) { + return FAILURE; + } + + /* judge input/output filter */ + LANG_SCNG(script_encoding) = script_encoding; + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + + if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { + if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ + LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; + LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; + } else { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + } + return SUCCESS; + } + + /* both script and internal encodings are incompatible w/ flex */ + LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; + LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; + + return 0; +} ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) { @@ -288,13 +480,13 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); if (SCNG(script_filtered) == NULL) { zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); } } SCNG(yy_start) = SCNG(script_filtered) - offset; yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); } else { - SCNG(yy_start) = buf - offset; + SCNG(yy_start) = (unsigned char *)buf - offset; yy_scan_buffer(buf, size TSRMLS_CC); } } else { @@ -443,7 +635,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); SCNG(script_org_size) = str->value.str.len; - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); if (!SCNG(input_filter)) { SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); @@ -617,7 +809,7 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter /* calculate current position */ offset = original_offset = YYCURSOR - SCNG(yy_start); if (old_input_filter && offset > 0) { - zend_encoding *new_encoding = SCNG(script_encoding); + const zend_encoding *new_encoding = SCNG(script_encoding); zend_encoding_filter new_filter = SCNG(input_filter); SCNG(script_encoding) = old_encoding; SCNG(input_filter) = old_input_filter; @@ -839,7 +1031,7 @@ restart: yymore_restart: -#line 843 "Zend/zend_language_scanner.c" +#line 1035 "Zend/zend_language_scanner.c" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -938,7 +1130,7 @@ yyc_INITIAL: yy3: YYDEBUG(3, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1631 "Zend/zend_language_scanner.l" +#line 1823 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { return 0; @@ -998,7 +1190,7 @@ inline_html: HANDLE_NEWLINES(yytext, yyleng); return T_INLINE_HTML; } -#line 1002 "Zend/zend_language_scanner.c" +#line 1194 "Zend/zend_language_scanner.c" yy4: YYDEBUG(4, *YYCURSOR); yych = *++YYCURSOR; @@ -1016,7 +1208,7 @@ yy5: yy6: YYDEBUG(6, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1619 "Zend/zend_language_scanner.l" +#line 1811 "Zend/zend_language_scanner.l" { if (CG(short_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1028,14 +1220,14 @@ yy6: goto inline_char_handler; } } -#line 1032 "Zend/zend_language_scanner.c" +#line 1224 "Zend/zend_language_scanner.c" yy7: YYDEBUG(7, *YYCURSOR); ++YYCURSOR; if ((yych = *YYCURSOR) == '=') goto yy43; YYDEBUG(8, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1596 "Zend/zend_language_scanner.l" +#line 1788 "Zend/zend_language_scanner.l" { if (CG(asp_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1047,7 +1239,7 @@ yy7: goto inline_char_handler; } } -#line 1051 "Zend/zend_language_scanner.c" +#line 1243 "Zend/zend_language_scanner.c" yy9: YYDEBUG(9, *YYCURSOR); yych = *++YYCURSOR; @@ -1233,7 +1425,7 @@ yy35: ++YYCURSOR; YYDEBUG(38, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1552 "Zend/zend_language_scanner.l" +#line 1744 "Zend/zend_language_scanner.l" { YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); @@ -1250,7 +1442,7 @@ yy35: BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG; } -#line 1254 "Zend/zend_language_scanner.c" +#line 1446 "Zend/zend_language_scanner.c" yy39: YYDEBUG(39, *YYCURSOR); yych = *++YYCURSOR; @@ -1277,7 +1469,7 @@ yy43: ++YYCURSOR; YYDEBUG(44, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1570 "Zend/zend_language_scanner.l" +#line 1762 "Zend/zend_language_scanner.l" { if (CG(asp_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1289,13 +1481,13 @@ yy43: goto inline_char_handler; } } -#line 1293 "Zend/zend_language_scanner.c" +#line 1485 "Zend/zend_language_scanner.c" yy45: YYDEBUG(45, *YYCURSOR); ++YYCURSOR; YYDEBUG(46, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1583 "Zend/zend_language_scanner.l" +#line 1775 "Zend/zend_language_scanner.l" { if (CG(short_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1307,7 +1499,7 @@ yy45: goto inline_char_handler; } } -#line 1311 "Zend/zend_language_scanner.c" +#line 1503 "Zend/zend_language_scanner.c" yy47: YYDEBUG(47, *YYCURSOR); yych = *++YYCURSOR; @@ -1334,7 +1526,7 @@ yy50: yy51: YYDEBUG(51, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1609 "Zend/zend_language_scanner.l" +#line 1801 "Zend/zend_language_scanner.l" { zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.len = yyleng; @@ -1343,7 +1535,7 @@ yy51: BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG; } -#line 1347 "Zend/zend_language_scanner.c" +#line 1539 "Zend/zend_language_scanner.c" yy52: YYDEBUG(52, *YYCURSOR); ++YYCURSOR; @@ -1414,7 +1606,7 @@ yyc_ST_BACKQUOTE: yy56: YYDEBUG(56, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2082 "Zend/zend_language_scanner.l" +#line 2274 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { return 0; @@ -1455,7 +1647,7 @@ yy56: zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); return T_ENCAPSED_AND_WHITESPACE; } -#line 1459 "Zend/zend_language_scanner.c" +#line 1651 "Zend/zend_language_scanner.c" yy57: YYDEBUG(57, *YYCURSOR); yych = *++YYCURSOR; @@ -1466,12 +1658,12 @@ yy58: ++YYCURSOR; YYDEBUG(59, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2026 "Zend/zend_language_scanner.l" +#line 2218 "Zend/zend_language_scanner.l" { BEGIN(ST_IN_SCRIPTING); return '`'; } -#line 1475 "Zend/zend_language_scanner.c" +#line 1667 "Zend/zend_language_scanner.c" yy60: YYDEBUG(60, *YYCURSOR); yych = *++YYCURSOR; @@ -1481,14 +1673,14 @@ yy61: ++YYCURSOR; YYDEBUG(62, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2013 "Zend/zend_language_scanner.l" +#line 2205 "Zend/zend_language_scanner.l" { zendlval->value.lval = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; } -#line 1492 "Zend/zend_language_scanner.c" +#line 1684 "Zend/zend_language_scanner.c" yy63: YYDEBUG(63, *YYCURSOR); yyaccept = 0; @@ -1504,24 +1696,24 @@ yy63: yy65: YYDEBUG(65, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1713 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1514 "Zend/zend_language_scanner.c" +#line 1706 "Zend/zend_language_scanner.c" yy66: YYDEBUG(66, *YYCURSOR); ++YYCURSOR; YYDEBUG(67, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1345 "Zend/zend_language_scanner.l" +#line 1537 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC); return T_DOLLAR_OPEN_CURLY_BRACES; } -#line 1525 "Zend/zend_language_scanner.c" +#line 1717 "Zend/zend_language_scanner.c" yy68: YYDEBUG(68, *YYCURSOR); yych = *++YYCURSOR; @@ -1535,7 +1727,7 @@ yy70: ++YYCURSOR; YYDEBUG(71, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1705 "Zend/zend_language_scanner.l" +#line 1897 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET TSRMLS_CC); @@ -1543,7 +1735,7 @@ yy70: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1547 "Zend/zend_language_scanner.c" +#line 1739 "Zend/zend_language_scanner.c" yy72: YYDEBUG(72, *YYCURSOR); yych = *++YYCURSOR; @@ -1561,7 +1753,7 @@ yy73: ++YYCURSOR; YYDEBUG(74, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1695 "Zend/zend_language_scanner.l" +#line 1887 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); @@ -1569,7 +1761,7 @@ yy73: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1573 "Zend/zend_language_scanner.c" +#line 1765 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_DOUBLE_QUOTES: @@ -1637,7 +1829,7 @@ yy77: yy78: YYDEBUG(78, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2032 "Zend/zend_language_scanner.l" +#line 2224 "Zend/zend_language_scanner.l" { if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) { YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1; @@ -1686,7 +1878,7 @@ double_quotes_scan_done: zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); return T_ENCAPSED_AND_WHITESPACE; } -#line 1690 "Zend/zend_language_scanner.c" +#line 1882 "Zend/zend_language_scanner.c" yy79: YYDEBUG(79, *YYCURSOR); yych = *++YYCURSOR; @@ -1697,12 +1889,12 @@ yy80: ++YYCURSOR; YYDEBUG(81, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2021 "Zend/zend_language_scanner.l" +#line 2213 "Zend/zend_language_scanner.l" { BEGIN(ST_IN_SCRIPTING); return '"'; } -#line 1706 "Zend/zend_language_scanner.c" +#line 1898 "Zend/zend_language_scanner.c" yy82: YYDEBUG(82, *YYCURSOR); yych = *++YYCURSOR; @@ -1712,14 +1904,14 @@ yy83: ++YYCURSOR; YYDEBUG(84, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2013 "Zend/zend_language_scanner.l" +#line 2205 "Zend/zend_language_scanner.l" { zendlval->value.lval = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; } -#line 1723 "Zend/zend_language_scanner.c" +#line 1915 "Zend/zend_language_scanner.c" yy85: YYDEBUG(85, *YYCURSOR); yyaccept = 0; @@ -1735,24 +1927,24 @@ yy85: yy87: YYDEBUG(87, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1713 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1745 "Zend/zend_language_scanner.c" +#line 1937 "Zend/zend_language_scanner.c" yy88: YYDEBUG(88, *YYCURSOR); ++YYCURSOR; YYDEBUG(89, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1345 "Zend/zend_language_scanner.l" +#line 1537 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC); return T_DOLLAR_OPEN_CURLY_BRACES; } -#line 1756 "Zend/zend_language_scanner.c" +#line 1948 "Zend/zend_language_scanner.c" yy90: YYDEBUG(90, *YYCURSOR); yych = *++YYCURSOR; @@ -1766,7 +1958,7 @@ yy92: ++YYCURSOR; YYDEBUG(93, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1705 "Zend/zend_language_scanner.l" +#line 1897 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET TSRMLS_CC); @@ -1774,7 +1966,7 @@ yy92: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1778 "Zend/zend_language_scanner.c" +#line 1970 "Zend/zend_language_scanner.c" yy94: YYDEBUG(94, *YYCURSOR); yych = *++YYCURSOR; @@ -1792,7 +1984,7 @@ yy95: ++YYCURSOR; YYDEBUG(96, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1695 "Zend/zend_language_scanner.l" +#line 1887 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); @@ -1800,7 +1992,7 @@ yy95: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 1804 "Zend/zend_language_scanner.c" +#line 1996 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_END_HEREDOC: @@ -1811,7 +2003,7 @@ yyc_ST_END_HEREDOC: ++YYCURSOR; YYDEBUG(100, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2000 "Zend/zend_language_scanner.l" +#line 2192 "Zend/zend_language_scanner.l" { YYCURSOR += CG(heredoc_len) - 1; yyleng = CG(heredoc_len); @@ -1823,7 +2015,7 @@ yyc_ST_END_HEREDOC: BEGIN(ST_IN_SCRIPTING); return T_END_HEREDOC; } -#line 1827 "Zend/zend_language_scanner.c" +#line 2019 "Zend/zend_language_scanner.c" /* *********************************** */ yyc_ST_HEREDOC: { @@ -1885,7 +2077,7 @@ yy103: yy104: YYDEBUG(104, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2124 "Zend/zend_language_scanner.l" +#line 2316 "Zend/zend_language_scanner.l" { int newline = 0; @@ -1956,7 +2148,7 @@ heredoc_scan_done: zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC); return T_ENCAPSED_AND_WHITESPACE; } -#line 1960 "Zend/zend_language_scanner.c" +#line 2152 "Zend/zend_language_scanner.c" yy105: YYDEBUG(105, *YYCURSOR); yych = *++YYCURSOR; @@ -1971,14 +2163,14 @@ yy107: ++YYCURSOR; YYDEBUG(108, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2013 "Zend/zend_language_scanner.l" +#line 2205 "Zend/zend_language_scanner.l" { zendlval->value.lval = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; } -#line 1982 "Zend/zend_language_scanner.c" +#line 2174 "Zend/zend_language_scanner.c" yy109: YYDEBUG(109, *YYCURSOR); yyaccept = 0; @@ -1994,24 +2186,24 @@ yy109: yy111: YYDEBUG(111, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1713 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); zendlval->type = IS_STRING; return T_VARIABLE; } -#line 2004 "Zend/zend_language_scanner.c" +#line 2196 "Zend/zend_language_scanner.c" yy112: YYDEBUG(112, *YYCURSOR); ++YYCURSOR; YYDEBUG(113, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1345 "Zend/zend_language_scanner.l" +#line 1537 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC); return T_DOLLAR_OPEN_CURLY_BRACES; } -#line 2015 "Zend/zend_language_scanner.c" +#line 2207 "Zend/zend_language_scanner.c" yy114: YYDEBUG(114, *YYCURSOR); yych = *++YYCURSOR; @@ -2025,7 +2217,7 @@ yy116: ++YYCURSOR; YYDEBUG(117, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1705 "Zend/zend_language_scanner.l" +#line 1897 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET TSRMLS_CC); @@ -2033,7 +2225,7 @@ yy116: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 2037 "Zend/zend_language_scanner.c" +#line 2229 "Zend/zend_language_scanner.c" yy118: YYDEBUG(118, *YYCURSOR); yych = *++YYCURSOR; @@ -2051,7 +2243,7 @@ yy119: ++YYCURSOR; YYDEBUG(120, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1695 "Zend/zend_language_scanner.l" +#line 1887 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); @@ -2059,7 +2251,7 @@ yy119: zendlval->type = IS_STRING; return T_VARIABLE; } -#line 2063 "Zend/zend_language_scanner.c" +#line 2255 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_IN_SCRIPTING: @@ -2240,13 +2432,13 @@ yy123: yy124: YYDEBUG(124, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1736 "Zend/zend_language_scanner.l" +#line 1928 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, yytext, yyleng); zendlval->type = IS_STRING; return T_STRING; } -#line 2250 "Zend/zend_language_scanner.c" +#line 2442 "Zend/zend_language_scanner.c" yy125: YYDEBUG(125, *YYCURSOR); yych = *++YYCURSOR; @@ -2500,11 +2692,11 @@ yy137: yy138: YYDEBUG(138, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1334 "Zend/zend_language_scanner.l" +#line 1526 "Zend/zend_language_scanner.l" { return yytext[0]; } -#line 2508 "Zend/zend_language_scanner.c" +#line 2700 "Zend/zend_language_scanner.c" yy139: YYDEBUG(139, *YYCURSOR); ++YYCURSOR; @@ -2513,7 +2705,7 @@ yy139: yy140: YYDEBUG(140, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1009 "Zend/zend_language_scanner.l" +#line 1201 "Zend/zend_language_scanner.l" { zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.len = yyleng; @@ -2521,7 +2713,7 @@ yy140: HANDLE_NEWLINES(yytext, yyleng); return T_WHITESPACE; } -#line 2525 "Zend/zend_language_scanner.c" +#line 2717 "Zend/zend_language_scanner.c" yy141: YYDEBUG(141, *YYCURSOR); yych = *++YYCURSOR; @@ -2532,11 +2724,11 @@ yy142: ++YYCURSOR; YYDEBUG(143, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1038 "Zend/zend_language_scanner.l" +#line 1230 "Zend/zend_language_scanner.l" { return T_NS_SEPARATOR; } -#line 2540 "Zend/zend_language_scanner.c" +#line 2732 "Zend/zend_language_scanner.c" yy144: YYDEBUG(144, *YYCURSOR); yych = *++YYCURSOR; @@ -2790,18 +2982,18 @@ yy167: ++YYCURSOR; YYDEBUG(168, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1339 "Zend/zend_language_scanner.l" +#line 1531 "Zend/zend_language_scanner.l" { yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); return '{'; } -#line 2799 "Zend/zend_language_scanner.c" +#line 2991 "Zend/zend_language_scanner.c" yy169: YYDEBUG(169, *YYCURSOR); ++YYCURSOR; YYDEBUG(170, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1351 "Zend/zend_language_scanner.l" +#line 1543 "Zend/zend_language_scanner.l" { RESET_DOC_COMMENT(); if (!zend_stack_is_empty(&SCNG(state_stack))) { @@ -2809,7 +3001,7 @@ yy169: } return '}'; } -#line 2813 "Zend/zend_language_scanner.c" +#line 3005 "Zend/zend_language_scanner.c" yy171: YYDEBUG(171, *YYCURSOR); yyaccept = 2; @@ -2832,7 +3024,7 @@ yy171: yy172: YYDEBUG(172, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1377 "Zend/zend_language_scanner.l" +#line 1569 "Zend/zend_language_scanner.l" { if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ zendlval->value.lval = strtol(yytext, NULL, 0); @@ -2853,7 +3045,7 @@ yy172: zendlval->type = IS_LONG; return T_LNUMBER; } -#line 2857 "Zend/zend_language_scanner.c" +#line 3049 "Zend/zend_language_scanner.c" yy173: YYDEBUG(173, *YYCURSOR); yyaccept = 2; @@ -2881,7 +3073,7 @@ yy175: yy176: YYDEBUG(176, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1743 "Zend/zend_language_scanner.l" +#line 1935 "Zend/zend_language_scanner.l" { while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { @@ -2915,14 +3107,14 @@ yy176: return T_COMMENT; } -#line 2919 "Zend/zend_language_scanner.c" +#line 3111 "Zend/zend_language_scanner.c" yy177: YYDEBUG(177, *YYCURSOR); ++YYCURSOR; yy178: YYDEBUG(178, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1834 "Zend/zend_language_scanner.l" +#line 2026 "Zend/zend_language_scanner.l" { register char *s, *t; char *end; @@ -2990,14 +3182,14 @@ yy178: } return T_CONSTANT_ENCAPSED_STRING; } -#line 2994 "Zend/zend_language_scanner.c" +#line 3186 "Zend/zend_language_scanner.c" yy179: YYDEBUG(179, *YYCURSOR); ++YYCURSOR; yy180: YYDEBUG(180, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1903 "Zend/zend_language_scanner.l" +#line 2095 "Zend/zend_language_scanner.l" { int bprefix = (yytext[0] != '"') ? 1 : 0; @@ -3038,24 +3230,24 @@ yy180: BEGIN(ST_DOUBLE_QUOTES); return '"'; } -#line 3042 "Zend/zend_language_scanner.c" +#line 3234 "Zend/zend_language_scanner.c" yy181: YYDEBUG(181, *YYCURSOR); ++YYCURSOR; YYDEBUG(182, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1994 "Zend/zend_language_scanner.l" +#line 2186 "Zend/zend_language_scanner.l" { BEGIN(ST_BACKQUOTE); return '`'; } -#line 3053 "Zend/zend_language_scanner.c" +#line 3245 "Zend/zend_language_scanner.c" yy183: YYDEBUG(183, *YYCURSOR); ++YYCURSOR; YYDEBUG(184, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2252 "Zend/zend_language_scanner.l" +#line 2444 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { return 0; @@ -3064,7 +3256,7 @@ yy183: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); goto restart; } -#line 3068 "Zend/zend_language_scanner.c" +#line 3260 "Zend/zend_language_scanner.c" yy185: YYDEBUG(185, *YYCURSOR); ++YYCURSOR; @@ -3091,13 +3283,13 @@ yy187: yy189: YYDEBUG(189, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1438 "Zend/zend_language_scanner.l" +#line 1630 "Zend/zend_language_scanner.l" { zendlval->value.dval = zend_strtod(yytext, NULL); zendlval->type = IS_DOUBLE; return T_DNUMBER; } -#line 3101 "Zend/zend_language_scanner.c" +#line 3293 "Zend/zend_language_scanner.c" yy190: YYDEBUG(190, *YYCURSOR); yyaccept = 2; @@ -3182,7 +3374,7 @@ yy198: } YYDEBUG(200, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1398 "Zend/zend_language_scanner.l" +#line 1590 "Zend/zend_language_scanner.l" { char *hex = yytext + 2; /* Skip "0x" */ int len = yyleng - 2; @@ -3203,7 +3395,7 @@ yy198: return T_DNUMBER; } } -#line 3207 "Zend/zend_language_scanner.c" +#line 3399 "Zend/zend_language_scanner.c" yy201: YYDEBUG(201, *YYCURSOR); ++YYCURSOR; @@ -3212,7 +3404,7 @@ yy201: yy202: YYDEBUG(202, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1811 "Zend/zend_language_scanner.l" +#line 2003 "Zend/zend_language_scanner.l" { zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.len = yyleng; @@ -3220,7 +3412,7 @@ yy202: BEGIN(INITIAL); return T_CLOSE_TAG; /* implicit ';' at php-end tag */ } -#line 3224 "Zend/zend_language_scanner.c" +#line 3416 "Zend/zend_language_scanner.c" yy203: YYDEBUG(203, *YYCURSOR); yych = *++YYCURSOR; @@ -3254,13 +3446,13 @@ yy205: yy207: YYDEBUG(207, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1713 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); zendlval->type = IS_STRING; return T_VARIABLE; } -#line 3264 "Zend/zend_language_scanner.c" +#line 3456 "Zend/zend_language_scanner.c" yy208: YYDEBUG(208, *YYCURSOR); yych = *++YYCURSOR; @@ -3274,61 +3466,61 @@ yy209: } YYDEBUG(210, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1322 "Zend/zend_language_scanner.l" +#line 1514 "Zend/zend_language_scanner.l" { return T_LOGICAL_XOR; } -#line 3282 "Zend/zend_language_scanner.c" +#line 3474 "Zend/zend_language_scanner.c" yy211: YYDEBUG(211, *YYCURSOR); ++YYCURSOR; YYDEBUG(212, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1302 "Zend/zend_language_scanner.l" +#line 1494 "Zend/zend_language_scanner.l" { return T_XOR_EQUAL; } -#line 3292 "Zend/zend_language_scanner.c" +#line 3484 "Zend/zend_language_scanner.c" yy213: YYDEBUG(213, *YYCURSOR); ++YYCURSOR; YYDEBUG(214, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1306 "Zend/zend_language_scanner.l" +#line 1498 "Zend/zend_language_scanner.l" { return T_BOOLEAN_OR; } -#line 3302 "Zend/zend_language_scanner.c" +#line 3494 "Zend/zend_language_scanner.c" yy215: YYDEBUG(215, *YYCURSOR); ++YYCURSOR; YYDEBUG(216, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1298 "Zend/zend_language_scanner.l" +#line 1490 "Zend/zend_language_scanner.l" { return T_OR_EQUAL; } -#line 3312 "Zend/zend_language_scanner.c" +#line 3504 "Zend/zend_language_scanner.c" yy217: YYDEBUG(217, *YYCURSOR); ++YYCURSOR; YYDEBUG(218, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1310 "Zend/zend_language_scanner.l" +#line 1502 "Zend/zend_language_scanner.l" { return T_BOOLEAN_AND; } -#line 3322 "Zend/zend_language_scanner.c" +#line 3514 "Zend/zend_language_scanner.c" yy219: YYDEBUG(219, *YYCURSOR); ++YYCURSOR; YYDEBUG(220, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1294 "Zend/zend_language_scanner.l" +#line 1486 "Zend/zend_language_scanner.l" { return T_AND_EQUAL; } -#line 3332 "Zend/zend_language_scanner.c" +#line 3524 "Zend/zend_language_scanner.c" yy221: YYDEBUG(221, *YYCURSOR); ++YYCURSOR; @@ -3337,7 +3529,7 @@ yy221: yy222: YYDEBUG(222, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1820 "Zend/zend_language_scanner.l" +#line 2012 "Zend/zend_language_scanner.l" { if (CG(asp_tags)) { BEGIN(INITIAL); @@ -3350,17 +3542,17 @@ yy222: return yytext[0]; } } -#line 3354 "Zend/zend_language_scanner.c" +#line 3546 "Zend/zend_language_scanner.c" yy223: YYDEBUG(223, *YYCURSOR); ++YYCURSOR; YYDEBUG(224, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1282 "Zend/zend_language_scanner.l" +#line 1474 "Zend/zend_language_scanner.l" { return T_MOD_EQUAL; } -#line 3364 "Zend/zend_language_scanner.c" +#line 3556 "Zend/zend_language_scanner.c" yy225: YYDEBUG(225, *YYCURSOR); yych = *++YYCURSOR; @@ -3391,11 +3583,11 @@ yy229: ++YYCURSOR; YYDEBUG(230, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1278 "Zend/zend_language_scanner.l" +#line 1470 "Zend/zend_language_scanner.l" { return T_CONCAT_EQUAL; } -#line 3399 "Zend/zend_language_scanner.c" +#line 3591 "Zend/zend_language_scanner.c" yy231: YYDEBUG(231, *YYCURSOR); yyaccept = 4; @@ -3404,7 +3596,7 @@ yy231: yy232: YYDEBUG(232, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1777 "Zend/zend_language_scanner.l" +#line 1969 "Zend/zend_language_scanner.l" { int doc_com; @@ -3438,7 +3630,7 @@ yy232: return T_COMMENT; } -#line 3442 "Zend/zend_language_scanner.c" +#line 3634 "Zend/zend_language_scanner.c" yy233: YYDEBUG(233, *YYCURSOR); yych = *++YYCURSOR; @@ -3448,11 +3640,11 @@ yy234: ++YYCURSOR; YYDEBUG(235, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1274 "Zend/zend_language_scanner.l" +#line 1466 "Zend/zend_language_scanner.l" { return T_DIV_EQUAL; } -#line 3456 "Zend/zend_language_scanner.c" +#line 3648 "Zend/zend_language_scanner.c" yy236: YYDEBUG(236, *YYCURSOR); yych = *++YYCURSOR; @@ -3475,42 +3667,42 @@ yy239: ++YYCURSOR; YYDEBUG(240, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1270 "Zend/zend_language_scanner.l" +#line 1462 "Zend/zend_language_scanner.l" { return T_MUL_EQUAL; } -#line 3483 "Zend/zend_language_scanner.c" +#line 3675 "Zend/zend_language_scanner.c" yy241: YYDEBUG(241, *YYCURSOR); ++YYCURSOR; if ((yych = *YYCURSOR) == '=') goto yy245; YYDEBUG(242, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1330 "Zend/zend_language_scanner.l" +#line 1522 "Zend/zend_language_scanner.l" { return T_SR; } -#line 3494 "Zend/zend_language_scanner.c" +#line 3686 "Zend/zend_language_scanner.c" yy243: YYDEBUG(243, *YYCURSOR); ++YYCURSOR; YYDEBUG(244, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1258 "Zend/zend_language_scanner.l" +#line 1450 "Zend/zend_language_scanner.l" { return T_IS_GREATER_OR_EQUAL; } -#line 3504 "Zend/zend_language_scanner.c" +#line 3696 "Zend/zend_language_scanner.c" yy245: YYDEBUG(245, *YYCURSOR); ++YYCURSOR; YYDEBUG(246, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1290 "Zend/zend_language_scanner.l" +#line 1482 "Zend/zend_language_scanner.l" { return T_SR_EQUAL; } -#line 3514 "Zend/zend_language_scanner.c" +#line 3706 "Zend/zend_language_scanner.c" yy247: YYDEBUG(247, *YYCURSOR); yyaccept = 5; @@ -3521,11 +3713,11 @@ yy247: yy248: YYDEBUG(248, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1326 "Zend/zend_language_scanner.l" +#line 1518 "Zend/zend_language_scanner.l" { return T_SL; } -#line 3529 "Zend/zend_language_scanner.c" +#line 3721 "Zend/zend_language_scanner.c" yy249: YYDEBUG(249, *YYCURSOR); yych = *++YYCURSOR; @@ -3537,22 +3729,22 @@ yy250: ++YYCURSOR; YYDEBUG(251, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1254 "Zend/zend_language_scanner.l" +#line 1446 "Zend/zend_language_scanner.l" { return T_IS_SMALLER_OR_EQUAL; } -#line 3545 "Zend/zend_language_scanner.c" +#line 3737 "Zend/zend_language_scanner.c" yy252: YYDEBUG(252, *YYCURSOR); ++YYCURSOR; yy253: YYDEBUG(253, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1250 "Zend/zend_language_scanner.l" +#line 1442 "Zend/zend_language_scanner.l" { return T_IS_NOT_EQUAL; } -#line 3556 "Zend/zend_language_scanner.c" +#line 3748 "Zend/zend_language_scanner.c" yy254: YYDEBUG(254, *YYCURSOR); yych = *++YYCURSOR; @@ -3603,11 +3795,11 @@ yy261: ++YYCURSOR; YYDEBUG(262, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1286 "Zend/zend_language_scanner.l" +#line 1478 "Zend/zend_language_scanner.l" { return T_SL_EQUAL; } -#line 3611 "Zend/zend_language_scanner.c" +#line 3803 "Zend/zend_language_scanner.c" yy263: YYDEBUG(263, *YYCURSOR); ++YYCURSOR; @@ -3712,7 +3904,7 @@ yy272: yy273: YYDEBUG(273, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1945 "Zend/zend_language_scanner.l" +#line 2137 "Zend/zend_language_scanner.l" { char *s; int bprefix = (yytext[0] != '<') ? 1 : 0; @@ -3760,7 +3952,7 @@ yy273: return T_START_HEREDOC; } -#line 3764 "Zend/zend_language_scanner.c" +#line 3956 "Zend/zend_language_scanner.c" yy274: YYDEBUG(274, *YYCURSOR); yych = *++YYCURSOR; @@ -3800,31 +3992,31 @@ yy277: ++YYCURSOR; YYDEBUG(279, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1242 "Zend/zend_language_scanner.l" +#line 1434 "Zend/zend_language_scanner.l" { return T_IS_NOT_IDENTICAL; } -#line 3808 "Zend/zend_language_scanner.c" +#line 4000 "Zend/zend_language_scanner.c" yy280: YYDEBUG(280, *YYCURSOR); ++YYCURSOR; YYDEBUG(281, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1262 "Zend/zend_language_scanner.l" +#line 1454 "Zend/zend_language_scanner.l" { return T_PLUS_EQUAL; } -#line 3818 "Zend/zend_language_scanner.c" +#line 4010 "Zend/zend_language_scanner.c" yy282: YYDEBUG(282, *YYCURSOR); ++YYCURSOR; YYDEBUG(283, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1230 "Zend/zend_language_scanner.l" +#line 1422 "Zend/zend_language_scanner.l" { return T_INC; } -#line 3828 "Zend/zend_language_scanner.c" +#line 4020 "Zend/zend_language_scanner.c" yy284: YYDEBUG(284, *YYCURSOR); yych = *++YYCURSOR; @@ -3839,11 +4031,11 @@ yy285: } YYDEBUG(286, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1314 "Zend/zend_language_scanner.l" +#line 1506 "Zend/zend_language_scanner.l" { return T_LOGICAL_OR; } -#line 3847 "Zend/zend_language_scanner.c" +#line 4039 "Zend/zend_language_scanner.c" yy287: YYDEBUG(287, *YYCURSOR); yych = *++YYCURSOR; @@ -3922,12 +4114,12 @@ yy295: ++YYCURSOR; YYDEBUG(296, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1225 "Zend/zend_language_scanner.l" +#line 1417 "Zend/zend_language_scanner.l" { yyless((sizeof("object") - 1)); return T_OBJECT_HINT; } -#line 3931 "Zend/zend_language_scanner.c" +#line 4123 "Zend/zend_language_scanner.c" yy297: YYDEBUG(297, *YYCURSOR); yych = *++YYCURSOR; @@ -4020,7 +4212,7 @@ yy305: ++YYCURSOR; YYDEBUG(306, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1198 "Zend/zend_language_scanner.l" +#line 1390 "Zend/zend_language_scanner.l" { if (yytext[3] == 'e') { yyless((sizeof("integer") - 1)); @@ -4031,7 +4223,7 @@ yy305: } return T_INT_HINT; } -#line 4035 "Zend/zend_language_scanner.c" +#line 4227 "Zend/zend_language_scanner.c" yy307: YYDEBUG(307, *YYCURSOR); yych = *++YYCURSOR; @@ -4045,42 +4237,42 @@ yy308: } YYDEBUG(309, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1166 "Zend/zend_language_scanner.l" +#line 1358 "Zend/zend_language_scanner.l" { return T_LIST; } -#line 4053 "Zend/zend_language_scanner.c" +#line 4245 "Zend/zend_language_scanner.c" yy310: YYDEBUG(310, *YYCURSOR); ++YYCURSOR; if ((yych = *YYCURSOR) == '=') goto yy314; YYDEBUG(311, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1246 "Zend/zend_language_scanner.l" +#line 1438 "Zend/zend_language_scanner.l" { return T_IS_EQUAL; } -#line 4064 "Zend/zend_language_scanner.c" +#line 4256 "Zend/zend_language_scanner.c" yy312: YYDEBUG(312, *YYCURSOR); ++YYCURSOR; YYDEBUG(313, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1162 "Zend/zend_language_scanner.l" +#line 1354 "Zend/zend_language_scanner.l" { return T_DOUBLE_ARROW; } -#line 4074 "Zend/zend_language_scanner.c" +#line 4266 "Zend/zend_language_scanner.c" yy314: YYDEBUG(314, *YYCURSOR); ++YYCURSOR; YYDEBUG(315, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1238 "Zend/zend_language_scanner.l" +#line 1430 "Zend/zend_language_scanner.l" { return T_IS_IDENTICAL; } -#line 4084 "Zend/zend_language_scanner.c" +#line 4276 "Zend/zend_language_scanner.c" yy316: YYDEBUG(316, *YYCURSOR); yych = *++YYCURSOR; @@ -4202,7 +4394,7 @@ yy331: } YYDEBUG(334, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1542 "Zend/zend_language_scanner.l" +#line 1734 "Zend/zend_language_scanner.l" { if (CG(current_namespace)) { *zendlval = *CG(current_namespace); @@ -4212,7 +4404,7 @@ yy331: } return T_NS_C; } -#line 4216 "Zend/zend_language_scanner.c" +#line 4408 "Zend/zend_language_scanner.c" yy335: YYDEBUG(335, *YYCURSOR); yych = *++YYCURSOR; @@ -4232,7 +4424,7 @@ yy336: } YYDEBUG(339, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1515 "Zend/zend_language_scanner.l" +#line 1707 "Zend/zend_language_scanner.l" { char *filename = zend_get_compiled_filename(TSRMLS_C); const size_t filename_len = strlen(filename); @@ -4259,7 +4451,7 @@ yy336: zendlval->type = IS_STRING; return T_DIR; } -#line 4263 "Zend/zend_language_scanner.c" +#line 4455 "Zend/zend_language_scanner.c" yy340: YYDEBUG(340, *YYCURSOR); yych = *++YYCURSOR; @@ -4284,13 +4476,13 @@ yy342: } YYDEBUG(345, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1497 "Zend/zend_language_scanner.l" +#line 1689 "Zend/zend_language_scanner.l" { zendlval->value.lval = CG(zend_lineno); zendlval->type = IS_LONG; return T_LINE; } -#line 4294 "Zend/zend_language_scanner.c" +#line 4486 "Zend/zend_language_scanner.c" yy346: YYDEBUG(346, *YYCURSOR); yych = *++YYCURSOR; @@ -4325,7 +4517,7 @@ yy350: } YYDEBUG(353, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1476 "Zend/zend_language_scanner.l" +#line 1668 "Zend/zend_language_scanner.l" { char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; @@ -4346,7 +4538,7 @@ yy350: zendlval->type = IS_STRING; return T_METHOD_C; } -#line 4350 "Zend/zend_language_scanner.c" +#line 4542 "Zend/zend_language_scanner.c" yy354: YYDEBUG(354, *YYCURSOR); yych = *++YYCURSOR; @@ -4397,7 +4589,7 @@ yy361: } YYDEBUG(364, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1460 "Zend/zend_language_scanner.l" +#line 1652 "Zend/zend_language_scanner.l" { char *func_name = NULL; @@ -4413,7 +4605,7 @@ yy361: zendlval->type = IS_STRING; return T_FUNC_C; } -#line 4417 "Zend/zend_language_scanner.c" +#line 4609 "Zend/zend_language_scanner.c" yy365: YYDEBUG(365, *YYCURSOR); yych = *++YYCURSOR; @@ -4433,7 +4625,7 @@ yy366: } YYDEBUG(369, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1503 "Zend/zend_language_scanner.l" +#line 1695 "Zend/zend_language_scanner.l" { char *filename = zend_get_compiled_filename(TSRMLS_C); @@ -4445,7 +4637,7 @@ yy366: zendlval->type = IS_STRING; return T_FILE; } -#line 4449 "Zend/zend_language_scanner.c" +#line 4641 "Zend/zend_language_scanner.c" yy370: YYDEBUG(370, *YYCURSOR); yych = *++YYCURSOR; @@ -4475,7 +4667,7 @@ yy373: } YYDEBUG(376, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1444 "Zend/zend_language_scanner.l" +#line 1636 "Zend/zend_language_scanner.l" { char *class_name = NULL; @@ -4491,7 +4683,7 @@ yy373: zendlval->type = IS_STRING; return T_CLASS_C; } -#line 4495 "Zend/zend_language_scanner.c" +#line 4687 "Zend/zend_language_scanner.c" yy377: YYDEBUG(377, *YYCURSOR); yych = *++YYCURSOR; @@ -4553,11 +4745,11 @@ yy388: } YYDEBUG(389, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1130 "Zend/zend_language_scanner.l" +#line 1322 "Zend/zend_language_scanner.l" { return T_HALT_COMPILER; } -#line 4561 "Zend/zend_language_scanner.c" +#line 4753 "Zend/zend_language_scanner.c" yy390: YYDEBUG(390, *YYCURSOR); yych = *++YYCURSOR; @@ -4577,11 +4769,11 @@ yy392: } YYDEBUG(393, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1110 "Zend/zend_language_scanner.l" +#line 1302 "Zend/zend_language_scanner.l" { return T_USE; } -#line 4585 "Zend/zend_language_scanner.c" +#line 4777 "Zend/zend_language_scanner.c" yy394: YYDEBUG(394, *YYCURSOR); yych = *++YYCURSOR; @@ -4600,11 +4792,11 @@ yy396: } YYDEBUG(397, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1158 "Zend/zend_language_scanner.l" +#line 1350 "Zend/zend_language_scanner.l" { return T_UNSET; } -#line 4608 "Zend/zend_language_scanner.c" +#line 4800 "Zend/zend_language_scanner.c" yy398: YYDEBUG(398, *YYCURSOR); ++YYCURSOR; @@ -4786,11 +4978,11 @@ yy413: ++YYCURSOR; YYDEBUG(415, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1054 "Zend/zend_language_scanner.l" +#line 1246 "Zend/zend_language_scanner.l" { return T_INT_CAST; } -#line 4794 "Zend/zend_language_scanner.c" +#line 4986 "Zend/zend_language_scanner.c" yy416: YYDEBUG(416, *YYCURSOR); yych = *++YYCURSOR; @@ -4834,11 +5026,11 @@ yy421: ++YYCURSOR; YYDEBUG(424, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1058 "Zend/zend_language_scanner.l" +#line 1250 "Zend/zend_language_scanner.l" { return T_DOUBLE_CAST; } -#line 4842 "Zend/zend_language_scanner.c" +#line 5034 "Zend/zend_language_scanner.c" yy425: YYDEBUG(425, *YYCURSOR); yych = *++YYCURSOR; @@ -4914,11 +5106,11 @@ yy436: ++YYCURSOR; YYDEBUG(439, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1066 "Zend/zend_language_scanner.l" +#line 1258 "Zend/zend_language_scanner.l" { return T_SCALAR_CAST; } -#line 4922 "Zend/zend_language_scanner.c" +#line 5114 "Zend/zend_language_scanner.c" yy440: YYDEBUG(440, *YYCURSOR); yych = *++YYCURSOR; @@ -4951,11 +5143,11 @@ yy443: ++YYCURSOR; YYDEBUG(446, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1062 "Zend/zend_language_scanner.l" +#line 1254 "Zend/zend_language_scanner.l" { return T_STRING_CAST; } -#line 4959 "Zend/zend_language_scanner.c" +#line 5151 "Zend/zend_language_scanner.c" yy447: YYDEBUG(447, *YYCURSOR); yych = *++YYCURSOR; @@ -4988,11 +5180,11 @@ yy450: ++YYCURSOR; YYDEBUG(453, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1070 "Zend/zend_language_scanner.l" +#line 1262 "Zend/zend_language_scanner.l" { return T_ARRAY_CAST; } -#line 4996 "Zend/zend_language_scanner.c" +#line 5188 "Zend/zend_language_scanner.c" yy454: YYDEBUG(454, *YYCURSOR); yych = *++YYCURSOR; @@ -5030,11 +5222,11 @@ yy458: ++YYCURSOR; YYDEBUG(461, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1074 "Zend/zend_language_scanner.l" +#line 1266 "Zend/zend_language_scanner.l" { return T_OBJECT_CAST; } -#line 5038 "Zend/zend_language_scanner.c" +#line 5230 "Zend/zend_language_scanner.c" yy462: YYDEBUG(462, *YYCURSOR); yych = *++YYCURSOR; @@ -5075,11 +5267,11 @@ yy467: ++YYCURSOR; YYDEBUG(469, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1078 "Zend/zend_language_scanner.l" +#line 1270 "Zend/zend_language_scanner.l" { return T_BOOL_CAST; } -#line 5083 "Zend/zend_language_scanner.c" +#line 5275 "Zend/zend_language_scanner.c" yy470: YYDEBUG(470, *YYCURSOR); yych = *++YYCURSOR; @@ -5139,11 +5331,11 @@ yy478: ++YYCURSOR; YYDEBUG(481, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1082 "Zend/zend_language_scanner.l" +#line 1274 "Zend/zend_language_scanner.l" { return T_UNSET_CAST; } -#line 5147 "Zend/zend_language_scanner.c" +#line 5339 "Zend/zend_language_scanner.c" yy482: YYDEBUG(482, *YYCURSOR); yych = *++YYCURSOR; @@ -5157,11 +5349,11 @@ yy483: } YYDEBUG(484, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1050 "Zend/zend_language_scanner.l" +#line 1242 "Zend/zend_language_scanner.l" { return T_VAR; } -#line 5165 "Zend/zend_language_scanner.c" +#line 5357 "Zend/zend_language_scanner.c" yy485: YYDEBUG(485, *YYCURSOR); yych = *++YYCURSOR; @@ -5275,12 +5467,12 @@ yy497: ++YYCURSOR; YYDEBUG(498, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1193 "Zend/zend_language_scanner.l" +#line 1385 "Zend/zend_language_scanner.l" { yyless(7); return T_NUMERIC_HINT; } -#line 5284 "Zend/zend_language_scanner.c" +#line 5476 "Zend/zend_language_scanner.c" yy499: YYDEBUG(499, *YYCURSOR); ++YYCURSOR; @@ -5289,11 +5481,11 @@ yy499: } YYDEBUG(500, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1042 "Zend/zend_language_scanner.l" +#line 1234 "Zend/zend_language_scanner.l" { return T_NEW; } -#line 5297 "Zend/zend_language_scanner.c" +#line 5489 "Zend/zend_language_scanner.c" yy501: YYDEBUG(501, *YYCURSOR); yych = *++YYCURSOR; @@ -5332,21 +5524,21 @@ yy507: } YYDEBUG(508, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1106 "Zend/zend_language_scanner.l" +#line 1298 "Zend/zend_language_scanner.l" { return T_NAMESPACE; } -#line 5340 "Zend/zend_language_scanner.c" +#line 5532 "Zend/zend_language_scanner.c" yy509: YYDEBUG(509, *YYCURSOR); ++YYCURSOR; YYDEBUG(510, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1034 "Zend/zend_language_scanner.l" +#line 1226 "Zend/zend_language_scanner.l" { return T_PAAMAYIM_NEKUDOTAYIM; } -#line 5350 "Zend/zend_language_scanner.c" +#line 5542 "Zend/zend_language_scanner.c" yy511: YYDEBUG(511, *YYCURSOR); ++YYCURSOR; @@ -5368,32 +5560,32 @@ yy513: ++YYCURSOR; YYDEBUG(514, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1266 "Zend/zend_language_scanner.l" +#line 1458 "Zend/zend_language_scanner.l" { return T_MINUS_EQUAL; } -#line 5376 "Zend/zend_language_scanner.c" +#line 5568 "Zend/zend_language_scanner.c" yy515: YYDEBUG(515, *YYCURSOR); ++YYCURSOR; YYDEBUG(516, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1234 "Zend/zend_language_scanner.l" +#line 1426 "Zend/zend_language_scanner.l" { return T_DEC; } -#line 5386 "Zend/zend_language_scanner.c" +#line 5578 "Zend/zend_language_scanner.c" yy517: YYDEBUG(517, *YYCURSOR); ++YYCURSOR; YYDEBUG(518, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1004 "Zend/zend_language_scanner.l" +#line 1196 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); return T_OBJECT_OPERATOR; } -#line 5397 "Zend/zend_language_scanner.c" +#line 5589 "Zend/zend_language_scanner.c" yy519: YYDEBUG(519, *YYCURSOR); yych = *++YYCURSOR; @@ -5438,11 +5630,11 @@ yy524: } YYDEBUG(525, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1154 "Zend/zend_language_scanner.l" +#line 1346 "Zend/zend_language_scanner.l" { return T_PUBLIC; } -#line 5446 "Zend/zend_language_scanner.c" +#line 5638 "Zend/zend_language_scanner.c" yy526: YYDEBUG(526, *YYCURSOR); yych = *++YYCURSOR; @@ -5497,11 +5689,11 @@ yy533: } YYDEBUG(534, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1150 "Zend/zend_language_scanner.l" +#line 1342 "Zend/zend_language_scanner.l" { return T_PROTECTED; } -#line 5505 "Zend/zend_language_scanner.c" +#line 5697 "Zend/zend_language_scanner.c" yy535: YYDEBUG(535, *YYCURSOR); yych = *++YYCURSOR; @@ -5531,11 +5723,11 @@ yy539: } YYDEBUG(540, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1146 "Zend/zend_language_scanner.l" +#line 1338 "Zend/zend_language_scanner.l" { return T_PRIVATE; } -#line 5539 "Zend/zend_language_scanner.c" +#line 5731 "Zend/zend_language_scanner.c" yy541: YYDEBUG(541, *YYCURSOR); ++YYCURSOR; @@ -5544,11 +5736,11 @@ yy541: } YYDEBUG(542, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 980 "Zend/zend_language_scanner.l" +#line 1172 "Zend/zend_language_scanner.l" { return T_PRINT; } -#line 5552 "Zend/zend_language_scanner.c" +#line 5744 "Zend/zend_language_scanner.c" yy543: YYDEBUG(543, *YYCURSOR); yych = *++YYCURSOR; @@ -5573,11 +5765,11 @@ yy546: } YYDEBUG(547, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 972 "Zend/zend_language_scanner.l" +#line 1164 "Zend/zend_language_scanner.l" { return T_GOTO; } -#line 5581 "Zend/zend_language_scanner.c" +#line 5773 "Zend/zend_language_scanner.c" yy548: YYDEBUG(548, *YYCURSOR); yych = *++YYCURSOR; @@ -5601,11 +5793,11 @@ yy551: } YYDEBUG(552, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1118 "Zend/zend_language_scanner.l" +#line 1310 "Zend/zend_language_scanner.l" { return T_GLOBAL; } -#line 5609 "Zend/zend_language_scanner.c" +#line 5801 "Zend/zend_language_scanner.c" yy553: YYDEBUG(553, *YYCURSOR); yych = *++YYCURSOR; @@ -5654,11 +5846,11 @@ yy561: } YYDEBUG(562, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 964 "Zend/zend_language_scanner.l" +#line 1156 "Zend/zend_language_scanner.l" { return T_BREAK; } -#line 5662 "Zend/zend_language_scanner.c" +#line 5854 "Zend/zend_language_scanner.c" yy563: YYDEBUG(563, *YYCURSOR); yych = *++YYCURSOR; @@ -5763,7 +5955,7 @@ yy569: ++YYCURSOR; YYDEBUG(570, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1174 "Zend/zend_language_scanner.l" +#line 1366 "Zend/zend_language_scanner.l" { if (yytext[4] == 'e') { yyless((sizeof("boolean") - 1)); @@ -5772,7 +5964,7 @@ yy569: } return T_BOOL_HINT; } -#line 5776 "Zend/zend_language_scanner.c" +#line 5968 "Zend/zend_language_scanner.c" yy571: YYDEBUG(571, *YYCURSOR); yych = *++YYCURSOR; @@ -5899,12 +6091,12 @@ yy582: ++YYCURSOR; YYDEBUG(583, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1183 "Zend/zend_language_scanner.l" +#line 1375 "Zend/zend_language_scanner.l" { yyless(6); return T_STRING_HINT; } -#line 5908 "Zend/zend_language_scanner.c" +#line 6100 "Zend/zend_language_scanner.c" yy584: YYDEBUG(584, *YYCURSOR); yych = *++YYCURSOR; @@ -5960,11 +6152,11 @@ yy591: } YYDEBUG(592, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 948 "Zend/zend_language_scanner.l" +#line 1140 "Zend/zend_language_scanner.l" { return T_SWITCH; } -#line 5968 "Zend/zend_language_scanner.c" +#line 6160 "Zend/zend_language_scanner.c" yy593: YYDEBUG(593, *YYCURSOR); yych = *++YYCURSOR; @@ -6005,11 +6197,11 @@ yy599: } YYDEBUG(600, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1134 "Zend/zend_language_scanner.l" +#line 1326 "Zend/zend_language_scanner.l" { return T_STATIC; } -#line 6013 "Zend/zend_language_scanner.c" +#line 6205 "Zend/zend_language_scanner.c" yy601: YYDEBUG(601, *YYCURSOR); yych = *++YYCURSOR; @@ -6101,12 +6293,12 @@ yy609: ++YYCURSOR; YYDEBUG(610, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1188 "Zend/zend_language_scanner.l" +#line 1380 "Zend/zend_language_scanner.l" { yyless(6); return T_SCALAR_HINT; } -#line 6110 "Zend/zend_language_scanner.c" +#line 6302 "Zend/zend_language_scanner.c" yy611: YYDEBUG(611, *YYCURSOR); yych = *++YYCURSOR; @@ -6133,11 +6325,11 @@ yy614: } YYDEBUG(615, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 944 "Zend/zend_language_scanner.l" +#line 1136 "Zend/zend_language_scanner.l" { return T_AS; } -#line 6141 "Zend/zend_language_scanner.c" +#line 6333 "Zend/zend_language_scanner.c" yy616: YYDEBUG(616, *YYCURSOR); yych = *++YYCURSOR; @@ -6156,11 +6348,11 @@ yy618: } YYDEBUG(619, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1170 "Zend/zend_language_scanner.l" +#line 1362 "Zend/zend_language_scanner.l" { return T_ARRAY; } -#line 6164 "Zend/zend_language_scanner.c" +#line 6356 "Zend/zend_language_scanner.c" yy620: YYDEBUG(620, *YYCURSOR); ++YYCURSOR; @@ -6169,11 +6361,11 @@ yy620: } YYDEBUG(621, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1318 "Zend/zend_language_scanner.l" +#line 1510 "Zend/zend_language_scanner.l" { return T_LOGICAL_AND; } -#line 6177 "Zend/zend_language_scanner.c" +#line 6369 "Zend/zend_language_scanner.c" yy622: YYDEBUG(622, *YYCURSOR); yych = *++YYCURSOR; @@ -6207,11 +6399,11 @@ yy627: } YYDEBUG(628, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1138 "Zend/zend_language_scanner.l" +#line 1330 "Zend/zend_language_scanner.l" { return T_ABSTRACT; } -#line 6215 "Zend/zend_language_scanner.c" +#line 6407 "Zend/zend_language_scanner.c" yy629: YYDEBUG(629, *YYCURSOR); yych = *++YYCURSOR; @@ -6235,11 +6427,11 @@ yy632: } YYDEBUG(633, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 904 "Zend/zend_language_scanner.l" +#line 1096 "Zend/zend_language_scanner.l" { return T_WHILE; } -#line 6243 "Zend/zend_language_scanner.c" +#line 6435 "Zend/zend_language_scanner.c" yy634: YYDEBUG(634, *YYCURSOR); ++YYCURSOR; @@ -6248,11 +6440,11 @@ yy634: } YYDEBUG(635, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 888 "Zend/zend_language_scanner.l" +#line 1080 "Zend/zend_language_scanner.l" { return T_IF; } -#line 6256 "Zend/zend_language_scanner.c" +#line 6448 "Zend/zend_language_scanner.c" yy636: YYDEBUG(636, *YYCURSOR); yych = *++YYCURSOR; @@ -6304,11 +6496,11 @@ yy641: } YYDEBUG(642, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1122 "Zend/zend_language_scanner.l" +#line 1314 "Zend/zend_language_scanner.l" { return T_ISSET; } -#line 6312 "Zend/zend_language_scanner.c" +#line 6504 "Zend/zend_language_scanner.c" yy643: YYDEBUG(643, *YYCURSOR); yych = *++YYCURSOR; @@ -6404,11 +6596,11 @@ yy649: yy650: YYDEBUG(650, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1090 "Zend/zend_language_scanner.l" +#line 1282 "Zend/zend_language_scanner.l" { return T_INCLUDE; } -#line 6412 "Zend/zend_language_scanner.c" +#line 6604 "Zend/zend_language_scanner.c" yy651: YYDEBUG(651, *YYCURSOR); yych = *++YYCURSOR; @@ -6437,11 +6629,11 @@ yy655: } YYDEBUG(656, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1094 "Zend/zend_language_scanner.l" +#line 1286 "Zend/zend_language_scanner.l" { return T_INCLUDE_ONCE; } -#line 6445 "Zend/zend_language_scanner.c" +#line 6637 "Zend/zend_language_scanner.c" yy657: YYDEBUG(657, *YYCURSOR); yych = *++YYCURSOR; @@ -6495,11 +6687,11 @@ yy664: } YYDEBUG(665, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 988 "Zend/zend_language_scanner.l" +#line 1180 "Zend/zend_language_scanner.l" { return T_INTERFACE; } -#line 6503 "Zend/zend_language_scanner.c" +#line 6695 "Zend/zend_language_scanner.c" yy666: YYDEBUG(666, *YYCURSOR); yych = *++YYCURSOR; @@ -6549,11 +6741,11 @@ yy672: } YYDEBUG(673, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1114 "Zend/zend_language_scanner.l" +#line 1306 "Zend/zend_language_scanner.l" { return T_INSTEADOF; } -#line 6557 "Zend/zend_language_scanner.c" +#line 6749 "Zend/zend_language_scanner.c" yy674: YYDEBUG(674, *YYCURSOR); yych = *++YYCURSOR; @@ -6582,11 +6774,11 @@ yy678: } YYDEBUG(679, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 940 "Zend/zend_language_scanner.l" +#line 1132 "Zend/zend_language_scanner.l" { return T_INSTANCEOF; } -#line 6590 "Zend/zend_language_scanner.c" +#line 6782 "Zend/zend_language_scanner.c" yy680: YYDEBUG(680, *YYCURSOR); yych = *++YYCURSOR; @@ -6630,11 +6822,11 @@ yy687: } YYDEBUG(688, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1000 "Zend/zend_language_scanner.l" +#line 1192 "Zend/zend_language_scanner.l" { return T_IMPLEMENTS; } -#line 6638 "Zend/zend_language_scanner.c" +#line 6830 "Zend/zend_language_scanner.c" yy689: YYDEBUG(689, *YYCURSOR); yych = *++YYCURSOR; @@ -6662,11 +6854,11 @@ yy690: } YYDEBUG(692, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 876 "Zend/zend_language_scanner.l" +#line 1068 "Zend/zend_language_scanner.l" { return T_TRY; } -#line 6670 "Zend/zend_language_scanner.c" +#line 6862 "Zend/zend_language_scanner.c" yy693: YYDEBUG(693, *YYCURSOR); yych = *++YYCURSOR; @@ -6685,11 +6877,11 @@ yy695: } YYDEBUG(696, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 992 "Zend/zend_language_scanner.l" +#line 1184 "Zend/zend_language_scanner.l" { return T_TRAIT; } -#line 6693 "Zend/zend_language_scanner.c" +#line 6885 "Zend/zend_language_scanner.c" yy697: YYDEBUG(697, *YYCURSOR); yych = *++YYCURSOR; @@ -6708,11 +6900,11 @@ yy699: } YYDEBUG(700, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 884 "Zend/zend_language_scanner.l" +#line 1076 "Zend/zend_language_scanner.l" { return T_THROW; } -#line 6716 "Zend/zend_language_scanner.c" +#line 6908 "Zend/zend_language_scanner.c" yy701: YYDEBUG(701, *YYCURSOR); yych = *++YYCURSOR; @@ -6854,12 +7046,12 @@ yy715: ++YYCURSOR; YYDEBUG(716, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1220 "Zend/zend_language_scanner.l" +#line 1412 "Zend/zend_language_scanner.l" { yyless((sizeof("resource") - 1)); return T_RESOURCE_HINT; } -#line 6863 "Zend/zend_language_scanner.c" +#line 7055 "Zend/zend_language_scanner.c" yy717: YYDEBUG(717, *YYCURSOR); yyaccept = 0; @@ -6936,7 +7128,7 @@ yy722: ++YYCURSOR; YYDEBUG(723, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1209 "Zend/zend_language_scanner.l" +#line 1401 "Zend/zend_language_scanner.l" { if (yytext[4] == 'l') { yyless((sizeof("double") - 1)); @@ -6947,7 +7139,7 @@ yy722: } return T_DOUBLE_HINT; } -#line 6951 "Zend/zend_language_scanner.c" +#line 7143 "Zend/zend_language_scanner.c" yy724: YYDEBUG(724, *YYCURSOR); yych = *++YYCURSOR; @@ -6984,11 +7176,11 @@ yy727: yy728: YYDEBUG(728, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1098 "Zend/zend_language_scanner.l" +#line 1290 "Zend/zend_language_scanner.l" { return T_REQUIRE; } -#line 6992 "Zend/zend_language_scanner.c" +#line 7184 "Zend/zend_language_scanner.c" yy729: YYDEBUG(729, *YYCURSOR); yych = *++YYCURSOR; @@ -7017,11 +7209,11 @@ yy733: } YYDEBUG(734, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1102 "Zend/zend_language_scanner.l" +#line 1294 "Zend/zend_language_scanner.l" { return T_REQUIRE_ONCE; } -#line 7025 "Zend/zend_language_scanner.c" +#line 7217 "Zend/zend_language_scanner.c" yy735: YYDEBUG(735, *YYCURSOR); yych = *++YYCURSOR; @@ -7040,11 +7232,11 @@ yy737: } YYDEBUG(738, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 872 "Zend/zend_language_scanner.l" +#line 1064 "Zend/zend_language_scanner.l" { return T_RETURN; } -#line 7048 "Zend/zend_language_scanner.c" +#line 7240 "Zend/zend_language_scanner.c" yy739: YYDEBUG(739, *YYCURSOR); yych = *++YYCURSOR; @@ -7125,11 +7317,11 @@ yy748: } YYDEBUG(749, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 968 "Zend/zend_language_scanner.l" +#line 1160 "Zend/zend_language_scanner.l" { return T_CONTINUE; } -#line 7133 "Zend/zend_language_scanner.c" +#line 7325 "Zend/zend_language_scanner.c" yy750: YYDEBUG(750, *YYCURSOR); ++YYCURSOR; @@ -7138,11 +7330,11 @@ yy750: } YYDEBUG(751, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 868 "Zend/zend_language_scanner.l" +#line 1060 "Zend/zend_language_scanner.l" { return T_CONST; } -#line 7146 "Zend/zend_language_scanner.c" +#line 7338 "Zend/zend_language_scanner.c" yy752: YYDEBUG(752, *YYCURSOR); yych = *++YYCURSOR; @@ -7167,11 +7359,11 @@ yy755: } YYDEBUG(756, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1046 "Zend/zend_language_scanner.l" +#line 1238 "Zend/zend_language_scanner.l" { return T_CLONE; } -#line 7175 "Zend/zend_language_scanner.c" +#line 7367 "Zend/zend_language_scanner.c" yy757: YYDEBUG(757, *YYCURSOR); yych = *++YYCURSOR; @@ -7185,11 +7377,11 @@ yy758: } YYDEBUG(759, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 984 "Zend/zend_language_scanner.l" +#line 1176 "Zend/zend_language_scanner.l" { return T_CLASS; } -#line 7193 "Zend/zend_language_scanner.c" +#line 7385 "Zend/zend_language_scanner.c" yy760: YYDEBUG(760, *YYCURSOR); yych = *++YYCURSOR; @@ -7209,11 +7401,11 @@ yy762: } YYDEBUG(763, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 956 "Zend/zend_language_scanner.l" +#line 1148 "Zend/zend_language_scanner.l" { return T_CASE; } -#line 7217 "Zend/zend_language_scanner.c" +#line 7409 "Zend/zend_language_scanner.c" yy764: YYDEBUG(764, *YYCURSOR); yych = *++YYCURSOR; @@ -7227,11 +7419,11 @@ yy765: } YYDEBUG(766, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 880 "Zend/zend_language_scanner.l" +#line 1072 "Zend/zend_language_scanner.l" { return T_CATCH; } -#line 7235 "Zend/zend_language_scanner.c" +#line 7427 "Zend/zend_language_scanner.c" yy767: YYDEBUG(767, *YYCURSOR); yych = *++YYCURSOR; @@ -7288,11 +7480,11 @@ yy776: } YYDEBUG(777, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 864 "Zend/zend_language_scanner.l" +#line 1056 "Zend/zend_language_scanner.l" { return T_FUNCTION; } -#line 7296 "Zend/zend_language_scanner.c" +#line 7488 "Zend/zend_language_scanner.c" yy778: YYDEBUG(778, *YYCURSOR); ++YYCURSOR; @@ -7316,11 +7508,11 @@ yy778: yy779: YYDEBUG(779, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 916 "Zend/zend_language_scanner.l" +#line 1108 "Zend/zend_language_scanner.l" { return T_FOR; } -#line 7324 "Zend/zend_language_scanner.c" +#line 7516 "Zend/zend_language_scanner.c" yy780: YYDEBUG(780, *YYCURSOR); yych = *++YYCURSOR; @@ -7344,11 +7536,11 @@ yy783: } YYDEBUG(784, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 924 "Zend/zend_language_scanner.l" +#line 1116 "Zend/zend_language_scanner.l" { return T_FOREACH; } -#line 7352 "Zend/zend_language_scanner.c" +#line 7544 "Zend/zend_language_scanner.c" yy785: YYDEBUG(785, *YYCURSOR); yych = *++YYCURSOR; @@ -7378,11 +7570,11 @@ yy789: } YYDEBUG(790, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1142 "Zend/zend_language_scanner.l" +#line 1334 "Zend/zend_language_scanner.l" { return T_FINAL; } -#line 7386 "Zend/zend_language_scanner.c" +#line 7578 "Zend/zend_language_scanner.c" yy791: YYDEBUG(791, *YYCURSOR); yych = *++YYCURSOR; @@ -7428,11 +7620,11 @@ yy793: yy794: YYDEBUG(794, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 912 "Zend/zend_language_scanner.l" +#line 1104 "Zend/zend_language_scanner.l" { return T_DO; } -#line 7436 "Zend/zend_language_scanner.c" +#line 7628 "Zend/zend_language_scanner.c" yy795: YYDEBUG(795, *YYCURSOR); yych = *++YYCURSOR; @@ -7457,11 +7649,11 @@ yy798: } YYDEBUG(799, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 860 "Zend/zend_language_scanner.l" +#line 1052 "Zend/zend_language_scanner.l" { return T_EXIT; } -#line 7465 "Zend/zend_language_scanner.c" +#line 7657 "Zend/zend_language_scanner.c" yy800: YYDEBUG(800, *YYCURSOR); yych = *++YYCURSOR; @@ -7496,11 +7688,11 @@ yy805: } YYDEBUG(806, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 960 "Zend/zend_language_scanner.l" +#line 1152 "Zend/zend_language_scanner.l" { return T_DEFAULT; } -#line 7504 "Zend/zend_language_scanner.c" +#line 7696 "Zend/zend_language_scanner.c" yy807: YYDEBUG(807, *YYCURSOR); yych = *++YYCURSOR; @@ -7524,11 +7716,11 @@ yy810: } YYDEBUG(811, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 932 "Zend/zend_language_scanner.l" +#line 1124 "Zend/zend_language_scanner.l" { return T_DECLARE; } -#line 7532 "Zend/zend_language_scanner.c" +#line 7724 "Zend/zend_language_scanner.c" yy812: YYDEBUG(812, *YYCURSOR); yych = *++YYCURSOR; @@ -7608,11 +7800,11 @@ yy823: } YYDEBUG(824, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 996 "Zend/zend_language_scanner.l" +#line 1188 "Zend/zend_language_scanner.l" { return T_EXTENDS; } -#line 7616 "Zend/zend_language_scanner.c" +#line 7808 "Zend/zend_language_scanner.c" yy825: YYDEBUG(825, *YYCURSOR); ++YYCURSOR; @@ -7621,11 +7813,11 @@ yy825: } YYDEBUG(826, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 856 "Zend/zend_language_scanner.l" +#line 1048 "Zend/zend_language_scanner.l" { return T_EXIT; } -#line 7629 "Zend/zend_language_scanner.c" +#line 7821 "Zend/zend_language_scanner.c" yy827: YYDEBUG(827, *YYCURSOR); yych = *++YYCURSOR; @@ -7639,11 +7831,11 @@ yy828: } YYDEBUG(829, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1086 "Zend/zend_language_scanner.l" +#line 1278 "Zend/zend_language_scanner.l" { return T_EVAL; } -#line 7647 "Zend/zend_language_scanner.c" +#line 7839 "Zend/zend_language_scanner.c" yy830: YYDEBUG(830, *YYCURSOR); yych = *++YYCURSOR; @@ -7713,11 +7905,11 @@ yy839: } YYDEBUG(840, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 908 "Zend/zend_language_scanner.l" +#line 1100 "Zend/zend_language_scanner.l" { return T_ENDWHILE; } -#line 7721 "Zend/zend_language_scanner.c" +#line 7913 "Zend/zend_language_scanner.c" yy841: YYDEBUG(841, *YYCURSOR); yych = *++YYCURSOR; @@ -7746,11 +7938,11 @@ yy845: } YYDEBUG(846, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 952 "Zend/zend_language_scanner.l" +#line 1144 "Zend/zend_language_scanner.l" { return T_ENDSWITCH; } -#line 7754 "Zend/zend_language_scanner.c" +#line 7946 "Zend/zend_language_scanner.c" yy847: YYDEBUG(847, *YYCURSOR); ++YYCURSOR; @@ -7759,11 +7951,11 @@ yy847: } YYDEBUG(848, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 896 "Zend/zend_language_scanner.l" +#line 1088 "Zend/zend_language_scanner.l" { return T_ENDIF; } -#line 7767 "Zend/zend_language_scanner.c" +#line 7959 "Zend/zend_language_scanner.c" yy849: YYDEBUG(849, *YYCURSOR); yych = *++YYCURSOR; @@ -7792,11 +7984,11 @@ yy850: yy851: YYDEBUG(851, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 920 "Zend/zend_language_scanner.l" +#line 1112 "Zend/zend_language_scanner.l" { return T_ENDFOR; } -#line 7800 "Zend/zend_language_scanner.c" +#line 7992 "Zend/zend_language_scanner.c" yy852: YYDEBUG(852, *YYCURSOR); yych = *++YYCURSOR; @@ -7820,11 +8012,11 @@ yy855: } YYDEBUG(856, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 928 "Zend/zend_language_scanner.l" +#line 1120 "Zend/zend_language_scanner.l" { return T_ENDFOREACH; } -#line 7828 "Zend/zend_language_scanner.c" +#line 8020 "Zend/zend_language_scanner.c" yy857: YYDEBUG(857, *YYCURSOR); yych = *++YYCURSOR; @@ -7858,11 +8050,11 @@ yy862: } YYDEBUG(863, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 936 "Zend/zend_language_scanner.l" +#line 1128 "Zend/zend_language_scanner.l" { return T_ENDDECLARE; } -#line 7866 "Zend/zend_language_scanner.c" +#line 8058 "Zend/zend_language_scanner.c" yy864: YYDEBUG(864, *YYCURSOR); yych = *++YYCURSOR; @@ -7881,11 +8073,11 @@ yy866: } YYDEBUG(867, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1126 "Zend/zend_language_scanner.l" +#line 1318 "Zend/zend_language_scanner.l" { return T_EMPTY; } -#line 7889 "Zend/zend_language_scanner.c" +#line 8081 "Zend/zend_language_scanner.c" yy868: YYDEBUG(868, *YYCURSOR); yych = *++YYCURSOR; @@ -7914,11 +8106,11 @@ yy869: yy870: YYDEBUG(870, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 900 "Zend/zend_language_scanner.l" +#line 1092 "Zend/zend_language_scanner.l" { return T_ELSE; } -#line 7922 "Zend/zend_language_scanner.c" +#line 8114 "Zend/zend_language_scanner.c" yy871: YYDEBUG(871, *YYCURSOR); yych = *++YYCURSOR; @@ -7932,11 +8124,11 @@ yy872: } YYDEBUG(873, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 892 "Zend/zend_language_scanner.l" +#line 1084 "Zend/zend_language_scanner.l" { return T_ELSEIF; } -#line 7940 "Zend/zend_language_scanner.c" +#line 8132 "Zend/zend_language_scanner.c" yy874: YYDEBUG(874, *YYCURSOR); yych = *++YYCURSOR; @@ -7950,11 +8142,11 @@ yy875: } YYDEBUG(876, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 976 "Zend/zend_language_scanner.l" +#line 1168 "Zend/zend_language_scanner.l" { return T_ECHO; } -#line 7958 "Zend/zend_language_scanner.c" +#line 8150 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_LOOKING_FOR_PROPERTY: @@ -8027,7 +8219,7 @@ yy879: yy880: YYDEBUG(880, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1009 "Zend/zend_language_scanner.l" +#line 1201 "Zend/zend_language_scanner.l" { zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.len = yyleng; @@ -8035,7 +8227,7 @@ yy880: HANDLE_NEWLINES(yytext, yyleng); return T_WHITESPACE; } -#line 8039 "Zend/zend_language_scanner.c" +#line 8231 "Zend/zend_language_scanner.c" yy881: YYDEBUG(881, *YYCURSOR); ++YYCURSOR; @@ -8043,13 +8235,13 @@ yy881: yy882: YYDEBUG(882, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1028 "Zend/zend_language_scanner.l" +#line 1220 "Zend/zend_language_scanner.l" { yyless(0); yy_pop_state(TSRMLS_C); goto restart; } -#line 8053 "Zend/zend_language_scanner.c" +#line 8245 "Zend/zend_language_scanner.c" yy883: YYDEBUG(883, *YYCURSOR); ++YYCURSOR; @@ -8058,14 +8250,14 @@ yy883: yy884: YYDEBUG(884, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1021 "Zend/zend_language_scanner.l" +#line 1213 "Zend/zend_language_scanner.l" { yy_pop_state(TSRMLS_C); zend_copy_value(zendlval, yytext, yyleng); zendlval->type = IS_STRING; return T_STRING; } -#line 8069 "Zend/zend_language_scanner.c" +#line 8261 "Zend/zend_language_scanner.c" yy885: YYDEBUG(885, *YYCURSOR); yych = *++YYCURSOR; @@ -8086,11 +8278,11 @@ yy888: ++YYCURSOR; YYDEBUG(889, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1017 "Zend/zend_language_scanner.l" +#line 1209 "Zend/zend_language_scanner.l" { return T_OBJECT_OPERATOR; } -#line 8094 "Zend/zend_language_scanner.c" +#line 8286 "Zend/zend_language_scanner.c" yy890: YYDEBUG(890, *YYCURSOR); ++YYCURSOR; @@ -8160,7 +8352,7 @@ yy894: yy895: YYDEBUG(895, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1360 "Zend/zend_language_scanner.l" +#line 1552 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, yytext, yyleng); zendlval->type = IS_STRING; @@ -8168,20 +8360,20 @@ yy895: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); return T_STRING_VARNAME; } -#line 8172 "Zend/zend_language_scanner.c" +#line 8364 "Zend/zend_language_scanner.c" yy896: YYDEBUG(896, *YYCURSOR); ++YYCURSOR; YYDEBUG(897, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1369 "Zend/zend_language_scanner.l" +#line 1561 "Zend/zend_language_scanner.l" { yyless(0); yy_pop_state(TSRMLS_C); yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); goto restart; } -#line 8185 "Zend/zend_language_scanner.c" +#line 8377 "Zend/zend_language_scanner.c" yy898: YYDEBUG(898, *YYCURSOR); ++YYCURSOR; @@ -8203,7 +8395,7 @@ yyc_ST_NOWDOC: ++YYCURSOR; YYDEBUG(903, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2196 "Zend/zend_language_scanner.l" +#line 2388 "Zend/zend_language_scanner.l" { int newline = 0; @@ -8258,7 +8450,7 @@ nowdoc_scan_done: HANDLE_NEWLINES(yytext, yyleng - newline); return T_ENCAPSED_AND_WHITESPACE; } -#line 8262 "Zend/zend_language_scanner.c" +#line 8454 "Zend/zend_language_scanner.c" /* *********************************** */ yyc_ST_VAR_OFFSET: { @@ -8358,7 +8550,7 @@ yy906: yy907: YYDEBUG(907, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1419 "Zend/zend_language_scanner.l" +#line 1611 "Zend/zend_language_scanner.l" { /* Offset could be treated as a long */ if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) { zendlval->value.lval = strtol(yytext, NULL, 10); @@ -8370,7 +8562,7 @@ yy907: } return T_NUM_STRING; } -#line 8374 "Zend/zend_language_scanner.c" +#line 8566 "Zend/zend_language_scanner.c" yy908: YYDEBUG(908, *YYCURSOR); yych = *++YYCURSOR; @@ -8390,23 +8582,23 @@ yy909: yy910: YYDEBUG(910, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1724 "Zend/zend_language_scanner.l" +#line 1916 "Zend/zend_language_scanner.l" { /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */ return yytext[0]; } -#line 8399 "Zend/zend_language_scanner.c" +#line 8591 "Zend/zend_language_scanner.c" yy911: YYDEBUG(911, *YYCURSOR); ++YYCURSOR; YYDEBUG(912, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1719 "Zend/zend_language_scanner.l" +#line 1911 "Zend/zend_language_scanner.l" { yy_pop_state(TSRMLS_C); return ']'; } -#line 8410 "Zend/zend_language_scanner.c" +#line 8602 "Zend/zend_language_scanner.c" yy913: YYDEBUG(913, *YYCURSOR); yych = *++YYCURSOR; @@ -8416,14 +8608,14 @@ yy914: ++YYCURSOR; YYDEBUG(915, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1729 "Zend/zend_language_scanner.l" +#line 1921 "Zend/zend_language_scanner.l" { /* Invalid rule to return a more explicit parse error with proper line number */ yyless(0); yy_pop_state(TSRMLS_C); return T_ENCAPSED_AND_WHITESPACE; } -#line 8427 "Zend/zend_language_scanner.c" +#line 8619 "Zend/zend_language_scanner.c" yy916: YYDEBUG(916, *YYCURSOR); ++YYCURSOR; @@ -8432,19 +8624,19 @@ yy916: yy917: YYDEBUG(917, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1736 "Zend/zend_language_scanner.l" +#line 1928 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, yytext, yyleng); zendlval->type = IS_STRING; return T_STRING; } -#line 8442 "Zend/zend_language_scanner.c" +#line 8634 "Zend/zend_language_scanner.c" yy918: YYDEBUG(918, *YYCURSOR); ++YYCURSOR; YYDEBUG(919, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2252 "Zend/zend_language_scanner.l" +#line 2444 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { return 0; @@ -8453,7 +8645,7 @@ yy918: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); goto restart; } -#line 8457 "Zend/zend_language_scanner.c" +#line 8649 "Zend/zend_language_scanner.c" yy920: YYDEBUG(920, *YYCURSOR); ++YYCURSOR; @@ -8489,13 +8681,13 @@ yy922: yy924: YYDEBUG(924, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1713 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); zendlval->type = IS_STRING; return T_VARIABLE; } -#line 8499 "Zend/zend_language_scanner.c" +#line 8691 "Zend/zend_language_scanner.c" yy925: YYDEBUG(925, *YYCURSOR); ++YYCURSOR; @@ -8527,14 +8719,14 @@ yy929: yy931: YYDEBUG(931, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1431 "Zend/zend_language_scanner.l" +#line 1623 "Zend/zend_language_scanner.l" { /* Offset must be treated as a string */ zendlval->value.str.val = (char *)estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_NUM_STRING; } -#line 8538 "Zend/zend_language_scanner.c" +#line 8730 "Zend/zend_language_scanner.c" yy932: YYDEBUG(932, *YYCURSOR); ++YYCURSOR; @@ -8547,6 +8739,6 @@ yy932: goto yy931; } } -#line 2261 "Zend/zend_language_scanner.l" +#line 2453 "Zend/zend_language_scanner.l" } diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index b0bbfe5b405..a3d125bedea 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -47,8 +47,7 @@ typedef struct _zend_lex_state { /* input/ouput filters */ zend_encoding_filter input_filter; zend_encoding_filter output_filter; - zend_encoding *script_encoding; - zend_encoding *internal_encoding; + const zend_encoding *script_encoding; } zend_lex_state; @@ -57,6 +56,10 @@ int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2); ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC); +ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC); +ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC); +ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC); +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC); END_EXTERN_C() diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index b55237db9ab..1d54b53ad84 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -181,16 +181,13 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); - if (CG(multibyte)) { - lex_state->script_org = SCNG(script_org); - lex_state->script_org_size = SCNG(script_org_size); - lex_state->script_filtered = SCNG(script_filtered); - lex_state->script_filtered_size = SCNG(script_filtered_size); - lex_state->input_filter = SCNG(input_filter); - lex_state->output_filter = SCNG(output_filter); - lex_state->script_encoding = SCNG(script_encoding); - lex_state->internal_encoding = SCNG(internal_encoding); - } + lex_state->script_org = SCNG(script_org); + lex_state->script_org_size = SCNG(script_org_size); + lex_state->script_filtered = SCNG(script_filtered); + lex_state->script_filtered_size = SCNG(script_filtered_size); + lex_state->input_filter = SCNG(input_filter); + lex_state->output_filter = SCNG(output_filter); + lex_state->script_encoding = SCNG(script_encoding); } ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) @@ -209,24 +206,22 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); - if (CG(multibyte)) { - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - SCNG(script_org) = lex_state->script_org; - SCNG(script_org_size) = lex_state->script_org_size; - SCNG(script_filtered) = lex_state->script_filtered; - SCNG(script_filtered_size) = lex_state->script_filtered_size; - SCNG(input_filter) = lex_state->input_filter; - SCNG(output_filter) = lex_state->output_filter; - SCNG(script_encoding) = lex_state->script_encoding; - SCNG(internal_encoding) = lex_state->internal_encoding; + + if (SCNG(script_org)) { + efree(SCNG(script_org)); + SCNG(script_org) = NULL; } + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_org) = lex_state->script_org; + SCNG(script_org_size) = lex_state->script_org_size; + SCNG(script_filtered) = lex_state->script_filtered; + SCNG(script_filtered_size) = lex_state->script_filtered_size; + SCNG(input_filter) = lex_state->input_filter; + SCNG(output_filter) = lex_state->output_filter; + SCNG(script_encoding) = lex_state->script_encoding; } ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) @@ -239,6 +234,203 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) } } +#define BOM_UTF32_BE "\x00\x00\xfe\xff" +#define BOM_UTF32_LE "\xff\xfe\x00\x00" +#define BOM_UTF16_BE "\xfe\xff" +#define BOM_UTF16_LE "\xff\xfe" +#define BOM_UTF8 "\xef\xbb\xbf" + +static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) +{ + const unsigned char *p; + int wchar_size = 2; + int le = 0; + + /* utf-16 or utf-32? */ + p = script; + while ((p-script) < script_size) { + p = memchr(p, 0, script_size-(p-script)-2); + if (!p) { + break; + } + if (*(p+1) == '\0' && *(p+2) == '\0') { + wchar_size = 4; + break; + } + + /* searching for UTF-32 specific byte orders, so this will do */ + p += 4; + } + + /* BE or LE? */ + p = script; + while ((p-script) < script_size) { + if (*p == '\0' && *(p+wchar_size-1) != '\0') { + /* BE */ + le = 0; + break; + } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { + /* LE* */ + le = 1; + break; + } + p += wchar_size; + } + + if (wchar_size == 2) { + return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; + } else { + return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) +{ + const zend_encoding *script_encoding = NULL; + int bom_size; + unsigned char *script; + unsigned char *pos1, *pos2; + + if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { + return NULL; + } + + /* check out BOM */ + if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf32be; + bom_size = sizeof(BOM_UTF32_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf32le; + bom_size = sizeof(BOM_UTF32_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf16be; + bom_size = sizeof(BOM_UTF16_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf16le; + bom_size = sizeof(BOM_UTF16_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { + script_encoding = zend_multibyte_encoding_utf8; + bom_size = sizeof(BOM_UTF8)-1; + } + + if (script_encoding) { + /* remove BOM */ + script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size); + memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size); + efree(LANG_SCNG(script_org)); + LANG_SCNG(script_org) = script; + LANG_SCNG(script_org_size) -= bom_size; + + return script_encoding; + } + + /* script contains NULL bytes -> auto-detection */ + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } + /* make best effort if BOM is missing */ + return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) +{ + const zend_encoding *script_encoding; + + if (CG(detect_unicode)) { + /* check out bom(byte order mark) and see if containing wchars */ + script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); + if (script_encoding != NULL) { + /* bom or wchar detection is prior to 'script_encoding' option */ + return script_encoding; + } + } + + /* if no script_encoding specified, just leave alone */ + if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { + return NULL; + } + + /* if multiple encodings specified, detect automagically */ + if (CG(script_encoding_list_size) > 1) { + return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); + } + + return CG(script_encoding_list)[0]; +} + +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); + + if (!script_encoding) { + return FAILURE; + } + + /* judge input/output filter */ + LANG_SCNG(script_encoding) = script_encoding; + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + + if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { + if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ + LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; + LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; + } else { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + } + return SUCCESS; + } + + /* both script and internal encodings are incompatible w/ flex */ + LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; + LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; + + return 0; +} ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) { @@ -286,13 +478,13 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); if (SCNG(script_filtered) == NULL) { zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); } } SCNG(yy_start) = SCNG(script_filtered) - offset; yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); } else { - SCNG(yy_start) = buf - offset; + SCNG(yy_start) = (unsigned char *)buf - offset; yy_scan_buffer(buf, size TSRMLS_CC); } } else { @@ -441,7 +633,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); SCNG(script_org_size) = str->value.str.len; - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); if (!SCNG(input_filter)) { SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); @@ -615,7 +807,7 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter /* calculate current position */ offset = original_offset = YYCURSOR - SCNG(yy_start); if (old_input_filter && offset > 0) { - zend_encoding *new_encoding = SCNG(script_encoding); + const zend_encoding *new_encoding = SCNG(script_encoding); zend_encoding_filter new_filter = SCNG(input_filter); SCNG(script_encoding) = old_encoding; SCNG(input_filter) = old_input_filter; diff --git a/Zend/zend_language_scanner_defs.h b/Zend/zend_language_scanner_defs.h index 0e06acf556e..b306a910da4 100644 --- a/Zend/zend_language_scanner_defs.h +++ b/Zend/zend_language_scanner_defs.h @@ -1,4 +1,4 @@ -/* Generated by re2c 0.13.5 on Thu Nov 25 23:17:48 2010 */ +/* Generated by re2c 0.13.5 on Mon Dec 20 01:33:18 2010 */ #line 3 "Zend/zend_language_scanner_defs.h" enum YYCONDTYPE { diff --git a/Zend/zend_multibyte.c b/Zend/zend_multibyte.c index 3ca5191c004..dec07ecdcdd 100644 --- a/Zend/zend_multibyte.c +++ b/Zend/zend_multibyte.c @@ -23,1219 +23,209 @@ #include "zend_compile.h" #include "zend_operators.h" #include "zend_multibyte.h" +#include "zend_ini.h" -static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_length, const char *to_encoding, const unsigned char *from, size_t from_length, const char *from_encoding TSRMLS_DC); -size_t sjis_input_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC); -size_t sjis_output_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC); -static char* zend_multibyte_assemble_encoding_list(zend_encoding **encoding_list, size_t encoding_list_size); -static int zend_multibyte_parse_encoding_list(const char *encoding_list, -size_t encoding_list_size, zend_encoding ***result, size_t *result_size); -static zend_encoding *zend_multibyte_find_script_encoding(zend_encoding *onetime_encoding TSRMLS_DC); -static zend_encoding *zend_multibyte_detect_unicode(TSRMLS_D); -static zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC); - -/* - * encodings - */ -static const char *ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL}; -static zend_encoding encoding_ucs2 = { - NULL, - NULL, - "UCS-2", - (const char *(*)[])&ucs2_aliases, - 0 -}; - -static zend_encoding encoding_ucs2be = { - NULL, - NULL, - "UCS-2BE", - NULL, - 0 -}; - -static zend_encoding encoding_ucs2le = { - NULL, - NULL, - "UCS-2LE", - NULL, - 0 -}; - -static const char *ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL}; -static zend_encoding encoding_ucs4 = { - NULL, - NULL, - "UCS-4", - (const char *(*)[])&ucs4_aliases, - 0 -}; - -static zend_encoding encoding_ucs4be = { - NULL, - NULL, - "UCS-4BE", - NULL, - 0 -}; - -static zend_encoding encoding_ucs4le = { - NULL, - NULL, - "UCS-4LE", - NULL, - 0 -}; - -static const char *utf32_aliases[] = {"utf32", NULL}; -static zend_encoding encoding_utf32 = { - NULL, - NULL, - "UTF-32", - (const char *(*)[])&utf32_aliases, - 0 -}; - -static zend_encoding encoding_utf32be = { - NULL, - NULL, - "UTF-32BE", - NULL, - 0 -}; - -static zend_encoding encoding_utf32le = { - NULL, - NULL, - "UTF-32LE", - NULL, - 0 -}; - -static const char *utf16_aliases[] = {"utf16", NULL}; -static zend_encoding encoding_utf16 = { - NULL, - NULL, - "UTF-16", - (const char *(*)[])&utf16_aliases, - 0 -}; - -static zend_encoding encoding_utf16be = { - NULL, - NULL, - "UTF-16BE", - NULL, - 0 -}; - -static zend_encoding encoding_utf16le = { - NULL, - NULL, - "UTF-16LE", - NULL, - 0 -}; - -static const char *utf8_aliases[] = {"utf8", NULL}; -static zend_encoding encoding_utf8 = { - NULL, - NULL, - "UTF-8", - (const char *(*)[])&utf8_aliases, - 1 -}; - -static const char *ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "cp367", "csASCII", NULL}; -static zend_encoding encoding_ascii = { - NULL, - NULL, - "ASCII", - (const char *(*)[])&ascii_aliases, - 1 -}; - -static const char *euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL}; -static zend_encoding encoding_euc_jp = { - NULL, - NULL, - "EUC-JP", - (const char *(*)[])&euc_jp_aliases, - 1 -}; - -static const char *sjis_aliases[] = {"x-sjis", "SJIS", "SHIFT-JIS", NULL}; -static zend_encoding encoding_sjis = { - sjis_input_filter, - sjis_output_filter, - "Shift_JIS", - (const char *(*)[])&sjis_aliases, - 0 -}; - -static const char *eucjp_win_aliases[] = {"eucJP-open", NULL}; -static zend_encoding encoding_eucjp_win = { - NULL, - NULL, - "eucJP-win", - (const char *(*)[])&eucjp_win_aliases, - 1 -}; - -static const char *sjis_win_aliases[] = {"SJIS-open", "MS_Kanji", "Windows-31J", "CP932", NULL}; -static zend_encoding encoding_sjis_win = { - /* sjis-filters does not care about diffs of Shift_JIS and CP932 */ - sjis_input_filter, - sjis_output_filter, - "SJIS-win", - (const char *(*)[])&sjis_win_aliases, - 0 -}; - -static const char *jis_aliases[] = {"ISO-2022-JP", NULL}; -static zend_encoding encoding_jis = { - NULL, - NULL, - "JIS", - (const char *(*)[])&jis_aliases, - 0 -}; - -static const char *euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL}; -static zend_encoding encoding_euc_cn = { - NULL, - NULL, - "EUC-CN", - (const char *(*)[])&euc_cn_aliases, - 1 -}; - -static const char *cp936_aliases[] = {"CP-936", NULL}; -static zend_encoding encoding_cp936 = { - NULL, - NULL, - "CP936", - (const char *(*)[])&cp936_aliases, - 0 -}; - -static const char *hz_aliases[] = {"HZ-GB-2312", NULL}; -static zend_encoding encoding_hz = { - NULL, - NULL, - "HZ", - (const char *(*)[])&hz_aliases, - 0 -}; - -static const char *euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; -static zend_encoding encoding_euc_tw = { - NULL, - NULL, - "EUC-TW", - (const char *(*)[])&euc_tw_aliases, - 1 -}; - -static const char *big5_aliases[] = {"BIG5", "CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL}; -static zend_encoding encoding_big5 = { - NULL, - NULL, - "BIG-5", - (const char *(*)[])&big5_aliases, - 0 -}; - -static const char *euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; -static zend_encoding encoding_euc_kr = { - NULL, - NULL, - "EUC-KR", - (const char *(*)[])&euc_kr_aliases, - 1 -}; - -static const char *uhc_aliases[] = {"CP949", NULL}; -static zend_encoding encoding_uhc = { - NULL, - NULL, - "UHC", - (const char *(*)[])&uhc_aliases, - 1 -}; - -static zend_encoding encoding_2022kr = { - NULL, - NULL, - "ISO-2022-KR", - NULL, - 0 -}; - -static const char *cp1252_aliases[] = {"cp1252", NULL}; -static zend_encoding encoding_cp1252 = { - NULL, - NULL, - "Windows-1252", - (const char *(*)[])&cp1252_aliases, - 1 -}; - -static const char *iso_8859_1_aliases[] = {"ISO_8859-1", "latin1", NULL}; -static zend_encoding encoding_8859_1 = { - NULL, - NULL, - "ISO-8859-1", - (const char *(*)[])&iso_8859_1_aliases, - 1 -}; - -static const char *iso_8859_2_aliases[] = {"ISO_8859-2", "latin2", NULL}; -static zend_encoding encoding_8859_2 = { - NULL, - NULL, - "ISO-8859-2", - (const char *(*)[])&iso_8859_2_aliases, - 1 -}; - -static const char *iso_8859_3_aliases[] = {"ISO_8859-3", "latin3", NULL}; -static zend_encoding encoding_8859_3 = { - NULL, - NULL, - "ISO-8859-3", - (const char *(*)[])&iso_8859_3_aliases, - 1 -}; - -static const char *iso_8859_4_aliases[] = {"ISO_8859-4", "latin4", NULL}; -static zend_encoding encoding_8859_4 = { - NULL, - NULL, - "ISO-8859-4", - (const char *(*)[])&iso_8859_4_aliases, - 1 -}; - -static const char *iso_8859_5_aliases[] = {"ISO_8859-5", "cyrillic", NULL}; -static zend_encoding encoding_8859_5 = { - NULL, - NULL, - "ISO-8859-5", - (const char *(*)[])&iso_8859_5_aliases, - 1 -}; - -static const char *iso_8859_6_aliases[] = {"ISO_8859-6", "arabic", NULL}; -static zend_encoding encoding_8859_6 = { - NULL, - NULL, - "ISO-8859-6", - (const char *(*)[])&iso_8859_6_aliases, - 1 -}; - -static const char *iso_8859_7_aliases[] = {"ISO_8859-7", "greek", NULL}; -static zend_encoding encoding_8859_7 = { - NULL, - NULL, - "ISO-8859-7", - (const char *(*)[])&iso_8859_7_aliases, - 1 -}; - -static const char *iso_8859_8_aliases[] = {"ISO_8859-8", "hebrew", NULL}; -static zend_encoding encoding_8859_8 = { - NULL, - NULL, - "ISO-8859-8", - (const char *(*)[])&iso_8859_8_aliases, - 1 -}; - -static const char *iso_8859_9_aliases[] = {"ISO_8859-9", "latin5", NULL}; -static zend_encoding encoding_8859_9 = { - NULL, - NULL, - "ISO-8859-9", - (const char *(*)[])&iso_8859_9_aliases, - 1 -}; - -static const char *iso_8859_10_aliases[] = {"ISO_8859-10", "latin6", NULL}; -static zend_encoding encoding_8859_10 = { - NULL, - NULL, - "ISO-8859-10", - (const char *(*)[])&iso_8859_10_aliases, - 1 -}; - -static const char *iso_8859_13_aliases[] = {"ISO_8859-13", NULL}; -static zend_encoding encoding_8859_13 = { - NULL, - NULL, - "ISO-8859-13", - (const char *(*)[])&iso_8859_13_aliases, - 1 -}; - -static const char *iso_8859_14_aliases[] = {"ISO_8859-14", "latin8", NULL}; -static zend_encoding encoding_8859_14 = { - NULL, - NULL, - "ISO-8859-14", - (const char *(*)[])&iso_8859_14_aliases, - 1 -}; - -static const char *iso_8859_15_aliases[] = {"ISO_8859-15", NULL}; -static zend_encoding encoding_8859_15 = { - NULL, - NULL, - "ISO-8859-15", - (const char *(*)[])&iso_8859_15_aliases, - 1 -}; - -static const char *cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL}; -static zend_encoding encoding_cp1251 = { - NULL, - NULL, - "Windows-1251", - (const char *(*)[])&cp1251_aliases, - 1 -}; - -static const char *cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL}; -static zend_encoding encoding_cp866 = { - NULL, - NULL, - "CP866", - (const char *(*)[])&cp866_aliases, - 1 -}; - -static const char *koi8r_aliases[] = {"KOI8-R", "KOI8R", NULL}; -static zend_encoding encoding_koi8r = { - NULL, - NULL, - "KOI8-R", - (const char *(*)[])&koi8r_aliases, - 1 -}; - -static const char *koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL}; -static zend_encoding encoding_koi8u = { - NULL, - NULL, - "KOI8-U", - (const char *(*)[])&koi8u_aliases, - 1 -}; - -static const char *cp1254_aliases[] = {"cp1254", NULL}; -static zend_encoding encoding_cp1254 = { - NULL, - NULL, - "Windows-1254", - (const char *(*)[])&cp1254_aliases, - 1 -}; - -static const char *armscii8_aliases[] = { "ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL}; -static zend_encoding encoding_armscii8 = { - NULL, - NULL, - "ArmSCII-8", - (const char *(*)[])&armscii8_aliases, - 1 -}; - -static const char *cp850_aliases[] = {"IBM850", NULL}; -static zend_encoding encoding_cp850 = { - NULL, - NULL, - "CP850", - (const char *(*)[])&cp850_aliases, - 1 -}; - -static zend_encoding *zend_encoding_table[] = { - &encoding_ucs4, - &encoding_ucs4be, - &encoding_ucs4le, - &encoding_ucs2, - &encoding_ucs2be, - &encoding_ucs2le, - &encoding_utf32, - &encoding_utf32be, - &encoding_utf32le, - &encoding_utf16, - &encoding_utf16be, - &encoding_utf16le, - &encoding_utf8, - &encoding_ascii, - &encoding_euc_jp, - &encoding_sjis, - &encoding_eucjp_win, - &encoding_sjis_win, - &encoding_jis, - &encoding_cp1252, - &encoding_8859_1, - &encoding_8859_2, - &encoding_8859_3, - &encoding_8859_4, - &encoding_8859_5, - &encoding_8859_6, - &encoding_8859_7, - &encoding_8859_8, - &encoding_8859_9, - &encoding_8859_10, - &encoding_8859_13, - &encoding_8859_14, - &encoding_8859_15, - &encoding_euc_cn, - &encoding_cp936, - &encoding_hz, - &encoding_euc_tw, - &encoding_big5, - &encoding_euc_kr, - &encoding_uhc, - &encoding_2022kr, - &encoding_cp1251, - &encoding_cp866, - &encoding_koi8r, - &encoding_koi8u, - &encoding_armscii8, - &encoding_cp1254, - &encoding_cp850, - NULL -}; - -static char* dummy_encoding_detector(const unsigned char *string, size_t length, char *list TSRMLS_DC) +static const zend_encoding *dummy_encoding_fetcher(const char *encoding_name TSRMLS_DC) { return NULL; } -static int dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC) +static const char *dummy_encoding_name_getter(const zend_encoding *encoding) { - return -1; + return NULL; } -static size_t dummy_encoding_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) +static int dummy_encoding_lexer_compatibility_checker(const zend_encoding *encoding) { return 0; } -static int dummy_encoding_list_checker(const char *encoding_list TSRMLS_DC) -{ - /* ignore encoding */ - return 1; -} - -static const char* dummy_get_internal_encoding(TSRMLS_D) +static const zend_encoding *dummy_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC) { return NULL; } -ZEND_API zend_encoding_detector zend_multibyte_encoding_detector = dummy_encoding_detector; -ZEND_API zend_encoding_converter zend_multibyte_encoding_converter = dummy_encoding_converter; -ZEND_API zend_encoding_oddlen zend_multibyte_encoding_oddlen = dummy_encoding_oddlen; -ZEND_API zend_encoding_list_checker zend_multibyte_check_encoding_list = dummy_encoding_list_checker; -ZEND_API zend_encoding_name_getter zend_multibyte_get_internal_encoding = dummy_get_internal_encoding; +static size_t dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +{ + return (size_t)-1; +} -ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list, -size_t encoding_list_size TSRMLS_DC) +static int dummy_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) +{ + return FAILURE; +} + +static const zend_encoding *dummy_internal_encoding_getter(TSRMLS_D) +{ + return NULL; +} + +static int dummy_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC) +{ + return FAILURE; +} + +static zend_multibyte_functions multibyte_functions = { + NULL, + dummy_encoding_fetcher, + dummy_encoding_name_getter, + dummy_encoding_lexer_compatibility_checker, + dummy_encoding_detector, + dummy_encoding_converter, + dummy_encoding_list_parser, + dummy_internal_encoding_getter, + dummy_internal_encoding_setter +}; + +ZEND_API const zend_encoding *zend_multibyte_encoding_utf32be; +ZEND_API const zend_encoding *zend_multibyte_encoding_utf32le; +ZEND_API const zend_encoding *zend_multibyte_encoding_utf16be; +ZEND_API const zend_encoding *zend_multibyte_encoding_utf16le; +ZEND_API const zend_encoding *zend_multibyte_encoding_utf8; + +ZEND_API int zend_multibyte_set_functions(const zend_multibyte_functions *functions TSRMLS_DC) +{ + zend_multibyte_encoding_utf32be = functions->encoding_fetcher("UTF-32BE" TSRMLS_CC); + if (!zend_multibyte_encoding_utf32be) { + return FAILURE; + } + zend_multibyte_encoding_utf32le = functions->encoding_fetcher("UTF-32LE" TSRMLS_CC); + if (!zend_multibyte_encoding_utf32le) { + return FAILURE; + } + zend_multibyte_encoding_utf16be = functions->encoding_fetcher("UTF-16BE" TSRMLS_CC); + if (!zend_multibyte_encoding_utf16be) { + return FAILURE; + } + zend_multibyte_encoding_utf16le = functions->encoding_fetcher("UTF-16LE" TSRMLS_CC); + if (!zend_multibyte_encoding_utf16le) { + return FAILURE; + } + zend_multibyte_encoding_utf8 = functions->encoding_fetcher("UTF-8" TSRMLS_CC); + if (!zend_multibyte_encoding_utf8) { + return FAILURE; + } + + multibyte_functions = *functions; + + /* As zend_multibyte_set_functions() gets called after ini settings were + * populated, we need to reinitialize script_encoding here. + */ + { + const char *value = zend_ini_string("zend.script_encoding", sizeof("zend.script_encoding"), 0); + zend_multibyte_set_script_encoding_by_string(value, strlen(value) TSRMLS_CC); + } + return SUCCESS; +} + +ZEND_API const zend_multibyte_functions *zend_multibyte_get_functions(TSRMLS_D) +{ + return multibyte_functions.provider_name ? &multibyte_functions: NULL; +} + +ZEND_API const zend_encoding *zend_multibyte_fetch_encoding(const char *name TSRMLS_DC) +{ + return multibyte_functions.encoding_fetcher(name TSRMLS_CC); +} + +ZEND_API const char *zend_multibyte_get_encoding_name(const zend_encoding *encoding) +{ + return multibyte_functions.encoding_name_getter(encoding); +} + +ZEND_API int zend_multibyte_check_lexer_compatibility(const zend_encoding *encoding) +{ + return multibyte_functions.lexer_compatibility_checker(encoding); +} + +ZEND_API const zend_encoding *zend_multibyte_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC) +{ + return multibyte_functions.encoding_detector(string, length, list, list_size TSRMLS_CC); +} + +ZEND_API size_t zend_multibyte_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +{ + return multibyte_functions.encoding_converter(to, to_length, from, from_length, encoding_to, encoding_from TSRMLS_CC); +} + +ZEND_API int zend_multibyte_parse_encoding_list(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) +{ + return multibyte_functions.encoding_list_parser(encoding_list, encoding_list_len, return_list, return_size, persistent TSRMLS_CC); +} + +ZEND_API const zend_encoding *zend_multibyte_get_internal_encoding(TSRMLS_D) +{ + return multibyte_functions.internal_encoding_getter(TSRMLS_C); +} + +ZEND_API const zend_encoding *zend_multibyte_get_script_encoding(TSRMLS_D) +{ + return LANG_SCNG(script_encoding); +} + +ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_list, size_t encoding_list_size TSRMLS_DC) { if (CG(script_encoding_list)) { efree(CG(script_encoding_list)); - CG(script_encoding_list) = NULL; } - CG(script_encoding_list_size) = 0; - - if (!encoding_list) { - return 0; - } - - zend_multibyte_parse_encoding_list(encoding_list, encoding_list_size, &(CG(script_encoding_list)), &(CG(script_encoding_list_size))); - - return 0; + CG(script_encoding_list) = encoding_list; + CG(script_encoding_list_size) = encoding_list_size; + return SUCCESS; } - -ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC) +ZEND_API int zend_multibyte_set_internal_encoding(const zend_encoding *encoding TSRMLS_DC) { - CG(internal_encoding) = zend_multibyte_fetch_encoding(encoding_name); - return 0; + return multibyte_functions.internal_encoding_setter(encoding TSRMLS_CC); } -ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC) +ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length TSRMLS_DC) { - zend_multibyte_encoding_detector = encoding_detector; - zend_multibyte_encoding_converter = encoding_converter; - zend_multibyte_encoding_oddlen = encoding_oddlen; - zend_multibyte_check_encoding_list = encoding_list_checker; - zend_multibyte_get_internal_encoding = get_internal_encoding; - return 0; + const zend_encoding **list = 0; + size_t size = 0; + + if (!new_value) { + zend_multibyte_set_script_encoding(NULL, 0 TSRMLS_CC); + return SUCCESS; + } + + if (FAILURE == zend_multibyte_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { + return FAILURE; + } + + if (size == 0) { + pefree(list, 1); + return FAILURE; + } + + if (FAILURE == zend_multibyte_set_script_encoding(list, size TSRMLS_CC)) { + return FAILURE; + } + + return SUCCESS; } - -ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC) -{ - LANG_SCNG(script_encoding) = zend_multibyte_find_script_encoding(onetime_encoding TSRMLS_CC); - LANG_SCNG(internal_encoding) = CG(internal_encoding); - - /* judge input/output filter */ - LANG_SCNG(input_filter) = NULL; - LANG_SCNG(output_filter) = NULL; - - if (!LANG_SCNG(script_encoding)) { - return 0; - } - - if (!LANG_SCNG(internal_encoding) || LANG_SCNG(script_encoding) == LANG_SCNG(internal_encoding)) { - /* if encoding specfic filters exist, use them */ - if (LANG_SCNG(script_encoding)->input_filter && LANG_SCNG(script_encoding)->output_filter) { - LANG_SCNG(input_filter) = LANG_SCNG(script_encoding)->input_filter; - LANG_SCNG(output_filter) = LANG_SCNG(script_encoding)->output_filter; - return 0; - } - - if (!LANG_SCNG(script_encoding)->compatible) { - /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ - LANG_SCNG(internal_encoding) = LANG_SCNG(script_encoding); - LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; - LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; - return 0; - } else { - /* nothing to do in this case */ - return 0; - } - } - - /* LANG_SCNG(internal_encoding) cannot be NULL here */ - if (LANG_SCNG(internal_encoding)->compatible) { - LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; - return 0; - } else if (LANG_SCNG(script_encoding)->compatible) { - LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; - return 0; - } - - /* both script and internal encodings are incompatible w/ flex */ - LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter; - LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter; - - return 0; -} - - -ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name) -{ - int i, j; - zend_encoding *encoding; - - if (!encoding_name) { - return NULL; - } - - for (i = 0; (encoding = zend_encoding_table[i]) != NULL; i++) { - if (zend_binary_strcasecmp(encoding->name, strlen(encoding->name), encoding_name, strlen(encoding_name)) == 0) { - return encoding; - } - } - - for (i = 0; (encoding = zend_encoding_table[i]) != NULL; i++) { - if (encoding->aliases != NULL) { - for (j = 0; (*encoding->aliases)[j] != NULL; j++) { - if (zend_binary_strcasecmp((*encoding->aliases)[j], strlen((*encoding->aliases)[j]), encoding_name, strlen(encoding_name)) == 0) { - return encoding; - } - } - } - } - - return NULL; -} - - ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) { - const char *name; - - if (LANG_SCNG(internal_encoding) == NULL || LANG_SCNG(internal_encoding)->compatible == 0) { - name = "UTF-8"; - } else { - name = LANG_SCNG(internal_encoding)->name; + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) { + internal_encoding = zend_multibyte_encoding_utf8; } - - return zend_multibyte_encoding_filter(to, to_length, name, from, from_length, LANG_SCNG(script_encoding)->name TSRMLS_CC); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); } ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) { - const char *name; - - if (LANG_SCNG(script_encoding)->compatible == 0) { - name = "UTF-8"; - } else { - name = LANG_SCNG(script_encoding)->name; + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = LANG_SCNG(script_encoding); + if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) { + internal_encoding = zend_multibyte_encoding_utf8; + } + if (!zend_multibyte_check_lexer_compatibility(script_encoding)) { + script_encoding = zend_multibyte_encoding_utf8; } - - return zend_multibyte_encoding_filter(to, to_length, LANG_SCNG(internal_encoding)->name, from, from_length, name TSRMLS_CC); -} - -static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_length, const char *to_encoding, const unsigned char *from, size_t from_length, const char *from_encoding TSRMLS_DC) -{ - size_t oddlen; - - if (zend_multibyte_encoding_converter == dummy_encoding_converter) { - return 0; - } - - oddlen = zend_multibyte_encoding_oddlen(from, from_length, from_encoding TSRMLS_CC); - if (oddlen > 0) { - from_length -= oddlen; - } - - if (zend_multibyte_encoding_converter(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) { - return 0; - } - - return from_length; -} - - -/* - * Shift_JIS Input/Output Filter - */ -static const unsigned char table_sjis[] = { /* 0x80-0x9f,0xE0-0xEF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 0, 0, 0 -}; - -size_t sjis_input_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC) -{ - const unsigned char *p; - unsigned char *q; - unsigned char c1, c2; - - *buf = (unsigned char*)emalloc(sjis_length * 3 / 2 + 1); - if (!*buf) - return 0; - *length = 0; - - p = sjis; - q = *buf; - - /* convert [SJIS -> EUC-JP] (for lex scan) -- some other better ways? */ - while (*p && (p - sjis) < sjis_length) { - if (!(*p & 0x80)) { - *q++ = *p++; - continue; - } - - /* handling 8 bit code */ - if (table_sjis[*p] == 1) { - /* 1 byte kana */ - *q++ = 0x8e; - *q++ = *p++; - continue; - } - - if (!*(p+1)) { - *q++ = *p++; - break; - } - - if (table_sjis[*p] == 2) { - /* 2 byte kanji code */ - c1 = *p++; - if (!*p || (p - sjis) >= sjis_length) { - break; - } - c2 = *p++; - c1 -= (c1 <= 0x9f) ? 0x71 : 0xb1; - c1 = (c1 << 1) + 1; - if (c2 >= 0x9e) { - c2 -= 0x7e; - c1++; - } else if (c2 > 0x7f) { - c2 -= 0x20; - } else { - c2 -= 0x1f; - } - - c1 |= 0x80; - c2 |= 0x80; - - *q++ = c1; - *q++ = c2; - } else { - /* - * for user defined chars (ATTENTION) - * - * THESE ARE NOT CODE FOR CONVERSION! :-P - * (using *ILLEGALLY* 3byte EUC-JP space) - * - * we cannot perfectly (== 1 to 1) convert these chars to EUC-JP. - * so, these code are for perfect RESTORING in sjis_output_filter() - */ - c1 = *p++; - if (!*p || (p - sjis) >= sjis_length) { - break; - } - c2 = *p++; - *q++ = 0x8f; - /* - * MAP TO (EUC-JP): - * type A: 0xeba1 - 0xf4fe - * type B: 0xf5a1 - 0xfefe - * type C: 0xa1a1 - 0xa6fe - */ - c1 -= (c1 > 0xf9) ? (0x79+0x71) : (0x0a+0xb1); - c1 = (c1 << 1) + 1; - if (c2 >= 0x9e) { - c2 -= 0x7e; - c1++; - } else if (c2 > 0x7f) { - c2 -= 0x20; - } else { - c2 -= 0x1f; - } - - c1 |= 0x80; - c2 |= 0x80; - - *q++ = c1; - *q++ = c2; - } - } - *q = '\0'; - *length = q - *buf; - - return *length; -} - -static const unsigned char table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -size_t sjis_output_filter(unsigned char **sjis, size_t *sjis_length, const unsigned char *buf, size_t length TSRMLS_DC) -{ - unsigned char c1, c2; - unsigned char *p; - const unsigned char *q; - - if (!sjis || !sjis_length) { - return 0; - } - - /* always Shift_JIS <= EUC-JP */ - *sjis = (unsigned char*)emalloc(length+1); - if (!sjis) { - return 0; - } - p = *sjis; - q = buf; - - /* restore converted strings [EUC-JP -> Shift_JIS] */ - while (*q && (q - buf) < length) { - if (!(*q & 0x80)) { - *p++ = *q++; - continue; - } - - /* hankaku kana */ - if (*q == 0x8e) { - q++; - if (*q) { - *p++ = *q++; - } - continue; - } - - /* 2 byte kanji code */ - if (table_eucjp[*q] == 2) { - c1 = (*q++ & ~0x80) & 0xff; - if (*q) { - c2 = (*q++ & ~0x80) & 0xff; - } else { - q--; - break; - } - - c2 += (c1 & 0x01) ? 0x1f : 0x7d; - if (c2 >= 0x7f) { - c2++; - } - c1 = ((c1 - 0x21) >> 1) + 0x81; - if (c1 > 0x9f) { - c1 += 0x40; - } - - *p++ = c1; - *p++ = c2; - continue; - } - - if (*q == 0x8f) { - q++; - if (*q) { - c1 = (*q++ & ~0x80) & 0xff; - } else { - q--; - break; - } - if (*q) { - c2 = (*q++ & ~0x80) & 0xff; - } else { - q -= 2; - break; - } - - c2 += (c1 & 0x01) ? 0x1f : 0x7d; - if (c2 >= 0x7f) { - c2++; - } - c1 = ((c1 - 0x21) >> 1) + 0x81; - if (c1 > 0x9f) { - c1 += 0x40; - } - - if (c1 >= 0x81 && c1 <= 0x9f) { - c1 += 0x79; - } else { - c1 += 0x0a; - } - - *p++ = c1; - *p++ = c2; - continue; - } - - /* some other chars (may not happen) */ - *p++ = *q++; - } - *p = '\0'; - *sjis_length = p - *sjis; - - return q-buf; /* return length we actually read */ -} - - -static char *zend_multibyte_assemble_encoding_list(zend_encoding **encoding_list, size_t encoding_list_size) -{ - int i, list_size = 0; - const char *name; - char *list = NULL; - - if (!encoding_list || !encoding_list_size) { - return NULL; - } - - for (i = 0; i < encoding_list_size; i++) { - name = (*(encoding_list+i))->name; - if (name) { - list_size += strlen(name) + 1; - if (!list) { - list = (char*)emalloc(list_size); - if (!list) { - return NULL; - } - *list = '\0'; - } else { - list = (char*)erealloc(list, list_size); - if (!list) { - return NULL; - } - strcat(list, ","); - } - strcat(list, name); - } - } - return list; -} - - -static int zend_multibyte_parse_encoding_list(const char *encoding_list, -size_t encoding_list_size, zend_encoding ***result, size_t *result_size) -{ - int n, size; - char *p, *p1, *p2, *endp, *tmpstr; - zend_encoding **list, **entry, *encoding; - - list = NULL; - if (encoding_list == NULL || encoding_list_size <= 0) { - return -1; - } else { - /* copy the encoding_list string for work */ - tmpstr = (char *)estrndup(encoding_list, encoding_list_size); - if (tmpstr == NULL) { - return -1; - } - /* count the number of listed encoding names */ - endp = tmpstr + encoding_list_size; - n = 1; - p1 = tmpstr; - while ((p2 = zend_memnstr(p1, ",", 1, endp)) != NULL) { - p1 = p2 + 1; - n++; - } - size = n; - /* make list */ - list = (zend_encoding**)ecalloc(size, sizeof(zend_encoding*)); - if (list != NULL) { - entry = list; - n = 0; - p1 = tmpstr; - do { - p2 = p = zend_memnstr(p1, ",", 1, endp); - if (p == NULL) { - p = endp; - } - *p = '\0'; - /* trim spaces */ - while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { - p1++; - } - p--; - while (p > p1 && (*p == ' ' || *p == '\t')) { - *p = '\0'; - p--; - } - /* convert to the encoding number and check encoding */ - encoding = zend_multibyte_fetch_encoding(p1); - if (encoding) - { - *entry++ = encoding; - n++; - } - p1 = p2 + 1; - } while (n < size && p2 != NULL); - *result = list; - *result_size = n; - } - efree(tmpstr); - } - - if (list == NULL) { - return -1; - } - - return 0; -} - - -static zend_encoding* zend_multibyte_find_script_encoding(zend_encoding *onetime_encoding TSRMLS_DC) -{ - zend_encoding *script_encoding; - char *name, *list; - - /* onetime_encoding is prior to everything */ - if (onetime_encoding != NULL) { - return onetime_encoding; - } - - if (CG(detect_unicode)) { - /* check out bom(byte order mark) and see if containing wchars */ - script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); - if (script_encoding != NULL) { - /* bom or wchar detection is prior to 'script_encoding' option */ - return script_encoding; - } - } - - /* if no script_encoding specified, just leave alone */ - if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { - return NULL; - } - - /* if multiple encodings specified, detect automagically */ - if (CG(script_encoding_list_size) > 1 && - zend_multibyte_encoding_detector != dummy_encoding_detector) { - list = zend_multibyte_assemble_encoding_list(CG(script_encoding_list), - CG(script_encoding_list_size)); - name = zend_multibyte_encoding_detector(LANG_SCNG(script_org), - LANG_SCNG(script_org_size), list TSRMLS_CC); - if (list) { - efree(list); - } - if (name) { - script_encoding = zend_multibyte_fetch_encoding(name); - efree(name); - } else { - script_encoding = NULL; - } - return script_encoding; - } - - return *(CG(script_encoding_list)); -} - - -static zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) -{ - zend_encoding *script_encoding = NULL; - int bom_size; - unsigned char *script; - unsigned char *pos1, *pos2; - - if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { - return NULL; - } - - /* check out BOM */ - if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { - script_encoding = &encoding_utf32be; - bom_size = sizeof(BOM_UTF32_BE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { - script_encoding = &encoding_utf32le; - bom_size = sizeof(BOM_UTF32_LE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { - script_encoding = &encoding_utf16be; - bom_size = sizeof(BOM_UTF16_BE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { - script_encoding = &encoding_utf16le; - bom_size = sizeof(BOM_UTF16_LE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { - script_encoding = &encoding_utf8; - bom_size = sizeof(BOM_UTF8)-1; - } - - if (script_encoding) { - /* remove BOM */ - script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size); - memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size); - efree(LANG_SCNG(script_org)); - LANG_SCNG(script_org) = script; - LANG_SCNG(script_org_size) -= bom_size; - - return script_encoding; - } - - /* script contains NULL bytes -> auto-detection */ - if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { - /* check if the NULL byte is after the __HALT_COMPILER(); */ - pos2 = LANG_SCNG(script_org); - - while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { - pos2 = memchr(pos2, '_', pos1 - pos2); - if (!pos2) break; - pos2++; - if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { - pos2 += sizeof("_HALT_COMPILER")-1; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == '(') { - pos2++; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == ')') { - pos2++; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == ';') { - return NULL; - } - } - } - } - } - /* make best effort if BOM is missing */ - return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); - } - - return NULL; -} - -static zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) -{ - const unsigned char *p; - int wchar_size = 2; - int le = 0; - - /* utf-16 or utf-32? */ - p = script; - while ((p-script) < script_size) { - p = memchr(p, 0, script_size-(p-script)-2); - if (!p) { - break; - } - if (*(p+1) == '\0' && *(p+2) == '\0') { - wchar_size = 4; - break; - } - - /* searching for UTF-32 specific byte orders, so this will do */ - p += 4; - } - - /* BE or LE? */ - p = script; - while ((p-script) < script_size) { - if (*p == '\0' && *(p+wchar_size-1) != '\0') { - /* BE */ - le = 0; - break; - } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { - /* LE* */ - le = 1; - break; - } - p += wchar_size; - } - - if (wchar_size == 2) { - return le ? &encoding_utf16le : &encoding_utf16be; - } else { - return le ? &encoding_utf32le : &encoding_utf32be; - } - - return NULL; + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +script_encoding, internal_encoding TSRMLS_CC); } /* diff --git a/Zend/zend_multibyte.h b/Zend/zend_multibyte.h index 94d8417b804..8233da6b39a 100644 --- a/Zend/zend_multibyte.h +++ b/Zend/zend_multibyte.h @@ -22,59 +22,62 @@ #ifndef ZEND_MULTIBYTE_H #define ZEND_MULTIBYTE_H -#define BOM_UTF32_BE "\x00\x00\xfe\xff" -#define BOM_UTF32_LE "\xff\xfe\x00\x00" -#define BOM_UTF16_BE "\xfe\xff" -#define BOM_UTF16_LE "\xff\xfe" -#define BOM_UTF8 "\xef\xbb\xbf" +typedef struct _zend_encoding zend_encoding; typedef size_t (*zend_encoding_filter)(unsigned char **str, size_t *str_length, const unsigned char *buf, size_t length TSRMLS_DC); -typedef char* (*zend_encoding_detector)(const unsigned char *string, size_t length, char *list TSRMLS_DC); - -typedef int (*zend_encoding_converter)(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); - -typedef size_t (*zend_encoding_oddlen)(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); - -typedef int (*zend_encoding_list_checker)(const char *encoding_list TSRMLS_DC); - -typedef const char* (*zend_encoding_name_getter)(TSRMLS_D); - -typedef struct _zend_encoding { - zend_encoding_filter input_filter; /* escape input filter */ - zend_encoding_filter output_filter; /* escape output filter */ - const char *name; /* encoding name */ - const char *(*aliases)[]; /* encoding name aliases */ - zend_bool compatible; /* flex compatible or not */ -} zend_encoding; +typedef const zend_encoding* (*zend_encoding_fetcher)(const char *encoding_name TSRMLS_DC); +typedef const char* (*zend_encoding_name_getter)(const zend_encoding *encoding); +typedef int (*zend_encoding_lexer_compatibility_checker)(const zend_encoding *encoding); +typedef const zend_encoding *(*zend_encoding_detector)(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC); +typedef size_t (*zend_encoding_converter)(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC); +typedef int (*zend_encoding_list_parser)(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC); +typedef const zend_encoding *(*zend_encoding_internal_encoding_getter)(TSRMLS_D); +typedef int (*zend_encoding_internal_encoding_setter)(const zend_encoding *encoding TSRMLS_DC); +typedef struct _zend_multibyte_functions { + const char *provider_name; + zend_encoding_fetcher encoding_fetcher; + zend_encoding_name_getter encoding_name_getter; + zend_encoding_lexer_compatibility_checker lexer_compatibility_checker; + zend_encoding_detector encoding_detector; + zend_encoding_converter encoding_converter; + zend_encoding_list_parser encoding_list_parser; + zend_encoding_internal_encoding_getter internal_encoding_getter; + zend_encoding_internal_encoding_setter internal_encoding_setter; +} zend_multibyte_functions; /* * zend multibyte APIs */ BEGIN_EXTERN_C() -/* multibyte utility functions */ -ZEND_API extern zend_encoding_detector zend_multibyte_encoding_detector; -ZEND_API extern zend_encoding_converter zend_multibyte_encoding_converter; -ZEND_API extern zend_encoding_oddlen zend_multibyte_encoding_oddlen; -ZEND_API extern zend_encoding_list_checker zend_multibyte_check_encoding_list; -ZEND_API extern zend_encoding_name_getter zend_multibyte_get_internal_encoding; +ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf32be; +ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf32le; +ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf16be; +ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf16le; +ZEND_API extern const zend_encoding *zend_multibyte_encoding_utf8; -ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list, -size_t encoding_list_size TSRMLS_DC); -ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC); -ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC); -ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC); -ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name); -ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t -*to_length, const unsigned char *from, size_t from_length TSRMLS_DC); +/* multibyte utility functions */ +ZEND_API int zend_multibyte_set_functions(const zend_multibyte_functions *functions TSRMLS_DC); +ZEND_API const zend_multibyte_functions *zend_multibyte_get_functions(TSRMLS_D); + +ZEND_API const zend_encoding *zend_multibyte_fetch_encoding(const char *name TSRMLS_DC); +ZEND_API const char *zend_multibyte_get_encoding_name(const zend_encoding *encoding); +ZEND_API int zend_multibyte_check_lexer_compatibility(const zend_encoding *encoding); +ZEND_API const zend_encoding *zend_multibyte_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC); +ZEND_API size_t zend_multibyte_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC); +ZEND_API int zend_multibyte_parse_encoding_list(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC); + +ZEND_API const zend_encoding *zend_multibyte_get_internal_encoding(TSRMLS_D); +ZEND_API const zend_encoding *zend_multibyte_get_script_encoding(TSRMLS_D); +ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_list, size_t encoding_list_size TSRMLS_DC); +ZEND_API int zend_multibyte_set_internal_encoding(const zend_encoding *encoding TSRMLS_DC); +ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length TSRMLS_DC); + +ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC); ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC); -/* in zend_language_scanner.l */ -ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC); -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC); -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC); END_EXTERN_C() #endif /* ZEND_MULTIBYTE_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_base64.c b/ext/mbstring/libmbfl/filters/mbfilter_base64.c index 13341f9e9fa..198f38c3d2b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_base64.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_base64.c @@ -41,7 +41,7 @@ const mbfl_encoding mbfl_encoding_base64 = { "BASE64", NULL, NULL, - MBFL_ENCTYPE_SBCS + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_8bit_b64 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.c b/ext/mbstring/libmbfl/filters/mbfilter_big5.c index fe5effe0444..aa14e3058d3 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_big5.c @@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_big5 = { "BIG5", (const char *(*)[])&mbfl_encoding_big5_aliases, mblen_table_big5, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_big5 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 587bff88cf7..148d825559a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -54,7 +54,7 @@ const mbfl_encoding mbfl_encoding_jis_ms = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50220 = { @@ -63,7 +63,7 @@ const mbfl_encoding mbfl_encoding_cp50220 = { "ISO-2022-JP", (const char *(*)[])NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50220raw = { @@ -72,7 +72,7 @@ const mbfl_encoding mbfl_encoding_cp50220raw = { "ISO-2022-JP", (const char *(*)[])NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50221 = { @@ -81,7 +81,7 @@ const mbfl_encoding mbfl_encoding_cp50221 = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50222 = { @@ -90,7 +90,7 @@ const mbfl_encoding mbfl_encoding_cp50222 = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 6e54d53f449..40ba8496519 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_cp932 = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_cp932_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_cp932 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c index 561dc3003bd..4cfaa8eb4e7 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c @@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_cp936 = { "CP936", (const char *(*)[])&mbfl_encoding_cp936_aliases, mblen_table_cp936, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_cp936 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 1fe0e6b732d..56c364d867a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -70,7 +70,7 @@ const mbfl_encoding mbfl_encoding_html_ent = { "HTML-ENTITIES", (const char *(*)[])&mbfl_encoding_html_ent_aliases, NULL, - MBFL_ENCTYPE_HTML_ENT + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_wchar_html = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.c b/ext/mbstring/libmbfl/filters/mbfilter_hz.c index 7c7eaffc070..81cea2bb3a1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_hz.c @@ -44,7 +44,7 @@ const mbfl_encoding mbfl_encoding_hz = { "HZ-GB-2312", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_hz = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index 1bf77172b6c..a93ee4e4bbd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -48,7 +48,7 @@ const mbfl_encoding mbfl_encoding_2022jpms = { "ISO-2022-JP", (const char *(*)[])&mbfl_encoding_2022jpms_aliases, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c index 77c95c5ad2b..01c01a4477f 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c @@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_2022kr = { "ISO-2022-KR", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_2022kr = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 6b1aef36433..7fa1fd35b9d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -46,7 +46,7 @@ const mbfl_encoding mbfl_encoding_jis = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_2022jp = { @@ -55,7 +55,7 @@ const mbfl_encoding mbfl_encoding_2022jp = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_jis = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c index 188d088ed0a..df9752bc3b9 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c @@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_qprint = { "Quoted-Printable", (const char *(*)[])&mbfl_encoding_qprint_aliases, NULL, - MBFL_ENCTYPE_SBCS + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_8bit_qprint = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c index 83ef565927d..b74fca21d56 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_sjis_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_sjis = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c index 38244a0ac9a..f24210c22f5 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis_open = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_sjis_open_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_sjis_open = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c index ad0205bee10..2bb1dfada13 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c @@ -57,7 +57,7 @@ const mbfl_encoding mbfl_encoding_utf7 = { "UTF-7", (const char *(*)[])&mbfl_encoding_utf7_aliases, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_utf7 = { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index b8b1db26838..85cf59656e7 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -127,6 +127,18 @@ mbfl_buffer_converter_new( enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz) +{ + const mbfl_encoding *_from = mbfl_no2encoding(from); + const mbfl_encoding *_to = mbfl_no2encoding(to); + + return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz); +} + +mbfl_buffer_converter * +mbfl_buffer_converter_new2( + const mbfl_encoding *from, + const mbfl_encoding *to, + int buf_initsz) { mbfl_buffer_converter *convd; @@ -137,14 +149,8 @@ mbfl_buffer_converter_new( } /* initialize */ - convd->from = mbfl_no2encoding(from); - convd->to = mbfl_no2encoding(to); - if (convd->from == NULL) { - convd->from = &mbfl_encoding_pass; - } - if (convd->to == NULL) { - convd->to = &mbfl_encoding_pass; - } + convd->from = from; + convd->to = to; /* create convert filter */ convd->filter1 = NULL; @@ -173,6 +179,7 @@ mbfl_buffer_converter_new( return convd; } + void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd) { @@ -250,6 +257,12 @@ mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string) +{ + return mbfl_buffer_converter_feed2(convd, string, NULL); +} + +int +mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc) { int n; unsigned char *p; @@ -263,20 +276,27 @@ mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string) /* feed data */ n = string->len; p = string->val; + filter = convd->filter1; if (filter != NULL) { filter_function = filter->filter_function; while (n > 0) { if ((*filter_function)(*p++, filter) < 0) { + if (loc) { + *loc = p - string->val; + } return -1; } n--; } } - + if (loc) { + *loc = p - string->val; + } return 0; } + int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd) { @@ -400,6 +420,49 @@ mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict return identd; } +mbfl_encoding_detector * +mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict) +{ + mbfl_encoding_detector *identd; + + int i, num; + mbfl_identify_filter *filter; + + if (elist == NULL || elistsz <= 0) { + return NULL; + } + + /* allocate */ + identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector)); + if (identd == NULL) { + return NULL; + } + identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *)); + if (identd->filter_list == NULL) { + mbfl_free(identd); + return NULL; + } + + /* create filters */ + i = 0; + num = 0; + while (i < elistsz) { + filter = mbfl_identify_filter_new2(elist[i]); + if (filter != NULL) { + identd->filter_list[num] = filter; + num++; + } + i++; + } + identd->filter_list_size = num; + + /* set strict flag */ + identd->strict = strict; + + return identd; +} + + void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd) { @@ -454,33 +517,32 @@ mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string) return res; } -enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) +const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd) { mbfl_identify_filter *filter; - enum mbfl_no_encoding encoding; + const mbfl_encoding *encoding = NULL; int n; /* judge */ - encoding = mbfl_no_encoding_invalid; if (identd != NULL) { n = identd->filter_list_size - 1; while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { if (!identd->strict || !filter->status) { - encoding = filter->encoding->no_encoding; + encoding = filter->encoding; } } n--; } /* fallback judge */ - if (encoding == mbfl_no_encoding_invalid) { + if (!encoding) { n = identd->filter_list_size - 1; while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { - encoding = filter->encoding->no_encoding; + encoding = filter->encoding; } n--; } @@ -490,6 +552,12 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident return encoding; } +enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) +{ + const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd); + return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding; +} + /* * encoding converter @@ -646,36 +714,88 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el return encoding; } -const char* -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict) +const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict) { + int i, n, num, bad; + unsigned char *p; + mbfl_identify_filter *flist, *filter; const mbfl_encoding *encoding; - encoding = mbfl_identify_encoding(string, elist, elistsz, strict); - if (encoding != NULL && - encoding->no_encoding > mbfl_no_encoding_charset_min && - encoding->no_encoding < mbfl_no_encoding_charset_max) { - return encoding->name; - } else { + /* flist is an array of mbfl_identify_filter instances */ + flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter)); + if (flist == NULL) { return NULL; } -} -enum mbfl_no_encoding -mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict) -{ - const mbfl_encoding *encoding; - - encoding = mbfl_identify_encoding(string, elist, elistsz, strict); - if (encoding != NULL && - encoding->no_encoding > mbfl_no_encoding_charset_min && - encoding->no_encoding < mbfl_no_encoding_charset_max) { - return encoding->no_encoding; - } else { - return mbfl_no_encoding_invalid; + num = 0; + if (elist != NULL) { + for (i = 0; i < elistsz; i++) { + if (!mbfl_identify_filter_init2(&flist[num], elist[i])) { + num++; + } + } } -} + /* feed data */ + n = string->len; + p = string->val; + + if (p != NULL) { + bad = 0; + while (n > 0) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + (*filter->filter_function)(*p, filter); + if (filter->flag) { + bad++; + } + } + } + if ((num - 1) <= bad && !strict) { + break; + } + p++; + n--; + } + } + + /* judge */ + encoding = NULL; + + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + if (strict && filter->status) { + continue; + } + encoding = filter->encoding; + break; + } + } + + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag && (!strict || !filter->status)) { + encoding = filter->encoding; + break; + } + } + } + + /* cleanup */ + /* dtors should be called in reverse order */ + i = num; while (--i >= 0) { + mbfl_identify_filter_cleanup(&flist[i]); + } + + mbfl_free((void *)flist); + + return encoding; +} /* * strlen diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 4565fc6985a..8e073c94d2c 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -127,12 +127,14 @@ struct _mbfl_buffer_converter { }; MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz); +MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new2(const mbfl_encoding *from, const mbfl_encoding *to, int buf_initsz); MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd); MBFLAPI extern void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd); MBFLAPI extern int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode); MBFLAPI extern int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar); MBFLAPI extern int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n); MBFLAPI extern int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string); +MBFLAPI extern int mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc); MBFLAPI extern int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd); MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result); @@ -151,9 +153,11 @@ struct _mbfl_encoding_detector { }; MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict); +MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict); MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd); MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string); MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd); +MBFLAPI extern const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd); /* @@ -169,12 +173,8 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc MBFLAPI extern const mbfl_encoding * mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); -MBFLAPI extern const char * -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); - -MBFLAPI extern enum mbfl_no_encoding -mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); - +MBFLAPI extern const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict); /* * strlen */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index b6c0bb2d871..05f11cdf222 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -42,7 +42,8 @@ #define MBFL_ENCTYPE_MWC4BE 0x00000400 #define MBFL_ENCTYPE_MWC4LE 0x00000800 #define MBFL_ENCTYPE_SHFTCODE 0x00001000 -#define MBFL_ENCTYPE_HTML_ENT 0x00002000 +#define MBFL_ENCTYPE_ENC_STRM 0x00002000 +#define MBFL_ENCTYPE_GL_UNSAFE 0x00004000 /* wchar plane, special charactor */ #define MBFL_WCSPLANE_MASK 0xffff diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index 9a898070535..0d61169af3a 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -191,15 +191,37 @@ mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding) return filter; } +mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding) +{ + mbfl_identify_filter *filter; + + /* allocate */ + filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_identify_filter_init2(filter, encoding)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + + int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding) +{ + const mbfl_encoding *enc = mbfl_no2encoding(encoding); + return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass); +} + +int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding) { const struct mbfl_identify_vtbl *vtbl; /* encoding structure */ - filter->encoding = mbfl_no2encoding(encoding); - if (filter->encoding == NULL) { - filter->encoding = &mbfl_encoding_pass; - } + filter->encoding = encoding; filter->status = 0; filter->flag = 0; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h index b0721fc4130..12d81cde8c6 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h @@ -58,8 +58,10 @@ struct mbfl_identify_vtbl { MBFLAPI extern const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding); MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding); +MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new2(const mbfl_encoding *encoding); MBFLAPI extern void mbfl_identify_filter_delete(mbfl_identify_filter *filter); MBFLAPI extern int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding); +MBFLAPI extern int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding); MBFLAPI void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter); MBFLAPI extern void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index acfde4d5ae9..396eb4a60ab 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -27,6 +27,7 @@ #include "php.h" #include "php_ini.h" #include "php_variables.h" +#include "libmbfl/mbfl/mbfilter_pass.h" #include "mbstring.h" #include "ext/standard/php_string.h" #include "ext/standard/php_mail.h" @@ -56,7 +57,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) const char *c_var; zval *array_ptr; int free_buffer=0; - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; php_mb_encoding_handler_info_t info; if (arg != PARSE_STRING) { @@ -136,16 +137,16 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) switch(arg) { case PARSE_POST: - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_post) = NULL; break; case PARSE_GET: - MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_get) = NULL; break; case PARSE_COOKIE: - MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_cookie) = NULL; break; case PARSE_STRING: - MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_string) = NULL; break; } @@ -163,7 +164,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) detected = _php_mb_encoding_handler_ex(&info, array_ptr, res TSRMLS_CC); MBSTRG(http_input_identify) = detected; - if (detected != mbfl_no_encoding_invalid) { + if (detected) { switch(arg){ case PARSE_POST: MBSTRG(http_input_identify_post) = detected; @@ -191,7 +192,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) /* }}} */ /* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */ -enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC) +const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC) { char *var, *val; const char *s1, *s2; @@ -200,13 +201,13 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ int n, num, *len_list = NULL; unsigned int val_len, new_val_len; mbfl_string string, resvar, resval; - enum mbfl_no_encoding from_encoding = mbfl_no_encoding_invalid; + const mbfl_encoding *from_encoding = NULL; mbfl_encoding_detector *identd = NULL; mbfl_buffer_converter *convd = NULL; - mbfl_string_init_set(&string, info->to_language, info->to_encoding); - mbfl_string_init_set(&resvar, info->to_language, info->to_encoding); - mbfl_string_init_set(&resval, info->to_language, info->to_encoding); + mbfl_string_init_set(&string, info->to_language, info->to_encoding->no_encoding); + mbfl_string_init_set(&resvar, info->to_language, info->to_encoding->no_encoding); + mbfl_string_init_set(&resval, info->to_language, info->to_encoding->no_encoding); if (!res || *res == '\0') { goto out; @@ -257,12 +258,12 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ /* initialize converter */ if (info->num_from_encodings <= 0) { - from_encoding = mbfl_no_encoding_pass; + from_encoding = &mbfl_encoding_pass; } else if (info->num_from_encodings == 1) { from_encoding = info->from_encodings[0]; } else { /* auto detect */ - from_encoding = mbfl_no_encoding_invalid; + from_encoding = NULL; identd = mbfl_encoding_detector_new((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection)); if (identd) { n = 0; @@ -274,10 +275,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } n++; } - from_encoding = mbfl_encoding_detector_judge(identd); + from_encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } - if (from_encoding == mbfl_no_encoding_invalid) { + if (!from_encoding) { if (info->report_errors) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); } @@ -286,8 +287,8 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } convd = NULL; - if (from_encoding != mbfl_no_encoding_pass) { - convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0); + if (from_encoding != &mbfl_encoding_pass) { + convd = mbfl_buffer_converter_new2(from_encoding, info->to_encoding, 0); if (convd != NULL) { mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); @@ -300,7 +301,7 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } /* convert encoding */ - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; n = 0; while (n < num) { @@ -312,10 +313,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ var = val_list[n]; } n++; - string.val = val_list[n]; + string.val = (unsigned char *)val_list[n]; string.len = len_list[n]; if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) { - val = resval.val; + val = (char *)resval.val; val_len = resval.len; } else { val = val_list[n]; @@ -355,10 +356,10 @@ out: /* {{{ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) */ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) { - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; php_mb_encoding_handler_info_t info; - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_post) = NULL; info.data_type = PARSE_POST; info.separator = "&"; @@ -372,7 +373,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) detected = _php_mb_encoding_handler_ex(&info, arg, SG(request_info).post_data TSRMLS_CC); MBSTRG(http_input_identify) = detected; - if (detected != mbfl_no_encoding_invalid) { + if (detected) { MBSTRG(http_input_identify_post) = detected; } } diff --git a/ext/mbstring/mb_gpc.h b/ext/mbstring/mb_gpc.h index 83090c3bc9a..ab6fcc86e06 100644 --- a/ext/mbstring/mb_gpc.h +++ b/ext/mbstring/mb_gpc.h @@ -34,10 +34,10 @@ typedef struct _php_mb_encoding_handler_info_t { const char *separator; unsigned int report_errors: 1; enum mbfl_no_language to_language; - enum mbfl_no_encoding to_encoding; + const mbfl_encoding *to_encoding; enum mbfl_no_language from_language; - int num_from_encodings; - const enum mbfl_no_encoding *from_encodings; + const mbfl_encoding **from_encodings; + size_t num_from_encodings; } php_mb_encoding_handler_info_t; /* }}}*/ @@ -47,7 +47,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler); MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data); int _php_mb_enable_encoding_translation(int flag); -enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC); +const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC); /* }}} */ #endif /* HAVE_MBSTRING */ diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index d4119dda973..7013ebc0da2 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -62,6 +62,7 @@ #include "ext/standard/info.h" #include "libmbfl/mbfl/mbfl_allocators.h" +#include "libmbfl/mbfl/mbfilter_pass.h" #include "php_variables.h" #include "php_globals.h" @@ -96,18 +97,15 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring) static PHP_GINIT_FUNCTION(mbstring); static PHP_GSHUTDOWN_FUNCTION(mbstring); -static const char* php_mb_internal_encoding_name(TSRMLS_D); -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC); -static int php_mb_set_zend_encoding(TSRMLS_D); +static void php_mb_populate_current_detect_order_list(TSRMLS_D); + /* }}} */ /* {{{ php_mb_default_identify_list */ typedef struct _php_mb_nls_ident_list { enum mbfl_no_language lang; - const enum mbfl_no_encoding* list; - int list_size; + const enum mbfl_no_encoding *list; + size_t list_size; } php_mb_nls_ident_list; static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { @@ -650,12 +648,12 @@ static sapi_post_entry mbstr_post_entries[] = { * of parsed encodings. */ static int -php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { - int n, l, size, bauto, ret = 1; + int size, bauto, ret = SUCCESS; + size_t n; char *p, *p1, *p2, *endp, *tmpstr; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *entry, *list; + const mbfl_encoding **entry, **list; list = NULL; if (value == NULL || value_length <= 0) { @@ -665,14 +663,8 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc if (return_size) { *return_size = 0; } - return 0; + return FAILURE; } else { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = (char *)estrndup(value+1, value_length-2); @@ -681,7 +673,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc else tmpstr = (char *)estrndup(value, value_length); if (tmpstr == NULL) { - return 0; + return FAILURE; } /* count the number of listed encoding names */ endp = tmpstr + value_length; @@ -691,9 +683,9 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc p1 = p2 + 1; n++; } - size = n + identify_list_size; + size = n + MBSTRG(default_detect_order_list_size); /* make list */ - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; n = 0; @@ -717,19 +709,19 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc /* convert to the encoding number and check encoding */ if (strcasecmp(p1, "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t i; bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (i = 0; i < identify_list_size; i++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(p1); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(p1); + if (encoding) { + *entry++ = encoding; n++; } else { ret = 0; @@ -769,40 +761,26 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc } /* }}} */ -/* {{{ MBSTRING_API php_mb_check_encoding_list */ -MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) -{ - return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC); -} -/* }}} */ - /* {{{ static int php_mb_parse_encoding_array() * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */ static int -php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { zval **hash_entry; HashTable *target_hash; - int i, n, l, size, bauto,ret = 1; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *list, *entry; + int i, n, size, bauto, ret = SUCCESS; + const mbfl_encoding **list, **entry; list = NULL; if (Z_TYPE_P(array) == IS_ARRAY) { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - target_hash = Z_ARRVAL_P(array); zend_hash_internal_pointer_reset(target_hash); i = zend_hash_num_elements(target_hash); - size = i + identify_list_size; - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + size = i + MBSTRG(default_detect_order_list_size); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; bauto = 0; @@ -814,22 +792,23 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in convert_to_string_ex(hash_entry); if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t j; + bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (j = 0; j < identify_list_size; j++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry)); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry)); + if (encoding) { + *entry++ = encoding; n++; } else { - ret = 0; + ret = FAILURE; } } zend_hash_move_forward(target_hash); @@ -846,7 +825,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in if (return_list) { *return_list = NULL; } - ret = 0; + ret = FAILURE; } if (return_size) { *return_size = n; @@ -858,7 +837,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in if (return_size) { *return_size = 0; } - ret = 0; + ret = FAILURE; } } @@ -866,6 +845,118 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in } /* }}} */ +/* {{{ zend_multibyte interface */ +static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC) +{ + return (const zend_encoding*)mbfl_name2encoding(encoding_name); +} + +static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding) +{ + return ((const mbfl_encoding *)encoding)->name; +} + +static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) +{ + const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + return 1; + } + if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { + return 1; + } + return 0; +} + +static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC) +{ + mbfl_string string; + + if (!list) { + list = (const zend_encoding **)MBSTRG(current_detect_order_list); + list_size = MBSTRG(current_detect_order_list_size); + } + + mbfl_string_init(&string); + string.no_language = MBSTRG(language); + string.val = (unsigned char *)arg_string; + string.len = arg_length; + return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0); +} + +static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +{ + mbfl_string string, result; + mbfl_buffer_converter *convd; + int status, loc; + + /* new encoding */ + /* initialize string */ + mbfl_string_init(&string); + mbfl_string_init(&result); + string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding; + string.no_language = MBSTRG(language); + string.val = (unsigned char*)from; + string.len = from_length; + + /* initialize converter */ + convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); + if (convd == NULL) { + return -1; + } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + + /* do it */ + status = mbfl_buffer_converter_feed2(convd, &string, &loc); + if (status) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + mbfl_buffer_converter_flush(convd); + if (!mbfl_buffer_converter_result(convd, &result)) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + *to = result.val; + *to_length = result.len; + + mbfl_buffer_converter_delete(convd); + + return loc; +} + +static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) +{ + return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC); +} + +static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D) +{ + return (const zend_encoding *)MBSTRG(internal_encoding); +} + +static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC) +{ + MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding; + return SUCCESS; +} + +static zend_multibyte_functions php_mb_zend_multibyte_functions = { + "mbstring", + php_mb_zend_encoding_fetcher, + php_mb_zend_encoding_name_getter, + php_mb_zend_encoding_lexer_compatibility_checker, + php_mb_zend_encoding_detector, + php_mb_zend_encoding_converter, + php_mb_zend_encoding_list_parser, + php_mb_zend_internal_encoding_getter, + php_mb_zend_internal_encoding_setter +}; +/* }}} */ + static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC); static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); static void _php_mb_free_regex(void *opaque); @@ -940,7 +1031,7 @@ static void _php_mb_free_regex(void *opaque) #endif /* {{{ php_mb_nls_get_default_detect_order_list */ -static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size) +static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size) { size_t i; @@ -1048,23 +1139,27 @@ static PHP_INI_MH(OnUpdate_mbstring_language) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ static PHP_INI_MH(OnUpdate_mbstring_detect_order) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { + if (!new_value) { if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); - } - MBSTRG(detect_order_list) = list; - MBSTRG(detect_order_list_size) = size; - } else { - if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); - MBSTRG(detect_order_list) = NULL; + pefree(MBSTRG(detect_order_list), 1); } + MBSTRG(detect_order_list) = NULL; + MBSTRG(detect_order_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(detect_order_list)) { + pefree(MBSTRG(detect_order_list), 1); + } + MBSTRG(detect_order_list) = list; + MBSTRG(detect_order_list_size) = size; return SUCCESS; } /* }}} */ @@ -1072,24 +1167,28 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ static PHP_INI_MH(OnUpdate_mbstring_http_input) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { + if (!new_value) { if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); - } - MBSTRG(http_input_list) = list; - MBSTRG(http_input_list_size) = size; - } else { - if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); - MBSTRG(http_input_list) = NULL; + pefree(MBSTRG(http_input_list), 1); } + MBSTRG(http_input_list) = NULL; MBSTRG(http_input_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(http_input_list)) { + pefree(MBSTRG(http_input_list), 1); + } + MBSTRG(http_input_list) = list; + MBSTRG(http_input_list_size) = size; + return SUCCESS; } /* }}} */ @@ -1097,20 +1196,23 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ static PHP_INI_MH(OnUpdate_mbstring_http_output) { - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; - no_encoding = mbfl_name2no_encoding(new_value); - if (no_encoding != mbfl_no_encoding_invalid) { - MBSTRG(http_output_encoding) = no_encoding; - MBSTRG(current_http_output_encoding) = no_encoding; - } else { - MBSTRG(http_output_encoding) = mbfl_no_encoding_pass; - MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass; - if (new_value != NULL && new_value_length > 0) { - return FAILURE; - } + if (new_value == NULL || new_value_length == 0) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return SUCCESS; } + encoding = mbfl_name2encoding(new_value); + if (!encoding) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return FAILURE; + } + + MBSTRG(http_output_encoding) = encoding; + MBSTRG(current_http_output_encoding) = encoding; return SUCCESS; } /* }}} */ @@ -1118,46 +1220,44 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) { - enum mbfl_no_encoding no_encoding; - - if (!new_value - || !*new_value - || (no_encoding = mbfl_name2no_encoding(new_value)) == mbfl_no_encoding_invalid) { + const mbfl_encoding *encoding; + + if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { switch (MBSTRG(language)) { case mbfl_no_language_uni: - no_encoding = mbfl_no_encoding_utf8; + encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); break; case mbfl_no_language_japanese: - no_encoding = mbfl_no_encoding_euc_jp; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp); break; case mbfl_no_language_korean: - no_encoding = mbfl_no_encoding_euc_kr; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr); break; case mbfl_no_language_simplified_chinese: - no_encoding = mbfl_no_encoding_euc_cn; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn); break; case mbfl_no_language_traditional_chinese: - no_encoding = mbfl_no_encoding_euc_tw; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw); break; case mbfl_no_language_russian: - no_encoding = mbfl_no_encoding_koi8r; + encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r); break; case mbfl_no_language_german: - no_encoding = mbfl_no_encoding_8859_15; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15); break; case mbfl_no_language_armenian: - no_encoding = mbfl_no_encoding_armscii8; + encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8); break; case mbfl_no_language_turkish: - no_encoding = mbfl_no_encoding_8859_9; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9); break; default: - no_encoding = mbfl_no_encoding_8859_1; + encoding = NULL; break; } } - MBSTRG(internal_encoding) = no_encoding; - MBSTRG(current_internal_encoding) = no_encoding; + MBSTRG(internal_encoding) = encoding; + MBSTRG(current_internal_encoding) = encoding; #if HAVE_MBREGEX { const char *enc_name = new_value; @@ -1194,33 +1294,6 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } /* }}} */ -/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */ -static PHP_INI_MH(OnUpdate_mbstring_script_encoding) -{ - int *list, size; - - if (!CG(multibyte)) { - return FAILURE; - } - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = list; - MBSTRG(script_encoding_list_size) = size; - } else { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = NULL; - MBSTRG(script_encoding_list_size) = 0; - return FAILURE; - } - - return SUCCESS; -} -/* }}} */ - /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) { @@ -1263,7 +1336,7 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) { if (new_value == NULL) { - return FAILURE; + return FAILURE; } OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); @@ -1318,7 +1391,6 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) - PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) @@ -1343,17 +1415,15 @@ PHP_INI_END() static PHP_GINIT_FUNCTION(mbstring) { mbstring_globals->language = mbfl_no_language_uni; - mbstring_globals->internal_encoding = mbfl_no_encoding_invalid; + mbstring_globals->internal_encoding = NULL; mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; - mbstring_globals->script_encoding_list = NULL; - mbstring_globals->script_encoding_list_size = 0; - mbstring_globals->http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->http_input_identify = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid; + mbstring_globals->http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->http_input_identify = NULL; + mbstring_globals->http_input_identify_get = NULL; + mbstring_globals->http_input_identify_post = NULL; + mbstring_globals->http_input_identify_cookie = NULL; + mbstring_globals->http_input_identify_string = NULL; mbstring_globals->http_input_list = NULL; mbstring_globals->http_input_list_size = 0; mbstring_globals->detect_order_list = NULL; @@ -1384,9 +1454,6 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) if (mbstring_globals->http_input_list) { free(mbstring_globals->http_input_list); } - if (mbstring_globals->script_encoding_list) { - free(mbstring_globals->script_encoding_list); - } if (mbstring_globals->detect_order_list) { free(mbstring_globals->detect_order_list); } @@ -1426,12 +1493,9 @@ PHP_MINIT_FUNCTION(mbstring) PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif - zend_multibyte_set_functions( - php_mb_encoding_detector, - php_mb_encoding_converter, - php_mb_oddlen, - php_mb_check_encoding_list, - php_mb_internal_encoding_name TSRMLS_CC); + if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) { + return FAILURE; + } php_rfc1867_set_multibyte_callbacks( php_mb_encoding_translation, @@ -1460,8 +1524,6 @@ PHP_MSHUTDOWN_FUNCTION(mbstring) /* {{{ PHP_RINIT_FUNCTION(mbstring) */ PHP_RINIT_FUNCTION(mbstring) { - int n; - enum mbfl_no_encoding *list=NULL, *entry; zend_function *func, *orig; const struct mb_overload_def *p; @@ -1472,22 +1534,7 @@ PHP_RINIT_FUNCTION(mbstring) MBSTRG(illegalchars) = 0; - n = 0; - if (MBSTRG(detect_order_list)) { - list = MBSTRG(detect_order_list); - n = MBSTRG(detect_order_list_size); - } - if (n <= 0) { - list = MBSTRG(default_detect_order_list); - n = MBSTRG(default_detect_order_list_size); - } - entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0); - MBSTRG(current_detect_order_list) = entry; - MBSTRG(current_detect_order_list_size) = n; - while (n > 0) { - *entry++ = *list++; - n--; - } + php_mb_populate_current_detect_order_list(TSRMLS_C); /* override original function. */ if (MBSTRG(func_overload)){ @@ -1519,10 +1566,7 @@ PHP_RINIT_FUNCTION(mbstring) #if HAVE_MBREGEX PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif - if (CG(multibyte)) { - zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); - php_mb_set_zend_encoding(TSRMLS_C); - } + zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC); return SUCCESS; } @@ -1546,11 +1590,11 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) } /* clear http input identification. */ - MBSTRG(http_input_identify) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify) = NULL; + MBSTRG(http_input_identify_post) = NULL; + MBSTRG(http_input_identify_get) = NULL; + MBSTRG(http_input_identify_cookie) = NULL; + MBSTRG(http_input_identify_string) = NULL; /* clear overloaded function. */ if (MBSTRG(func_overload)){ @@ -1625,31 +1669,27 @@ PHP_FUNCTION(mb_language) Sets the current internal encoding or Returns the current internal encoding as a string */ PHP_FUNCTION(mb_internal_encoding) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); + name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_internal_encoding) = no_encoding; - /* TODO: make independent from mbstring.encoding_translation? */ - if (CG(multibyte) && MBSTRG(encoding_translation)) { - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } + MBSTRG(current_internal_encoding) = encoding; RETURN_TRUE; } } @@ -1662,10 +1702,9 @@ PHP_FUNCTION(mb_http_input) { char *typ = NULL; int typ_len; - int retname, n; - char *name, *list, *temp; - enum mbfl_no_encoding *entry; - enum mbfl_no_encoding result = mbfl_no_encoding_invalid; + int retname; + char *list, *temp; + const mbfl_encoding *result = NULL; retname = 1; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { @@ -1693,40 +1732,38 @@ PHP_FUNCTION(mb_http_input) break; case 'I': case 'i': - array_init(return_value); - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); + { + array_init(return_value); + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); + entry++; } - entry++; - n--; + retname = 0; } - retname = 0; break; case 'L': case 'l': - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - list = NULL; - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { + { + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + list = NULL; + for (i = 0; i < n; i++) { if (list) { temp = list; - spprintf(&list, 0, "%s,%s", temp, name); + spprintf(&list, 0, "%s,%s", temp, (*entry)->name); efree(temp); if (!list) { break; } } else { - list = estrdup(name); + list = estrdup((*entry)->name); } + entry++; } - entry++; - n--; } if (!list) { RETURN_FALSE; @@ -1741,9 +1778,8 @@ PHP_FUNCTION(mb_http_input) } if (retname) { - if (result != mbfl_no_encoding_invalid && - (name = (char *)mbfl_no_encoding2name(result)) != NULL) { - RETVAL_STRING(name, 1); + if (result) { + RETVAL_STRING(result->name, 1); } else { RETVAL_FALSE; } @@ -1755,28 +1791,28 @@ PHP_FUNCTION(mb_http_input) Sets the current output_encoding or returns the current output_encoding as a string */ PHP_FUNCTION(mb_http_output) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding)); + name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_http_output_encoding) = no_encoding; + MBSTRG(current_http_output_encoding) = encoding; RETURN_TRUE; } } @@ -1788,32 +1824,26 @@ PHP_FUNCTION(mb_http_output) PHP_FUNCTION(mb_detect_order) { zval **arg1 = NULL; - int n, size; - enum mbfl_no_encoding *list, *entry; - char *name; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { return; } if (!arg1) { + size_t i; + size_t n = MBSTRG(current_detect_order_list_size); + const mbfl_encoding **entry = MBSTRG(current_detect_order_list); array_init(return_value); - entry = MBSTRG(current_detect_order_list); - n = MBSTRG(current_detect_order_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); entry++; - n--; } } else { - list = NULL; - size = 0; + const mbfl_encoding **list = NULL; + size_t size = 0; switch (Z_TYPE_PP(arg1)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1822,7 +1852,7 @@ PHP_FUNCTION(mb_detect_order) break; default: convert_to_string_ex(arg1); - if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1942,7 +1972,7 @@ PHP_FUNCTION(mb_parse_str) char *encstr = NULL; int encstr_len; php_mb_encoding_handler_info_t info; - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; track_vars_array = NULL; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { @@ -1970,7 +2000,7 @@ PHP_FUNCTION(mb_parse_str) MBSTRG(http_input_identify) = detected; - RETVAL_BOOL(detected != mbfl_no_encoding_invalid); + RETVAL_BOOL(detected); if (encstr != NULL) efree(encstr); } @@ -1986,7 +2016,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_string string, result; const char *charset; char *p; - enum mbfl_no_encoding encoding; + const mbfl_encoding *encoding; int last_feed, len; unsigned char send_text_mimetype = 0; char *s, *mimetype = NULL; @@ -2005,7 +2035,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = NULL; } - if (encoding == mbfl_no_encoding_pass) { + if (encoding == &mbfl_encoding_pass) { RETURN_STRINGL(arg_string, arg_string_len, 1); } @@ -2027,7 +2057,7 @@ PHP_FUNCTION(mb_output_handler) /* if content-type is not yet set, set it and activate the converter */ if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { - charset = mbfl_no2preferred_mime_name(encoding); + charset = encoding->mime_name; if (charset) { len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); if (sapi_add_header(p, len, 0) != FAILURE) { @@ -2035,7 +2065,7 @@ PHP_FUNCTION(mb_output_handler) } } /* activate the converter */ - MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0); + MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0); if (send_text_mimetype){ efree(mimetype); } @@ -2056,7 +2086,7 @@ PHP_FUNCTION(mb_output_handler) /* feed the string */ mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)arg_string; string.len = arg_string_len; mbfl_buffer_converter_feed(MBSTRG(outconv), &string); @@ -2093,7 +2123,7 @@ PHP_FUNCTION(mb_strlen) string.no_language = MBSTRG(language); if (enc_name == NULL) { - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; } else { string.no_encoding = mbfl_name2no_encoding(enc_name); if (string.no_encoding == mbfl_no_encoding_invalid) { @@ -2124,9 +2154,9 @@ PHP_FUNCTION(mb_strpos) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; offset = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { @@ -2191,9 +2221,9 @@ PHP_FUNCTION(mb_strrpos) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2280,7 +2310,7 @@ PHP_FUNCTION(mb_stripos) int n; long offset; mbfl_string haystack, needle; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2309,7 +2339,7 @@ PHP_FUNCTION(mb_strripos) int n; long offset; mbfl_string haystack, needle; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2341,9 +2371,9 @@ PHP_FUNCTION(mb_strstr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2399,9 +2429,9 @@ PHP_FUNCTION(mb_strrchr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2454,13 +2484,13 @@ PHP_FUNCTION(mb_stristr) unsigned int from_encoding_len, len, mblen; int n; mbfl_string haystack, needle, result, *ret = NULL; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2512,13 +2542,13 @@ PHP_FUNCTION(mb_strrichr) zend_bool part = 0; int n, from_encoding_len, len, mblen; mbfl_string haystack, needle, result, *ret = NULL; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2570,9 +2600,9 @@ PHP_FUNCTION(mb_substr_count) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2616,7 +2646,7 @@ PHP_FUNCTION(mb_substr) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (argc == 4) { string.no_encoding = mbfl_name2no_encoding(encoding); @@ -2685,7 +2715,7 @@ PHP_FUNCTION(mb_strcut) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) { return; @@ -2748,7 +2778,7 @@ PHP_FUNCTION(mb_strwidth) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2787,9 +2817,9 @@ PHP_FUNCTION(mb_strimwidth) mbfl_string_init(&string); mbfl_string_init(&marker); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.no_language = MBSTRG(language); - marker.no_encoding = MBSTRG(current_internal_encoding); + marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.val = NULL; marker.len = 0; @@ -2833,9 +2863,10 @@ PHP_FUNCTION(mb_strimwidth) MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC) { mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; - int size, *list; + size_t size; + const mbfl_encoding **list; char *output=NULL; if (output_len) { @@ -2846,8 +2877,8 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* new encoding */ if (_to_encoding && strlen(_to_encoding)) { - to_encoding = mbfl_name2no_encoding(_to_encoding); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(_to_encoding); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding); return NULL; } @@ -2859,7 +2890,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); string.val = (unsigned char *)input; string.len = length; @@ -2871,17 +2902,17 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); if (size == 1) { from_encoding = *list; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } else if (size > 1) { /* auto detect */ - from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection)); - if (from_encoding != mbfl_no_encoding_invalid) { - string.no_encoding = from_encoding; + from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection)); + if (from_encoding) { + string.no_encoding = from_encoding->no_encoding; } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding"); from_encoding = mbfl_no_encoding_pass; to_encoding = from_encoding; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified"); @@ -2892,7 +2923,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter"); return NULL; @@ -2993,7 +3024,8 @@ PHP_FUNCTION(mb_convert_encoding) Returns a case-folded version of sourcestring */ PHP_FUNCTION(mb_convert_case) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; long case_mode = 0; char *newstr; @@ -3017,7 +3049,8 @@ PHP_FUNCTION(mb_convert_case) */ PHP_FUNCTION(mb_strtoupper) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -3040,7 +3073,8 @@ PHP_FUNCTION(mb_strtoupper) */ PHP_FUNCTION(mb_strtolower) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -3068,9 +3102,9 @@ PHP_FUNCTION(mb_detect_encoding) zval *encoding_list; mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; + const mbfl_encoding *ret; + const mbfl_encoding **elist, **list; + size_t size; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { return; @@ -3082,7 +3116,7 @@ PHP_FUNCTION(mb_detect_encoding) if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) { switch (Z_TYPE_P(encoding_list)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3092,7 +3126,7 @@ PHP_FUNCTION(mb_detect_encoding) break; default: convert_to_string(encoding_list); - if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3121,7 +3155,7 @@ PHP_FUNCTION(mb_detect_encoding) string.no_language = MBSTRG(language); string.val = (unsigned char *)str; string.len = str_len; - ret = mbfl_identify_encoding_name(&string, elist, size, strict); + ret = mbfl_identify_encoding2(&string, elist, size, strict); if (list != NULL) { efree((void *)list); @@ -3131,7 +3165,7 @@ PHP_FUNCTION(mb_detect_encoding) RETURN_FALSE; } - RETVAL_STRING((char *)ret, 1); + RETVAL_STRING((char *)ret->name, 1); } /* }}} */ @@ -3196,7 +3230,7 @@ PHP_FUNCTION(mb_encode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { return; @@ -3245,14 +3279,14 @@ PHP_FUNCTION(mb_decode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) { return; } mbfl_string_init(&result); - ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); + ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); if (ret != NULL) { RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ } else { @@ -3274,7 +3308,7 @@ PHP_FUNCTION(mb_convert_kana) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { return; @@ -3373,12 +3407,13 @@ PHP_FUNCTION(mb_convert_variables) zval ***args, ***stack, **var, **hash_entry, **zfrom_enc; HashTable *target_hash; mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_encoding_detector *identd; mbfl_buffer_converter *convd; - int n, to_enc_len, argc, stack_level, stack_max, elistsz; - enum mbfl_no_encoding *elist; - char *name, *to_enc; + int n, to_enc_len, argc, stack_level, stack_max; + size_t elistsz; + const mbfl_encoding **elist; + char *to_enc; void *ptmp; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { @@ -3386,8 +3421,8 @@ PHP_FUNCTION(mb_convert_variables) } /* new encoding */ - to_encoding = mbfl_name2no_encoding(to_enc); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(to_enc); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc); efree(args); RETURN_FALSE; @@ -3397,7 +3432,7 @@ PHP_FUNCTION(mb_convert_variables) mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); /* pre-conversion encoding */ @@ -3418,11 +3453,11 @@ PHP_FUNCTION(mb_convert_variables) from_encoding = *elist; } else { /* auto detect */ - from_encoding = mbfl_no_encoding_invalid; + from_encoding = NULL; stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); stack_level = 0; - identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); + identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection)); if (identd != NULL) { n = 0; while (n < argc || stack_level > 0) { @@ -3475,12 +3510,12 @@ PHP_FUNCTION(mb_convert_variables) } } detect_end: - from_encoding = mbfl_encoding_detector_judge(identd); + from_encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } efree(stack); - if (from_encoding == mbfl_no_encoding_invalid) { + if (!from_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); from_encoding = mbfl_no_encoding_pass; } @@ -3491,7 +3526,7 @@ detect_end: /* create converter */ convd = NULL; if (from_encoding != mbfl_no_encoding_pass) { - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; @@ -3573,9 +3608,8 @@ detect_end: efree(args); - name = (char *)mbfl_no_encoding2name(from_encoding); - if (name != NULL) { - RETURN_STRING(name, 1); + if (from_encoding) { + RETURN_STRING(from_encoding->name, 1); } else { RETURN_FALSE; } @@ -3602,7 +3636,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str; string.len = str_len; @@ -4055,10 +4089,10 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)subject; orig_str.len = subject_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); if (pstr != NULL) { @@ -4074,11 +4108,11 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)message; orig_str.len = (unsigned int)message_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = NULL; @@ -4186,13 +4220,13 @@ PHP_FUNCTION(mb_send_mail) PHP_FUNCTION(mb_get_info) { char *typ = NULL; - int typ_len, n; + int typ_len; + size_t n; char *name; const struct mb_overload_def *over_func; zval *row1, *row2; const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); - enum mbfl_no_encoding *entry; - zval *row3; + const mbfl_encoding **entry; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { RETURN_FALSE; @@ -4200,14 +4234,14 @@ PHP_FUNCTION(mb_get_info) if (!typ || !strcasecmp("all", typ)) { array_init(return_value); - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - add_assoc_string(return_value, "internal_encoding", name, 1); + if (MBSTRG(current_internal_encoding)) { + add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - add_assoc_string(return_value, "http_input", name, 1); + if (MBSTRG(http_input_identify)) { + add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - add_assoc_string(return_value, "http_output", name, 1); + if (MBSTRG(current_http_output_encoding)) { + add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1); } if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1); @@ -4249,15 +4283,13 @@ PHP_FUNCTION(mb_get_info) } n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; MAKE_STD_ZVAL(row2); array_init(row2); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row2, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(row2, (*entry)->name, 1); entry++; - n--; } add_assoc_zval(return_value, "detect_order", row2); } @@ -4275,33 +4307,17 @@ PHP_FUNCTION(mb_get_info) } else { add_assoc_string(return_value, "strict_detection", "Off", 1); } - if (CG(multibyte)) { - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - MAKE_STD_ZVAL(row3); - array_init(row3); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row3, name, 1); - } - entry++; - n--; - } - add_assoc_zval(return_value, "script_encoding", row3); - } - } } else if (!strcasecmp("internal_encoding", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_internal_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1); } } else if (!strcasecmp("http_input", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(http_input_identify)) { + RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1); } } else if (!strcasecmp("http_output", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_http_output_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1); } } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { @@ -4349,15 +4365,11 @@ PHP_FUNCTION(mb_get_info) } else if (!strcasecmp("detect_order", typ)) { n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } - entry++; - n--; + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); } } } else if (!strcasecmp("substitute_character", typ)) { @@ -4377,22 +4389,6 @@ PHP_FUNCTION(mb_get_info) RETVAL_STRING("Off", 1); } } else { - if (CG(multibyte) && !strcasecmp("script_encoding", typ)) { - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } - entry++; - n--; - } - } - return; - } RETURN_FALSE; } } @@ -4407,7 +4403,7 @@ PHP_FUNCTION(mb_check_encoding) char *enc = NULL; int enc_len; mbfl_buffer_converter *convd; - enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding); + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_string string, result, *ret = NULL; long illegalchars = 0; @@ -4420,14 +4416,14 @@ PHP_FUNCTION(mb_check_encoding) } if (enc != NULL) { - no_encoding = mbfl_name2no_encoding(enc); - if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) { + encoding = mbfl_name2encoding(enc); + if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc); RETURN_FALSE; } } - convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0); + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; @@ -4436,7 +4432,7 @@ PHP_FUNCTION(mb_check_encoding) mbfl_buffer_converter_illegal_substchar(convd, 0); /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding); + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); mbfl_string_init(&result); string.val = (unsigned char *)var; @@ -4455,6 +4451,34 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ + +/* {{{ php_mb_populate_current_detect_order_list */ +static void php_mb_populate_current_detect_order_list(TSRMLS_D) +{ + const mbfl_encoding **entry = 0; + size_t nentries; + + if (MBSTRG(current_detect_order_list)) { + return; + } + + if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) { + nentries = MBSTRG(detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries); + } else { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + nentries = MBSTRG(default_detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + size_t i; + for (i = 0; i < nentries; i++) { + entry[i] = mbfl_no2encoding(src[i]); + } + } + MBSTRG(current_detect_order_list) = entry; + MBSTRG(current_detect_order_list_size) = nentries; +} + /* {{{ MBSTRING_API int php_mb_encoding_translation() */ MBSTRING_API int php_mb_encoding_translation(TSRMLS_D) { @@ -4483,8 +4507,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *e /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC) { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); } /* }}} */ @@ -4532,8 +4555,7 @@ MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC) { - return php_mb_safe_strrchr_ex(s, c, nbytes, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding)); } /* }}} */ @@ -4548,12 +4570,10 @@ MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC) MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) { - if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){ - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(http_input_identify))); + if (MBSTRG(http_input_identify)) { + return php_mb_mbchar_bytes_ex(s, MBSTRG(http_input_identify)); } else { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); } } /* }}} */ @@ -4563,13 +4583,13 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co { int i; mbfl_string string, result, *ret = NULL; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; if (encoding_to) { /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(encoding_to); + if (!to_encoding) { return -1; } } else { @@ -4577,8 +4597,8 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co } if (encoding_from) { /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { + from_encoding = mbfl_name2encoding(encoding_from); + if (from_encoding) { return -1; } } else { @@ -4592,7 +4612,7 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co /* initialize string */ mbfl_string_init(&string); mbfl_string_init(&result); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); for (i=0; ino_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; do { size_t len = 0; @@ -4778,176 +4784,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int } /* }}} */ -/* {{{ php_mb_set_zend_encoding() */ -static int php_mb_set_zend_encoding(TSRMLS_D) -{ - /* 'd better use mbfl_memory_device? */ - char *name, *list = NULL; - int n, *entry, list_size = 0; - - /* notify script encoding to Zend Engine */ - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - list_size += strlen(name) + 1; - if (!list) { - list = (char*)emalloc(list_size); - *list = '\0'; - } else { - list = (char*)erealloc(list, list_size); - strcat(list, ","); - } - strcat(list, name); - } - entry++; - n--; - } - zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC); - if (list) { - efree(list); - } - - /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { - /* notify internal encoding to Zend Engine */ - name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } - - return 0; -} -/* }}} */ - -/* {{{ char *php_mb_encoding_detector() - * Interface for Zend Engine - */ -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC) -{ - mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; - - /* make encoding list */ - list = NULL; - size = 0; - php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); - if (size <= 0) { - return NULL; - } - if (size > 0 && list != NULL) { - elist = list; - } else { - elist = MBSTRG(current_detect_order_list); - size = MBSTRG(current_detect_order_list_size); - } - - mbfl_string_init(&string); - string.no_language = MBSTRG(language); - string.val = (unsigned char *)arg_string; - string.len = arg_length; - ret = mbfl_identify_encoding_name(&string, elist, size, 0); - if (list != NULL) { - efree((void *)list); - } - if (ret != NULL) { - return estrdup(ret); - } else { - return NULL; - } -} -/* }}} */ - -/* {{{ int php_mb_encoding_converter() */ -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC) -{ - mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; - mbfl_buffer_converter *convd; - - /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* initialize string */ - mbfl_string_init(&string); - mbfl_string_init(&result); - string.no_encoding = from_encoding; - string.no_language = MBSTRG(language); - string.val = (unsigned char*)from; - string.len = from_length; - - /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); - if (convd == NULL) { - return -1; - } - mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); - - /* do it */ - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - if (ret != NULL) { - *to = ret->val; - *to_length = ret->len; - } - - MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); - - return ret ? 0 : -1; -} -/* }}} */ - -/* {{{ int php_mb_oddlen() - * returns number of odd (e.g. appears only first byte of multibyte - * character) chars - */ -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) -{ - mbfl_string mb_string; - - mbfl_string_init(&mb_string); - mb_string.no_language = MBSTRG(language); - mb_string.no_encoding = mbfl_name2no_encoding(encoding); - mb_string.val = (unsigned char *)string; - mb_string.len = length; - - if (mb_string.no_encoding == mbfl_no_encoding_invalid) { - return 0; - } - return mbfl_oddlen(&mb_string); -} -/* }}} */ - -/* {{{ const char* php_mb_internal_encoding_name() - * returns name of internal encoding - */ -static const char* php_mb_internal_encoding_name(TSRMLS_D) -{ - const char *name = mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - - if (!name || - !*name || - (strlen(name) == 4 && - (!memcmp("pass", name, sizeof("pass") - 1) || - !memcmp("auto", name, sizeof("auto") - 1) || - !memcmp("none", name, sizeof("none") - 1)))) { - return NULL; - } - return name; -} -/* }}} */ - - #endif /* HAVE_MBSTRING */ /* diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 77f1c9d5ef0..6eae92f4d02 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -165,25 +165,23 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; enum mbfl_no_language language; - enum mbfl_no_encoding internal_encoding; - enum mbfl_no_encoding current_internal_encoding; - enum mbfl_no_encoding *script_encoding_list; - int script_encoding_list_size; - enum mbfl_no_encoding http_output_encoding; - enum mbfl_no_encoding current_http_output_encoding; - enum mbfl_no_encoding http_input_identify; - enum mbfl_no_encoding http_input_identify_get; - enum mbfl_no_encoding http_input_identify_post; - enum mbfl_no_encoding http_input_identify_cookie; - enum mbfl_no_encoding http_input_identify_string; - enum mbfl_no_encoding *http_input_list; - int http_input_list_size; - enum mbfl_no_encoding *detect_order_list; - int detect_order_list_size; - enum mbfl_no_encoding *current_detect_order_list; - int current_detect_order_list_size; + const mbfl_encoding *internal_encoding; + const mbfl_encoding *current_internal_encoding; + const mbfl_encoding *http_output_encoding; + const mbfl_encoding *current_http_output_encoding; + const mbfl_encoding *http_input_identify; + const mbfl_encoding *http_input_identify_get; + const mbfl_encoding *http_input_identify_post; + const mbfl_encoding *http_input_identify_cookie; + const mbfl_encoding *http_input_identify_string; + const mbfl_encoding **http_input_list; + size_t http_input_list_size; + const mbfl_encoding **detect_order_list; + size_t detect_order_list_size; + const mbfl_encoding **current_detect_order_list; + size_t current_detect_order_list_size; enum mbfl_no_encoding *default_detect_order_list; - int default_detect_order_list_size; + size_t default_detect_order_list_size; int filter_illegal_mode; int filter_illegal_substchar; int current_filter_illegal_mode; diff --git a/ext/standard/info.c b/ext/standard/info.c index 9093c8576aa..30e1a8ab995 100644 --- a/ext/standard/info.c +++ b/ext/standard/info.c @@ -749,7 +749,17 @@ PHPAPI void php_print_info(int flag TSRMLS_DC) php_info_print_table_row(2, "Zend Memory Manager", is_zend_mm(TSRMLS_C) ? "enabled" : "disabled" ); - php_info_print_table_row(2, "Zend Multibyte Support", CG(multibyte) ? "enabled" : "disabled"); + { + const zend_multibyte_functions *functions = zend_multibyte_get_functions(TSRMLS_C); + char *descr; + if (functions) { + spprintf(&descr, 0, "provided by %s", functions->provider_name); + } else { + descr = estrdup("disabled"); + } + php_info_print_table_row(2, "Zend Multibyte Support", descr); + efree(descr); + } #if HAVE_IPV6 php_info_print_table_row(2, "IPv6 Support", "enabled" );