php-src/ext/mbstring/mbstring.c

5010 lines
140 KiB
C
Raw Normal View History

2001-05-01 01:52:55 +00:00
/*
+----------------------------------------------------------------------+
2014-09-19 16:33:14 +00:00
| PHP Version 7 |
2001-05-01 01:52:55 +00:00
+----------------------------------------------------------------------+
2017-01-04 17:23:42 +00:00
| Copyright (c) 1997-2017 The PHP Group |
2001-05-01 01:52:55 +00:00
+----------------------------------------------------------------------+
2006-01-01 12:51:34 +00:00
| This source file is subject to version 3.01 of the PHP license, |
2001-05-01 01:52:55 +00:00
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
2006-01-01 12:51:34 +00:00
| http://www.php.net/license/3_01.txt |
2001-05-01 01:52:55 +00:00
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
2002-02-28 08:29:35 +00:00
| Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
2002-04-10 12:47:41 +00:00
| Rui Hirokawa <hirokawa@php.net> |
2001-05-01 01:52:55 +00:00
+----------------------------------------------------------------------+
*/
/* $Id$ */
/*
* PHP 4 Multibyte String module "mbstring"
2001-05-01 01:52:55 +00:00
*
* History:
* 2000.5.19 Release php-4.0RC2_jstring-1.0
* 2001.4.1 Release php4_jstring-1.0.91
* 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
* 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
*/
/*
* PHP3 Internationalization support program.
*
* Copyright (c) 1999,2000 by the PHP3 internationalization team.
* All rights reserved.
*
* See README_PHP3-i18n-ja for more detail.
*
* Authors:
* Hironori Sato <satoh@jpnnet.com>
* Shigeru Kanemoto <sgk@happysize.co.jp>
* Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
* Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
2001-05-01 01:52:55 +00:00
*/
/* {{{ includes */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
2001-05-01 01:52:55 +00:00
#include "php.h"
#include "php_ini.h"
#include "php_variables.h"
2001-05-01 01:52:55 +00:00
#include "mbstring.h"
#include "ext/standard/php_string.h"
#include "ext/standard/php_mail.h"
2005-09-21 13:23:00 +00:00
#include "ext/standard/exec.h"
2001-05-01 01:52:55 +00:00
#include "ext/standard/url.h"
2001-07-20 19:37:19 +00:00
#include "main/php_output.h"
2001-05-04 10:42:54 +00:00
#include "ext/standard/info.h"
2001-05-01 01:52:55 +00:00
#include "libmbfl/mbfl/mbfl_allocators.h"
#include "libmbfl/mbfl/mbfilter_pass.h"
2001-05-01 01:52:55 +00:00
#include "php_variables.h"
#include "php_globals.h"
#include "rfc1867.h"
#include "php_content_types.h"
#include "SAPI.h"
#include "php_unicode.h"
2005-03-17 08:15:23 +00:00
#include "TSRM.h"
2001-05-01 01:52:55 +00:00
#include "mb_gpc.h"
#if HAVE_MBREGEX
#include "php_mbregex.h"
#endif
#include "zend_multibyte.h"
#if HAVE_ONIG
#include "php_onig_compat.h"
#include <oniguruma.h>
#undef UChar
#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
#include "ext/pcre/php_pcre.h"
#endif
/* }}} */
#if HAVE_MBSTRING
/* {{{ prototypes */
ZEND_DECLARE_MODULE_GLOBALS(mbstring)
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
2014-12-13 22:06:14 +00:00
static void php_mb_populate_current_detect_order_list(void);
2014-12-13 22:06:14 +00:00
static int php_mb_encoding_translation(void);
2014-12-13 22:06:14 +00:00
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
2014-12-13 22:06:14 +00:00
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
/* }}} */
/* {{{ php_mb_default_identify_list */
typedef struct _php_mb_nls_ident_list {
enum mbfl_no_language lang;
const enum mbfl_no_encoding *list;
size_t list_size;
} php_mb_nls_ident_list;
static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
2001-05-01 01:52:55 +00:00
mbfl_no_encoding_ascii,
mbfl_no_encoding_jis,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_sjis
};
static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_cp936
};
static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_big5
};
static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_uhc
};
2001-05-01 01:52:55 +00:00
static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_koi8r,
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866
};
static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_armscii8
};
static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_9
};
static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_koi8u
};
static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8
};
2005-02-21 07:57:08 +00:00
static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
};
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ mb_overload_def mb_ovld[] */
static const struct mb_overload_def mb_ovld[] = {
{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
2006-05-30 15:43:09 +00:00
{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
2006-05-30 15:45:35 +00:00
{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
2006-05-30 15:47:53 +00:00
{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
{0, NULL, NULL, NULL}
2015-01-03 09:22:58 +00:00
};
/* }}} */
/* {{{ arginfo */
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
ZEND_ARG_INFO(0, language)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
ZEND_ARG_INFO(0, type)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
ZEND_ARG_INFO(0, substchar)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
2011-09-12 13:20:05 +00:00
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
ZEND_ARG_INFO(0, encoded_string)
ZEND_ARG_INFO(1, result)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
ZEND_ARG_INFO(0, contents)
ZEND_ARG_INFO(0, status)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, offset)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, offset)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, offset)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, offset)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, part)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, part)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, part)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, part)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
ZEND_ARG_INFO(0, haystack)
ZEND_ARG_INFO(0, needle)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, start)
ZEND_ARG_INFO(0, length)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, start)
ZEND_ARG_INFO(0, length)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, start)
ZEND_ARG_INFO(0, width)
ZEND_ARG_INFO(0, trimmarker)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, to)
ZEND_ARG_INFO(0, from)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
ZEND_ARG_INFO(0, sourcestring)
ZEND_ARG_INFO(0, mode)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
ZEND_ARG_INFO(0, sourcestring)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
ZEND_ARG_INFO(0, sourcestring)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, encoding_list)
ZEND_ARG_INFO(0, strict)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
ZEND_END_ARG_INFO()
2008-09-13 00:30:51 +00:00
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, charset)
ZEND_ARG_INFO(0, transfer)
ZEND_ARG_INFO(0, linefeed)
ZEND_ARG_INFO(0, indent)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
ZEND_ARG_INFO(0, string)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
ZEND_ARG_INFO(0, str)
ZEND_ARG_INFO(0, option)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
ZEND_ARG_INFO(0, to)
ZEND_ARG_INFO(0, from)
ZEND_ARG_VARIADIC_INFO(1, vars)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, convmap)
ZEND_ARG_INFO(0, encoding)
ZEND_ARG_INFO(0, is_hex)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, convmap)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
ZEND_ARG_INFO(0, to)
ZEND_ARG_INFO(0, subject)
ZEND_ARG_INFO(0, message)
ZEND_ARG_INFO(0, additional_headers)
ZEND_ARG_INFO(0, additional_parameters)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
ZEND_ARG_INFO(0, type)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, var)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(1, registers)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(1, registers)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, replacement)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, replacement)
ZEND_ARG_INFO(0, string)
ZEND_END_ARG_INFO()
2011-09-25 08:01:54 +00:00
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, callback)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, limit)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
ZEND_ARG_INFO(0, position)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
ZEND_ARG_INFO(0, options)
ZEND_END_ARG_INFO()
/* }}} */
2005-12-06 02:21:01 +00:00
/* {{{ zend_function_entry mbstring_functions[] */
const zend_function_entry mbstring_functions[] = {
PHP_FE(mb_convert_case, arginfo_mb_convert_case)
PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
PHP_FE(mb_strtolower, arginfo_mb_strtolower)
PHP_FE(mb_language, arginfo_mb_language)
PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
PHP_FE(mb_http_input, arginfo_mb_http_input)
PHP_FE(mb_http_output, arginfo_mb_http_output)
PHP_FE(mb_detect_order, arginfo_mb_detect_order)
PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
PHP_FE(mb_parse_str, arginfo_mb_parse_str)
PHP_FE(mb_output_handler, arginfo_mb_output_handler)
PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
PHP_FE(mb_strlen, arginfo_mb_strlen)
PHP_FE(mb_strpos, arginfo_mb_strpos)
PHP_FE(mb_strrpos, arginfo_mb_strrpos)
PHP_FE(mb_stripos, arginfo_mb_stripos)
PHP_FE(mb_strripos, arginfo_mb_strripos)
PHP_FE(mb_strstr, arginfo_mb_strstr)
PHP_FE(mb_strrchr, arginfo_mb_strrchr)
PHP_FE(mb_stristr, arginfo_mb_stristr)
PHP_FE(mb_strrichr, arginfo_mb_strrichr)
PHP_FE(mb_substr_count, arginfo_mb_substr_count)
PHP_FE(mb_substr, arginfo_mb_substr)
PHP_FE(mb_strcut, arginfo_mb_strcut)
PHP_FE(mb_strwidth, arginfo_mb_strwidth)
PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
2008-09-13 00:30:51 +00:00
PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
PHP_FE(mb_send_mail, arginfo_mb_send_mail)
PHP_FE(mb_get_info, arginfo_mb_get_info)
PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
#if HAVE_MBREGEX
PHP_MBREGEX_FUNCTION_ENTRIES
#endif
2011-07-25 11:35:02 +00:00
PHP_FE_END
2001-05-01 01:52:55 +00:00
};
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ zend_module_entry mbstring_module_entry */
2001-05-01 01:52:55 +00:00
zend_module_entry mbstring_module_entry = {
2010-12-19 17:28:57 +00:00
STANDARD_MODULE_HEADER,
2001-05-01 01:52:55 +00:00
"mbstring",
mbstring_functions,
PHP_MINIT(mbstring),
PHP_MSHUTDOWN(mbstring),
PHP_RINIT(mbstring),
PHP_RSHUTDOWN(mbstring),
PHP_MINFO(mbstring),
PHP_MBSTRING_VERSION,
2010-12-19 17:28:57 +00:00
PHP_MODULE_GLOBALS(mbstring),
PHP_GINIT(mbstring),
PHP_GSHUTDOWN(mbstring),
NULL,
STANDARD_MODULE_PROPERTIES_EX
2001-05-01 01:52:55 +00:00
};
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static sapi_post_entry php_post_entries[] */
static sapi_post_entry php_post_entries[] = {
{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
{ MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
{ NULL, 0, NULL, NULL }
};
/* }}} */
2001-05-01 01:52:55 +00:00
#ifdef COMPILE_DL_MBSTRING
#ifdef ZTS
ZEND_TSRMLS_CACHE_DEFINE()
#endif
2001-05-01 01:52:55 +00:00
ZEND_GET_MODULE(mbstring)
#endif
2014-12-13 22:06:14 +00:00
static char *get_internal_encoding(void) {
if (PG(internal_encoding) && PG(internal_encoding)[0]) {
return PG(internal_encoding);
} else if (SG(default_charset)) {
return SG(default_charset);
}
return "";
}
2014-12-13 22:06:14 +00:00
static char *get_input_encoding(void) {
if (PG(input_encoding) && PG(input_encoding)[0]) {
return PG(input_encoding);
} else if (SG(default_charset)) {
return SG(default_charset);
}
return "";
}
2014-12-13 22:06:14 +00:00
static char *get_output_encoding(void) {
if (PG(output_encoding) && PG(output_encoding)[0]) {
return PG(output_encoding);
} else if (SG(default_charset)) {
return SG(default_charset);
}
return "";
}
/* {{{ allocators */
static void *_php_mb_allocators_malloc(unsigned int sz)
{
return emalloc(sz);
}
static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
{
return erealloc(ptr, sz);
}
static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
{
return ecalloc(nelems, szelem);
}
static void _php_mb_allocators_free(void *ptr)
{
efree(ptr);
2015-01-03 09:22:58 +00:00
}
static void *_php_mb_allocators_pmalloc(unsigned int sz)
{
return pemalloc(sz, 1);
}
static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
{
return perealloc(ptr, sz, 1);
}
static void _php_mb_allocators_pfree(void *ptr)
{
pefree(ptr, 1);
2015-01-03 09:22:58 +00:00
}
static mbfl_allocators _php_mb_allocators = {
_php_mb_allocators_malloc,
_php_mb_allocators_realloc,
_php_mb_allocators_calloc,
_php_mb_allocators_free,
_php_mb_allocators_pmalloc,
_php_mb_allocators_prealloc,
_php_mb_allocators_pfree
};
/* }}} */
/* {{{ static sapi_post_entry mbstr_post_entries[] */
static sapi_post_entry mbstr_post_entries[] = {
{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
{ MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
{ NULL, 0, NULL, NULL }
};
/* }}} */
/* {{{ static int php_mb_parse_encoding_list()
* Return 0 if input contains any illegal encoding, otherwise 1.
2015-01-03 09:22:58 +00:00
* Even if any illegal encoding is detected the result may contain a list
* of parsed encodings.
*/
2001-05-01 01:52:55 +00:00
static int
2014-12-13 22:06:14 +00:00
php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
2001-05-01 01:52:55 +00:00
{
2016-06-21 21:40:50 +00:00
int bauto, ret = SUCCESS;
size_t n, size;
2001-05-01 01:52:55 +00:00
char *p, *p1, *p2, *endp, *tmpstr;
const mbfl_encoding **entry, **list;
2001-05-01 01:52:55 +00:00
list = NULL;
if (value == NULL || value_length <= 0) {
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = NULL;
2002-11-03 08:50:43 +00:00
}
if (return_size) {
*return_size = 0;
2002-11-03 08:50:43 +00:00
}
return FAILURE;
2001-05-01 01:52:55 +00:00
} else {
/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
value_length -= 2;
}
else
tmpstr = (char *)estrndup(value, value_length);
2001-05-01 01:52:55 +00:00
if (tmpstr == NULL) {
return FAILURE;
2001-05-01 01:52:55 +00:00
}
/* count the number of listed encoding names */
endp = tmpstr + value_length;
n = 1;
p1 = tmpstr;
2014-04-22 14:52:59 +00:00
while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
2001-05-01 01:52:55 +00:00
p1 = p2 + 1;
n++;
}
size = n + MBSTRG(default_detect_order_list_size);
2001-05-01 01:52:55 +00:00
/* make list */
list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
2001-05-01 01:52:55 +00:00
if (list != NULL) {
entry = list;
n = 0;
bauto = 0;
p1 = tmpstr;
do {
2014-04-22 14:52:59 +00:00
p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
2001-05-01 01:52:55 +00:00
if (p == NULL) {
p = endp;
}
*p = '\0';
/* trim spaces */
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
p1++;
}
p--;
while (p > p1 && (*p == ' ' || *p == '\t')) {
*p = '\0';
p--;
}
/* convert to the encoding number and check encoding */
if (strcasecmp(p1, "auto") == 0) {
2001-05-01 01:52:55 +00:00
if (!bauto) {
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
size_t i;
2001-05-01 01:52:55 +00:00
bauto = 1;
for (i = 0; i < identify_list_size; i++) {
*entry++ = mbfl_no2encoding(*src++);
2001-05-01 01:52:55 +00:00
n++;
}
}
} else {
const mbfl_encoding *encoding = mbfl_name2encoding(p1);
if (encoding) {
*entry++ = encoding;
n++;
} else {
ret = 0;
}
2001-05-01 01:52:55 +00:00
}
p1 = p2 + 1;
} while (n < size && p2 != NULL);
if (n > 0) {
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = list;
2002-11-03 08:50:43 +00:00
} else {
pefree(list, persistent);
2002-11-03 08:50:43 +00:00
}
2002-03-23 07:36:27 +00:00
} else {
pefree(list, persistent);
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = NULL;
2002-11-03 08:50:43 +00:00
}
ret = 0;
2002-03-23 07:36:27 +00:00
}
2002-11-03 08:50:43 +00:00
if (return_size) {
*return_size = n;
2002-11-03 08:50:43 +00:00
}
} else {
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = NULL;
2002-11-03 08:50:43 +00:00
}
if (return_size) {
*return_size = 0;
2002-11-03 08:50:43 +00:00
}
ret = 0;
2001-05-01 01:52:55 +00:00
}
efree(tmpstr);
}
return ret;
2001-05-01 01:52:55 +00:00
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static int php_mb_parse_encoding_array()
* Return 0 if input contains any illegal encoding, otherwise 1.
2015-01-03 09:22:58 +00:00
* Even if any illegal encoding is detected the result may contain a list
* of parsed encodings.
*/
2001-05-01 01:52:55 +00:00
static int
2014-12-13 22:06:14 +00:00
php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
2001-05-01 01:52:55 +00:00
{
2014-03-23 11:45:48 +00:00
zval *hash_entry;
2001-05-01 01:52:55 +00:00
HashTable *target_hash;
int i, n, size, bauto, ret = SUCCESS;
const mbfl_encoding **list, **entry;
2001-05-01 01:52:55 +00:00
list = NULL;
if (Z_TYPE_P(array) == IS_ARRAY) {
target_hash = Z_ARRVAL_P(array);
2001-05-01 01:52:55 +00:00
i = zend_hash_num_elements(target_hash);
size = i + MBSTRG(default_detect_order_list_size);
list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
2001-05-01 01:52:55 +00:00
if (list != NULL) {
entry = list;
bauto = 0;
n = 0;
2014-05-28 13:43:11 +00:00
ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
2001-05-01 01:52:55 +00:00
convert_to_string_ex(hash_entry);
2014-03-23 11:45:48 +00:00
if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
2001-05-01 01:52:55 +00:00
if (!bauto) {
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
size_t j;
2001-05-01 01:52:55 +00:00
bauto = 1;
for (j = 0; j < identify_list_size; j++) {
*entry++ = mbfl_no2encoding(*src++);
2001-05-01 01:52:55 +00:00
n++;
}
}
} else {
2014-03-23 11:45:48 +00:00
const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
if (encoding) {
*entry++ = encoding;
n++;
} else {
ret = FAILURE;
}
2001-05-01 01:52:55 +00:00
}
i--;
2014-05-28 13:43:11 +00:00
} ZEND_HASH_FOREACH_END();
2002-03-23 07:36:27 +00:00
if (n > 0) {
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = list;
2002-11-03 08:50:43 +00:00
} else {
pefree(list, persistent);
2002-11-03 08:50:43 +00:00
}
2002-03-23 07:36:27 +00:00
} else {
pefree(list, persistent);
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = NULL;
2002-11-03 08:50:43 +00:00
}
ret = FAILURE;
2002-03-23 07:36:27 +00:00
}
2002-11-03 08:50:43 +00:00
if (return_size) {
*return_size = n;
2002-11-03 08:50:43 +00:00
}
} else {
2002-11-03 08:50:43 +00:00
if (return_list) {
*return_list = NULL;
2002-11-03 08:50:43 +00:00
}
if (return_size) {
*return_size = 0;
2002-11-03 08:50:43 +00:00
}
ret = FAILURE;
2001-05-01 01:52:55 +00:00
}
}
return ret;
2001-05-01 01:52:55 +00:00
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ zend_multibyte interface */
2014-12-13 22:06:14 +00:00
static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
{
return (const zend_encoding*)mbfl_name2encoding(encoding_name);
}
static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
{
return ((const mbfl_encoding *)encoding)->name;
}
static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
{
const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
if (encoding->flag & MBFL_ENCTYPE_SBCS) {
return 1;
}
if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
return 1;
}
return 0;
}
2014-12-13 22:06:14 +00:00
static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
{
mbfl_string string;
if (!list) {
list = (const zend_encoding **)MBSTRG(current_detect_order_list);
list_size = MBSTRG(current_detect_order_list_size);
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.val = (unsigned char *)arg_string;
string.len = arg_length;
return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
}
2014-12-13 22:06:14 +00:00
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
{
mbfl_string string, result;
mbfl_buffer_converter *convd;
int status, loc;
/* new encoding */
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
string.no_language = MBSTRG(language);
string.val = (unsigned char*)from;
string.len = from_length;
/* initialize converter */
convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
if (convd == NULL) {
return -1;
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
/* do it */
status = mbfl_buffer_converter_feed2(convd, &string, &loc);
if (status) {
mbfl_buffer_converter_delete(convd);
return (size_t)-1;
}
mbfl_buffer_converter_flush(convd);
if (!mbfl_buffer_converter_result(convd, &result)) {
mbfl_buffer_converter_delete(convd);
return (size_t)-1;
2015-01-03 09:22:58 +00:00
}
*to = result.val;
*to_length = result.len;
mbfl_buffer_converter_delete(convd);
return loc;
}
2014-12-13 22:06:14 +00:00
static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
{
2014-12-13 22:06:14 +00:00
return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
}
2014-12-13 22:06:14 +00:00
static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
{
return (const zend_encoding *)MBSTRG(internal_encoding);
}
2014-12-13 22:06:14 +00:00
static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
{
MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
return SUCCESS;
}
static zend_multibyte_functions php_mb_zend_multibyte_functions = {
"mbstring",
php_mb_zend_encoding_fetcher,
php_mb_zend_encoding_name_getter,
php_mb_zend_encoding_lexer_compatibility_checker,
php_mb_zend_encoding_detector,
php_mb_zend_encoding_converter,
php_mb_zend_encoding_list_parser,
php_mb_zend_internal_encoding_getter,
php_mb_zend_internal_encoding_setter
};
/* }}} */
2014-12-13 22:06:14 +00:00
static void *_php_mb_compile_regex(const char *pattern);
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
static void _php_mb_free_regex(void *opaque);
#if HAVE_ONIG
/* {{{ _php_mb_compile_regex */
2014-12-13 22:06:14 +00:00
static void *_php_mb_compile_regex(const char *pattern)
{
php_mb_regex_t *retval;
OnigErrorInfo err_info;
int err_code;
if ((err_code = onig_new(&retval,
(const OnigUChar *)pattern,
(const OnigUChar *)pattern + strlen(pattern),
ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err_code, err_info);
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
retval = NULL;
}
return retval;
}
/* }}} */
/* {{{ _php_mb_match_regex */
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
{
return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
(const OnigUChar*)str + str_len, (const OnigUChar *)str,
(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
}
/* }}} */
/* {{{ _php_mb_free_regex */
static void _php_mb_free_regex(void *opaque)
{
onig_free((php_mb_regex_t *)opaque);
}
/* }}} */
#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
/* {{{ _php_mb_compile_regex */
2014-12-13 22:06:14 +00:00
static void *_php_mb_compile_regex(const char *pattern)
{
pcre *retval;
const char *err_str;
int err_offset;
if (!(retval = pcre_compile(pattern,
PCRE_CASELESS, &err_str, &err_offset, NULL))) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
}
return retval;
}
/* }}} */
/* {{{ _php_mb_match_regex */
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
{
return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
0, NULL, 0) >= 0;
}
/* }}} */
/* {{{ _php_mb_free_regex */
static void _php_mb_free_regex(void *opaque)
{
pcre_free(opaque);
}
/* }}} */
#endif
/* {{{ php_mb_nls_get_default_detect_order_list */
static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
{
size_t i;
2003-10-22 02:38:47 +00:00
*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
if (php_mb_default_identify_list[i].lang == lang) {
*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
*plist_size = php_mb_default_identify_list[i].list_size;
return 1;
}
}
return 0;
}
/* }}} */
2014-12-13 22:06:14 +00:00
static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
{
char *result = emalloc(len + 2);
char *resp = result;
int i;
for (i = 0; i < len && start[i] != quote; ++i) {
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
while (j-- > 0 && i < len) {
*resp++ = start[i++];
}
--i;
}
}
*resp = '\0';
return result;
}
2014-12-13 22:06:14 +00:00
static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
{
char *pos = *line, quote;
char *res;
while (*pos && *pos != stop) {
if ((quote = *pos) == '"' || quote == '\'') {
++pos;
while (*pos && *pos != quote) {
if (*pos == '\\' && pos[1] && pos[1] == quote) {
pos += 2;
} else {
++pos;
}
}
if (*pos) {
++pos;
}
} else {
pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
}
}
if (*pos == '\0') {
res = estrdup(*line);
*line += strlen(*line);
return res;
}
res = estrndup(*line, pos - *line);
while (*pos == stop) {
pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
}
*line = pos;
return res;
}
/* }}} */
2014-12-13 22:06:14 +00:00
static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
{
while (*str && isspace(*(unsigned char *)str)) {
++str;
}
if (!*str) {
return estrdup("");
}
if (*str == '"' || *str == '\'') {
char quote = *str;
str++;
2014-12-13 22:06:14 +00:00
return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
} else {
char *strend = str;
while (*strend && !isspace(*(unsigned char *)strend)) {
++strend;
}
2014-12-13 22:06:14 +00:00
return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
}
}
/* }}} */
2014-12-13 22:06:14 +00:00
static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
{
2011-08-07 06:37:22 +00:00
char *s, *s2;
const size_t filename_len = strlen(filename);
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
2011-03-06 07:06:55 +00:00
s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
2015-01-03 09:22:58 +00:00
if (s && s2) {
if (s > s2) {
return ++s;
} else {
return ++s2;
}
} else if (s) {
return ++s;
} else if (s2) {
return ++s2;
} else {
return filename;
}
}
/* }}} */
/* {{{ php.ini directive handler */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
static PHP_INI_MH(OnUpdate_mbstring_language)
{
enum mbfl_no_language no_language;
no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
if (no_language == mbfl_no_language_invalid) {
MBSTRG(language) = mbfl_no_language_neutral;
return FAILURE;
}
MBSTRG(language) = no_language;
php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
return SUCCESS;
}
/* }}} */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
2001-05-01 01:52:55 +00:00
static PHP_INI_MH(OnUpdate_mbstring_detect_order)
{
const mbfl_encoding **list;
size_t size;
2001-05-01 01:52:55 +00:00
if (!new_value) {
if (MBSTRG(detect_order_list)) {
pefree(MBSTRG(detect_order_list), 1);
}
MBSTRG(detect_order_list) = NULL;
MBSTRG(detect_order_list_size) = 0;
return SUCCESS;
}
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
2001-05-01 01:52:55 +00:00
return FAILURE;
}
if (MBSTRG(detect_order_list)) {
pefree(MBSTRG(detect_order_list), 1);
}
MBSTRG(detect_order_list) = list;
MBSTRG(detect_order_list_size) = size;
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
2001-05-01 01:52:55 +00:00
static PHP_INI_MH(OnUpdate_mbstring_http_input)
{
const mbfl_encoding **list;
size_t size;
2001-05-01 01:52:55 +00:00
if (!new_value || !ZSTR_VAL(new_value)) {
if (MBSTRG(http_input_list)) {
pefree(MBSTRG(http_input_list), 1);
}
2014-12-13 22:06:14 +00:00
if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
MBSTRG(http_input_list) = list;
MBSTRG(http_input_list_size) = size;
return SUCCESS;
}
MBSTRG(http_input_list) = NULL;
MBSTRG(http_input_list_size) = 0;
return SUCCESS;
}
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
2001-05-01 01:52:55 +00:00
return FAILURE;
}
if (MBSTRG(http_input_list)) {
pefree(MBSTRG(http_input_list), 1);
}
MBSTRG(http_input_list) = list;
MBSTRG(http_input_list_size) = size;
if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
2014-12-13 22:06:14 +00:00
php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
}
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
2001-05-01 01:52:55 +00:00
static PHP_INI_MH(OnUpdate_mbstring_http_output)
{
const mbfl_encoding *encoding;
2001-05-01 01:52:55 +00:00
if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
2014-12-13 22:06:14 +00:00
encoding = mbfl_name2encoding(get_output_encoding());
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return SUCCESS;
}
} else {
encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return FAILURE;
}
}
MBSTRG(http_output_encoding) = encoding;
MBSTRG(current_http_output_encoding) = encoding;
if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
2014-12-13 22:06:14 +00:00
php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
2001-05-01 01:52:55 +00:00
}
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
2014-12-13 22:06:14 +00:00
int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length)
2001-05-01 01:52:55 +00:00
{
const mbfl_encoding *encoding;
if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
2014-02-14 15:16:17 +00:00
/* falls back to UTF-8 if an unknown encoding name is given */
encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
}
MBSTRG(internal_encoding) = encoding;
MBSTRG(current_internal_encoding) = encoding;
#if HAVE_MBREGEX
{
const char *enc_name = new_value;
2014-12-13 22:06:14 +00:00
if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
/* falls back to UTF-8 if an unknown encoding name is given */
enc_name = "UTF-8";
2014-12-13 22:06:14 +00:00
php_mb_regex_set_default_mbctype(enc_name);
2001-05-01 01:52:55 +00:00
}
2014-12-13 22:06:14 +00:00
php_mb_regex_set_mbctype(new_value);
2001-05-01 01:52:55 +00:00
}
#endif
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
{
if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
2014-12-13 22:06:14 +00:00
php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
}
2014-12-13 22:06:14 +00:00
if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
2010-07-08 07:40:02 +00:00
return FAILURE;
}
if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
if (new_value && ZSTR_LEN(new_value)) {
return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
} else {
2014-12-13 22:06:14 +00:00
return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
}
} else {
2008-08-03 11:58:15 +00:00
/* the corresponding mbstring globals needs to be set according to the
* ini value in the later stage because it never falls back to the
* default value if 1. no value for mbstring.internal_encoding is given,
* 2. mbstring.language directive is processed in per-dir or runtime
* context and 3. call to the handler for mbstring.language is done
* after mbstring.internal_encoding is handled. */
return SUCCESS;
}
}
/* }}} */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
2001-05-01 01:52:55 +00:00
static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
{
2006-03-12 07:54:42 +00:00
int c;
char *endptr = NULL;
2001-05-01 01:52:55 +00:00
if (new_value != NULL) {
if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
2001-05-01 01:52:55 +00:00
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
2001-05-01 01:52:55 +00:00
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2001-05-01 01:52:55 +00:00
} else {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
if (ZSTR_LEN(new_value) > 0) {
c = strtol(ZSTR_VAL(new_value), &endptr, 0);
2006-03-12 07:54:42 +00:00
if (*endptr == '\0') {
MBSTRG(filter_illegal_substchar) = c;
MBSTRG(current_filter_illegal_substchar) = c;
2006-03-12 07:54:42 +00:00
}
}
2001-05-01 01:52:55 +00:00
}
2008-08-04 21:06:13 +00:00
} else {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
2001-05-01 01:52:55 +00:00
}
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
{
if (new_value == NULL) {
return FAILURE;
}
2014-12-13 22:06:14 +00:00
OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
if (MBSTRG(encoding_translation)) {
2014-12-13 22:06:14 +00:00
sapi_unregister_post_entry(php_post_entries);
sapi_register_post_entries(mbstr_post_entries);
} else {
2014-12-13 22:06:14 +00:00
sapi_unregister_post_entry(mbstr_post_entries);
sapi_register_post_entries(php_post_entries);
}
return SUCCESS;
}
/* }}} */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
{
zend_string *tmp;
void *re = NULL;
if (!new_value) {
2008-08-04 21:06:13 +00:00
new_value = entry->orig_value;
}
tmp = php_trim(new_value, NULL, 0, 3);
if (ZSTR_LEN(tmp) > 0) {
if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
zend_string_release(tmp);
return FAILURE;
}
}
if (MBSTRG(http_output_conv_mimetypes)) {
_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
}
MBSTRG(http_output_conv_mimetypes) = re;
zend_string_release(tmp);
return SUCCESS;
}
/* }}} */
/* }}} */
/* {{{ php.ini directive registration */
2001-05-01 01:52:55 +00:00
PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
2010-07-08 07:40:02 +00:00
STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
2015-01-03 09:22:58 +00:00
STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
PHP_INI_SYSTEM | PHP_INI_PERDIR,
2015-01-03 09:22:58 +00:00
OnUpdate_mbstring_encoding_translation,
encoding_translation, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
"^(text/|application/xhtml\\+xml)",
PHP_INI_ALL,
OnUpdate_mbstring_http_output_conv_mimetypes)
STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
PHP_INI_ALL,
OnUpdateLong,
strict_detection, zend_mbstring_globals, mbstring_globals)
2001-05-01 01:52:55 +00:00
PHP_INI_END()
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ module global initialize handler */
static PHP_GINIT_FUNCTION(mbstring)
2001-05-01 01:52:55 +00:00
{
#if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
ZEND_TSRMLS_CACHE_UPDATE();
#endif
2006-06-16 16:45:46 +00:00
mbstring_globals->language = mbfl_no_language_uni;
mbstring_globals->internal_encoding = NULL;
2006-06-16 16:45:46 +00:00
mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
mbstring_globals->http_input_identify = NULL;
mbstring_globals->http_input_identify_get = NULL;
mbstring_globals->http_input_identify_post = NULL;
mbstring_globals->http_input_identify_cookie = NULL;
mbstring_globals->http_input_identify_string = NULL;
2006-06-16 16:45:46 +00:00
mbstring_globals->http_input_list = NULL;
mbstring_globals->http_input_list_size = 0;
mbstring_globals->detect_order_list = NULL;
mbstring_globals->detect_order_list_size = 0;
mbstring_globals->current_detect_order_list = NULL;
mbstring_globals->current_detect_order_list_size = 0;
mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
mbstring_globals->illegalchars = 0;
mbstring_globals->func_overload = 0;
mbstring_globals->encoding_translation = 0;
mbstring_globals->strict_detection = 0;
mbstring_globals->outconv = NULL;
mbstring_globals->http_output_conv_mimetypes = NULL;
#if HAVE_MBREGEX
2014-12-13 22:06:14 +00:00
mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
#endif
2001-05-01 01:52:55 +00:00
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ PHP_GSHUTDOWN_FUNCTION */
static PHP_GSHUTDOWN_FUNCTION(mbstring)
2002-10-08 17:20:10 +00:00
{
if (mbstring_globals->http_input_list) {
free(mbstring_globals->http_input_list);
}
if (mbstring_globals->detect_order_list) {
free(mbstring_globals->detect_order_list);
}
if (mbstring_globals->http_output_conv_mimetypes) {
_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
}
2002-10-08 17:20:10 +00:00
#if HAVE_MBREGEX
2014-12-13 22:06:14 +00:00
php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
2002-10-08 17:20:10 +00:00
#endif
}
/* }}} */
2002-10-08 17:20:10 +00:00
/* {{{ PHP_MINIT_FUNCTION(mbstring) */
2001-05-01 01:52:55 +00:00
PHP_MINIT_FUNCTION(mbstring)
{
#if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
ZEND_TSRMLS_CACHE_UPDATE();
#endif
__mbfl_allocators = &_php_mb_allocators;
2001-05-01 01:52:55 +00:00
REGISTER_INI_ENTRIES();
/* This is a global handler. Should not be set in a per-request handler. */
2014-12-13 22:06:14 +00:00
sapi_register_treat_data(mbstr_treat_data);
/* Post handlers are stored in the thread-local context. */
2002-11-03 08:50:43 +00:00
if (MBSTRG(encoding_translation)) {
2014-12-13 22:06:14 +00:00
sapi_register_post_entries(mbstr_post_entries);
}
2001-05-01 01:52:55 +00:00
2014-08-25 17:24:55 +00:00
REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
2014-08-25 17:24:55 +00:00
REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
2002-10-08 17:20:10 +00:00
#if HAVE_MBREGEX
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
2002-10-08 17:20:10 +00:00
#endif
2014-12-13 22:06:14 +00:00
if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
return FAILURE;
}
php_rfc1867_set_multibyte_callbacks(
php_mb_encoding_translation,
php_mb_gpc_get_detect_order,
php_mb_gpc_set_input_encoding,
php_mb_rfc1867_getword,
php_mb_rfc1867_getword_conf,
php_mb_rfc1867_basename);
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
2001-05-01 01:52:55 +00:00
PHP_MSHUTDOWN_FUNCTION(mbstring)
{
UNREGISTER_INI_ENTRIES();
Fixed the UTF-8 and long path support in the streams on Windows. Since long the default PHP charset is UTF-8, however the Windows part is out of step with this important point. The current implementation in PHP doesn't technically permit to handle UTF-8 filepath and several other things. Till now, only the ANSI compatible APIs are being used. Here is more about it https://msdn.microsoft.com/en-us/library/windows/desktop/dd317752%28v=vs.85%29.aspx The patch fixes not only issues with multibyte filenames under incompatible codepages, but indirectly also issues with some other multibyte encodings like BIG5, Shift-JIS, etc. by providing a clean way to access filenames in UTF-8. Below is a small list of issues from the bug tracker, that are getting fixed: https://bugs.php.net/63401 https://bugs.php.net/41199 https://bugs.php.net/50203 https://bugs.php.net/71509 https://bugs.php.net/64699 https://bugs.php.net/64506 https://bugs.php.net/30195 https://bugs.php.net/65358 https://bugs.php.net/61315 https://bugs.php.net/70943 https://bugs.php.net/70903 https://bugs.php.net/63593 https://bugs.php.net/54977 https://bugs.php.net/54028 https://bugs.php.net/43148 https://bugs.php.net/30730 https://bugs.php.net/33350 https://bugs.php.net/35300 https://bugs.php.net/46990 https://bugs.php.net/61309 https://bugs.php.net/69333 https://bugs.php.net/45517 https://bugs.php.net/70551 https://bugs.php.net/50197 https://bugs.php.net/72200 https://bugs.php.net/37672 Yet more related tickets can for sure be found - on bugs.php.net, Stackoverflow and Github. Some of the bugs are pretty recent, some descend to early 2000th, but the user comments in there last even till today. Just for example, bug #30195 was opened in 2004, the latest comment in there was made in 2014. It is certain, that these bugs descend not only to pure PHP use cases, but get also redirected from the popular PHP based projects. Given the modern systems (and those supported by PHP) are always based on NTFS, there is no excuse to keep these issues unresolved. The internalization approach on Windows is in many ways different from UNIX and Linux, while it supports and is based on Unicode. It depends on the current system code page, APIs used and exact kind how the binary was compiled The locale doesn't affect the way Unicode or ANSI API work. PHP in particular is being compiled without _UNICODE defined and this is conditioned by the way we handle strings. Here is more about it https://msdn.microsoft.com/en-us/library/tsbaswba.aspx However, with any system code page ANSI functions automatically convert paths to UTF-16. Paths in some encodings incompatible with the current system code page, won't work correctly with ANSI APIs. PHP till now only uses the ANSI Windows APIs. For example, on a system with the current code page 1252, the paths in cp1252 are supported and transparently converted to UTF-16 by the ANSI functions. Once one wants to handle a filepath encoded with cp932 on that particular system, an ANSI or a POSIX compatible function used in PHP will produce an erroneous result. When trying to convert that cp932 path to UTF-8 and passing to the ANSI functions, an ANSI function would likely interpret the UTF-8 string as some string in the current code page and create a filepath that represents every single byte of the UTF-8 string. These behaviors are not only broken but also disregard the documented INI settings. This patch solves the issies with the multibyte paths on Windows by intelligently enforcing the usage of the Unicode aware APIs. For functions expect Unicode (fe CreateFileW, FindFirstFileW, etc.), arguments will be converted to UTF-16 wide chars. For functions returning Unicode aware data (fe GetCurrentDirectoryW, etc.), resulting wide string is converted back to char's depending on the current PHP charset settings, either to the current ANSI codepage (this is the behavior prior to this patch) or to UTF-8 (the default behavior). In a particular case, users might have to explicitly set internal_encoding or default_charset, if filenames in ANSI codepage are necessary. Current tests show no regressions and witness that this will be an exotic case, the current default UTF-8 encoding is compatible with any supported system. The dependency libraries are long switching to Unicode APIs, so some tests were also added for extensions not directly related to streams. At large, the patch brings over 150 related tests into the core. Those target and was run on various environments with European, Asian, etc. codepages. General PHP frameworks was tested and showed no regressions. The impact on the current C code base is low, the most places affected are the Windows only places in the three files tsrm_win32.c, zend_virtual_cwd.c and plain_wrapper.c. The actual implementation of the most of the wide char supporting functionality is in win32/ioutil.* and win32/codepage.*, several low level functionsare extended in place to avoid reimplementation for now. No performance impact was sighted. As previously mentioned, the ANSI APIs used prior the patch perform Unicode conversions internally. Using the Unicode APIs directly while doing custom conversions just retains the status quo. The ways to optimize it are open (fe. by implementing caching for the strings converted to wide variants). The long path implementation is user transparent. If a path exceeds the length of _MAX_PATH, it'll be automatically prefixed with \\?\. The MAXPATHLEN is set to 2048 bytes. Appreciation to Pierre Joye, Matt Ficken, @algo13 and others for tips, ideas and testing. Thanks.
2016-06-20 07:32:19 +00:00
zend_multibyte_restore_functions();
#if HAVE_MBREGEX
PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
2002-10-08 17:20:10 +00:00
#endif
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ PHP_RINIT_FUNCTION(mbstring) */
2001-05-01 01:52:55 +00:00
PHP_RINIT_FUNCTION(mbstring)
{
zend_function *func, *orig;
const struct mb_overload_def *p;
2001-05-01 01:52:55 +00:00
MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
2008-08-04 21:06:13 +00:00
MBSTRG(illegalchars) = 0;
2014-12-13 22:06:14 +00:00
php_mb_populate_current_detect_order_list();
2001-05-01 01:52:55 +00:00
/* override original function. */
if (MBSTRG(func_overload)){
p = &(mb_ovld[0]);
2015-01-03 09:22:58 +00:00
CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
while (p->type > 0) {
2015-01-03 09:22:58 +00:00
if ((MBSTRG(func_overload) & p->type) == p->type &&
2014-08-26 22:53:24 +00:00
!zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
) {
2014-03-23 11:45:48 +00:00
func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
2015-01-03 09:22:58 +00:00
2014-03-23 11:45:48 +00:00
if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
2014-12-13 22:06:14 +00:00
php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
return FAILURE;
} else {
2014-04-09 08:16:21 +00:00
ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
2014-08-26 22:53:24 +00:00
function_add_ref(orig);
2002-11-12 22:05:18 +00:00
2014-04-09 08:16:21 +00:00
if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
2014-12-13 22:06:14 +00:00
php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
return FAILURE;
}
2014-08-26 22:53:24 +00:00
function_add_ref(func);
}
}
p++;
}
}
#if HAVE_MBREGEX
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
2014-12-13 22:06:14 +00:00
zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
2001-05-01 01:52:55 +00:00
PHP_RSHUTDOWN_FUNCTION(mbstring)
{
const struct mb_overload_def *p;
zend_function *orig;
2001-05-01 01:52:55 +00:00
if (MBSTRG(current_detect_order_list) != NULL) {
efree(MBSTRG(current_detect_order_list));
MBSTRG(current_detect_order_list) = NULL;
MBSTRG(current_detect_order_list_size) = 0;
}
if (MBSTRG(outconv) != NULL) {
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_delete(MBSTRG(outconv));
2001-05-01 01:52:55 +00:00
MBSTRG(outconv) = NULL;
}
/* clear http input identification. */
MBSTRG(http_input_identify) = NULL;
MBSTRG(http_input_identify_post) = NULL;
MBSTRG(http_input_identify_get) = NULL;
MBSTRG(http_input_identify_cookie) = NULL;
MBSTRG(http_input_identify_string) = NULL;
/* clear overloaded function. */
if (MBSTRG(func_overload)){
p = &(mb_ovld[0]);
2008-06-13 14:48:33 +00:00
while (p->type > 0) {
2015-01-03 09:22:58 +00:00
if ((MBSTRG(func_overload) & p->type) == p->type &&
2014-03-23 11:45:48 +00:00
(orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
2015-01-03 09:22:58 +00:00
2014-04-09 08:16:21 +00:00
zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
2014-08-26 22:53:24 +00:00
function_add_ref(orig);
2014-03-23 11:45:48 +00:00
zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
2008-06-13 14:48:33 +00:00
}
p++;
}
CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
}
#if HAVE_MBREGEX
PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
2001-05-01 01:52:55 +00:00
return SUCCESS;
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ PHP_MINFO_FUNCTION(mbstring) */
2001-05-01 01:52:55 +00:00
PHP_MINFO_FUNCTION(mbstring)
{
php_info_print_table_start();
2002-10-25 08:31:27 +00:00
php_info_print_table_row(2, "Multibyte Support", "enabled");
php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
2015-01-03 09:22:58 +00:00
php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
2011-08-02 03:18:53 +00:00
{
char tmp[256];
snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
php_info_print_table_row(2, "libmbfl version", tmp);
}
#if HAVE_ONIG
2015-03-12 12:28:43 +00:00
{
char tmp[256];
snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
php_info_print_table_row(2, "oniguruma version", tmp);
}
#endif
2001-05-01 01:52:55 +00:00
php_info_print_table_end();
php_info_print_table_start();
php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
php_info_print_table_end();
#if HAVE_MBREGEX
PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
#endif
2001-05-01 01:52:55 +00:00
DISPLAY_INI_ENTRIES();
}
/* }}} */
2001-05-01 01:52:55 +00:00
2001-05-08 14:33:20 +00:00
/* {{{ proto string mb_language([string language])
2002-01-07 06:58:01 +00:00
Sets the current language or Returns the current language as a string */
2001-05-08 14:33:20 +00:00
PHP_FUNCTION(mb_language)
{
zend_string *name = NULL;
2001-05-08 14:33:20 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
return;
}
if (name == NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
} else {
2014-08-25 17:24:55 +00:00
zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
RETVAL_FALSE;
2001-05-08 14:33:20 +00:00
} else {
RETVAL_TRUE;
2001-05-08 14:33:20 +00:00
}
2014-08-25 17:24:55 +00:00
zend_string_release(ini_name);
2001-05-08 14:33:20 +00:00
}
}
/* }}} */
/* {{{ proto string mb_internal_encoding([string encoding])
2002-01-07 06:58:01 +00:00
Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)
2001-05-01 01:52:55 +00:00
{
const char *name = NULL;
2014-08-27 15:31:24 +00:00
size_t name_len;
const mbfl_encoding *encoding;
2001-05-01 01:52:55 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
return;
}
if (name == NULL) {
name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
2001-05-01 01:52:55 +00:00
if (name != NULL) {
2014-03-23 11:45:48 +00:00
RETURN_STRING(name);
2001-05-01 01:52:55 +00:00
} else {
RETURN_FALSE;
}
} else {
encoding = mbfl_name2encoding(name);
if (!encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
} else {
MBSTRG(current_internal_encoding) = encoding;
2001-05-01 01:52:55 +00:00
RETURN_TRUE;
}
}
}
/* }}} */
/* {{{ proto mixed mb_http_input([string type])
2002-01-07 06:58:01 +00:00
Returns the input encoding */
PHP_FUNCTION(mb_http_input)
2001-05-01 01:52:55 +00:00
{
char *typ = NULL;
size_t typ_len;
int retname;
char *list, *temp;
const mbfl_encoding *result = NULL;
2001-05-01 01:52:55 +00:00
retname = 1;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
return;
}
if (typ == NULL) {
result = MBSTRG(http_input_identify);
} else {
switch (*typ) {
2001-05-01 01:52:55 +00:00
case 'G':
case 'g':
result = MBSTRG(http_input_identify_get);
break;
case 'P':
case 'p':
result = MBSTRG(http_input_identify_post);
break;
case 'C':
case 'c':
result = MBSTRG(http_input_identify_cookie);
break;
case 'S':
case 's':
result = MBSTRG(http_input_identify_string);
break;
2001-05-01 01:52:55 +00:00
case 'I':
case 'i':
{
const mbfl_encoding **entry = MBSTRG(http_input_list);
const size_t n = MBSTRG(http_input_list_size);
size_t i;
2010-12-20 14:38:08 +00:00
array_init(return_value);
for (i = 0; i < n; i++) {
2014-04-15 11:40:40 +00:00
add_next_index_string(return_value, (*entry)->name);
entry++;
2001-05-01 01:52:55 +00:00
}
retname = 0;
2001-05-01 01:52:55 +00:00
}
break;
case 'L':
case 'l':
{
const mbfl_encoding **entry = MBSTRG(http_input_list);
const size_t n = MBSTRG(http_input_list_size);
size_t i;
list = NULL;
for (i = 0; i < n; i++) {
if (list) {
temp = list;
spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
efree(temp);
2015-01-03 09:22:58 +00:00
if (!list) {
break;
2002-11-03 08:50:43 +00:00
}
} else {
list = estrdup((*entry)->name);
}
entry++;
}
}
2002-11-03 08:50:43 +00:00
if (!list) {
RETURN_FALSE;
2002-11-03 08:50:43 +00:00
}
2014-03-23 11:45:48 +00:00
RETVAL_STRING(list);
efree(list);
retname = 0;
break;
2001-05-01 01:52:55 +00:00
default:
result = MBSTRG(http_input_identify);
break;
}
}
if (retname) {
if (result) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(result->name);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
}
/* }}} */
/* {{{ proto string mb_http_output([string encoding])
2002-01-07 06:58:01 +00:00
Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)
2001-05-01 01:52:55 +00:00
{
const char *name = NULL;
2014-08-27 17:25:28 +00:00
size_t name_len;
const mbfl_encoding *encoding;
2001-05-01 01:52:55 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
2014-03-23 11:45:48 +00:00
return;
}
if (name == NULL) {
name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
2001-05-01 01:52:55 +00:00
if (name != NULL) {
2014-03-23 11:45:48 +00:00
RETURN_STRING(name);
2001-05-01 01:52:55 +00:00
} else {
RETURN_FALSE;
}
} else {
encoding = mbfl_name2encoding(name);
if (!encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
} else {
MBSTRG(current_http_output_encoding) = encoding;
2001-05-01 01:52:55 +00:00
RETURN_TRUE;
}
}
}
/* }}} */
2002-08-13 04:37:28 +00:00
/* {{{ proto bool|array mb_detect_order([mixed encoding-list])
2002-01-07 06:58:01 +00:00
Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)
2001-05-01 01:52:55 +00:00
{
2014-03-23 11:45:48 +00:00
zval *arg1 = NULL;
2001-05-01 01:52:55 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
return;
}
2009-02-11 14:26:24 +00:00
if (!arg1) {
size_t i;
size_t n = MBSTRG(current_detect_order_list_size);
const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
array_init(return_value);
for (i = 0; i < n; i++) {
2014-04-15 11:40:40 +00:00
add_next_index_string(return_value, (*entry)->name);
2001-05-01 01:52:55 +00:00
entry++;
}
} else {
const mbfl_encoding **list = NULL;
size_t size = 0;
2014-03-23 11:45:48 +00:00
switch (Z_TYPE_P(arg1)) {
case IS_ARRAY:
2014-12-13 22:06:14 +00:00
if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
2014-03-23 11:45:48 +00:00
if (list) {
efree(list);
}
RETURN_FALSE;
}
2014-03-23 11:45:48 +00:00
break;
default:
convert_to_string_ex(arg1);
2014-12-13 22:06:14 +00:00
if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
2014-03-23 11:45:48 +00:00
if (list) {
efree(list);
}
RETURN_FALSE;
}
2014-03-23 11:45:48 +00:00
break;
2001-05-01 01:52:55 +00:00
}
2001-05-01 01:52:55 +00:00
if (list == NULL) {
RETURN_FALSE;
2001-05-01 01:52:55 +00:00
}
if (MBSTRG(current_detect_order_list)) {
efree(MBSTRG(current_detect_order_list));
}
MBSTRG(current_detect_order_list) = list;
MBSTRG(current_detect_order_list_size) = size;
RETURN_TRUE;
2001-05-01 01:52:55 +00:00
}
}
/* }}} */
/* {{{ proto mixed mb_substitute_character([mixed substchar])
2002-01-07 06:58:01 +00:00
Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)
2001-05-01 01:52:55 +00:00
{
2014-03-23 11:45:48 +00:00
zval *arg1 = NULL;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
return;
}
2001-05-01 01:52:55 +00:00
2009-02-11 14:26:24 +00:00
if (!arg1) {
2001-05-01 01:52:55 +00:00
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2014-03-23 11:45:48 +00:00
RETURN_STRING("none");
2002-11-03 08:50:43 +00:00
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2014-03-23 11:45:48 +00:00
RETURN_STRING("long");
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2014-03-23 11:45:48 +00:00
RETURN_STRING("entity");
2001-05-01 01:52:55 +00:00
} else {
2014-08-25 17:24:55 +00:00
RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2001-05-01 01:52:55 +00:00
}
} else {
2002-03-23 07:36:27 +00:00
RETVAL_TRUE;
2014-03-23 11:45:48 +00:00
switch (Z_TYPE_P(arg1)) {
case IS_STRING:
2014-08-25 17:24:55 +00:00
if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2014-03-23 11:45:48 +00:00
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2014-08-25 17:24:55 +00:00
} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2014-03-23 11:45:48 +00:00
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2014-08-25 17:24:55 +00:00
} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2014-03-23 11:45:48 +00:00
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
} else {
2014-08-25 19:51:49 +00:00
convert_to_long_ex(arg1);
2014-08-25 17:24:55 +00:00
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2014-03-23 11:45:48 +00:00
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2014-08-25 17:24:55 +00:00
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2014-03-23 11:45:48 +00:00
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown character.");
2014-03-23 11:45:48 +00:00
RETURN_FALSE;
}
}
break;
default:
2014-08-25 19:51:49 +00:00
convert_to_long_ex(arg1);
2014-08-25 17:24:55 +00:00
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2002-03-23 07:36:27 +00:00
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2014-08-25 17:24:55 +00:00
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2002-03-23 07:36:27 +00:00
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown character.");
RETURN_FALSE;
2002-03-23 07:36:27 +00:00
}
2014-03-23 11:45:48 +00:00
break;
2001-05-01 01:52:55 +00:00
}
}
}
/* }}} */
/* {{{ proto string mb_preferred_mime_name(string encoding)
2002-01-07 06:58:01 +00:00
Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)
2001-05-01 01:52:55 +00:00
{
enum mbfl_no_encoding no_encoding;
char *name = NULL;
2014-08-27 17:25:28 +00:00
size_t name_len;
2001-05-01 01:52:55 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
return;
} else {
no_encoding = mbfl_name2no_encoding(name);
2001-05-01 01:52:55 +00:00
if (no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2001-05-01 01:52:55 +00:00
RETVAL_FALSE;
} else {
const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
if (preferred_name == NULL || *preferred_name == '\0') {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2001-05-01 01:52:55 +00:00
RETVAL_FALSE;
} else {
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)preferred_name);
2001-05-01 01:52:55 +00:00
}
}
}
}
/* }}} */
#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2011-09-11 12:12:24 +00:00
/* {{{ proto bool mb_parse_str(string encoded_string [, array result])
Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)
2001-05-01 01:52:55 +00:00
{
2009-02-09 15:34:01 +00:00
zval *track_vars_array = NULL;
2005-02-10 14:11:06 +00:00
char *encstr = NULL;
size_t encstr_len;
php_mb_encoding_handler_info_t info;
const mbfl_encoding *detected;
2001-05-01 01:52:55 +00:00
track_vars_array = NULL;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
if (track_vars_array != NULL) {
2011-09-11 12:12:24 +00:00
/* Clear out the array */
zval_dtor(track_vars_array);
array_init(track_vars_array);
2001-05-01 01:52:55 +00:00
}
encstr = estrndup(encstr, encstr_len);
info.data_type = PARSE_STRING;
2015-01-03 09:22:58 +00:00
info.separator = PG(arg_separator).input;
info.report_errors = 1;
info.to_encoding = MBSTRG(current_internal_encoding);
info.to_language = MBSTRG(language);
info.from_encodings = MBSTRG(http_input_list);
2015-01-03 09:22:58 +00:00
info.num_from_encodings = MBSTRG(http_input_list_size);
info.from_language = MBSTRG(language);
2011-09-11 12:12:24 +00:00
if (track_vars_array != NULL) {
2014-12-13 22:06:14 +00:00
detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2011-09-11 12:12:24 +00:00
} else {
zval tmp;
zend_array *symbol_table;
if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
efree(encstr);
return;
}
symbol_table = zend_rebuild_symbol_table();
ZVAL_ARR(&tmp, symbol_table);
2015-01-03 09:22:58 +00:00
detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2011-09-11 12:12:24 +00:00
}
MBSTRG(http_input_identify) = detected;
RETVAL_BOOL(detected);
if (encstr != NULL) efree(encstr);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* {{{ proto string mb_output_handler(string contents, int status)
2001-05-01 01:52:55 +00:00
Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)
2001-05-01 01:52:55 +00:00
{
char *arg_string;
size_t arg_string_len;
2014-08-25 17:24:55 +00:00
zend_long arg_status;
mbfl_string string, result;
const char *charset;
char *p;
const mbfl_encoding *encoding;
int last_feed, len;
unsigned char send_text_mimetype = 0;
char *s, *mimetype = NULL;
2001-05-01 01:52:55 +00:00
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
encoding = MBSTRG(current_http_output_encoding);
/* start phase only */
if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
/* delete the converter just in case. */
if (MBSTRG(outconv)) {
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_delete(MBSTRG(outconv));
MBSTRG(outconv) = NULL;
}
if (encoding == &mbfl_encoding_pass) {
2014-03-23 11:45:48 +00:00
RETURN_STRINGL(arg_string, arg_string_len);
}
/* analyze mime type */
if (SG(sapi_headers).mimetype &&
_php_mb_match_regex(
MBSTRG(http_output_conv_mimetypes),
SG(sapi_headers).mimetype,
strlen(SG(sapi_headers).mimetype))) {
if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
mimetype = estrdup(SG(sapi_headers).mimetype);
} else {
mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
}
send_text_mimetype = 1;
} else if (SG(sapi_headers).send_default_content_type) {
2002-03-23 07:36:27 +00:00
mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
}
/* if content-type is not yet set, set it and activate the converter */
if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
charset = encoding->mime_name;
if (charset) {
2015-01-03 09:22:58 +00:00
len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2002-11-03 08:50:43 +00:00
if (sapi_add_header(p, len, 0) != FAILURE) {
SG(sapi_headers).send_default_content_type = 0;
2002-11-03 08:50:43 +00:00
}
}
/* activate the converter */
MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
if (send_text_mimetype){
efree(mimetype);
}
}
}
/* just return if the converter is not activated. */
if (MBSTRG(outconv) == NULL) {
2014-03-23 11:45:48 +00:00
RETURN_STRINGL(arg_string, arg_string_len);
2001-05-01 01:52:55 +00:00
}
/* flag */
last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
/* mode */
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2015-01-03 09:22:58 +00:00
/* feed the string */
mbfl_string_init(&string);
/* these are not needed. convd has encoding info.
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
*/
2002-11-02 13:38:32 +00:00
string.val = (unsigned char *)arg_string;
string.len = arg_string_len;
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2002-11-03 08:50:43 +00:00
if (last_feed) {
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_flush(MBSTRG(outconv));
2015-01-03 09:22:58 +00:00
}
/* get the converter output, and return it */
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
2014-03-23 15:38:32 +00:00
efree(result.val);
2015-01-03 09:22:58 +00:00
/* delete the converter if it is the last feed. */
if (last_feed) {
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_delete(MBSTRG(outconv));
2001-05-01 01:52:55 +00:00
MBSTRG(outconv) = NULL;
}
}
/* }}} */
2002-01-07 06:58:01 +00:00
/* {{{ proto int mb_strlen(string str [, string encoding])
2001-05-01 01:52:55 +00:00
Get character numbers of a string */
PHP_FUNCTION(mb_strlen)
2001-05-01 01:52:55 +00:00
{
int n;
mbfl_string string;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, string_len;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
return;
2002-03-23 07:36:27 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
string.no_language = MBSTRG(language);
if (enc_name == NULL) {
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
} else {
string.no_encoding = mbfl_name2no_encoding(enc_name);
2002-11-03 08:50:43 +00:00
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
2003-08-23 08:59:47 +00:00
n = mbfl_strlen(&string);
2001-05-01 01:52:55 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2001-05-01 01:52:55 +00:00
Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)
2001-05-01 01:52:55 +00:00
{
int n, reverse = 0;
zend_long offset = 0, slen;
2001-05-01 01:52:55 +00:00
mbfl_string haystack, needle;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, haystack_len, needle_len;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2014-03-23 11:45:48 +00:00
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2002-11-03 08:50:43 +00:00
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
slen = mbfl_strlen(&haystack);
if (offset < 0) {
offset += slen;
}
if (offset < 0 || offset > slen) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
if (needle.len == 0) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
2003-08-23 08:59:47 +00:00
n = mbfl_strpos(&haystack, &needle, offset, reverse);
2001-05-01 01:52:55 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2001-05-01 01:52:55 +00:00
} else {
2002-03-23 07:36:27 +00:00
switch (-n) {
case 1:
break;
case 2:
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2002-03-23 07:36:27 +00:00
break;
case 4:
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2002-03-23 07:36:27 +00:00
break;
case 8:
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_NOTICE, "Argument is empty");
2002-03-23 07:36:27 +00:00
break;
default:
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
break;
2002-03-23 07:36:27 +00:00
}
2001-05-01 01:52:55 +00:00
RETVAL_FALSE;
}
}
/* }}} */
2006-07-17 04:48:17 +00:00
/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)
2001-05-01 01:52:55 +00:00
{
int n;
mbfl_string haystack, needle;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, haystack_len, needle_len;
2014-03-23 11:45:48 +00:00
zval *zoffset = NULL;
2006-07-17 04:48:17 +00:00
long offset = 0, str_flg;
char *enc_name2 = NULL;
int enc_name_len2;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2014-03-23 11:45:48 +00:00
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2009-03-06 15:48:30 +00:00
if (zoffset) {
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(zoffset) == IS_STRING) {
enc_name2 = Z_STRVAL_P(zoffset);
2014-08-25 17:24:55 +00:00
enc_name_len2 = Z_STRLEN_P(zoffset);
2006-07-17 04:48:17 +00:00
str_flg = 1;
if (enc_name2 != NULL) {
switch (*enc_name2) {
2014-03-23 11:45:48 +00:00
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case ' ':
case '-':
case '.':
break;
default :
str_flg = 0;
break;
2006-07-17 04:48:17 +00:00
}
}
2009-03-06 15:48:30 +00:00
if (str_flg) {
2014-08-25 19:51:49 +00:00
convert_to_long_ex(zoffset);
2014-08-25 17:24:55 +00:00
offset = Z_LVAL_P(zoffset);
2006-07-17 04:48:17 +00:00
} else {
enc_name = enc_name2;
enc_name_len = enc_name_len2;
}
} else {
2014-08-25 19:51:49 +00:00
convert_to_long_ex(zoffset);
2014-08-25 17:24:55 +00:00
offset = Z_LVAL_P(zoffset);
2006-07-17 04:48:17 +00:00
}
}
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2002-11-03 08:50:43 +00:00
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
if (haystack.len <= 0) {
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
if (needle.len <= 0) {
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
{
int haystack_char_len = mbfl_strlen(&haystack);
if ((offset > 0 && offset > haystack_char_len) ||
(offset < 0 && -offset > haystack_char_len)) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
}
2006-07-17 04:48:17 +00:00
n = mbfl_strpos(&haystack, &needle, offset, 1);
2001-05-01 01:52:55 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
2006-05-30 15:43:09 +00:00
/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)
{
int n = -1;
zend_long offset = 0;
2008-07-24 13:46:50 +00:00
mbfl_string haystack, needle;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2016-03-07 21:01:44 +00:00
size_t from_encoding_len, haystack_len, needle_len;
2006-05-30 15:43:09 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
if (needle.len == 0) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2006-05-30 15:43:09 +00:00
RETURN_FALSE;
}
2014-12-13 22:06:14 +00:00
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2006-05-30 15:43:09 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2006-05-30 15:43:09 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)
{
int n = -1;
zend_long offset = 0;
2008-07-24 13:46:50 +00:00
mbfl_string haystack, needle;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2016-03-07 21:01:44 +00:00
size_t from_encoding_len, haystack_len, needle_len;
2006-05-30 15:43:09 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
return;
2006-05-30 15:43:09 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2014-12-13 22:06:14 +00:00
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2006-05-30 15:43:09 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2006-05-30 15:43:09 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
2006-05-30 15:45:35 +00:00
/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)
{
int n, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, haystack_len, needle_len;
2006-05-30 15:45:35 +00:00
zend_bool part = 0;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2006-05-30 15:45:35 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
return;
2006-05-30 15:45:35 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2006-05-30 15:45:35 +00:00
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2006-05-30 15:45:35 +00:00
RETURN_FALSE;
}
}
if (needle.len <= 0) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2006-05-30 15:45:35 +00:00
RETURN_FALSE;
}
n = mbfl_strpos(&haystack, &needle, 0, 0);
if (n >= 0) {
mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:45:35 +00:00
} else {
RETVAL_FALSE;
}
} else {
len = (mblen - n);
ret = mbfl_substr(&haystack, &result, n, len);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:45:35 +00:00
} else {
RETVAL_FALSE;
}
}
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)
{
int n, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, haystack_len, needle_len;
2006-05-30 15:45:35 +00:00
zend_bool part = 0;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2006-05-30 15:45:35 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
return;
2006-05-30 15:45:35 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2006-05-30 15:45:35 +00:00
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2006-05-30 15:45:35 +00:00
RETURN_FALSE;
}
}
if (haystack.len <= 0) {
RETURN_FALSE;
}
if (needle.len <= 0) {
RETURN_FALSE;
}
n = mbfl_strpos(&haystack, &needle, 0, 1);
if (n >= 0) {
mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:45:35 +00:00
} else {
RETVAL_FALSE;
}
} else {
len = (mblen - n);
ret = mbfl_substr(&haystack, &result, n, len);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:45:35 +00:00
} else {
RETVAL_FALSE;
}
}
} else {
RETVAL_FALSE;
}
}
/* }}} */
2006-05-30 15:47:53 +00:00
/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)
{
zend_bool part = 0;
2016-03-07 21:01:44 +00:00
size_t from_encoding_len, len, mblen, haystack_len, needle_len;
int n;
2006-05-30 15:47:53 +00:00
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2006-05-30 15:47:53 +00:00
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2006-05-30 15:47:53 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
return;
2006-05-30 15:47:53 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2008-07-24 13:46:50 +00:00
if (!needle.len) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty delimiter");
RETURN_FALSE;
}
2006-05-30 15:47:53 +00:00
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2006-05-30 15:47:53 +00:00
RETURN_FALSE;
}
2014-12-13 22:06:14 +00:00
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2006-05-30 15:47:53 +00:00
if (n <0) {
RETURN_FALSE;
}
mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:47:53 +00:00
} else {
RETVAL_FALSE;
}
} else {
len = (mblen - n);
ret = mbfl_substr(&haystack, &result, n, len);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocaton ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:47:53 +00:00
} else {
RETVAL_FALSE;
}
}
}
2007-01-12 12:06:45 +00:00
/* }}} */
2006-05-30 15:47:53 +00:00
/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)
{
zend_bool part = 0;
2014-08-27 17:25:28 +00:00
int n, len, mblen;
2016-03-07 21:01:44 +00:00
size_t from_encoding_len, haystack_len, needle_len;
2006-05-30 15:47:53 +00:00
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2006-05-30 15:47:53 +00:00
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2006-05-30 15:47:53 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
return;
2006-05-30 15:47:53 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
2006-05-30 15:47:53 +00:00
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2006-05-30 15:47:53 +00:00
RETURN_FALSE;
}
2014-12-13 22:06:14 +00:00
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2006-05-30 15:47:53 +00:00
if (n <0) {
RETURN_FALSE;
}
mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:47:53 +00:00
} else {
RETVAL_FALSE;
}
} else {
len = (mblen - n);
ret = mbfl_substr(&haystack, &result, n, len);
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2006-05-30 15:47:53 +00:00
} else {
RETVAL_FALSE;
}
}
}
2007-01-12 12:06:45 +00:00
/* }}} */
2006-05-30 15:47:53 +00:00
/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)
{
int n;
mbfl_string haystack, needle;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, haystack_len, needle_len;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
return;
} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
return;
}
haystack.len = (uint32_t)haystack_len;
needle.len = (uint32_t)needle_len;
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
RETURN_FALSE;
}
}
if (needle.len <= 0) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty substring");
RETURN_FALSE;
}
2003-08-23 08:59:47 +00:00
n = mbfl_substr_count(&haystack, &needle);
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2002-01-07 06:58:01 +00:00
Returns part of a string */
PHP_FUNCTION(mb_substr)
2001-05-01 01:52:55 +00:00
{
char *str, *encoding = NULL;
2014-08-25 17:24:55 +00:00
zend_long from, len;
2014-08-27 17:25:28 +00:00
int mblen;
size_t str_len, encoding_len;
zend_bool len_is_null = 1;
2001-05-01 01:52:55 +00:00
mbfl_string string, result, *ret;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
return;
}
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2001-05-01 01:52:55 +00:00
if (encoding) {
string.no_encoding = mbfl_name2no_encoding(encoding);
2001-05-01 01:52:55 +00:00
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
string.val = (unsigned char *)str;
string.len = str_len;
2001-05-01 01:52:55 +00:00
if (len_is_null) {
len = str_len;
2001-05-01 01:52:55 +00:00
}
/* measures length */
mblen = 0;
if (from < 0 || len < 0) {
2003-08-23 08:59:47 +00:00
mblen = mbfl_strlen(&string);
2001-05-01 01:52:55 +00:00
}
/* if "from" position is negative, count start position from the end
* of the string
*/
if (from < 0) {
from = mblen + from;
if (from < 0) {
from = 0;
}
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if (len < 0) {
len = (mblen - from) + len;
if (len < 0) {
len = 0;
}
}
if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
&& (from >= mbfl_strlen(&string))) {
RETURN_FALSE;
}
if (from > INT_MAX) {
from = INT_MAX;
}
if (len > INT_MAX) {
len = INT_MAX;
}
2003-08-23 08:59:47 +00:00
ret = mbfl_substr(&string, &result, from, len);
if (NULL == ret) {
RETURN_FALSE;
2001-05-01 01:52:55 +00:00
}
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 15:38:32 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
efree(ret->val);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2002-01-07 06:58:01 +00:00
Returns part of a string */
PHP_FUNCTION(mb_strcut)
2001-05-01 01:52:55 +00:00
{
char *encoding = NULL;
2014-08-25 17:24:55 +00:00
zend_long from, len;
2016-03-07 21:01:44 +00:00
size_t encoding_len, string_len;
zend_bool len_is_null = 1;
2001-05-01 01:52:55 +00:00
mbfl_string string, result, *ret;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2001-05-01 01:52:55 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2008-07-24 13:46:50 +00:00
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
if (encoding) {
string.no_encoding = mbfl_name2no_encoding(encoding);
2001-05-01 01:52:55 +00:00
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
if (len_is_null) {
2008-07-24 13:46:50 +00:00
len = string.len;
2001-05-01 01:52:55 +00:00
}
/* if "from" position is negative, count start position from the end
* of the string
*/
if (from < 0) {
2008-07-24 13:46:50 +00:00
from = string.len + from;
2001-05-01 01:52:55 +00:00
if (from < 0) {
from = 0;
}
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if (len < 0) {
2008-07-24 13:46:50 +00:00
len = (string.len - from) + len;
2001-05-01 01:52:55 +00:00
if (len < 0) {
len = 0;
}
}
2008-07-25 14:04:54 +00:00
if ((unsigned int)from > string.len) {
RETURN_FALSE;
}
2003-08-23 08:59:47 +00:00
ret = mbfl_strcut(&string, &result, from, len);
if (ret == NULL) {
RETURN_FALSE;
2001-05-01 01:52:55 +00:00
}
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 15:38:32 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
efree(ret->val);
2001-05-01 01:52:55 +00:00
}
/* }}} */
2002-01-07 06:58:01 +00:00
/* {{{ proto int mb_strwidth(string str [, string encoding])
Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)
2001-05-01 01:52:55 +00:00
{
int n;
mbfl_string string;
char *enc_name = NULL;
2016-03-07 21:01:44 +00:00
size_t enc_name_len, string_len;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2001-05-01 01:52:55 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
if (enc_name != NULL) {
string.no_encoding = mbfl_name2no_encoding(enc_name);
2002-11-03 08:50:43 +00:00
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
2003-08-23 08:59:47 +00:00
n = mbfl_strwidth(&string);
2001-05-01 01:52:55 +00:00
if (n >= 0) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(n);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2001-05-01 01:52:55 +00:00
Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)
2001-05-01 01:52:55 +00:00
{
char *str, *trimmarker = NULL, *encoding = NULL;
zend_long from, width, swidth;
size_t str_len, trimmarker_len, encoding_len;
2001-05-01 01:52:55 +00:00
mbfl_string string, result, marker, *ret;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
return;
}
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
mbfl_string_init(&marker);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
marker.no_language = MBSTRG(language);
marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2001-05-01 01:52:55 +00:00
marker.val = NULL;
marker.len = 0;
if (encoding) {
string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2001-05-01 01:52:55 +00:00
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
string.val = (unsigned char *)str;
string.len = str_len;
2001-05-01 01:52:55 +00:00
if ((from < 0) || (width < 0)) {
swidth = mbfl_strwidth(&string);
}
if (from < 0) {
from += swidth;
}
if (from < 0 || (size_t)from > str_len) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Start position is out of range");
2002-03-23 07:36:27 +00:00
RETURN_FALSE;
}
if (width < 0) {
width = swidth + width - from;
}
if (width < 0) {
php_error_docref(NULL, E_WARNING, "Width is out of range");
2002-03-23 07:36:27 +00:00
RETURN_FALSE;
}
if (trimmarker) {
marker.val = (unsigned char *)trimmarker;
marker.len = trimmarker_len;
2001-05-01 01:52:55 +00:00
}
2003-08-23 08:59:47 +00:00
ret = mbfl_strimwidth(&string, &marker, &result, from, width);
if (ret == NULL) {
RETURN_FALSE;
2001-05-01 01:52:55 +00:00
}
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2014-03-23 15:38:32 +00:00
efree(ret->val);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2014-12-13 22:06:14 +00:00
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
2001-05-01 01:52:55 +00:00
{
mbfl_string string, result, *ret;
const mbfl_encoding *from_encoding, *to_encoding;
2001-05-01 01:52:55 +00:00
mbfl_buffer_converter *convd;
size_t size;
const mbfl_encoding **list;
2002-04-07 18:09:31 +00:00
char *output=NULL;
2001-05-01 01:52:55 +00:00
if (output_len) {
*output_len = 0;
}
2003-08-19 14:48:33 +00:00
if (!input) {
2002-04-05 01:23:22 +00:00
return NULL;
2001-05-01 01:52:55 +00:00
}
/* new encoding */
if (_to_encoding && strlen(_to_encoding)) {
to_encoding = mbfl_name2encoding(_to_encoding);
if (!to_encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2002-04-05 01:23:22 +00:00
return NULL;
}
} else {
to_encoding = MBSTRG(current_internal_encoding);
2001-05-01 01:52:55 +00:00
}
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
2002-11-02 13:38:32 +00:00
string.val = (unsigned char *)input;
2002-04-05 01:23:22 +00:00
string.len = length;
2001-05-01 01:52:55 +00:00
/* pre-conversion encoding */
2002-04-05 01:23:22 +00:00
if (_from_encodings) {
2001-05-01 01:52:55 +00:00
list = NULL;
size = 0;
2014-12-13 22:06:14 +00:00
php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
2001-05-01 01:52:55 +00:00
if (size == 1) {
from_encoding = *list;
string.no_encoding = from_encoding->no_encoding;
2001-05-01 01:52:55 +00:00
} else if (size > 1) {
/* auto detect */
from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
if (from_encoding) {
string.no_encoding = from_encoding->no_encoding;
2001-05-01 01:52:55 +00:00
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
2011-03-14 16:00:59 +00:00
from_encoding = &mbfl_encoding_pass;
2001-05-01 01:52:55 +00:00
to_encoding = from_encoding;
string.no_encoding = from_encoding->no_encoding;
2001-05-01 01:52:55 +00:00
}
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
2001-05-01 01:52:55 +00:00
}
if (list != NULL) {
efree((void *)list);
}
}
/* initialize converter */
convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
2001-05-01 01:52:55 +00:00
if (convd == NULL) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
2002-04-05 01:23:22 +00:00
return NULL;
2001-05-01 01:52:55 +00:00
}
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2001-05-01 01:52:55 +00:00
/* do it */
2003-08-23 08:59:47 +00:00
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2002-04-05 01:23:22 +00:00
if (ret) {
if (output_len) {
*output_len = ret->len;
}
2002-11-02 13:38:32 +00:00
output = (char *)ret->val;
2001-05-01 01:52:55 +00:00
}
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_delete(convd);
2002-04-05 01:23:22 +00:00
return output;
2001-05-01 01:52:55 +00:00
}
/* }}} */
2002-04-05 01:23:22 +00:00
/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)
{
char *arg_str, *arg_new;
size_t str_len, new_len;
zval *arg_old = NULL;
2002-11-02 13:38:32 +00:00
size_t size, l, n;
char *_from_encodings = NULL, *ret, *s_free = NULL;
2002-04-05 01:23:22 +00:00
2014-03-23 11:45:48 +00:00
zval *hash_entry;
2002-04-05 01:23:22 +00:00
HashTable *target_hash;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
return;
}
2002-04-05 01:23:22 +00:00
if (arg_old) {
switch (Z_TYPE_P(arg_old)) {
case IS_ARRAY:
target_hash = Z_ARRVAL_P(arg_old);
_from_encodings = NULL;
2014-05-28 13:43:11 +00:00
ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
convert_to_string_ex(hash_entry);
if ( _from_encodings) {
l = strlen(_from_encodings);
n = strlen(Z_STRVAL_P(hash_entry));
_from_encodings = erealloc(_from_encodings, l+n+2);
memcpy(_from_encodings + l, ",", 1);
2014-08-25 17:24:55 +00:00
memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
} else {
_from_encodings = estrdup(Z_STRVAL_P(hash_entry));
}
2014-05-28 13:43:11 +00:00
} ZEND_HASH_FOREACH_END();
if (_from_encodings != NULL && !strlen(_from_encodings)) {
efree(_from_encodings);
_from_encodings = NULL;
}
s_free = _from_encodings;
break;
default:
convert_to_string(arg_old);
_from_encodings = Z_STRVAL_P(arg_old);
break;
2002-04-05 01:23:22 +00:00
}
}
/* new encoding */
2014-12-13 22:06:14 +00:00
ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
2002-04-05 01:23:22 +00:00
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
efree(ret);
2002-04-05 01:23:22 +00:00
} else {
2003-08-19 14:48:33 +00:00
RETVAL_FALSE;
2002-04-05 01:23:22 +00:00
}
2002-04-05 01:23:22 +00:00
if ( s_free) {
efree(s_free);
}
}
/* }}} */
/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
{
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
char *str;
2014-08-27 17:25:28 +00:00
size_t str_len, from_encoding_len;
2014-08-25 17:24:55 +00:00
zend_long case_mode = 0;
char *newstr;
size_t ret_len;
RETVAL_FALSE;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
&case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
2014-12-13 22:06:14 +00:00
newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
2002-11-03 08:50:43 +00:00
if (newstr) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL(newstr, ret_len);
efree(newstr);
2015-01-03 09:22:58 +00:00
}
}
/* }}} */
2002-11-09 23:10:02 +00:00
/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
* Returns a uppercased version of sourcestring
*/
PHP_FUNCTION(mb_strtoupper)
{
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
char *str;
2014-08-27 17:25:28 +00:00
size_t str_len, from_encoding_len;
char *newstr;
size_t ret_len;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
&from_encoding, &from_encoding_len) == FAILURE) {
return;
}
2014-12-13 22:06:14 +00:00
newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
2002-11-03 08:50:43 +00:00
if (newstr) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-24 04:37:05 +00:00
RETVAL_STRINGL(newstr, ret_len);
efree(newstr);
return;
2002-11-03 08:50:43 +00:00
}
RETURN_FALSE;
}
/* }}} */
2002-11-09 23:10:02 +00:00
/* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
* Returns a lowercased version of sourcestring
*/
PHP_FUNCTION(mb_strtolower)
{
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
char *str;
2014-08-27 17:25:28 +00:00
size_t str_len, from_encoding_len;
char *newstr;
size_t ret_len;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
&from_encoding, &from_encoding_len) == FAILURE) {
return;
}
2014-12-13 22:06:14 +00:00
newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
2002-11-03 08:50:43 +00:00
if (newstr) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-24 04:37:05 +00:00
RETVAL_STRINGL(newstr, ret_len);
efree(newstr);
return;
2002-11-03 08:50:43 +00:00
}
RETURN_FALSE;
}
/* }}} */
/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
2002-01-07 06:58:01 +00:00
Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)
2001-05-01 01:52:55 +00:00
{
char *str;
2014-08-27 17:25:28 +00:00
size_t str_len;
zend_bool strict=0;
zval *encoding_list = NULL;
2001-05-01 01:52:55 +00:00
mbfl_string string;
const mbfl_encoding *ret;
const mbfl_encoding **elist, **list;
size_t size;
2001-05-01 01:52:55 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
/* make encoding list */
list = NULL;
size = 0;
if (encoding_list) {
switch (Z_TYPE_P(encoding_list)) {
2001-05-01 01:52:55 +00:00
case IS_ARRAY:
2014-12-13 22:06:14 +00:00
if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
if (list) {
efree(list);
list = NULL;
size = 0;
}
}
2001-05-01 01:52:55 +00:00
break;
default:
convert_to_string(encoding_list);
2014-12-13 22:06:14 +00:00
if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
if (list) {
efree(list);
list = NULL;
size = 0;
}
}
2001-05-01 01:52:55 +00:00
break;
}
if (size <= 0) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Illegal argument");
2001-05-01 01:52:55 +00:00
}
}
if (ZEND_NUM_ARGS() < 3) {
2008-07-25 14:04:54 +00:00
strict = (zend_bool)MBSTRG(strict_detection);
}
2001-05-01 01:52:55 +00:00
if (size > 0 && list != NULL) {
2002-05-24 22:30:24 +00:00
elist = list;
2001-05-01 01:52:55 +00:00
} else {
2002-05-24 22:30:24 +00:00
elist = MBSTRG(current_detect_order_list);
2001-05-01 01:52:55 +00:00
size = MBSTRG(current_detect_order_list_size);
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.val = (unsigned char *)str;
string.len = str_len;
ret = mbfl_identify_encoding2(&string, elist, size, strict);
2001-05-01 01:52:55 +00:00
if (list != NULL) {
efree((void *)list);
}
if (ret == NULL) {
RETURN_FALSE;
2001-05-01 01:52:55 +00:00
}
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)ret->name);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* {{{ proto mixed mb_list_encodings()
Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)
{
const mbfl_encoding **encodings;
const mbfl_encoding *encoding;
int i;
if (zend_parse_parameters_none() == FAILURE) {
return;
}
array_init(return_value);
i = 0;
encodings = mbfl_get_supported_encodings();
while ((encoding = encodings[i++]) != NULL) {
2014-04-15 11:40:40 +00:00
add_next_index_string(return_value, (char *) encoding->name);
}
}
/* }}} */
2008-09-13 00:30:51 +00:00
/* {{{ proto array mb_encoding_aliases(string encoding)
Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)
{
const mbfl_encoding *encoding;
char *name = NULL;
2014-08-27 17:25:28 +00:00
size_t name_len;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
return;
}
2008-09-13 00:30:51 +00:00
encoding = mbfl_name2encoding(name);
if (!encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
RETURN_FALSE;
}
2008-09-13 00:30:51 +00:00
array_init(return_value);
if (encoding->aliases != NULL) {
const char **alias;
for (alias = *encoding->aliases; *alias; ++alias) {
2014-04-15 11:40:40 +00:00
add_next_index_string(return_value, (char *)*alias);
}
}
}
/* }}} */
/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
2002-01-07 06:58:01 +00:00
Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)
2001-05-01 01:52:55 +00:00
{
enum mbfl_no_encoding charset, transenc;
mbfl_string string, result, *ret;
char *charset_name = NULL;
2014-08-27 17:25:28 +00:00
size_t charset_name_len;
char *trans_enc_name = NULL;
2014-08-27 17:25:28 +00:00
size_t trans_enc_name_len;
char *linefeed = "\r\n";
2016-03-07 21:01:44 +00:00
size_t linefeed_len, string_len;
2014-08-25 17:24:55 +00:00
zend_long indent = 0;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
2001-05-01 01:52:55 +00:00
charset = mbfl_no_encoding_pass;
transenc = mbfl_no_encoding_base64;
if (charset_name != NULL) {
charset = mbfl_name2no_encoding(charset_name);
2001-05-01 01:52:55 +00:00
if (charset == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
} else {
const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
if (lang != NULL) {
charset = lang->mail_charset;
transenc = lang->mail_header_encoding;
2001-05-01 01:52:55 +00:00
}
}
if (trans_enc_name != NULL) {
if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
2001-05-01 01:52:55 +00:00
transenc = mbfl_no_encoding_base64;
} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
2001-05-01 01:52:55 +00:00
transenc = mbfl_no_encoding_qprint;
}
}
mbfl_string_init(&result);
ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
2001-05-01 01:52:55 +00:00
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2014-03-24 04:37:05 +00:00
efree(ret->val);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
/* {{{ proto string mb_decode_mimeheader(string string)
2002-01-07 06:58:01 +00:00
Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)
2001-05-01 01:52:55 +00:00
{
mbfl_string string, result, *ret;
2016-03-07 21:01:44 +00:00
size_t string_len;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
return;
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
mbfl_string_init(&result);
ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
2001-05-01 01:52:55 +00:00
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2014-03-24 04:37:05 +00:00
efree(ret->val);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
2002-01-07 06:58:01 +00:00
/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
2001-05-06 22:25:14 +00:00
Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)
2001-05-01 01:52:55 +00:00
{
int opt, i;
2001-05-01 01:52:55 +00:00
mbfl_string string, result, *ret;
char *optstr = NULL;
2014-08-27 17:25:28 +00:00
size_t optstr_len;
char *encname = NULL;
2016-03-07 21:01:44 +00:00
size_t encname_len, string_len;
2001-05-01 01:52:55 +00:00
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2001-05-01 01:52:55 +00:00
2016-03-07 21:01:44 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
2016-03-07 21:01:44 +00:00
if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
return;
}
string.len = (uint32_t)string_len;
2001-05-01 01:52:55 +00:00
/* option */
if (optstr != NULL) {
char *p = optstr;
int n = optstr_len;
2001-05-01 01:52:55 +00:00
i = 0;
opt = 0;
while (i < n) {
i++;
switch (*p++) {
case 'A':
opt |= 0x1;
break;
case 'a':
opt |= 0x10;
break;
case 'R':
opt |= 0x2;
break;
case 'r':
opt |= 0x20;
break;
case 'N':
opt |= 0x4;
break;
case 'n':
opt |= 0x40;
break;
case 'S':
opt |= 0x8;
break;
case 's':
opt |= 0x80;
break;
case 'K':
opt |= 0x100;
break;
case 'k':
opt |= 0x1000;
break;
case 'H':
opt |= 0x200;
break;
case 'h':
opt |= 0x2000;
break;
case 'V':
opt |= 0x800;
break;
case 'C':
opt |= 0x10000;
break;
case 'c':
opt |= 0x20000;
break;
case 'M':
opt |= 0x100000;
break;
case 'm':
opt |= 0x200000;
break;
}
}
} else {
opt = 0x900;
}
/* encoding */
if (encname != NULL) {
string.no_encoding = mbfl_name2no_encoding(encname);
if (string.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
}
2003-08-23 08:59:47 +00:00
ret = mbfl_ja_jp_hantozen(&string, &result, opt);
2001-05-01 01:52:55 +00:00
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2014-03-24 05:57:59 +00:00
efree(ret->val);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
}
/* }}} */
#define PHP_MBSTR_STACK_BLOCK_SIZE 32
2001-05-01 01:52:55 +00:00
/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
2001-05-06 22:25:14 +00:00
Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)
2001-05-01 01:52:55 +00:00
{
zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
2001-05-01 01:52:55 +00:00
HashTable *target_hash;
mbfl_string string, result, *ret;
const mbfl_encoding *from_encoding, *to_encoding;
2001-05-01 01:52:55 +00:00
mbfl_encoding_detector *identd;
mbfl_buffer_converter *convd;
int n, argc, stack_level, stack_max;
size_t to_enc_len;
size_t elistsz;
const mbfl_encoding **elist;
char *to_enc;
2015-01-03 09:22:58 +00:00
void *ptmp;
int recursion_error = 0;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
/* new encoding */
to_encoding = mbfl_name2encoding(to_enc);
if (!to_encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
}
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
2001-05-01 01:52:55 +00:00
/* pre-conversion encoding */
elist = NULL;
elistsz = 0;
2014-03-23 11:45:48 +00:00
switch (Z_TYPE_P(zfrom_enc)) {
case IS_ARRAY:
2014-12-13 22:06:14 +00:00
php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
2014-03-23 11:45:48 +00:00
break;
default:
convert_to_string_ex(zfrom_enc);
2014-12-13 22:06:14 +00:00
php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
2014-03-23 11:45:48 +00:00
break;
2001-05-01 01:52:55 +00:00
}
2014-03-23 11:45:48 +00:00
2001-05-01 01:52:55 +00:00
if (elistsz <= 0) {
2011-03-14 16:00:59 +00:00
from_encoding = &mbfl_encoding_pass;
2001-05-01 01:52:55 +00:00
} else if (elistsz == 1) {
from_encoding = *elist;
} else {
/* auto detect */
from_encoding = NULL;
stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
2014-03-23 11:45:48 +00:00
stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
stack_level = 0;
identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
if (identd != NULL) {
n = 0;
while (n < argc || stack_level > 0) {
if (stack_level <= 0) {
2014-03-23 11:45:48 +00:00
var = &args[n++];
2014-03-23 15:38:32 +00:00
ZVAL_DEREF(var);
SEPARATE_ZVAL_NOREF(var);
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
target_hash = HASH_OF(var);
if (target_hash != NULL) {
zend_hash_internal_pointer_reset(target_hash);
}
}
} else {
stack_level--;
2014-03-23 11:45:48 +00:00
var = &stack[stack_level];
}
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
target_hash = HASH_OF(var);
if (target_hash != NULL) {
2014-03-23 11:45:48 +00:00
while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
if (!Z_IMMUTABLE_P(var)) {
if (++target_hash->u.v.nApplyCount > 1) {
--target_hash->u.v.nApplyCount;
recursion_error = 1;
goto detect_end;
}
}
zend_hash_move_forward(target_hash);
if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
hash_entry = Z_INDIRECT_P(hash_entry);
}
ZVAL_DEREF(hash_entry);
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
if (stack_level >= stack_max) {
stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
2014-03-23 11:45:48 +00:00
ptmp = erealloc(stack, sizeof(zval) * stack_max);
stack = (zval *)ptmp;
}
2014-03-23 11:45:48 +00:00
ZVAL_COPY_VALUE(&stack[stack_level], var);
stack_level++;
var = hash_entry;
2014-03-23 11:45:48 +00:00
target_hash = HASH_OF(var);
if (target_hash != NULL) {
zend_hash_internal_pointer_reset(target_hash);
continue;
}
2014-03-23 11:45:48 +00:00
} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
2014-08-25 17:24:55 +00:00
string.len = Z_STRLEN_P(hash_entry);
2003-08-23 08:59:47 +00:00
if (mbfl_encoding_detector_feed(identd, &string)) {
goto detect_end; /* complete detecting */
2001-05-01 01:52:55 +00:00
}
}
}
}
2014-03-23 11:45:48 +00:00
} else if (Z_TYPE_P(var) == IS_STRING) {
string.val = (unsigned char *)Z_STRVAL_P(var);
2014-08-25 17:24:55 +00:00
string.len = Z_STRLEN_P(var);
2003-08-23 08:59:47 +00:00
if (mbfl_encoding_detector_feed(identd, &string)) {
goto detect_end; /* complete detecting */
}
}
}
detect_end:
from_encoding = mbfl_encoding_detector_judge2(identd);
2003-08-23 08:59:47 +00:00
mbfl_encoding_detector_delete(identd);
}
if (recursion_error) {
while(stack_level-- && (var = &stack[stack_level])) {
if (!Z_IMMUTABLE_P(var)) {
if (HASH_OF(var)->u.v.nApplyCount > 1) {
HASH_OF(var)->u.v.nApplyCount--;
}
}
}
efree(stack);
if (elist != NULL) {
efree((void *)elist);
}
2016-11-21 22:53:37 +00:00
php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
RETURN_FALSE;
}
efree(stack);
if (!from_encoding) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
2011-03-14 16:00:59 +00:00
from_encoding = &mbfl_encoding_pass;
}
}
if (elist != NULL) {
efree((void *)elist);
}
/* create converter */
convd = NULL;
2011-03-14 16:00:59 +00:00
if (from_encoding != &mbfl_encoding_pass) {
convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
if (convd == NULL) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unable to create converter");
RETURN_FALSE;
}
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
}
/* convert */
if (convd != NULL) {
stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
2014-03-23 11:45:48 +00:00
stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
stack_level = 0;
n = 0;
while (n < argc || stack_level > 0) {
if (stack_level <= 0) {
2014-03-23 11:45:48 +00:00
var = &args[n++];
2014-03-23 15:38:32 +00:00
ZVAL_DEREF(var);
SEPARATE_ZVAL_NOREF(var);
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
target_hash = HASH_OF(var);
if (target_hash != NULL) {
zend_hash_internal_pointer_reset(target_hash);
2001-05-01 01:52:55 +00:00
}
}
} else {
stack_level--;
2014-03-23 11:45:48 +00:00
var = &stack[stack_level];
}
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
target_hash = HASH_OF(var);
if (target_hash != NULL) {
while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
zend_hash_move_forward(target_hash);
if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
}
hash_entry = hash_entry_ptr;
ZVAL_DEREF(hash_entry);
2014-03-23 11:45:48 +00:00
if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
if (!Z_IMMUTABLE_P(hash_entry)) {
if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
--(HASH_OF(hash_entry)->u.v.nApplyCount);
recursion_error = 1;
goto conv_end;
}
}
if (stack_level >= stack_max) {
stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
2014-03-23 11:45:48 +00:00
ptmp = erealloc(stack, sizeof(zval) * stack_max);
stack = (zval *)ptmp;
}
2014-03-23 11:45:48 +00:00
ZVAL_COPY_VALUE(&stack[stack_level], var);
stack_level++;
var = hash_entry;
SEPARATE_ZVAL(hash_entry);
2014-03-23 11:45:48 +00:00
target_hash = HASH_OF(var);
if (target_hash != NULL) {
zend_hash_internal_pointer_reset(target_hash);
continue;
}
2014-03-23 11:45:48 +00:00
} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
2014-08-25 17:24:55 +00:00
string.len = Z_STRLEN_P(hash_entry);
2003-08-23 08:59:47 +00:00
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (ret != NULL) {
zval_ptr_dtor(hash_entry_ptr);
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2014-03-23 11:45:48 +00:00
}
}
}
}
2014-03-23 11:45:48 +00:00
} else if (Z_TYPE_P(var) == IS_STRING) {
string.val = (unsigned char *)Z_STRVAL_P(var);
2014-08-25 17:24:55 +00:00
string.len = Z_STRLEN_P(var);
2014-03-23 11:45:48 +00:00
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (ret != NULL) {
zval_ptr_dtor(var);
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
ZVAL_STRINGL(var, (char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
}
2001-05-01 01:52:55 +00:00
}
}
conv_end:
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2003-08-23 08:59:47 +00:00
mbfl_buffer_converter_delete(convd);
if (recursion_error) {
while(stack_level-- && (var = &stack[stack_level])) {
if (!Z_IMMUTABLE_P(var)) {
if (HASH_OF(var)->u.v.nApplyCount > 1) {
HASH_OF(var)->u.v.nApplyCount--;
}
}
}
efree(stack);
2016-11-21 22:53:37 +00:00
php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
RETURN_FALSE;
}
efree(stack);
2001-05-01 01:52:55 +00:00
}
if (from_encoding) {
2014-03-23 11:45:48 +00:00
RETURN_STRING(from_encoding->name);
2001-05-01 01:52:55 +00:00
} else {
RETURN_FALSE;
}
}
/* }}} */
/* {{{ HTML numeric entity */
/* {{{ static void php_mb_numericentity_exec() */
2001-05-01 01:52:55 +00:00
static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
2001-05-01 01:52:55 +00:00
{
char *str, *encoding = NULL;
2014-08-27 17:25:28 +00:00
size_t str_len, encoding_len;
2014-03-23 11:45:48 +00:00
zval *zconvmap, *hash_entry;
2001-05-01 01:52:55 +00:00
HashTable *target_hash;
int i, *convmap, *mapelm, mapsize=0;
zend_bool is_hex = 0;
2001-05-01 01:52:55 +00:00
mbfl_string string, result, *ret;
enum mbfl_no_encoding no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
string.val = (unsigned char *)str;
string.len = str_len;
2001-05-01 01:52:55 +00:00
/* encoding */
if (encoding && encoding_len > 0) {
no_encoding = mbfl_name2no_encoding(encoding);
2001-05-01 01:52:55 +00:00
if (no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2001-05-01 01:52:55 +00:00
RETURN_FALSE;
} else {
string.no_encoding = no_encoding;
}
}
if (type == 0 && is_hex) {
type = 2; /* output in hex format */
}
2001-05-01 01:52:55 +00:00
/* conversion map */
convmap = NULL;
if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
target_hash = Z_ARRVAL_P(zconvmap);
2001-05-01 01:52:55 +00:00
i = zend_hash_num_elements(target_hash);
if (i > 0) {
2003-05-01 21:08:31 +00:00
convmap = (int *)safe_emalloc(i, sizeof(int), 0);
mapelm = convmap;
mapsize = 0;
2014-05-28 13:43:11 +00:00
ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
2014-08-25 19:51:49 +00:00
convert_to_long_ex(hash_entry);
2014-08-25 17:24:55 +00:00
*mapelm++ = Z_LVAL_P(hash_entry);
mapsize++;
2014-05-28 13:43:11 +00:00
} ZEND_HASH_FOREACH_END();
2001-05-01 01:52:55 +00:00
}
}
if (convmap == NULL) {
RETURN_FALSE;
}
mapsize /= 4;
2003-08-23 08:59:47 +00:00
ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
2001-05-01 01:52:55 +00:00
if (ret != NULL) {
2014-04-15 17:56:30 +00:00
// TODO: avoid reallocation ???
2014-03-23 11:45:48 +00:00
RETVAL_STRINGL((char *)ret->val, ret->len);
2014-03-23 15:38:32 +00:00
efree(ret->val);
2001-05-01 01:52:55 +00:00
} else {
RETVAL_FALSE;
}
efree((void *)convmap);
}
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
2002-01-07 06:58:01 +00:00
Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)
2001-05-01 01:52:55 +00:00
{
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
2001-05-06 22:25:14 +00:00
Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)
2001-05-01 01:52:55 +00:00
{
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2001-05-01 01:52:55 +00:00
}
/* }}} */
/* }}} */
2001-05-01 01:52:55 +00:00
/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
* Sends an email message with MIME scheme
*/
#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
pos += 2; \
while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
pos++; \
} \
continue; \
}
#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
pp = str; \
ee = pp + len; \
while ((pp = memchr(pp, '\0', (ee - pp)))) { \
*pp = ' '; \
} \
static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
{
const char *ps;
size_t icnt;
int state = 0;
int crlf_state = -1;
char *token = NULL;
size_t token_pos = 0;
2014-04-11 04:53:35 +00:00
zend_string *fld_name, *fld_val;
ps = str;
icnt = str_len;
2014-04-11 04:53:35 +00:00
fld_name = fld_val = NULL;
/*
* C o n t e n t - T y p e : t e x t / h t m l \r\n
2015-01-03 09:22:58 +00:00
* ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
* state 0 1 2 3
*
* C o n t e n t - T y p e : t e x t / h t m l \r\n
2015-01-03 09:22:58 +00:00
* ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
* crlf_state -1 0 1 -1
*
*/
while (icnt > 0) {
switch (*ps) {
case ':':
if (crlf_state == 1) {
2014-04-11 04:53:35 +00:00
token_pos++;
}
if (state == 0 || state == 1) {
if(token && token_pos > 0) {
fld_name = zend_string_init(token, token_pos, 0);
}
state = 2;
} else {
2014-04-11 04:53:35 +00:00
token_pos++;
}
crlf_state = 0;
break;
case '\n':
if (crlf_state == -1) {
goto out;
}
crlf_state = -1;
break;
case '\r':
if (crlf_state == 1) {
2014-04-11 04:53:35 +00:00
token_pos++;
} else {
crlf_state = 1;
}
break;
case ' ': case '\t':
if (crlf_state == -1) {
if (state == 3) {
/* continuing from the previous line */
state = 4;
} else {
/* simply skipping this new line */
state = 5;
}
} else {
if (crlf_state == 1) {
2014-04-11 04:53:35 +00:00
token_pos++;
}
if (state == 1 || state == 3) {
2014-04-11 04:53:35 +00:00
token_pos++;
}
}
crlf_state = 0;
break;
default:
switch (state) {
case 0:
2014-04-11 04:53:35 +00:00
token = (char*)ps;
token_pos = 0;
state = 1;
break;
2015-01-03 09:22:58 +00:00
case 2:
if (crlf_state != -1) {
2014-04-11 04:53:35 +00:00
token = (char*)ps;
token_pos = 0;
state = 3;
break;
}
/* break is missing intentionally */
case 3:
if (crlf_state == -1) {
if(token && token_pos > 0) {
fld_val = zend_string_init(token, token_pos, 0);
}
2014-04-11 04:53:35 +00:00
if (fld_name != NULL && fld_val != NULL) {
zval val;
/* FIXME: some locale free implementation is
* really required here,,, */
php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
2014-04-11 04:53:35 +00:00
ZVAL_STR(&val, fld_val);
2014-04-11 04:53:35 +00:00
zend_hash_update(ht, fld_name, &val);
2014-08-25 17:24:55 +00:00
zend_string_release(fld_name);
}
2014-04-11 04:53:35 +00:00
fld_name = fld_val = NULL;
token = (char*)ps;
token_pos = 0;
state = 1;
}
break;
case 4:
2014-04-11 04:53:35 +00:00
token_pos++;
state = 3;
break;
}
if (crlf_state == 1) {
2014-04-11 04:53:35 +00:00
token_pos++;
}
2014-04-11 04:53:35 +00:00
token_pos++;
crlf_state = 0;
break;
}
ps++, icnt--;
}
out:
if (state == 2) {
2014-04-11 04:53:35 +00:00
token = "";
token_pos = 0;
state = 3;
}
if (state == 3) {
if(token && token_pos > 0) {
fld_val = zend_string_init(token, token_pos, 0);
}
2014-04-11 04:53:35 +00:00
if (fld_name != NULL && fld_val != NULL) {
zval val;
/* FIXME: some locale free implementation is
* really required here,,, */
php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
2014-04-11 04:53:35 +00:00
ZVAL_STR(&val, fld_val);
2014-04-11 04:53:35 +00:00
zend_hash_update(ht, fld_name, &val);
2014-08-25 17:24:55 +00:00
zend_string_release(fld_name);
}
}
return state;
}
PHP_FUNCTION(mb_send_mail)
2001-05-01 01:52:55 +00:00
{
int n;
2008-07-24 13:46:50 +00:00
char *to = NULL;
size_t to_len;
2008-07-24 13:46:50 +00:00
char *message = NULL;
size_t message_len;
2008-07-24 13:46:50 +00:00
char *headers = NULL;
size_t headers_len;
2008-07-24 13:46:50 +00:00
char *subject = NULL;
2014-03-23 11:45:48 +00:00
zend_string *extra_cmd = NULL;
size_t subject_len;
int i;
char *to_r = NULL;
char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
struct {
int cnt_type:1;
int cnt_trans_enc:1;
} suppressed_hdrs = { 0, 0 };
2008-07-24 13:46:50 +00:00
char *message_buf = NULL, *subject_buf = NULL, *p;
2001-05-01 01:52:55 +00:00
mbfl_string orig_str, conv_str;
mbfl_string *pstr; /* pointer to mbfl string for return value */
enum mbfl_no_encoding
2010-12-19 17:28:57 +00:00
tran_cs, /* transfar text charset */
head_enc, /* header transfar encoding */
body_enc; /* body transfar encoding */
2001-05-01 01:52:55 +00:00
mbfl_memory_device device; /* automatic allocateable buffer for additional header */
const mbfl_language *lang;
2001-05-01 01:52:55 +00:00
int err = 0;
HashTable ht_headers;
2014-04-11 04:53:35 +00:00
zval *s;
2003-08-23 08:59:47 +00:00
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
char *pp, *ee;
2001-05-01 01:52:55 +00:00
/* initialize */
2003-08-23 08:59:47 +00:00
mbfl_memory_device_init(&device, 0, 0);
2001-05-01 01:52:55 +00:00
mbfl_string_init(&orig_str);
mbfl_string_init(&conv_str);
/* character-set, transfer-encoding */
tran_cs = mbfl_no_encoding_utf8;
head_enc = mbfl_no_encoding_base64;
body_enc = mbfl_no_encoding_base64;
lang = mbfl_no2language(MBSTRG(language));
if (lang != NULL) {
tran_cs = lang->mail_charset;
head_enc = lang->mail_header_encoding;
body_enc = lang->mail_body_encoding;
2001-05-01 01:52:55 +00:00
}
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
return;
2001-05-01 01:52:55 +00:00
}
/* ASCIIZ check */
MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
if (headers) {
MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
}
if (extra_cmd) {
MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
}
2014-04-11 04:53:35 +00:00
zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
if (headers != NULL) {
_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
}
if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
char *tmp;
char *param_name;
char *charset = NULL;
ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
2014-04-11 04:53:35 +00:00
p = strchr(Z_STRVAL_P(s), ';');
if (p != NULL) {
/* skipping the padded spaces */
do {
++p;
} while (*p == ' ' || *p == '\t');
if (*p != '\0') {
if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
if (strcasecmp(param_name, "charset") == 0) {
enum mbfl_no_encoding _tran_cs = tran_cs;
2015-01-03 09:22:58 +00:00
charset = php_strtok_r(NULL, "= \"", &tmp);
if (charset != NULL) {
_tran_cs = mbfl_name2no_encoding(charset);
}
if (_tran_cs == mbfl_no_encoding_invalid) {
2015-01-03 09:22:58 +00:00
php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
_tran_cs = mbfl_no_encoding_ascii;
}
tran_cs = _tran_cs;
}
}
}
}
suppressed_hdrs.cnt_type = 1;
}
if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
enum mbfl_no_encoding _body_enc;
ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
2014-04-11 04:53:35 +00:00
_body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
switch (_body_enc) {
case mbfl_no_encoding_base64:
case mbfl_no_encoding_7bit:
case mbfl_no_encoding_8bit:
body_enc = _body_enc;
break;
default:
2015-01-03 09:22:58 +00:00
php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
body_enc = mbfl_no_encoding_8bit;
break;
}
suppressed_hdrs.cnt_trans_enc = 1;
}
2001-05-01 01:52:55 +00:00
/* To: */
if (to != NULL) {
2010-12-19 17:28:57 +00:00
if (to_len > 0) {
to_r = estrndup(to, to_len);
for (; to_len; to_len--) {
if (!isspace((unsigned char) to_r[to_len - 1])) {
break;
}
to_r[to_len - 1] = '\0';
}
for (i = 0; to_r[i]; i++) {
if (iscntrl((unsigned char) to_r[i])) {
/* According to RFC 822, section 3.1.1 long headers may be separated into
* parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
* To prevent these separators from being replaced with a space, we use the
* SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
*/
SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
to_r[i] = ' ';
}
2010-12-19 17:28:57 +00:00
}
} else {
to_r = to;
}
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Missing To: field");
2001-05-01 01:52:55 +00:00
err = 1;
}
/* Subject: */
2014-09-19 08:09:37 +00:00
if (subject != NULL) {
orig_str.no_language = MBSTRG(language);
orig_str.val = (unsigned char *)subject;
orig_str.len = subject_len;
orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
2001-05-01 01:52:55 +00:00
}
2003-08-23 08:59:47 +00:00
pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
2001-05-01 01:52:55 +00:00
if (pstr != NULL) {
2002-11-02 13:38:32 +00:00
subject_buf = subject = (char *)pstr->val;
2001-05-01 01:52:55 +00:00
}
} else {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Missing Subject: field");
2001-05-01 01:52:55 +00:00
err = 1;
}
/* message body */
if (message != NULL) {
orig_str.no_language = MBSTRG(language);
2008-07-24 13:46:50 +00:00
orig_str.val = (unsigned char *)message;
orig_str.len = (unsigned int)message_len;
orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
2001-05-01 01:52:55 +00:00
}
pstr = NULL;
{
mbfl_string tmpstr;
2003-08-23 08:59:47 +00:00
if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
tmpstr.no_encoding=mbfl_no_encoding_8bit;
2003-08-23 08:59:47 +00:00
pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
efree(tmpstr.val);
}
}
2001-05-01 01:52:55 +00:00
if (pstr != NULL) {
2002-11-02 13:38:32 +00:00
message_buf = message = (char *)pstr->val;
2001-05-01 01:52:55 +00:00
}
} else {
/* this is not really an error, so it is allowed. */
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Empty message body");
2001-05-01 01:52:55 +00:00
message = NULL;
}
/* other headers */
#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
if (headers != NULL) {
p = headers;
n = headers_len;
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, p, n);
if (n > 0 && p[n - 1] != '\n') {
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, "\n", 1);
2001-05-01 01:52:55 +00:00
}
}
2014-03-23 11:45:48 +00:00
if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
mbfl_memory_device_strncat(&device, "\n", 1);
}
if (!suppressed_hdrs.cnt_type) {
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
p = (char *)mbfl_no2preferred_mime_name(tran_cs);
if (p != NULL) {
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
mbfl_memory_device_strcat(&device, p);
}
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, "\n", 1);
2001-05-01 01:52:55 +00:00
}
if (!suppressed_hdrs.cnt_trans_enc) {
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
p = (char *)mbfl_no2preferred_mime_name(body_enc);
if (p == NULL) {
p = "7bit";
}
2003-08-23 08:59:47 +00:00
mbfl_memory_device_strcat(&device, p);
mbfl_memory_device_strncat(&device, "\n", 1);
2001-05-01 01:52:55 +00:00
}
2003-08-23 08:59:47 +00:00
mbfl_memory_device_unput(&device);
mbfl_memory_device_output('\0', &device);
2002-11-02 13:38:32 +00:00
headers = (char *)device.buffer;
2001-05-01 01:52:55 +00:00
if (force_extra_parameters) {
extra_cmd = php_escape_shell_cmd(force_extra_parameters);
} else if (extra_cmd) {
extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
2015-01-03 09:22:58 +00:00
}
if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
2001-05-01 01:52:55 +00:00
RETVAL_TRUE;
} else {
RETVAL_FALSE;
}
if (extra_cmd) {
2014-08-25 17:24:55 +00:00
zend_string_release(extra_cmd);
}
2014-03-23 11:45:48 +00:00
if (to_r != to) {
efree(to_r);
}
2001-05-01 01:52:55 +00:00
if (subject_buf) {
efree((void *)subject_buf);
}
if (message_buf) {
efree((void *)message_buf);
}
2003-08-23 08:59:47 +00:00
mbfl_memory_device_clear(&device);
zend_hash_destroy(&ht_headers);
2001-05-01 01:52:55 +00:00
}
#undef SKIP_LONG_HEADER_SEP_MBSTRING
#undef MAIL_ASCIIZ_CHECK_MBSTRING
#undef PHP_MBSTR_MAIL_MIME_HEADER1
#undef PHP_MBSTR_MAIL_MIME_HEADER2
#undef PHP_MBSTR_MAIL_MIME_HEADER3
#undef PHP_MBSTR_MAIL_MIME_HEADER4
/* }}} */
/* {{{ proto mixed mb_get_info([string type])
2002-01-07 06:58:01 +00:00
Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)
{
char *typ = NULL;
size_t typ_len;
size_t n;
char *name;
const struct mb_overload_def *over_func;
2014-03-23 11:45:48 +00:00
zval row1, row2;
const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
const mbfl_encoding **entry;
2014-12-13 22:06:14 +00:00
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
2014-03-23 11:45:48 +00:00
return;
}
2005-01-28 02:00:39 +00:00
if (!typ || !strcasecmp("all", typ)) {
array_init(return_value);
if (MBSTRG(current_internal_encoding)) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
}
if (MBSTRG(http_input_identify)) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
}
if (MBSTRG(current_http_output_encoding)) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
}
2014-03-24 04:37:05 +00:00
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "http_output_conv_mimetypes", name);
}
2014-08-25 17:24:55 +00:00
add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
if (MBSTRG(func_overload)){
over_func = &(mb_ovld[0]);
2014-03-23 11:45:48 +00:00
array_init(&row1);
while (over_func->type > 0) {
if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
2014-04-15 11:40:40 +00:00
add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
}
over_func++;
}
2014-03-23 11:45:48 +00:00
add_assoc_zval(return_value, "func_overload_list", &row1);
} else {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "func_overload_list", "no overload");
}
2006-02-11 02:26:07 +00:00
if (lang != NULL) {
if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "mail_charset", name);
}
if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "mail_header_encoding", name);
}
if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "mail_body_encoding", name);
}
2006-02-11 02:26:07 +00:00
}
2014-08-25 17:24:55 +00:00
add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
if (MBSTRG(encoding_translation)) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "encoding_translation", "On");
} else {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "encoding_translation", "Off");
}
if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "language", name);
2015-01-03 09:22:58 +00:00
}
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
if (n > 0) {
size_t i;
2014-03-23 11:45:48 +00:00
array_init(&row2);
for (i = 0; i < n; i++) {
2014-04-15 11:40:40 +00:00
add_next_index_string(&row2, (*entry)->name);
entry++;
}
2014-03-23 11:45:48 +00:00
add_assoc_zval(return_value, "detect_order", &row2);
}
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "substitute_character", "none");
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "substitute_character", "long");
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "substitute_character", "entity");
} else {
2014-08-25 17:24:55 +00:00
add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
}
if (MBSTRG(strict_detection)) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "strict_detection", "On");
} else {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, "strict_detection", "Off");
}
} else if (!strcasecmp("internal_encoding", typ)) {
if (MBSTRG(current_internal_encoding)) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
2015-01-03 09:22:58 +00:00
}
} else if (!strcasecmp("http_input", typ)) {
if (MBSTRG(http_input_identify)) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
2015-01-03 09:22:58 +00:00
}
} else if (!strcasecmp("http_output", typ)) {
if (MBSTRG(current_http_output_encoding)) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
2015-01-03 09:22:58 +00:00
}
} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
2014-03-24 04:37:05 +00:00
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(name);
}
} else if (!strcasecmp("func_overload", typ)) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(MBSTRG(func_overload));
} else if (!strcasecmp("func_overload_list", typ)) {
if (MBSTRG(func_overload)){
over_func = &(mb_ovld[0]);
array_init(return_value);
while (over_func->type > 0) {
if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
2014-04-15 11:40:40 +00:00
add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
}
over_func++;
}
} else {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("no overload");
}
} else if (!strcasecmp("mail_charset", typ)) {
if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(name);
}
} else if (!strcasecmp("mail_header_encoding", typ)) {
if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(name);
}
} else if (!strcasecmp("mail_body_encoding", typ)) {
if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(name);
}
2006-03-21 02:20:15 +00:00
} else if (!strcasecmp("illegal_chars", typ)) {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(MBSTRG(illegalchars));
} else if (!strcasecmp("encoding_translation", typ)) {
if (MBSTRG(encoding_translation)) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("On");
} else {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("Off");
}
} else if (!strcasecmp("language", typ)) {
if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING(name);
2015-01-03 09:22:58 +00:00
}
} else if (!strcasecmp("detect_order", typ)) {
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
if (n > 0) {
size_t i;
array_init(return_value);
for (i = 0; i < n; i++) {
2014-04-15 11:40:40 +00:00
add_next_index_string(return_value, (*entry)->name);
2011-03-14 16:00:59 +00:00
entry++;
}
}
} else if (!strcasecmp("substitute_character", typ)) {
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("none");
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("long");
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("entity");
} else {
2014-08-25 17:24:55 +00:00
RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
}
} else if (!strcasecmp("strict_detection", typ)) {
if (MBSTRG(strict_detection)) {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("On");
} else {
2014-03-23 11:45:48 +00:00
RETVAL_STRING("Off");
}
} else {
RETURN_FALSE;
}
}
/* }}} */
2015-03-01 12:43:09 +00:00
MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
2006-03-21 07:56:28 +00:00
{
const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
2015-03-01 12:43:09 +00:00
mbfl_buffer_converter *convd;
2006-03-21 07:56:28 +00:00
mbfl_string string, result, *ret = NULL;
long illegalchars = 0;
2015-03-01 12:43:09 +00:00
if (input == NULL) {
return MBSTRG(illegalchars) == 0;
2006-03-21 07:56:28 +00:00
}
if (enc != NULL) {
encoding = mbfl_name2encoding(enc);
if (!encoding || encoding == &mbfl_encoding_pass) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
2015-03-01 12:43:09 +00:00
return 0;
2006-03-21 07:56:28 +00:00
}
}
convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
2015-03-01 12:43:09 +00:00
2006-03-21 07:56:28 +00:00
if (convd == NULL) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unable to create converter");
2015-03-01 12:43:09 +00:00
return 0;
2015-01-03 09:22:58 +00:00
}
2015-03-01 12:43:09 +00:00
2008-09-12 23:35:01 +00:00
mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
2015-01-03 09:22:58 +00:00
mbfl_buffer_converter_illegal_substchar(convd, 0);
2006-03-21 07:56:28 +00:00
/* initialize string */
mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
2006-03-21 07:56:28 +00:00
mbfl_string_init(&result);
2015-03-01 12:43:09 +00:00
string.val = (unsigned char *) input;
string.len = length;
2006-03-21 07:56:28 +00:00
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
illegalchars = mbfl_buffer_illegalchars(convd);
mbfl_buffer_converter_delete(convd);
if (ret != NULL) {
2008-09-12 23:35:01 +00:00
if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
2015-03-01 12:43:09 +00:00
mbfl_string_clear(&result);
return 1;
}
2015-03-01 12:43:09 +00:00
2008-09-12 23:35:01 +00:00
mbfl_string_clear(&result);
2006-03-21 07:56:28 +00:00
}
2015-03-01 12:43:09 +00:00
return 0;
}
/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)
{
char *var = NULL;
size_t var_len;
char *enc = NULL;
size_t enc_len;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
return;
}
RETVAL_FALSE;
if (php_mb_check_encoding(var, var_len, enc)) {
RETVAL_TRUE;
}
2006-03-21 07:56:28 +00:00
}
/* }}} */
/* {{{ php_mb_populate_current_detect_order_list */
2014-12-13 22:06:14 +00:00
static void php_mb_populate_current_detect_order_list(void)
{
const mbfl_encoding **entry = 0;
size_t nentries;
if (MBSTRG(current_detect_order_list)) {
return;
}
if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
nentries = MBSTRG(detect_order_list_size);
entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
} else {
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
2010-12-20 14:38:08 +00:00
size_t i;
nentries = MBSTRG(default_detect_order_list_size);
entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
for (i = 0; i < nentries; i++) {
entry[i] = mbfl_no2encoding(src[i]);
}
}
MBSTRG(current_detect_order_list) = entry;
MBSTRG(current_detect_order_list_size) = nentries;
}
2014-03-23 11:45:48 +00:00
/* }}} */
/* {{{ static int php_mb_encoding_translation() */
2015-01-03 09:22:58 +00:00
static int php_mb_encoding_translation(void)
{
return MBSTRG(encoding_translation);
}
/* }}} */
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
{
if (enc != NULL) {
2002-11-03 08:50:43 +00:00
if (enc->flag & MBFL_ENCTYPE_MBCS) {
if (enc->mblen_table != NULL) {
if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
}
} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
return 2;
} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
return 4;
}
}
return 1;
}
/* }}} */
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
2014-12-13 22:06:14 +00:00
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
{
return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
}
/* }}} */
/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
{
register const char *p = s;
2002-10-24 19:15:40 +00:00
char *last=NULL;
if (nbytes == (size_t)-1) {
size_t nb = 0;
while (*p != '\0') {
if (nb == 0) {
if ((unsigned char)*p == (unsigned char)c) {
last = (char *)p;
}
nb = php_mb_mbchar_bytes_ex(p, enc);
if (nb == 0) {
return NULL; /* something is going wrong! */
}
}
--nb;
++p;
}
} else {
register size_t bcnt = nbytes;
register size_t nbytes_char;
while (bcnt > 0) {
if ((unsigned char)*p == (unsigned char)c) {
last = (char *)p;
}
nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
if (bcnt < nbytes_char) {
return NULL;
}
p += nbytes_char;
bcnt -= nbytes_char;
}
}
return last;
}
/* }}} */
/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
2014-12-13 22:06:14 +00:00
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
{
return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
}
/* }}} */
2006-05-30 15:43:09 +00:00
/* {{{ MBSTRING_API int php_mb_stripos()
*/
2014-12-13 22:06:14 +00:00
MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
2006-05-30 15:43:09 +00:00
{
int n;
mbfl_string haystack, needle;
n = -1;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2006-05-30 15:43:09 +00:00
do {
2008-07-24 13:46:50 +00:00
size_t len = 0;
2014-12-13 22:06:14 +00:00
haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
2008-07-24 13:46:50 +00:00
haystack.len = len;
2006-05-30 15:43:09 +00:00
if (!haystack.val) {
break;
}
if (haystack.len <= 0) {
break;
}
2014-12-13 22:06:14 +00:00
needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
2008-07-24 13:46:50 +00:00
needle.len = len;
2006-05-30 15:43:09 +00:00
if (!needle.val) {
break;
}
if (needle.len <= 0) {
break;
}
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2006-05-30 15:43:09 +00:00
break;
}
{
int haystack_char_len = mbfl_strlen(&haystack);
2015-01-03 09:22:58 +00:00
if (mode) {
if ((offset > 0 && offset > haystack_char_len) ||
(offset < 0 && -offset > haystack_char_len)) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
break;
}
} else {
if (offset < 0) {
offset += (long)haystack_char_len;
}
if (offset < 0 || offset > haystack_char_len) {
2014-12-13 22:06:14 +00:00
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
break;
}
}
2006-05-30 15:43:09 +00:00
}
n = mbfl_strpos(&haystack, &needle, offset, mode);
} while(0);
2008-07-24 13:46:50 +00:00
if (haystack.val) {
2006-05-30 15:43:09 +00:00
efree(haystack.val);
}
2008-07-24 13:46:50 +00:00
if (needle.val) {
2006-05-30 15:43:09 +00:00
efree(needle.val);
}
2008-07-24 13:46:50 +00:00
return n;
2006-05-30 15:43:09 +00:00
}
/* }}} */
2014-12-13 22:06:14 +00:00
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
{
*list = (const zend_encoding **)MBSTRG(http_input_list);
*list_size = MBSTRG(http_input_list_size);
}
/* }}} */
2014-12-13 22:06:14 +00:00
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
{
2011-03-14 16:00:59 +00:00
MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
}
/* }}} */
2001-05-01 01:52:55 +00:00
#endif /* HAVE_MBSTRING */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: fdm=marker
* vim: noet sw=4 ts=4
2001-05-01 01:52:55 +00:00
*/