mirror of
https://github.com/php/php-src.git
synced 2024-09-21 18:07:23 +00:00
Use fast conversion filters to implement php_mb_ord
Even for single-character strings, this is about 50% faster for ASCII, UTF-8, and UTF-16. For long strings, the performance gain is enormous, since the old code would convert the ENTIRE string, just to pick out the first codepoint.
This commit is contained in:
parent
9468fa7ff2
commit
880803a21e
@ -143,6 +143,10 @@ typedef struct {
|
||||
typedef size_t (*mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state);
|
||||
typedef void (*mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end);
|
||||
|
||||
/* When converting encoded text to a buffer of wchars (Unicode codepoints) using `mb_to_wchar_fn`,
|
||||
* the buffer must be at least this size (to work with all supported text encodings) */
|
||||
#define MBSTRING_MIN_WCHAR_BUFSIZE 5
|
||||
|
||||
static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uint32_t repl_char, unsigned int err_mode)
|
||||
{
|
||||
buf->state = buf->errors = 0;
|
||||
|
@ -3993,29 +3993,17 @@ static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string
|
||||
return -2;
|
||||
}
|
||||
|
||||
{
|
||||
mbfl_wchar_device dev;
|
||||
mbfl_convert_filter *filter;
|
||||
zend_long cp;
|
||||
/* Some legacy text encodings have a minimum required wchar buffer size;
|
||||
* the ones which need the most are SJIS-Mac, UTF-7, and UTF7-IMAP */
|
||||
uint32_t wchar_buf[MBSTRING_MIN_WCHAR_BUFSIZE];
|
||||
unsigned int state = 0;
|
||||
size_t out_len = enc->to_wchar((unsigned char**)&str, &str_len, wchar_buf, MBSTRING_MIN_WCHAR_BUFSIZE, &state);
|
||||
ZEND_ASSERT(out_len <= MBSTRING_MIN_WCHAR_BUFSIZE);
|
||||
|
||||
mbfl_wchar_device_init(&dev);
|
||||
filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
|
||||
/* If this assertion fails this means some memory allocation failure which is a bug */
|
||||
ZEND_ASSERT(filter != NULL);
|
||||
|
||||
mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
|
||||
mbfl_convert_filter_flush(filter);
|
||||
|
||||
if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] == MBFL_BAD_INPUT) {
|
||||
cp = -1;
|
||||
} else {
|
||||
cp = dev.buffer[0];
|
||||
}
|
||||
|
||||
mbfl_convert_filter_delete(filter);
|
||||
mbfl_wchar_device_clear(&dev);
|
||||
return cp;
|
||||
if (!out_len || wchar_buf[0] == MBFL_BAD_INPUT) {
|
||||
return -1;
|
||||
}
|
||||
return wchar_buf[0];
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user