mirror of
https://github.com/php/php-src.git
synced 2024-09-21 18:07:23 +00:00
Optimize mb_str{,im}width for performance
Rather than doing a linear search of a table of fullwidth codepoint ranges for every input character, 1) Short-cut the search if the codepoint is below the first such range 2) Otherwise, do a binary (rather than linear) search
This commit is contained in:
parent
f4365d2c26
commit
0b32a15eb0
@ -14,6 +14,8 @@
|
||||
* which should be displayed as double-width.
|
||||
*/
|
||||
|
||||
#define FIRST_DOUBLEWIDTH_CODEPOINT 0x1100
|
||||
|
||||
static const struct {
|
||||
int begin;
|
||||
int end;
|
||||
|
@ -1203,31 +1203,33 @@ mbfl_strcut(
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* strwidth
|
||||
*/
|
||||
static size_t is_fullwidth(int c)
|
||||
/* Some East Asian characters, when printed at a terminal (or the like), require double
|
||||
* the usual amount of horizontal space. We call these "fullwidth" characters. */
|
||||
static size_t character_width(int c)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (c < mbfl_eaw_table[0].begin) {
|
||||
return 0;
|
||||
if (c < FIRST_DOUBLEWIDTH_CODEPOINT) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
|
||||
if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
|
||||
return 1;
|
||||
/* Do a binary search to see if we fall in any of the fullwidth ranges */
|
||||
int lo = 0, hi = sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]);
|
||||
while (lo < hi) {
|
||||
int probe = (lo + hi) / 2;
|
||||
if (c < mbfl_eaw_table[probe].begin) {
|
||||
hi = probe;
|
||||
} else if (c > mbfl_eaw_table[probe].end) {
|
||||
lo = probe + 1;
|
||||
} else {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
filter_count_width(int c, void* data)
|
||||
static int filter_count_width(int c, void* data)
|
||||
{
|
||||
(*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
|
||||
(*(size_t *)data) += character_width(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1289,7 +1291,7 @@ collector_strimwidth(int c, void* data)
|
||||
break;
|
||||
default:
|
||||
if (pc->outchar >= pc->from) {
|
||||
pc->outwidth += (is_fullwidth(c) ? 2: 1);
|
||||
pc->outwidth += character_width(c);
|
||||
|
||||
if (pc->outwidth > pc->width) {
|
||||
if (pc->status == 0) {
|
||||
|
@ -700,7 +700,7 @@ function generateMPH(array $map, bool $fast) {
|
||||
}
|
||||
|
||||
function generateEastAsianWidthData(array $wideRanges) {
|
||||
$result = <<<'HEADER'
|
||||
$result = <<<'HEADER'
|
||||
/* This file was generated by ext/mbstring/ucgendat/ucgendat.php.
|
||||
*
|
||||
* DO NOT EDIT THIS FILE!
|
||||
@ -717,12 +717,17 @@ function generateEastAsianWidthData(array $wideRanges) {
|
||||
* which should be displayed as double-width.
|
||||
*/
|
||||
|
||||
HEADER;
|
||||
|
||||
$result .= "\n#define FIRST_DOUBLEWIDTH_CODEPOINT 0x" . dechex($wideRanges[0]->start) . "\n\n";
|
||||
|
||||
$result .= <<<'TABLESTART'
|
||||
static const struct {
|
||||
int begin;
|
||||
int end;
|
||||
} mbfl_eaw_table[] = {
|
||||
|
||||
HEADER;
|
||||
TABLESTART;
|
||||
|
||||
foreach ($wideRanges as $range) {
|
||||
$startCode = dechex($range->start);
|
||||
|
Loading…
Reference in New Issue
Block a user