Combine punctuation into one character group

We're not currently interested in distinguishing between
individual punctuation types, so just merge everything into one
general category to make the property lookup more efficient.
This commit is contained in:
Nikita Popov 2021-08-24 19:20:06 +02:00
parent d2073179e3
commit f458b16041
3 changed files with 169 additions and 273 deletions

View File

@ -50,33 +50,29 @@
#define UC_LT 16 /* Letter, Titlecase */
#define UC_LM 17 /* Letter, Modifier */
#define UC_LO 18 /* Letter, Other */
#define UC_PC 19 /* Punctuation, Connector */
#define UC_PD 20 /* Punctuation, Dash */
#define UC_PS 21 /* Punctuation, Open */
#define UC_PE 22 /* Punctuation, Close */
#define UC_PO 23 /* Punctuation, Other */
#define UC_SM 24 /* Symbol, Math */
#define UC_SC 25 /* Symbol, Currency */
#define UC_SK 26 /* Symbol, Modifier */
#define UC_SO 27 /* Symbol, Other */
#define UC_L 28 /* Left-To-Right */
#define UC_R 29 /* Right-To-Left */
#define UC_EN 30 /* European Number */
#define UC_ES 31 /* European Number Separator */
#define UC_ET 32 /* European Number Terminator */
#define UC_AN 33 /* Arabic Number */
#define UC_CS 34 /* Common Number Separator */
#define UC_B 35 /* Block Separator */
#define UC_S 36 /* Segment Separator */
#define UC_WS 37 /* Whitespace */
#define UC_ON 38 /* Other Neutrals */
#define UC_PI 39 /* Punctuation, Initial */
#define UC_PF 40 /* Punctuation, Final */
#define UC_AL 41 /* Arabic Letter */
#define UC_SM 19 /* Symbol, Math */
#define UC_SC 20 /* Symbol, Currency */
#define UC_SK 21 /* Symbol, Modifier */
#define UC_SO 22 /* Symbol, Other */
#define UC_L 23 /* Left-To-Right */
#define UC_R 24 /* Right-To-Left */
#define UC_EN 25 /* European Number */
#define UC_ES 26 /* European Number Separator */
#define UC_ET 27 /* European Number Terminator */
#define UC_AN 28 /* Arabic Number */
#define UC_CS 29 /* Common Number Separator */
#define UC_B 30 /* Block Separator */
#define UC_S 31 /* Segment Separator */
#define UC_WS 32 /* Whitespace */
#define UC_ON 33 /* Other Neutrals */
#define UC_AL 34 /* Arabic Letter */
/* Merged property categories */
#define UC_P 35
/* Derived properties from DerivedCoreProperties.txt */
#define UC_CASED 42
#define UC_CASE_IGNORABLE 43
#define UC_CASED 36
#define UC_CASE_IGNORABLE 37
MBSTRING_API bool php_unicode_is_prop(unsigned long code, ...);
@ -119,15 +115,15 @@ static inline int php_unicode_is_upper(unsigned long code) {
#define php_unicode_is_alnum(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, UC_ND, -1)
#define php_unicode_is_cntrl(cc) php_unicode_is_prop(cc, UC_CC, UC_CF, -1)
#define php_unicode_is_blank(cc) php_unicode_is_prop1(cc, UC_ZS)
#define php_unicode_is_punct(cc) php_unicode_is_prop(cc, UC_PD, UC_PS, UC_PE, UC_PO, UC_PI, UC_PF, -1)
#define php_unicode_is_graph(cc) php_unicode_is_prop(cc, UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_PC, UC_PD, \
UC_PS, UC_PE, UC_PO, UC_SM, UC_SM, UC_SC, UC_SK, \
UC_SO, UC_PI, UC_PF, -1)
#define php_unicode_is_print(cc) php_unicode_is_prop(cc, UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_PC, UC_PD, \
UC_PS, UC_PE, UC_PO, UC_SM, UC_SM, UC_SC, UC_SK, \
UC_SO, UC_ZS, UC_PI, UC_PF, -1)
#define php_unicode_is_punct(cc) php_unicode_is_prop1(cc, UC_P)
#define php_unicode_is_graph(cc) php_unicode_is_prop(cc, \
UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, -1)
#define php_unicode_is_print(cc) php_unicode_is_prop(cc, \
UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, UC_ZS, -1)
#define php_unicode_is_title(cc) php_unicode_is_prop1(cc, UC_LT)
#define php_unicode_is_isocntrl(cc) php_unicode_is_prop1(cc, UC_CC)
@ -136,10 +132,6 @@ static inline int php_unicode_is_upper(unsigned long code) {
#define php_unicode_is_symbol(cc) php_unicode_is_prop(cc, UC_SM, UC_SC, UC_SO, UC_SK, -1)
#define php_unicode_is_number(cc) php_unicode_is_prop(cc, UC_ND, UC_NO, UC_NL, -1)
#define php_unicode_is_nonspacing(cc) php_unicode_is_prop1(cc, UC_MN)
#define php_unicode_is_openpunct(cc) php_unicode_is_prop1(cc, UC_PS)
#define php_unicode_is_closepunct(cc) php_unicode_is_prop1(cc, UC_PE)
#define php_unicode_is_initialpunct(cc) php_unicode_is_prop1(cc, UC_PI)
#define php_unicode_is_finalpunct(cc) php_unicode_is_prop1(cc, UC_PF)
/*
* Directionality macros.
@ -157,8 +149,6 @@ static inline int php_unicode_is_upper(unsigned long code) {
#define php_unicode_is_mark(cc) php_unicode_is_prop(cc, UC_MN, UC_MC, UC_ME, -1)
#define php_unicode_is_modif(cc) php_unicode_is_prop1(cc, UC_LM)
#define php_unicode_is_letnum(cc) php_unicode_is_prop1(cc, UC_NL)
#define php_unicode_is_connect(cc) php_unicode_is_prop1(cc, UC_PC)
#define php_unicode_is_dash(cc) php_unicode_is_prop1(cc, UC_PD)
#define php_unicode_is_math(cc) php_unicode_is_prop1(cc, UC_SM)
#define php_unicode_is_currency(cc) php_unicode_is_prop1(cc, UC_SC)
#define php_unicode_is_modifsymbol(cc) php_unicode_is_prop1(cc, UC_SK)
@ -170,10 +160,6 @@ static inline int php_unicode_is_upper(unsigned long code) {
#define php_unicode_is_lsep(cc) php_unicode_is_prop1(cc, UC_ZL)
#define php_unicode_is_psep(cc) php_unicode_is_prop1(cc, UC_ZP)
#define php_unicode_is_identstart(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LT, UC_LO, UC_NL, -1)
#define php_unicode_is_identpart(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LT, UC_LO, UC_NL, \
UC_MN, UC_MC, UC_ND, UC_PC, UC_CF, -1)
/*
* Other miscellaneous character property macros.
*/

View File

@ -103,11 +103,10 @@ class UnicodeData {
"Mn", "Mc", "Me", "Nd", "Nl", "No",
"Zs", "Zl", "Zp", "Cc", "Cf", "Cs",
"Co", "Cn", "Lu", "Ll", "Lt", "Lm",
"Lo", "Pc", "Pd", "Ps", "Pe", "Po",
"Sm", "Sc", "Sk", "So", "L", "R",
"EN", "ES", "ET", "AN", "CS", "B",
"S", "WS", "ON", "Pi", "Pf", "AL",
"Cased", "Case_Ignorable"
"Lo", "Sm", "Sc", "Sk", "So", "L",
"R", "EN", "ES", "ET", "AN", "CS",
"B", "S", "WS", "ON", "AL",
"P", "Cased", "Case_Ignorable"
]);
$this->numProps = count($this->propIndexes);
@ -131,6 +130,12 @@ class UnicodeData {
$prop = "ON";
}
/* Merge all punctuation into a single category for efficiency of access.
* We're currently not interested in distinguishing different kinds of punctuation. */
if (in_array($prop, ["Pc", "Pd", "Ps", "Pe", "Po", "Pi", "Pf"])) {
$prop = "P";
}
if (!isset($this->propIndexes[$prop])) {
throw new Exception("Unknown property $prop");
}

View File

@ -10,15 +10,14 @@
* the project's page doesn't seem to be live anymore, so you can use
* OpenLDAP's modified copy (look in libraries/liblunicode/ucdata) */
static const unsigned short _ucprop_size = 44;
static const unsigned short _ucprop_size = 38;
static const unsigned short _ucprop_offsets[] = {
0x0000, 0x028e, 0x03ec, 0x03f6, 0x0470, 0x0488, 0x0516, 0x0524,
0x0526, 0x0528, 0x052c, 0x0554, 0x0556, 0x055c, 0x055c, 0x0a58,
0x0f62, 0x0f76, 0x0ff0, 0x13c2, 0x13ce, 0x13f2, 0x1488, 0x1518,
0x1682, 0x1702, 0x172c, 0x1768, 0x18d4, 0x1e50, 0x1ede, 0x1ef8,
0x1f0a, 0x1f3a, 0x1f48, 0x1f62, 0x1f6c, 0x1f72, 0x1f80, 0x236e,
0x2384, 0x2398, 0x2414, 0x252e, 0x2862, 0x0000, 0x0000, 0x0000
0x0f62, 0x0f76, 0x0ff0, 0x13c2, 0x1442, 0x146c, 0x14a8, 0x1614,
0x1b90, 0x1c1e, 0x1c38, 0x1c4a, 0x1c7a, 0x1c88, 0x1ca2, 0x1cac,
0x1cb2, 0x1cc0, 0x20ae, 0x212a, 0x229c, 0x23b6, 0x26ea, 0x0000
};
static const unsigned int _ucprop_ranges[] = {
@ -1286,183 +1285,7 @@ static const unsigned int _ucprop_ranges[] = {
0x00020000, 0x0002a6dd, 0x0002a700, 0x0002b734,
0x0002b740, 0x0002b81d, 0x0002b820, 0x0002cea1,
0x0002ceb0, 0x0002ebe0, 0x0002f800, 0x0002fa1d,
0x00030000, 0x0003134a, 0x0000005f, 0x0000005f,
0x0000203f, 0x00002040, 0x00002054, 0x00002054,
0x0000fe33, 0x0000fe34, 0x0000fe4d, 0x0000fe4f,
0x0000ff3f, 0x0000ff3f, 0x0000002d, 0x0000002d,
0x0000058a, 0x0000058a, 0x000005be, 0x000005be,
0x00001400, 0x00001400, 0x00001806, 0x00001806,
0x00002010, 0x00002015, 0x00002e17, 0x00002e17,
0x00002e1a, 0x00002e1a, 0x00002e3a, 0x00002e3b,
0x00002e40, 0x00002e40, 0x0000301c, 0x0000301c,
0x00003030, 0x00003030, 0x000030a0, 0x000030a0,
0x0000fe31, 0x0000fe32, 0x0000fe58, 0x0000fe58,
0x0000fe63, 0x0000fe63, 0x0000ff0d, 0x0000ff0d,
0x00010ead, 0x00010ead, 0x00000028, 0x00000028,
0x0000005b, 0x0000005b, 0x0000007b, 0x0000007b,
0x00000f3a, 0x00000f3a, 0x00000f3c, 0x00000f3c,
0x0000169b, 0x0000169b, 0x0000201a, 0x0000201a,
0x0000201e, 0x0000201e, 0x00002045, 0x00002045,
0x0000207d, 0x0000207d, 0x0000208d, 0x0000208d,
0x00002308, 0x00002308, 0x0000230a, 0x0000230a,
0x00002329, 0x00002329, 0x00002768, 0x00002768,
0x0000276a, 0x0000276a, 0x0000276c, 0x0000276c,
0x0000276e, 0x0000276e, 0x00002770, 0x00002770,
0x00002772, 0x00002772, 0x00002774, 0x00002774,
0x000027c5, 0x000027c5, 0x000027e6, 0x000027e6,
0x000027e8, 0x000027e8, 0x000027ea, 0x000027ea,
0x000027ec, 0x000027ec, 0x000027ee, 0x000027ee,
0x00002983, 0x00002983, 0x00002985, 0x00002985,
0x00002987, 0x00002987, 0x00002989, 0x00002989,
0x0000298b, 0x0000298b, 0x0000298d, 0x0000298d,
0x0000298f, 0x0000298f, 0x00002991, 0x00002991,
0x00002993, 0x00002993, 0x00002995, 0x00002995,
0x00002997, 0x00002997, 0x000029d8, 0x000029d8,
0x000029da, 0x000029da, 0x000029fc, 0x000029fc,
0x00002e22, 0x00002e22, 0x00002e24, 0x00002e24,
0x00002e26, 0x00002e26, 0x00002e28, 0x00002e28,
0x00002e42, 0x00002e42, 0x00003008, 0x00003008,
0x0000300a, 0x0000300a, 0x0000300c, 0x0000300c,
0x0000300e, 0x0000300e, 0x00003010, 0x00003010,
0x00003014, 0x00003014, 0x00003016, 0x00003016,
0x00003018, 0x00003018, 0x0000301a, 0x0000301a,
0x0000301d, 0x0000301d, 0x0000fd3f, 0x0000fd3f,
0x0000fe17, 0x0000fe17, 0x0000fe35, 0x0000fe35,
0x0000fe37, 0x0000fe37, 0x0000fe39, 0x0000fe39,
0x0000fe3b, 0x0000fe3b, 0x0000fe3d, 0x0000fe3d,
0x0000fe3f, 0x0000fe3f, 0x0000fe41, 0x0000fe41,
0x0000fe43, 0x0000fe43, 0x0000fe47, 0x0000fe47,
0x0000fe59, 0x0000fe59, 0x0000fe5b, 0x0000fe5b,
0x0000fe5d, 0x0000fe5d, 0x0000ff08, 0x0000ff08,
0x0000ff3b, 0x0000ff3b, 0x0000ff5b, 0x0000ff5b,
0x0000ff5f, 0x0000ff5f, 0x0000ff62, 0x0000ff62,
0x00000029, 0x00000029, 0x0000005d, 0x0000005d,
0x0000007d, 0x0000007d, 0x00000f3b, 0x00000f3b,
0x00000f3d, 0x00000f3d, 0x0000169c, 0x0000169c,
0x00002046, 0x00002046, 0x0000207e, 0x0000207e,
0x0000208e, 0x0000208e, 0x00002309, 0x00002309,
0x0000230b, 0x0000230b, 0x0000232a, 0x0000232a,
0x00002769, 0x00002769, 0x0000276b, 0x0000276b,
0x0000276d, 0x0000276d, 0x0000276f, 0x0000276f,
0x00002771, 0x00002771, 0x00002773, 0x00002773,
0x00002775, 0x00002775, 0x000027c6, 0x000027c6,
0x000027e7, 0x000027e7, 0x000027e9, 0x000027e9,
0x000027eb, 0x000027eb, 0x000027ed, 0x000027ed,
0x000027ef, 0x000027ef, 0x00002984, 0x00002984,
0x00002986, 0x00002986, 0x00002988, 0x00002988,
0x0000298a, 0x0000298a, 0x0000298c, 0x0000298c,
0x0000298e, 0x0000298e, 0x00002990, 0x00002990,
0x00002992, 0x00002992, 0x00002994, 0x00002994,
0x00002996, 0x00002996, 0x00002998, 0x00002998,
0x000029d9, 0x000029d9, 0x000029db, 0x000029db,
0x000029fd, 0x000029fd, 0x00002e23, 0x00002e23,
0x00002e25, 0x00002e25, 0x00002e27, 0x00002e27,
0x00002e29, 0x00002e29, 0x00003009, 0x00003009,
0x0000300b, 0x0000300b, 0x0000300d, 0x0000300d,
0x0000300f, 0x0000300f, 0x00003011, 0x00003011,
0x00003015, 0x00003015, 0x00003017, 0x00003017,
0x00003019, 0x00003019, 0x0000301b, 0x0000301b,
0x0000301e, 0x0000301f, 0x0000fd3e, 0x0000fd3e,
0x0000fe18, 0x0000fe18, 0x0000fe36, 0x0000fe36,
0x0000fe38, 0x0000fe38, 0x0000fe3a, 0x0000fe3a,
0x0000fe3c, 0x0000fe3c, 0x0000fe3e, 0x0000fe3e,
0x0000fe40, 0x0000fe40, 0x0000fe42, 0x0000fe42,
0x0000fe44, 0x0000fe44, 0x0000fe48, 0x0000fe48,
0x0000fe5a, 0x0000fe5a, 0x0000fe5c, 0x0000fe5c,
0x0000fe5e, 0x0000fe5e, 0x0000ff09, 0x0000ff09,
0x0000ff3d, 0x0000ff3d, 0x0000ff5d, 0x0000ff5d,
0x0000ff60, 0x0000ff60, 0x0000ff63, 0x0000ff63,
0x00000021, 0x00000023, 0x00000025, 0x00000027,
0x0000002a, 0x0000002a, 0x0000002c, 0x0000002c,
0x0000002e, 0x0000002f, 0x0000003a, 0x0000003b,
0x0000003f, 0x00000040, 0x0000005c, 0x0000005c,
0x000000a1, 0x000000a1, 0x000000a7, 0x000000a7,
0x000000b6, 0x000000b7, 0x000000bf, 0x000000bf,
0x0000037e, 0x0000037e, 0x00000387, 0x00000387,
0x0000055a, 0x0000055f, 0x00000589, 0x00000589,
0x000005c0, 0x000005c0, 0x000005c3, 0x000005c3,
0x000005c6, 0x000005c6, 0x000005f3, 0x000005f4,
0x00000609, 0x0000060a, 0x0000060c, 0x0000060d,
0x0000061b, 0x0000061b, 0x0000061e, 0x0000061f,
0x0000066a, 0x0000066d, 0x000006d4, 0x000006d4,
0x00000700, 0x0000070d, 0x000007f7, 0x000007f9,
0x00000830, 0x0000083e, 0x0000085e, 0x0000085e,
0x00000964, 0x00000965, 0x00000970, 0x00000970,
0x000009fd, 0x000009fd, 0x00000a76, 0x00000a76,
0x00000af0, 0x00000af0, 0x00000c77, 0x00000c77,
0x00000c84, 0x00000c84, 0x00000df4, 0x00000df4,
0x00000e4f, 0x00000e4f, 0x00000e5a, 0x00000e5b,
0x00000f04, 0x00000f12, 0x00000f14, 0x00000f14,
0x00000f85, 0x00000f85, 0x00000fd0, 0x00000fd4,
0x00000fd9, 0x00000fda, 0x0000104a, 0x0000104f,
0x000010fb, 0x000010fb, 0x00001360, 0x00001368,
0x0000166e, 0x0000166e, 0x000016eb, 0x000016ed,
0x00001735, 0x00001736, 0x000017d4, 0x000017d6,
0x000017d8, 0x000017da, 0x00001800, 0x00001805,
0x00001807, 0x0000180a, 0x00001944, 0x00001945,
0x00001a1e, 0x00001a1f, 0x00001aa0, 0x00001aa6,
0x00001aa8, 0x00001aad, 0x00001b5a, 0x00001b60,
0x00001bfc, 0x00001bff, 0x00001c3b, 0x00001c3f,
0x00001c7e, 0x00001c7f, 0x00001cc0, 0x00001cc7,
0x00001cd3, 0x00001cd3, 0x00002016, 0x00002017,
0x00002020, 0x00002027, 0x00002030, 0x00002038,
0x0000203b, 0x0000203e, 0x00002041, 0x00002043,
0x00002047, 0x00002051, 0x00002053, 0x00002053,
0x00002055, 0x0000205e, 0x00002cf9, 0x00002cfc,
0x00002cfe, 0x00002cff, 0x00002d70, 0x00002d70,
0x00002e00, 0x00002e01, 0x00002e06, 0x00002e08,
0x00002e0b, 0x00002e0b, 0x00002e0e, 0x00002e16,
0x00002e18, 0x00002e19, 0x00002e1b, 0x00002e1b,
0x00002e1e, 0x00002e1f, 0x00002e2a, 0x00002e2e,
0x00002e30, 0x00002e39, 0x00002e3c, 0x00002e3f,
0x00002e41, 0x00002e41, 0x00002e43, 0x00002e4f,
0x00002e52, 0x00002e52, 0x00003001, 0x00003003,
0x0000303d, 0x0000303d, 0x000030fb, 0x000030fb,
0x0000a4fe, 0x0000a4ff, 0x0000a60d, 0x0000a60f,
0x0000a673, 0x0000a673, 0x0000a67e, 0x0000a67e,
0x0000a6f2, 0x0000a6f7, 0x0000a874, 0x0000a877,
0x0000a8ce, 0x0000a8cf, 0x0000a8f8, 0x0000a8fa,
0x0000a8fc, 0x0000a8fc, 0x0000a92e, 0x0000a92f,
0x0000a95f, 0x0000a95f, 0x0000a9c1, 0x0000a9cd,
0x0000a9de, 0x0000a9df, 0x0000aa5c, 0x0000aa5f,
0x0000aade, 0x0000aadf, 0x0000aaf0, 0x0000aaf1,
0x0000abeb, 0x0000abeb, 0x0000fe10, 0x0000fe16,
0x0000fe19, 0x0000fe19, 0x0000fe30, 0x0000fe30,
0x0000fe45, 0x0000fe46, 0x0000fe49, 0x0000fe4c,
0x0000fe50, 0x0000fe52, 0x0000fe54, 0x0000fe57,
0x0000fe5f, 0x0000fe61, 0x0000fe68, 0x0000fe68,
0x0000fe6a, 0x0000fe6b, 0x0000ff01, 0x0000ff03,
0x0000ff05, 0x0000ff07, 0x0000ff0a, 0x0000ff0a,
0x0000ff0c, 0x0000ff0c, 0x0000ff0e, 0x0000ff0f,
0x0000ff1a, 0x0000ff1b, 0x0000ff1f, 0x0000ff20,
0x0000ff3c, 0x0000ff3c, 0x0000ff61, 0x0000ff61,
0x0000ff64, 0x0000ff65, 0x00010100, 0x00010102,
0x0001039f, 0x0001039f, 0x000103d0, 0x000103d0,
0x0001056f, 0x0001056f, 0x00010857, 0x00010857,
0x0001091f, 0x0001091f, 0x0001093f, 0x0001093f,
0x00010a50, 0x00010a58, 0x00010a7f, 0x00010a7f,
0x00010af0, 0x00010af6, 0x00010b39, 0x00010b3f,
0x00010b99, 0x00010b9c, 0x00010f55, 0x00010f59,
0x00011047, 0x0001104d, 0x000110bb, 0x000110bc,
0x000110be, 0x000110c1, 0x00011140, 0x00011143,
0x00011174, 0x00011175, 0x000111c5, 0x000111c8,
0x000111cd, 0x000111cd, 0x000111db, 0x000111db,
0x000111dd, 0x000111df, 0x00011238, 0x0001123d,
0x000112a9, 0x000112a9, 0x0001144b, 0x0001144f,
0x0001145a, 0x0001145b, 0x0001145d, 0x0001145d,
0x000114c6, 0x000114c6, 0x000115c1, 0x000115d7,
0x00011641, 0x00011643, 0x00011660, 0x0001166c,
0x0001173c, 0x0001173e, 0x0001183b, 0x0001183b,
0x00011944, 0x00011946, 0x000119e2, 0x000119e2,
0x00011a3f, 0x00011a46, 0x00011a9a, 0x00011a9c,
0x00011a9e, 0x00011aa2, 0x00011c41, 0x00011c45,
0x00011c70, 0x00011c71, 0x00011ef7, 0x00011ef8,
0x00011fff, 0x00011fff, 0x00012470, 0x00012474,
0x00016a6e, 0x00016a6f, 0x00016af5, 0x00016af5,
0x00016b37, 0x00016b3b, 0x00016b44, 0x00016b44,
0x00016e97, 0x00016e9a, 0x00016fe2, 0x00016fe2,
0x0001bc9f, 0x0001bc9f, 0x0001da87, 0x0001da8b,
0x0001e95e, 0x0001e95f, 0x0000002b, 0x0000002b,
0x00030000, 0x0003134a, 0x0000002b, 0x0000002b,
0x0000003c, 0x0000003e, 0x0000007c, 0x0000007c,
0x0000007e, 0x0000007e, 0x000000ac, 0x000000ac,
0x000000b1, 0x000000b1, 0x000000d7, 0x000000d7,
@ -2289,48 +2112,130 @@ static const unsigned int _ucprop_ranges[] = {
0x0001fac0, 0x0001fac2, 0x0001fad0, 0x0001fad6,
0x0001fb00, 0x0001fb92, 0x0001fb94, 0x0001fbca,
0x000e0001, 0x000e0001, 0x000e0020, 0x000e007f,
0x000e0100, 0x000e01ef, 0x000000ab, 0x000000ab,
0x00002018, 0x00002018, 0x0000201b, 0x0000201c,
0x0000201f, 0x0000201f, 0x00002039, 0x00002039,
0x00002e02, 0x00002e02, 0x00002e04, 0x00002e04,
0x00002e09, 0x00002e09, 0x00002e0c, 0x00002e0c,
0x00002e1c, 0x00002e1c, 0x00002e20, 0x00002e20,
0x000000bb, 0x000000bb, 0x00002019, 0x00002019,
0x0000201d, 0x0000201d, 0x0000203a, 0x0000203a,
0x00002e03, 0x00002e03, 0x00002e05, 0x00002e05,
0x00002e0a, 0x00002e0a, 0x00002e0d, 0x00002e0d,
0x00002e1d, 0x00002e1d, 0x00002e21, 0x00002e21,
0x00000608, 0x00000608, 0x0000060b, 0x0000060b,
0x0000060d, 0x0000060d, 0x0000061b, 0x0000061c,
0x0000061e, 0x0000064a, 0x0000066d, 0x0000066f,
0x00000671, 0x000006d5, 0x000006e5, 0x000006e6,
0x000006ee, 0x000006ef, 0x000006fa, 0x0000070d,
0x0000070f, 0x00000710, 0x00000712, 0x0000072f,
0x0000074d, 0x000007a5, 0x000007b1, 0x000007b1,
0x00000860, 0x0000086a, 0x000008a0, 0x000008b4,
0x000008b6, 0x000008c7, 0x0000fb50, 0x0000fbc1,
0x0000fbd3, 0x0000fd3d, 0x0000fd50, 0x0000fd8f,
0x0000fd92, 0x0000fdc7, 0x0000fdf0, 0x0000fdfc,
0x0000fe70, 0x0000fe74, 0x0000fe76, 0x0000fefc,
0x00010d00, 0x00010d23, 0x00010f30, 0x00010f45,
0x00010f51, 0x00010f59, 0x0001ec71, 0x0001ecb4,
0x0001ed01, 0x0001ed3d, 0x0001ee00, 0x0001ee03,
0x0001ee05, 0x0001ee1f, 0x0001ee21, 0x0001ee22,
0x0001ee24, 0x0001ee24, 0x0001ee27, 0x0001ee27,
0x0001ee29, 0x0001ee32, 0x0001ee34, 0x0001ee37,
0x0001ee39, 0x0001ee39, 0x0001ee3b, 0x0001ee3b,
0x0001ee42, 0x0001ee42, 0x0001ee47, 0x0001ee47,
0x0001ee49, 0x0001ee49, 0x0001ee4b, 0x0001ee4b,
0x0001ee4d, 0x0001ee4f, 0x0001ee51, 0x0001ee52,
0x0001ee54, 0x0001ee54, 0x0001ee57, 0x0001ee57,
0x0001ee59, 0x0001ee59, 0x0001ee5b, 0x0001ee5b,
0x0001ee5d, 0x0001ee5d, 0x0001ee5f, 0x0001ee5f,
0x0001ee61, 0x0001ee62, 0x0001ee64, 0x0001ee64,
0x0001ee67, 0x0001ee6a, 0x0001ee6c, 0x0001ee72,
0x0001ee74, 0x0001ee77, 0x0001ee79, 0x0001ee7c,
0x0001ee7e, 0x0001ee7e, 0x0001ee80, 0x0001ee89,
0x0001ee8b, 0x0001ee9b, 0x0001eea1, 0x0001eea3,
0x0001eea5, 0x0001eea9, 0x0001eeab, 0x0001eebb,
0x000e0100, 0x000e01ef, 0x00000608, 0x00000608,
0x0000060b, 0x0000060b, 0x0000060d, 0x0000060d,
0x0000061b, 0x0000061c, 0x0000061e, 0x0000064a,
0x0000066d, 0x0000066f, 0x00000671, 0x000006d5,
0x000006e5, 0x000006e6, 0x000006ee, 0x000006ef,
0x000006fa, 0x0000070d, 0x0000070f, 0x00000710,
0x00000712, 0x0000072f, 0x0000074d, 0x000007a5,
0x000007b1, 0x000007b1, 0x00000860, 0x0000086a,
0x000008a0, 0x000008b4, 0x000008b6, 0x000008c7,
0x0000fb50, 0x0000fbc1, 0x0000fbd3, 0x0000fd3d,
0x0000fd50, 0x0000fd8f, 0x0000fd92, 0x0000fdc7,
0x0000fdf0, 0x0000fdfc, 0x0000fe70, 0x0000fe74,
0x0000fe76, 0x0000fefc, 0x00010d00, 0x00010d23,
0x00010f30, 0x00010f45, 0x00010f51, 0x00010f59,
0x0001ec71, 0x0001ecb4, 0x0001ed01, 0x0001ed3d,
0x0001ee00, 0x0001ee03, 0x0001ee05, 0x0001ee1f,
0x0001ee21, 0x0001ee22, 0x0001ee24, 0x0001ee24,
0x0001ee27, 0x0001ee27, 0x0001ee29, 0x0001ee32,
0x0001ee34, 0x0001ee37, 0x0001ee39, 0x0001ee39,
0x0001ee3b, 0x0001ee3b, 0x0001ee42, 0x0001ee42,
0x0001ee47, 0x0001ee47, 0x0001ee49, 0x0001ee49,
0x0001ee4b, 0x0001ee4b, 0x0001ee4d, 0x0001ee4f,
0x0001ee51, 0x0001ee52, 0x0001ee54, 0x0001ee54,
0x0001ee57, 0x0001ee57, 0x0001ee59, 0x0001ee59,
0x0001ee5b, 0x0001ee5b, 0x0001ee5d, 0x0001ee5d,
0x0001ee5f, 0x0001ee5f, 0x0001ee61, 0x0001ee62,
0x0001ee64, 0x0001ee64, 0x0001ee67, 0x0001ee6a,
0x0001ee6c, 0x0001ee72, 0x0001ee74, 0x0001ee77,
0x0001ee79, 0x0001ee7c, 0x0001ee7e, 0x0001ee7e,
0x0001ee80, 0x0001ee89, 0x0001ee8b, 0x0001ee9b,
0x0001eea1, 0x0001eea3, 0x0001eea5, 0x0001eea9,
0x0001eeab, 0x0001eebb, 0x00000021, 0x00000023,
0x00000025, 0x0000002a, 0x0000002c, 0x0000002f,
0x0000003a, 0x0000003b, 0x0000003f, 0x00000040,
0x0000005b, 0x0000005d, 0x0000005f, 0x0000005f,
0x0000007b, 0x0000007b, 0x0000007d, 0x0000007d,
0x000000a1, 0x000000a1, 0x000000a7, 0x000000a7,
0x000000ab, 0x000000ab, 0x000000b6, 0x000000b7,
0x000000bb, 0x000000bb, 0x000000bf, 0x000000bf,
0x0000037e, 0x0000037e, 0x00000387, 0x00000387,
0x0000055a, 0x0000055f, 0x00000589, 0x0000058a,
0x000005be, 0x000005be, 0x000005c0, 0x000005c0,
0x000005c3, 0x000005c3, 0x000005c6, 0x000005c6,
0x000005f3, 0x000005f4, 0x00000609, 0x0000060a,
0x0000060c, 0x0000060d, 0x0000061b, 0x0000061b,
0x0000061e, 0x0000061f, 0x0000066a, 0x0000066d,
0x000006d4, 0x000006d4, 0x00000700, 0x0000070d,
0x000007f7, 0x000007f9, 0x00000830, 0x0000083e,
0x0000085e, 0x0000085e, 0x00000964, 0x00000965,
0x00000970, 0x00000970, 0x000009fd, 0x000009fd,
0x00000a76, 0x00000a76, 0x00000af0, 0x00000af0,
0x00000c77, 0x00000c77, 0x00000c84, 0x00000c84,
0x00000df4, 0x00000df4, 0x00000e4f, 0x00000e4f,
0x00000e5a, 0x00000e5b, 0x00000f04, 0x00000f12,
0x00000f14, 0x00000f14, 0x00000f3a, 0x00000f3d,
0x00000f85, 0x00000f85, 0x00000fd0, 0x00000fd4,
0x00000fd9, 0x00000fda, 0x0000104a, 0x0000104f,
0x000010fb, 0x000010fb, 0x00001360, 0x00001368,
0x00001400, 0x00001400, 0x0000166e, 0x0000166e,
0x0000169b, 0x0000169c, 0x000016eb, 0x000016ed,
0x00001735, 0x00001736, 0x000017d4, 0x000017d6,
0x000017d8, 0x000017da, 0x00001800, 0x0000180a,
0x00001944, 0x00001945, 0x00001a1e, 0x00001a1f,
0x00001aa0, 0x00001aa6, 0x00001aa8, 0x00001aad,
0x00001b5a, 0x00001b60, 0x00001bfc, 0x00001bff,
0x00001c3b, 0x00001c3f, 0x00001c7e, 0x00001c7f,
0x00001cc0, 0x00001cc7, 0x00001cd3, 0x00001cd3,
0x00002010, 0x00002027, 0x00002030, 0x00002043,
0x00002045, 0x00002051, 0x00002053, 0x0000205e,
0x0000207d, 0x0000207e, 0x0000208d, 0x0000208e,
0x00002308, 0x0000230b, 0x00002329, 0x0000232a,
0x00002768, 0x00002775, 0x000027c5, 0x000027c6,
0x000027e6, 0x000027ef, 0x00002983, 0x00002998,
0x000029d8, 0x000029db, 0x000029fc, 0x000029fd,
0x00002cf9, 0x00002cfc, 0x00002cfe, 0x00002cff,
0x00002d70, 0x00002d70, 0x00002e00, 0x00002e2e,
0x00002e30, 0x00002e4f, 0x00002e52, 0x00002e52,
0x00003001, 0x00003003, 0x00003008, 0x00003011,
0x00003014, 0x0000301f, 0x00003030, 0x00003030,
0x0000303d, 0x0000303d, 0x000030a0, 0x000030a0,
0x000030fb, 0x000030fb, 0x0000a4fe, 0x0000a4ff,
0x0000a60d, 0x0000a60f, 0x0000a673, 0x0000a673,
0x0000a67e, 0x0000a67e, 0x0000a6f2, 0x0000a6f7,
0x0000a874, 0x0000a877, 0x0000a8ce, 0x0000a8cf,
0x0000a8f8, 0x0000a8fa, 0x0000a8fc, 0x0000a8fc,
0x0000a92e, 0x0000a92f, 0x0000a95f, 0x0000a95f,
0x0000a9c1, 0x0000a9cd, 0x0000a9de, 0x0000a9df,
0x0000aa5c, 0x0000aa5f, 0x0000aade, 0x0000aadf,
0x0000aaf0, 0x0000aaf1, 0x0000abeb, 0x0000abeb,
0x0000fd3e, 0x0000fd3f, 0x0000fe10, 0x0000fe19,
0x0000fe30, 0x0000fe52, 0x0000fe54, 0x0000fe61,
0x0000fe63, 0x0000fe63, 0x0000fe68, 0x0000fe68,
0x0000fe6a, 0x0000fe6b, 0x0000ff01, 0x0000ff03,
0x0000ff05, 0x0000ff0a, 0x0000ff0c, 0x0000ff0f,
0x0000ff1a, 0x0000ff1b, 0x0000ff1f, 0x0000ff20,
0x0000ff3b, 0x0000ff3d, 0x0000ff3f, 0x0000ff3f,
0x0000ff5b, 0x0000ff5b, 0x0000ff5d, 0x0000ff5d,
0x0000ff5f, 0x0000ff65, 0x00010100, 0x00010102,
0x0001039f, 0x0001039f, 0x000103d0, 0x000103d0,
0x0001056f, 0x0001056f, 0x00010857, 0x00010857,
0x0001091f, 0x0001091f, 0x0001093f, 0x0001093f,
0x00010a50, 0x00010a58, 0x00010a7f, 0x00010a7f,
0x00010af0, 0x00010af6, 0x00010b39, 0x00010b3f,
0x00010b99, 0x00010b9c, 0x00010ead, 0x00010ead,
0x00010f55, 0x00010f59, 0x00011047, 0x0001104d,
0x000110bb, 0x000110bc, 0x000110be, 0x000110c1,
0x00011140, 0x00011143, 0x00011174, 0x00011175,
0x000111c5, 0x000111c8, 0x000111cd, 0x000111cd,
0x000111db, 0x000111db, 0x000111dd, 0x000111df,
0x00011238, 0x0001123d, 0x000112a9, 0x000112a9,
0x0001144b, 0x0001144f, 0x0001145a, 0x0001145b,
0x0001145d, 0x0001145d, 0x000114c6, 0x000114c6,
0x000115c1, 0x000115d7, 0x00011641, 0x00011643,
0x00011660, 0x0001166c, 0x0001173c, 0x0001173e,
0x0001183b, 0x0001183b, 0x00011944, 0x00011946,
0x000119e2, 0x000119e2, 0x00011a3f, 0x00011a46,
0x00011a9a, 0x00011a9c, 0x00011a9e, 0x00011aa2,
0x00011c41, 0x00011c45, 0x00011c70, 0x00011c71,
0x00011ef7, 0x00011ef8, 0x00011fff, 0x00011fff,
0x00012470, 0x00012474, 0x00016a6e, 0x00016a6f,
0x00016af5, 0x00016af5, 0x00016b37, 0x00016b3b,
0x00016b44, 0x00016b44, 0x00016e97, 0x00016e9a,
0x00016fe2, 0x00016fe2, 0x0001bc9f, 0x0001bc9f,
0x0001da87, 0x0001da8b, 0x0001e95e, 0x0001e95f,
0x00000041, 0x0000005a, 0x00000061, 0x0000007a,
0x000000aa, 0x000000aa, 0x000000b5, 0x000000b5,
0x000000ba, 0x000000ba, 0x000000c0, 0x000000d6,