MFH (updated libmbfl for 1.3.0.).

This commit is contained in:
Rui Hirokawa 2011-08-02 03:50:10 +00:00
parent eb180ac30b
commit c5a650e815
20 changed files with 4078 additions and 25 deletions

View File

@ -264,6 +264,8 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_qprint.c
libmbfl/filters/mbfilter_sjis.c
libmbfl/filters/mbfilter_sjis_open.c
libmbfl/filters/mbfilter_sjis_mobile.c
libmbfl/filters/mbfilter_sjis_mac.c
libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c

View File

@ -35,7 +35,7 @@ if (PHP_MBSTRING != "no") {
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
mbfilter_koi8u.c mbfilter_cp1254.c \
mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c \
mbfilter_cp5022x.c mbfilter_sjis_open.c \
mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c mbfilter_sjis_mac.c \
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \

View File

@ -1,6 +1,7 @@
EXTRA_DIST=mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
PERL=perl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_hz.c \
@ -33,6 +34,8 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_base64.c \
mbfilter_sjis.c \
mbfilter_sjis_open.c \
mbfilter_sjis_mobile.c \
mbfilter_sjis_mac.c \
mbfilter_7bit.c \
mbfilter_qprint.c \
mbfilter_ucs4.c \
@ -102,6 +105,8 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_qprint.h \
mbfilter_sjis.h \
mbfilter_sjis_open.h \
mbfilter_sjis_mobile.h \
mbfilter_sjis_mac.h \
mbfilter_ucs2.h \
mbfilter_ucs4.h \
mbfilter_uhc.h \
@ -143,7 +148,9 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
unicode_table_armscii8.h \
unicode_table_cp850.h \
unicode_table_uhc.h \
translit_kana_jisx0201_jisx0208.h
translit_kana_jisx0201_jisx0208.h \
emoji2uni.h \
sjis_mac2uni.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
@ -238,8 +245,14 @@ unicode_table_iso8859_16.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_16_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLEISO8859_16_H -f mk_sb_tbl.awk 8859-16.TXT > $@
EmojiSources.txt :
$(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/UNIDATA/$@
emoji2uni.h : mk_emoji_tbl.pl
$(PERL) mk_emoji_tbl.pl EmojiSources.txt
unidata: 8859-1.TXT 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT 8859-6.TXT \
8859-7.TXT 8859-8.TXT 8859-9.TXT 8859-10.TXT 8859-11.TXT 8859-13.TXT \
8859-14.TXT 8859-15.TXT 8859-16.TXT
8859-14.TXT 8859-15.TXT 8859-16.TXT EmojiSources.txt
.PHONY: unidata

File diff suppressed because it is too large Load Diff

View File

@ -250,7 +250,7 @@ retry:
CK((*filter->output_function)(0x203e, filter->data));
} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */
CK((*filter->output_function)(0xff40 + c, filter->data));
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x93) { /* kanji first char */
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
@ -282,10 +282,7 @@ retry:
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
} else if (s >= 94 * 94 && s < 114 * 94) {
/* user-defined => PUA (Microsoft extended) */
w = (s & 0xff) + ((s >> 8) - 94) * 94 + 0xe000;
} else if (s >= 212 * 94 && s < 222 * 94) {
/* user-defined => PUA (G3 85 - 94 Ku) */
w = (s & 0xff) + ((s >> 8) - 212) * 94 + 0xe000 + 10 * 94;
w = s - 94*94 + 0xe000;
} else {
w = 0;
}

View File

@ -0,0 +1,865 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_sjis_open.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_sjis_mac.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_sjis_mac(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL};
const mbfl_encoding mbfl_encoding_sjis_mac = {
mbfl_no_encoding_sjis_mac,
"SJIS-mac",
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis_mac_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = {
mbfl_no_encoding_sjis_mac,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_sjis_mac
};
const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
mbfl_no_encoding_sjis_mac,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_sjis_mac_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_sjis_mac,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_sjis_mac,
mbfl_filt_conv_sjis_mac_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
s1--; \
s1 >>= 1; \
if ((c1) < 0x5f) { \
s1 += 0x71; \
} else { \
s1 += 0xb1; \
} \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
if (s1 < 0xa0) { \
s1 -= 0x81; \
} else { \
s1 -= 0xc1; \
} \
s1 <<= 1; \
s1 += 0x21; \
s2 = c2; \
if (s2 < 0x9f) { \
if (s2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
#include "sjis_mac2uni.h"
const int code_tbl[][3] = {
{0x02f0, 0x0303, 0x2460},
{0x030e, 0x0321, 0x2474},
{0x032c, 0x0334, 0x2776},
{0x0341, 0x0349, 0x2488},
{0x034e, 0x0359, 0x2160},
{0x0362, 0x036d, 0x2170},
{0x038a, 0x03a3, 0x249c},
};
const int code_ofst_tbl[] [2]= {
{0x03ac, 0x03c9},
{0x0406, 0x0420},
{0x0432, 0x0441},
{0x0468, 0x0480},
{0x04b8, 0x04e8},
{0x050c, 0x0551},
{0x1ed9, 0x1f18},
{0x1ff2, 0x20a5},
};
const int *code_map[] = {
sjis_mac2wchar1, sjis_mac2wchar2, sjis_mac2wchar3, sjis_mac2wchar4,
sjis_mac2wchar5, sjis_mac2wchar6, sjis_mac2wchar7, sjis_mac2wchar8};
const int code_tbl_m[][6] = {
{0x0340, 0xf860, 0x0030, 0x002e, 0x0000, 0x0000},
{0x03c9, 0xf860, 0x0054, 0x0042, 0x0000, 0x0000},
{0x035c, 0xf860, 0x0058, 0x0056, 0x0000, 0x0000},
{0x0370, 0xf860, 0x0078, 0x0076, 0x0000, 0x0000},
{0x0439, 0xf860, 0x2193, 0x2191, 0x0000, 0x0000},
{0x0409, 0xf861, 0x0046, 0x0041, 0x0058, 0x0000},
{0x035b, 0xf861, 0x0058, 0x0049, 0x0056, 0x0000},
{0x036f, 0xf861, 0x0078, 0x0069, 0x0076, 0x0000},
{0x035a, 0xf862, 0x0058, 0x0049, 0x0049, 0x0049},
{0x036e, 0xf862, 0x0078, 0x0069, 0x0069, 0x0069},
{0x0522, 0xf862, 0x6709, 0x9650, 0x4f1a, 0x793e},
{0x0523, 0xf862, 0x8ca1, 0x56e3, 0x6cd5, 0x4eba},
};
const int s_form_tbl[] = {
0x2010,0x2016,0x2026,
0x3001,0x3002,0x301c,0x3041,0x3043,0x3045,0x3047,0x3049,
0x3063,0x3083,0x3085,0x3087,0x308e,0x30a1,0x30a3,0x30a5,
0x30a7,0x30a9,0x30c3,0x30e3,0x30e5,0x30e7,0x30ee,0x30f5,
0x30f6,0x30fc,0xff1d,0xff3b,0xff3d,0xff5c,0xffe3, // vertical f87e (34)
0x2026,0xff47,0xff4d, // halfwidth f87f (3)
0x5927,0x5c0f,0x63a7, // enclosing circle 20dd (3)
0x21e6,0x21e7,0x21e8,0x21e9, // black arrow f87a (4)
};
const int s_form_sjis_tbl[] = {
0xeb5d,0xeb61,0xeb63,
0xeb41,0xeb42,0xeb60,0xec9f,0xeca1,0xeca3,0xeca5,0xeca7,
0xecc1,0xece1,0xece3,0xece5,0xecec,0xed40,0xed42,0xed44,
0xed46,0xed48,0xed62,0xed83,0xed85,0xed87,0xed8e,0xed95,
0xed96,0xeb5b,0xeb81,0xeb6d,0xeb6e,0xeb62,0xeb50, // vertical
0x00ff,0x864b,0x8645, // halfwidth
0x8791,0x8792,0x879d, // enclosing circle
0x86d4,0x86d5,0x86d3,0x86d6, // black arrow
};
const int s_form_sjis_fallback_tbl[] = {
0x815d,0x8161,0x8163,
0x8141,0x8142,0x8160,0x829f,0x82a1,0x82a3,0x82a5,0x82a7,
0x82c1,0x82e1,0x82e3,0x82e5,0x82ec,0x8340,0x8342,0x8344,
0x8346,0x8348,0x8362,0x8383,0x8385,0x8387,0x838e,0x8395,
0x8396,0x815b,0x8181,0x816d,0x816e,0x8162,0x8150, // vertical
0x815d,0x8287,0x828d, // halfwidth
0x91e5,0x8fac,0x8d54, // enclosing circle
0x86d0,0x86d1,0x86cf,0x86d2, // arrow
};
const int wchar2sjis_mac_r_tbl[][3] = {
{0x2160, 0x216b, 0x034e},
{0x2170, 0x217b, 0x0362},
{0x2460, 0x2473, 0x02f0},
{0x2474, 0x2487, 0x030e},
{0x2488, 0x2490, 0x0341},
{0x249c, 0x24b5, 0x038a},
{0x2776, 0x277e, 0x032c},
{0x30f7, 0x30fa, 0x054e},
{0x32a4, 0x32a9, 0x04ba},
};
const int wchar2sjis_mac_r_map[][2] = {
{0x2660, 0x2667},
{0x322a, 0x3243},
{0x3296, 0x329e},
{0x3300, 0x33d4},
{0xfe30, 0xfe44},
};
const int *wchar2sjis_mac_code_map[] = {
wchar2sjis_mac4, wchar2sjis_mac7, wchar2sjis_mac8, wchar2sjis_mac9, wchar2sjis_mac10};
const int wchar2sjis_mac_wchar_tbl[][2] = {
{0x2109, 0x03c2},
{0x2110, 0x21ef5},
{0x2113, 0x03bc},
{0x2116, 0x0406},
{0x2121, 0x0408},
{0x21c4, 0x0437},
{0x21c5, 0x0438},
{0x21c6, 0x0436},
{0x21e6, 0x043b},
{0x21e7, 0x043c},
{0x21e8, 0x043a},
{0x21e9, 0x043d},
{0x221f, 0x0525},
{0x222e, 0x0524},
{0x22bf, 0x0526},
{0x260e, 0x041f},
{0x261c, 0x0433},
{0x261d, 0x0434},
{0x261e, 0x0432},
{0x261f, 0x0435},
{0x3004, 0x0420},
{0x301d, 0x0538},
{0x301f, 0x0539},
{0x3020, 0x041e},
{0x3094, 0x054c},
};
/*
* SJIS-mac => wchar
*/
int
mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
{
int i, j, n;
int c1, s, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x5c) {
CK((*filter->output_function)(0x00a5, filter->data));
} else if (c == 0x80) {
CK((*filter->output_function)(0x005c, filter->data));
} else if (c == 0xa0) {
CK((*filter->output_function)(0x00a0, filter->data));
} else if (c == 0xfd) {
CK((*filter->output_function)(0x00a9, filter->data));
} else if (c == 0xfe) {
CK((*filter->output_function)(0x2122, filter->data));
} else if (c == 0xff) {
CK((*filter->output_function)(0x2026, filter->data));
CK((*filter->output_function)(0xf87f, filter->data));
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
w = 0;
SJIS_DECODE(c1, c, s1, s2);
s = (s1 - 0x21)*94 + s2 - 0x21;
if (s <= 0x89) {
if (s == 0x1c) {
w = 0x2014; /* EM DASH */
} else if (s == 0x1f) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 0x20) {
w = 0x301c; /* FULLWIDTH TILDE */
} else if (s == 0x21) {
w = 0x2016; /* PARALLEL TO */
} else if (s == 0x3c) {
w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 0x50) {
w = 0x00a2; /* FULLWIDTH CENT SIGN */
} else if (s == 0x51) {
w = 0x00a3; /* FULLWIDTH POUND SIGN */
} else if (s == 0x89) {
w = 0x00ac; /* FULLWIDTH NOT SIGN */
}
}
/* apple gaiji area 0x8540 - 0x886d */
if (w == 0) {
for (i=0; i<7; i++) {
if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) {
w = s - code_tbl[i][0] + code_tbl[i][2];
break;
}
}
}
if (w == 0) {
for (i=0; i<12; i++) {
if (s == code_tbl_m[i][0]) {
if (code_tbl_m[i][1] == 0xf860) {
n = 4;
} else if (code_tbl_m[i][1] == 0xf861) {
n = 5;
} else {
n = 6;
}
for (j=1; j<n-1; j++) {
CK((*filter->output_function)(code_tbl_m[i][j], filter->data));
}
w = code_tbl_m[i][n-1];
break;
}
}
}
if (w == 0) {
for (i=0; i<8; i++) {
if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) {
w = code_map[i][s - code_ofst_tbl[i][0]];
if (w >= 0x10000) {
CK((*filter->output_function)(w & 0xffff, filter->data));
if (w & 0x10000) {
w = 0xf87a;
} else if (w & 0x20000) {
w = 0xf87e;
} else if (w & 0x40000) {
w = 0xf87f;
} else if (w & 0x80000) {
w = 0x20dd;
} else if (w & 0xF0000) {
// TBD
w = 0;
} else {
w = 0;
}
}
break;
}
}
}
if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
}
if (w <= 0) {
w = (s1 << 8) | s2;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP932;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => SJIS-mac
*/
int
mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
{
int i, j;
int c1, c2, s1, s2, mode;
s1 = 0;
s2 = 0;
// a1: U+0000 -> U+046F
// a2: U+2000 -> U+30FF
// i: U+4E00 -> U+9FFF
// r: U+FF00 -> U+FFFF
switch (filter->status) {
case 1:
c1 = filter->cache;
filter->cache = 0;
filter->status = 0;
s1 = 0;
s2 = 0;
if (c == 0xf87a) {
for (i=0;i<4;i++) {
if (c1 == s_form_tbl[i+34+3+3]) {
s1 = s_form_sjis_tbl[i+34+3+3];
break;
}
}
if (s1 <= 0) {
s2 = c1;
}
} else if (c == 0x20dd) {
for (i=0;i<3;i++) {
if (c1 == s_form_tbl[i+34+3]) {
s1 = s_form_sjis_tbl[i+34+3];
break;
}
}
if (s1 <= 0) {
s2 = c1;
}
} else if (c == 0xf87f) {
for (i=0;i<3;i++) {
if (c1 == s_form_tbl[i+34]) {
s1 = s_form_sjis_tbl[i+34];
break;
}
}
if (s1 <= 0) {
s2 = c1; s1 = -1;
}
} else if (c == 0xf87e) {
for (i=0;i<34;i++) {
if (c1 == s_form_tbl[i]) {
s1 = s_form_sjis_tbl[i];
break;
}
}
if (s1 <= 0) {
s2 = c1; s1 = -1;
}
} else {
s2 = c1;
s1 = c;
}
if (s2 > 0) {
for (i=0;i<sizeof(s_form_tbl)/sizeof(int);i++) {
if (c1 == s_form_tbl[i]) {
s1 = s_form_sjis_fallback_tbl[i];
break;
}
}
}
if (s1 >= 0) {
if (s1 < 0x100) {
CK((*filter->output_function)(s1, filter->data));
} else {
CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s1 & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
if (s2 <= 0 || s1 == -1) {
break;
}
case 0:
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
if (c == 0x5c) {
s1 = 0x80;
} else if (c == 0xa9) {
s1 = 0xfd;
}
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
if (c == 0x2122) {
s1 = 0xfe;
} else if (c == 0x2014) {
s1 = 0x213d;
} else if (c == 0x2116) {
s1 = 0x2c1d;
}
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (c >= 0x2000) {
for (i=0;i<sizeof(s_form_tbl)/sizeof(int);i++) {
if (c == s_form_tbl[i]) {
filter->status = 1;
filter->cache = c;
return c;
}
}
if (c == 0xf860 || c == 0xf861 || c == 0xf862) {
filter->status = 2;
filter->cache = c;
return c;
}
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
} else if (c == 0xa0) {
s1 = 0x00a0;
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x005c;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
}
}
if (s1 <= 0) {
for (i=0; i<sizeof(wchar2sjis_mac_r_tbl)/(3*sizeof(int));i++) {
if (c >= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) {
s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2];
break;
}
}
if (s1 <= 0) {
for (i=0; i<sizeof(wchar2sjis_mac_r_map)/(2*sizeof(int));i++) {
if (c >= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) {
s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]];
break;
}
}
}
if (s1 <= 0) {
for (i=0; i<sizeof(wchar2sjis_mac_wchar_tbl)/(2*sizeof(int));i++) {
if ( c == wchar2sjis_mac_wchar_tbl[i][0]) {
s1 = wchar2sjis_mac_wchar_tbl[i][1] & 0xffff;
break;
}
}
}
if (s1 > 0) {
c1 = s1/94+0x21;
c2 = s1-94*(c1-0x21)+0x21;
s1 = (c1 << 8) | c2;
s2 = 1;
}
}
if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
s1 = -1;
c1 = 0;
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));
} else { /* kanji */
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
break;
case 2:
c1 = filter->cache;
filter->cache = 0;
filter->status = 0;
if (c1 == 0xf860) {
for (i=0; i<5; i++) {
if (c == code_tbl_m[i][2]) {
filter->cache = c | 0x10000;
filter->status = 3;
break;
}
}
} else if (c1 == 0xf861) {
for (i=0; i<3; i++) {
if (c == code_tbl_m[i+5][2]) {
filter->cache = c | 0x20000;
filter->status = 3;
break;
}
}
} else if (c1 == 0xf862) {
for (i=0; i<4; i++) {
if (c == code_tbl_m[i+5+3][2]) {
filter->cache = c | 0x40000;
filter->status = 3;
break;
}
}
}
if (filter->status == 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c1, filter));
CK(mbfl_filt_conv_illegal_output(c, filter));
}
break;
case 3:
s1 = 0;
c1 = filter->cache & 0xffff;
mode = (filter->cache & 0xf0000) >> 16;
filter->cache = 0;
filter->status = 0;
if (mode == 0x1) {
for (i=0; i<5; i++) {
if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) {
s1 = code_tbl_m[i][0];
break;
}
}
if (s1 > 0) {
c1 = s1/94+0x21;
c2 = s1-94*(c1-0x21)+0x21;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(0xf860, filter));
CK(mbfl_filt_conv_illegal_output(c1, filter));
CK(mbfl_filt_conv_illegal_output(c, filter));
}
} else if (mode == 0x2) {
for (i=0; i<3; i++) {
if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) {
filter->cache = c | 0x20000;
filter->status = 4;
break;
}
}
} else if (mode == 0x4) {
for (i=0; i<4; i++) {
if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) {
filter->cache = c | 0x40000;
filter->status = 4;
break;
}
}
}
break;
case 4:
s1 = 0;
c1 = filter->cache & 0xffff;
mode = (filter->cache & 0xf0000) >> 16;
filter->cache = 0;
filter->status = 0;
if (mode == 0x2) {
for (i=0; i<3; i++) {
if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) {
s1 = code_tbl_m[i+5][0];
break;
}
}
if (s1 > 0) {
c1 = s1/94+0x21;
c2 = s1-94*(c1-0x21)+0x21;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(0xf861, filter));
for (i=0; i<3; i++) {
if (c1 == code_tbl_m[i+5][3]) {
CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter));
break;
}
}
CK(mbfl_filt_conv_illegal_output(c1, filter));
CK(mbfl_filt_conv_illegal_output(c, filter));
}
} else if (mode == 0x4) {
for (i=0; i<4; i++) {
if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) {
filter->cache = c | 0x40000;
filter->status = 5;
break;
}
}
}
break;
case 5:
s1 = 0;
c1 = filter->cache & 0xffff;
mode = (filter->cache & 0xf0000) >> 16;
filter->cache = 0;
filter->status = 0;
if (mode == 0x4) {
for (i=0; i<4; i++) {
if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) {
s1 = code_tbl_m[i+8][0];
break;
}
}
if (s1 > 0) {
c1 = s1/94+0x21;
c2 = s1-94*(c1-0x21)+0x21;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
if (s1 <= 0 && filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(0xf862, filter));
for (i=0; i<4; i++) {
if (c1 == code_tbl_m[i+8][4]) {
CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter));
CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter));
break;
}
}
CK(mbfl_filt_conv_illegal_output(c1, filter));
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
break;
default:
filter->status = 0;
break;
}
return c;
}
int
mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
{
int i, c1, s1 = 0;
if (filter->status == 1 && filter->cache > 0) {
c1 = filter->cache;
for (i=0;i<sizeof(s_form_tbl)/sizeof(int);i++) {
if (c1 == s_form_tbl[i]) {
s1 = s_form_sjis_fallback_tbl[i];
break;
}
}
if (s1 > 0) {
CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s1 & 0xff, filter->data));
}
}
filter->cache = 0;
filter->status = 0;
if (filter->flush_function != NULL) {
return (*filter->flush_function)(filter->data);
}
return 0;
}
static int mbfl_filt_ident_sjis_mac(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -0,0 +1,45 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_sjis_open.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_MAC_H
#define MBFL_MBFILTER_SJIS_MAC_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_mac;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_mac;
extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac;
int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_MAC_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_sjis_open.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_MOBILE_H
#define MBFL_MBFILTER_SJIS_MOBILE_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_docomo;
extern const mbfl_encoding mbfl_encoding_sjis_kddi;
extern const mbfl_encoding mbfl_encoding_sjis_sb;
extern const mbfl_encoding mbfl_encoding_sjis_docomo_pua;
extern const mbfl_encoding mbfl_encoding_sjis_kddi_pua;
extern const mbfl_encoding mbfl_encoding_sjis_kddi_pua_b;
extern const mbfl_encoding mbfl_encoding_sjis_sb_pua;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_mobile;
extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo;
extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi;
extern const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb;
extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_pua_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo_pua;
extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_pua_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi_pua;
extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_pua_b_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi_pua_b;
extern const struct mbfl_convert_vtbl vtbl_sjis_sb_pua_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb_pua;
int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_MOBILE_H */

View File

@ -0,0 +1,425 @@
#!/usr/bin/perl
# script to generate Shift_JIS encoded Emoji to/from Unicode conversion table.
# Rui Hirokawa <hirokawa@php.net>
#
# usage: mktbl.pl EmojiSources.txt
#
# Unicoe;DoCoMo;KDDI;SoftBank
@docomo = ();
@kddi = ();
@softbank = ();
@to_docomo = ();
@to_kddi = ();
@to_sb = ();
$fname = "emoji2uni.h";
open(OUT,">$fname") or die $!;
sub sjis2code {
my @c = unpack("C*", pack("H4", $_[0]));
# Shift_JIS -> JIS
$c[0] = (($c[0]-($c[0]<160?112:176))<<1)-($c[1]<159?1:0);
$c[1] -= ($c[1]<159?($c[1]>127?32:31):126);
$s = ($c[0] - 0x21)*94 + $c[1]-0x21;
return $s;
}
sub show_code {
my @c = @_;
$s = "\t";
for ($i=0; $i<=$#c; $i++) {#
if ($c[$i]) {
@v = split(' ',$c[$i]);
$s .= "0x$v[0], \t";
if ($#v > 0) {
print "$i $v[0] $v[1]\n";
}
} else {
$s .= "0x0000, \t";
}
if ($i % 4 == 3) {
$s .= "\n\t";
}
}
return $s;
}
while(<>) {
if ($_ =~ /^\d+/) {
@v = split(/;/,$_);
if ($v[1] =~ /[\dA-F]+/) {
$code = &sjis2code($v[1]);
$docomo{$code} = $v[0];
$to_docomo{$v[0]} = $code;
}
if ($v[2] =~ /[\dA-F]+/) {
$code = &sjis2code($v[2]);
$kddi{$code} = $v[0];
$to_kddi{$v[0]} = $code;
}
if ($v[3] =~ /[\dA-F]+/) {
$code = &sjis2code($v[3]);
$softbank{$code} = $v[0];
$to_sb{$v[0]} = $code;
}
}
}
print "DoCoMo\n";
$docomo_min = 10434;
$docomo_max = 10434+281;
@docomo_v = ();
foreach $key (sort {hex($a) <=> hex($b)} keys(%docomo)) {
$s = $key;
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $key - $docomo_min;
#print "$ku:$pos - ". $v ."=> $docomo{$key}\n";
$docomo_v[$key-$docomo_min] = $docomo{$key};
}
$to_docomo_min = 10434;
$to_docomo_min1 = 0x0023;
$to_docomo_max1 = 0x00AE;
$to_docomo_min2 = 0x203C;
$to_docomo_max2 = 0x3299;
$to_docomo_min3 = 0x1F17F;
$to_docomo_max3 = 0x1F6BB;
@r_docomo1_key = ();
@r_docomo1_val = ();
@r_docomo2_key = ();
@r_docomo2_val = ();
@r_docomo3_key = ();
@r_docomo3_val = ();
foreach $key (sort {hex($a) <=> hex($b)} keys(%to_docomo)) {
$s = $to_docomo{$key};
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $to_docomo{$key} - $to_docomo_min;
$h = sprintf("%x",$s);
#print "$ku:$pos = $h ($v) <= $key\n";
if (hex($key) <= $to_docomo_max1) {
push(@r_docomo1_key, $key);
push(@r_docomo1_val, sprintf("%x", $to_docomo{$key}));
} elsif (hex($key) <= $to_docomo_max2) {
push(@r_docomo2_key, $key);
push(@r_docomo2_val, $h);
} elsif (hex($key) >= $to_docomo_max3) {
push(@r_docomo3_key, $key);
push(@r_docomo3_val, $h);
}
}
push(@r_docomo1_key, 0x00);
push(@r_docomo1_val, 0x00);
push(@r_docomo2_key, 0x00);
push(@r_docomo2_val, 0x00);
push(@r_docomo3_key, 0x00);
push(@r_docomo3_val, 0x00);
print OUT "int mb_tbl_code2uni_docomo_min = $docomo_min;\n";
print OUT "int mb_tbl_code2uni_docomo_max = $docomo_max;\n\n";
print OUT "int mb_tbl_code2uni_docomo[] = {\n";
print OUT &show_code(@docomo_v);
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_min1 = $to_docomo_min1;\n";
print OUT "int mb_tbl_uni_docomo2code_max1 = $to_docomo_max1;\n";
print OUT "int mb_tbl_uni_docomo2code_min2 = $to_docomo_min2;\n";
print OUT "int mb_tbl_uni_docomo2code_max2 = $to_docomo_max2;\n";
print OUT "int mb_tbl_uni_docomo2code_min3 = $to_docomo_min3;\n";
print OUT "int mb_tbl_uni_docomo2code_max3 = $to_docomo_max3;\n\n";
#print "DOCOMO reverse 1\n";
print OUT "int mb_tbl_uni_docomo2code_key1[] = {\n";
print OUT &show_code(@r_docomo1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val1[] = {\n";
print OUT &show_code(@r_docomo1_val),"\n";
print OUT "};\n\n";
#print "DOCOMO reverse 2\n";
print OUT "int mb_tbl_uni_docomo2code_key2[] = {\n";
print OUT &show_code(@r_docomo2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val2[] = {\n";
print OUT &show_code(@r_docomo2_val),"\n";
print OUT "};\n\n";
print "DOCOMO reverse 3\n";
print OUT "int mb_tbl_uni_docomo2code_key3[] = {\n";
print OUT &show_code(@r_docomo3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val3[] = {\n";
print OUT &show_code(@r_docomo3_val),"\n";
print OUT "};\n\n";
#print "DOCOMO reverse end \n";
$kddi_min1 = 9400;
$kddi_max1 = 9400+264;
$kddi_min2 = 9400+564;
$kddi_max2 = 9400+939;
@kddi_v1 = ();
@kddi_v2 = ();
#print "KDDI\n";
foreach $key (sort {hex($a) <=> hex($b)} keys(%kddi)) {
$s = $key;
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $key - $kddi_min1;
$h = sprintf("%x",$key);
#print "$ku:$pos :: $v ($h) => $kddi{$key}\n";
if ($key <= $kddi_max1) {
$kddi_v1[$key-$kddi_min1] = $kddi{$key};
} elsif ($key <= $kddi_max2) {
$kddi_v2[$key-$kddi_min2] = $kddi{$key};
}
}
$to_kddi_min = 9660;
$to_kddi_min1 = 0x0030;
$to_kddi_max1 = 0x00AE;
$to_kddi_min2 = 0x2002;
$to_kddi_max2 = 0x3299;
$to_kddi_min3 = 0x1F004;
$to_kddi_max3 = 0x1F6C0;
@r_kddi1_key = (); @r_kddi1_val = ();
@r_kddi2_key = (); @r_kddi2_val = ();
@r_kddi3_key = (); @r_kddi3_val = ();
foreach $key (sort {hex($a) <=> hex($b)} keys(%to_kddi)) {
$s = $to_kddi{$key};
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $to_kddi{$key} - $to_kddi_min;
$h = sprintf("%x",$s);
#print "$ku:$pos = $h ($v) <= $key\n";
if (hex($key) <= $to_kddi_max1) {
push(@r_kddi1_key, $key);
push(@r_kddi1_val, $h);
} elsif (hex($key) <= $to_kddi_max2) {
push(@r_kddi2_key, $key);
push(@r_kddi2_val, $h);
} else {
push(@r_kddi3_key, $key);
push(@r_kddi3_val, $h);
}
}
push(@r_kddi1_key, 0x00);
push(@r_kddi1_val, 0x00);
push(@r_kddi2_key, 0x00);
push(@r_kddi2_val, 0x00);
push(@r_kddi3_key, 0x00);
push(@r_kddi3_val, 0x00);
print OUT "int mb_tbl_code2uni_kddi1_min = $kddi_min1;\n";
print OUT "int mb_tbl_code2uni_kddi1_max = $kddi_max1;\n";
print OUT "int mb_tbl_code2uni_kddi2_min = $kddi_min2;\n";
print OUT "int mb_tbl_code2uni_kddi2_max = $kddi_max2;\n\n";
#print "KDDI 1\n";
print OUT "int mb_tbl_code2uni_kddi1[] = {\n";
print OUT &show_code(@kddi_v1);
print OUT "};\n\n";
#print "KDDI 2\n";
print OUT "int mb_tbl_code2uni_kddi2[] = {\n";
print OUT &show_code(@kddi_v2);
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_min1 = $to_kddi_min1;\n";
print OUT "int mb_tbl_uni_kddi2code_max1 = $to_kddi_max1;\n";
print OUT "int mb_tbl_uni_kddi2code_min2 = $to_kddi_min2;\n";
print OUT "int mb_tbl_uni_kddi2code_max2 = $to_kddi_max2;\n";
print OUT "int mb_tbl_uni_kddi2code_min3 = $to_kddi_min3;\n";
print OUT "int mb_tbl_uni_kddi2code_max3 = $to_kddi_max3;\n\n";
#print "KDDI reverse 1\n";
print OUT "int mb_tbl_uni_kddi2code_key1[] = {\n";
print OUT &show_code(@r_kddi1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val1[] = {\n";
print OUT &show_code(@r_kddi1_val),"\n";
print OUT "};\n\n";
#print "KDDI reverse 1\n";
print OUT "int mb_tbl_uni_kddi2code_key2[] = {\n";
print OUT &show_code(@r_kddi2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val2[] = {\n";
print OUT &show_code(@r_kddi2_val),"\n";
print OUT "};\n\n";
#print "KDDI reverse 3\n";
print OUT "int mb_tbl_uni_kddi2code_key3[] = {\n";
print OUT &show_code(@r_kddi3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val3[] = {\n";
print OUT &show_code(@r_kddi3_val),"\n";
print OUT "};\n\n";
$sb_min1 = 10153;
$sb_max1 = 10153+177;
$sb_min2 = 10153+376;
$sb_max2 = 10153+547;
$sb_min3 = 10153+752;
$sb_max3 = 10153+901;
@sb_v1 = ();
@sb_v2 = ();
@sb_v3 = ();
if (1) {
print "SoftBank\n";
foreach $key (sort {hex($a) <=> hex($b)} keys(%softbank)) {
$s = $key;
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $key - $sb_min1;
$h = sprintf("%x",$key);
#print "$ku:$pos :: $v ($h) => $softbank{$key}\n";
if ($key <= $sb_max1) {
$sb_v1[$key-$sb_min1] = $softbank{$key};
} elsif ($key <= $sb_max2) {
$sb_v2[$key-$sb_min2] = $softbank{$key};
} elsif ($key <= $sb_max3) {
$sb_v3[$key-$sb_min3] = $softbank{$key};
}
}
}
$to_sb_min = 10263;
$to_sb_min1 = 0x0023;
$to_sb_max1 = 0x00AE;
$to_sb_min2 = 0x2122;
$to_sb_max2 = 0x3299;
$to_sb_min3 = 0x1F004;
$to_sb_max3 = 0x1F6C0;
@r_sb1_key = (); @r_sb1_val = ();
@r_sb2_key = (); @r_sb2_val = ();
@r_sb3_key = (); @r_sb3_val = ();
foreach $key (sort {hex($a) <=> hex($b)} keys(%to_sb)) {
$s = $to_sb{$key};
$pos = $s % 94;
$ku = ($s - $pos)/94;
$v = $to_sb{$key} - $to_sb_min;
$h = sprintf("%x",$s);
#print "$ku:$pos = $h ($v) <= $key\n";
if (hex($key) <= $to_sb_max1) {
push(@r_sb1_key, $key);
push(@r_sb1_val, $h);
} elsif (hex($key) >= $to_sb_min2 && hex($key) <= $to_sb_max2) {
push(@r_sb2_key, $key);
push(@r_sb2_val, $h);
} else {
push(@r_sb3_key, $key);
push(@r_sb3_val, $h);
}
}
push(@r_sb1_key, 0x00);
push(@r_sb1_val, 0x00);
push(@r_sb2_key, 0x00);
push(@r_sb2_val, 0x00);
push(@r_sb3_key, 0x00);
push(@r_sb3_val, 0x00);
print OUT "int mb_tbl_code2uni_sb1_min = $sb_min1;\n";
print OUT "int mb_tbl_code2uni_sb1_max = $sb_max1;\n";
print OUT "int mb_tbl_code2uni_sb2_min = $sb_min2;\n";
print OUT "int mb_tbl_code2uni_sb2_max = $sb_max2;\n";
print OUT "int mb_tbl_code2uni_sb3_min = $sb_min3;\n";
print OUT "int mb_tbl_code2uni_sb3_max = $sb_max3;\n\n";
#print "SoftBank 1\n";
print OUT "int mb_tbl_code2uni_sb1[] = {\n";
print OUT &show_code(@sb_v1);
print OUT "};\n\n";
#print "SoftBank 2\n";
print OUT "int mb_tbl_code2uni_sb2[] = {\n";
print OUT &show_code(@sb_v2);
print OUT "};\n\n";
#print "SoftBank 3\n";
print OUT "int mb_tbl_code2uni_sb3[] = {\n";
print OUT &show_code(@sb_v3);
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_min1 = $to_sb_min1;\n";
print OUT "int mb_tbl_uni_sb2code_max1 = $to_sb_max1;\n";
print OUT "int mb_tbl_uni_sb2code_min2 = $to_sb_min2;\n";
print OUT "int mb_tbl_uni_sb2code_max2 = $to_sb_max2;\n";
print OUT "int mb_tbl_uni_sb2code_min3 = $to_sb_min3;\n";
print OUT "int mb_tbl_uni_sb2code_max3 = $to_sb_max3;\n\n";
#print "SB reverse 1\n";
print OUT "int mb_tbl_uni_sb2code_key1[] = {\n";
print OUT &show_code(@r_sb1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val1[] = {\n";
print OUT &show_code(@r_sb1_val),"\n";
print OUT "};\n\n";
#print "SB reverse 2\n";
print OUT "int mb_tbl_uni_sb2code_key2[] = {\n";
print OUT &show_code(@r_sb2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val2[] = {\n";
print OUT &show_code(@r_sb2_val),"\n";
print OUT "};\n\n";
#print "SB reverse 3\n";
print OUT "int mb_tbl_uni_sb2code_key3[] = {\n";
print OUT &show_code(@r_sb3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val3[] = {\n";
print OUT &show_code(@r_sb3_val),"\n";
print OUT "};\n\n";
close(OUT);

View File

@ -0,0 +1,214 @@
const int sjis_mac2wchar1[] = { // 0x03ac - 0x03c9
0x0339c, 0x0339f, 0x0339d, 0x033a0,
0x033a4, 0x4ff4d, 0x033a1, 0x033a5,
0x0339e, 0x033a2, 0x0338e, 0x4ff47,
0x0338f, 0x033c4, 0x03396, 0x03397,
0x02113, 0x03398, 0x033b3, 0x033b2,
0x033b1, 0x033b0, 0x02109, 0x033d4,
0x033cb, 0x03390, 0x03385, 0x03386,
0x03387, 0xff860, };
const int sjis_mac2wchar2[] = { // 0x0406 - 0x0420
0x02116, 0x033cd,
0x02121, 0xff861, 0x02664, 0x02667,
0x02661, 0x02662, 0x02660, 0x02663,
0x02665, 0x02666, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x03020, 0x0260e,
0x03004, };
const int sjis_mac2wchar3[] = { // 0x0432 - 0x0441
0x0261e, 0x0261c,
0x0261d, 0x0261f, 0x021c6, 0x021c4,
0x021c5, 0xff860, 0x021e8, 0x021e6,
0x021e7, 0x021e9, 0x121e8, 0x121e6,
0x121e7, 0x121e9, };
const int sjis_mac2wchar4[] = { // 0x0468 - 0x0480
0x03230, 0x0322a, 0x0322b, 0x0322c,
0x0322d, 0x0322e, 0x0322f, 0x03240,
0x03237, 0x03242, 0x03243, 0x03239,
0x0323a, 0x03231, 0x0323e, 0x03234,
0x03232, 0x0323b, 0x03236, 0x03233,
0x03235, 0x0323c, 0x0323d, 0x0323f,
0x03238, };
const int sjis_mac2wchar5[] = { // 0x04b8 - 0x04e8
0x85927, 0x85c0f, 0x032a4, 0x032a5,
0x032a6, 0x032a7, 0x032a8, 0x032a9,
0x03296, 0x0329d, 0x03298, 0x0329e,
0x863a7, 0x03299, 0x03349, 0x03322,
0x0334d, 0x03314, 0x03316, 0x03305,
0x03333, 0x0334e, 0x03303, 0x03336,
0x03318, 0x03315, 0x03327, 0x03351,
0x0334a, 0x03339, 0x03357, 0x0330d,
0x03342, 0x03323, 0x03326, 0x0333b,
0x0332b, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x03300, 0x0331e, 0x0332a, 0x03331,
0x03347, };
const int sjis_mac2wchar6[] = { // 0x050c - 0x0551
0x0337e, 0x0337d, 0x0337c, 0x0337b,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x0337f, 0xff862, 0xff862,
0x0222e, 0x0221f, 0x022bf, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x0301d, 0x0301f, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x03094, 0x00000, 0x030f7, 0x030f8,
0x030f9, 0x030fa, };
const int sjis_mac2wchar7[] = { // 0x1ed9 - 0x1f18
0x23001, 0x23002, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x2ffe3, 0x0fe33, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x230fc,
0x0fe31, 0x22010, 0x00000, 0x00000,
0x2301c, 0x22016, 0x2ff5c, 0x22026,
0x0fe30, 0x00000, 0x00000, 0x00000,
0x00000, 0x0fe35, 0x0fe36, 0x0fe39,
0x0fe3a, 0x2ff3b, 0x2ff3d, 0x0fe37,
0x0fe38, 0x0fe3f, 0x0fe40, 0x0fe3d,
0x0fe3e, 0x0fe41, 0x0fe42, 0x0fe43,
0x0fe44, 0x0fe3b, 0x0fe3c, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x2ff1d, };
const int sjis_mac2wchar8[] = { // 0x1ff2 - 0x20a5
0x23041, 0x00000,
0x23043, 0x00000, 0x23045, 0x00000,
0x23047, 0x00000, 0x23049, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x23063, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x23083, 0x00000, 0x23085, 0x00000,
0x23087, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x2308e,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x230a1, 0x00000, 0x230a3, 0x00000,
0x230a5, 0x00000, 0x230a7, 0x00000,
0x230a9, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x230c3, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x00000, 0x230e3, 0x00000,
0x230e5, 0x00000, 0x230e7, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x00000, 0x230ee, 0x00000, 0x00000,
0x00000, 0x00000, 0x00000, 0x00000,
0x230f5, 0x230f6, };
const int wchar2sjis_mac4[] = { // 0x2660 - 0x2667
0x040e, 0x040c, 0x040d, 0x040f,
0x040a, 0x0410, 0x0411, 0x040b,
};
const int wchar2sjis_mac7[] = { // 0x322a - 0x3243
0x0469, 0x046a,
0x046b, 0x046c, 0x046d, 0x046e,
0x0468, 0x0475, 0x0478, 0x047b,
0x0477, 0x047c, 0x047a, 0x0470,
0x0480, 0x0473, 0x0474, 0x0479,
0x047d, 0x047e, 0x0476, 0x047f,
0x046f, 0x0000, 0x0471, 0x0472,
};
const int wchar2sjis_mac8[] = { // 0x3296 - 0x329e
0x04c0, 0x0000,
0x04c2, 0x04c5, 0x0000, 0x0000,
0x0000, 0x04c1, 0x04c3, };
const int wchar2sjis_mac9[] = { // 0x3300 - 0x33d4
0x04e4, 0x0000, 0x0000, 0x04ce,
0x0000, 0x04cb, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x04d7, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x04c9, 0x04d1, 0x04ca, 0x0000,
0x04d0, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x04e5, 0x0000,
0x0000, 0x0000, 0x04c7, 0x04d9,
0x0000, 0x0000, 0x04da, 0x04d2,
0x0000, 0x0000, 0x04e6, 0x04dc,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x04e7, 0x0000, 0x04cc,
0x0000, 0x0000, 0x04cf, 0x0000,
0x0000, 0x04d5, 0x0000, 0x04db,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x04d8, 0x0000,
0x0000, 0x0000, 0x0000, 0x04e8,
0x0000, 0x04c6, 0x04d4, 0x0000,
0x0000, 0x04c8, 0x04cd, 0x0000,
0x0000, 0x04d3, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x04d6,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x050f,
0x050e, 0x050d, 0x050c, 0x0521,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x03c6, 0x03c7, 0x03c8,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x03b6, 0x03b8,
0x03c5, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x03ba, 0x03bb,
0x03bd, 0x0000, 0x0000, 0x0000,
0x03ac, 0x03ae, 0x03b4, 0x03ad,
0x03af, 0x03b2, 0x03b5, 0x0000,
0x03b0, 0x03b3, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x03c1, 0x03c0, 0x03bf, 0x03be,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x03b9, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x03c4,
0x0000, 0x0407, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000,
0x03c3, };
const int wchar2sjis_mac10[] = { // 0xfe30 - 0xfe44
0x1efc, 0x1ef4, 0x0000, 0x1ee9,
0x0000, 0x1f01, 0x1f02, 0x1f07,
0x1f08, 0x1f03, 0x1f04, 0x1f11,
0x1f12, 0x1f0b, 0x1f0c, 0x1f09,
0x1f0a, 0x1f0d, 0x1f0e, 0x1f0f,
0x1f10, };

View File

@ -2,8 +2,9 @@ static const struct {
int begin;
int end;
} mbfl_eaw_table[] = {
{ 0x1100, 0x1159 },
{ 0x115f, 0x115f },
{ 0x1100, 0x115f },
{ 0x11a3, 0x11a7 },
{ 0x11fa, 0x11ff },
{ 0x2329, 0x232a },
{ 0x2e80, 0x2e99 },
{ 0x2e9b, 0x2ef3 },
@ -12,25 +13,32 @@ static const struct {
{ 0x3000, 0x303e },
{ 0x3041, 0x3096 },
{ 0x3099, 0x30ff },
{ 0x3105, 0x312c },
{ 0x3105, 0x312d },
{ 0x3131, 0x318e },
{ 0x3190, 0x31b7 },
{ 0x3190, 0x31ba },
{ 0x31c0, 0x31e3 },
{ 0x31f0, 0x321e },
{ 0x3220, 0x3243 },
{ 0x3250, 0x327d },
{ 0x327f, 0x32fe },
{ 0x3300, 0x4db5 },
{ 0x4e00, 0x9fa5 },
{ 0xa000, 0xa48c },
{ 0x3220, 0x3247 },
{ 0x3250, 0x32fe },
{ 0x3300, 0x4dbf },
{ 0x4e00, 0xa48c },
{ 0xa490, 0xa4c6 },
{ 0xa960, 0xa97c },
{ 0xac00, 0xd7a3 },
{ 0xf900, 0xfa2d },
{ 0xfa30, 0xfa6a },
{ 0xd7b0, 0xd7c6 },
{ 0xd7cb, 0xd7fb },
{ 0xf900, 0xfaff },
{ 0xfe10, 0xfe19 },
{ 0xfe30, 0xfe52 },
{ 0xfe54, 0xfe66 },
{ 0xfe68, 0xfe6b },
{ 0xff01, 0xff60 },
{ 0xffe0, 0xffe6 },
{ 0x1b000, 0x1b001 },
{ 0x1f200, 0x1f202 },
{ 0x1f210, 0x1f23a },
{ 0x1f240, 0x1f248 },
{ 0x1f250, 0x1f251 },
{ 0x20000, 0x2fffd },
{ 0x30000, 0x3fffd }
};

View File

@ -102,7 +102,7 @@
* version information
*/
#define MBFL_VERSION_MAJOR 1
#define MBFL_VERSION_MINOR 1
#define MBFL_VERSION_MINOR 3
#define MBFL_VERSION_TEENY 0
/*

View File

@ -52,6 +52,8 @@
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_open.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
@ -125,6 +127,22 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_eucjpwin,
&vtbl_cp932_wchar,
&vtbl_wchar_cp932,
&vtbl_sjis_docomo_wchar,
&vtbl_wchar_sjis_docomo,
&vtbl_sjis_kddi_wchar,
&vtbl_wchar_sjis_kddi,
&vtbl_sjis_sb_wchar,
&vtbl_wchar_sjis_sb,
&vtbl_sjis_docomo_pua_wchar,
&vtbl_wchar_sjis_docomo_pua,
&vtbl_sjis_kddi_pua_wchar,
&vtbl_wchar_sjis_kddi_pua,
&vtbl_sjis_kddi_pua_b_wchar,
&vtbl_wchar_sjis_kddi_pua_b,
&vtbl_sjis_sb_pua_wchar,
&vtbl_wchar_sjis_sb_pua,
&vtbl_sjis_mac_wchar,
&vtbl_wchar_sjis_mac,
&vtbl_euccn_wchar,
&vtbl_wchar_euccn,
&vtbl_cp936_wchar,

View File

@ -58,6 +58,8 @@
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_open.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
@ -156,6 +158,14 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_sjis,
&mbfl_encoding_eucjp_win,
&mbfl_encoding_sjis_open,
&mbfl_encoding_sjis_docomo,
&mbfl_encoding_sjis_kddi,
&mbfl_encoding_sjis_sb,
&mbfl_encoding_sjis_docomo_pua,
&mbfl_encoding_sjis_kddi_pua,
&mbfl_encoding_sjis_kddi_pua_b,
&mbfl_encoding_sjis_sb_pua,
&mbfl_encoding_sjis_mac,
&mbfl_encoding_cp932,
&mbfl_encoding_cp51932,
&mbfl_encoding_jis,

View File

@ -69,6 +69,14 @@ enum mbfl_no_encoding {
mbfl_no_encoding_sjis,
mbfl_no_encoding_eucjp_win,
mbfl_no_encoding_sjis_open,
mbfl_no_encoding_sjis_docomo,
mbfl_no_encoding_sjis_kddi,
mbfl_no_encoding_sjis_sb,
mbfl_no_encoding_sjis_docomo_pua,
mbfl_no_encoding_sjis_kddi_pua,
mbfl_no_encoding_sjis_kddi_pua_b,
mbfl_no_encoding_sjis_sb_pua,
mbfl_no_encoding_sjis_mac,
mbfl_no_encoding_cp932,
mbfl_no_encoding_cp51932,
mbfl_no_encoding_jis,

View File

@ -51,6 +51,7 @@
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_open.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_euc_jp.h"

View File

@ -18,7 +18,8 @@ BEGIN {
/^[0-9a-fA-F]+;/ {
if ($2 == "W" || $2 == "F") {
v = ( "0x" $1 ) + 0
v = strtonum( "0x" $1 )
if (prev < 0) {
first = v
} else if (v - prev > 1) {
@ -44,8 +45,8 @@ BEGIN {
/^[0-9a-fA-F]+\.\./ {
if ($4 == "W" || $4 == "F") {
vs = ( "0x" $1 ) + 0
ve = ( "0x" $3 ) + 0
vs = strtonum( "0x" $1 )
ve = strtonum( "0x" $3 )
if (prev < 0) {
first = vs
} else if (vs - prev > 1) {

View File

@ -0,0 +1,119 @@
/**
* this is a small sample script to use libmbfl.
* Rui Hirokawa <hirokawa@php.net>
*
* this file is encoded in EUC-JP.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mbfl/mbfilter.h"
static void hexdump(const mbfl_string *ptr)
{
unsigned int i;
for (i = 0; i < ptr->len; i++) {
printf("%%%02x", ptr->val[i]);
}
printf(" (%u)\n", ptr->len);
}
//#define TEST_DOCOMO
//#define TEST_KDDI
#define TEST_SOFTBANK
int main(int argc, char **argv)
{
enum mbfl_no_encoding from_encoding, to_encoding;
enum mbfl_no_language no_language;
mbfl_buffer_converter *convd = NULL, *convd2 = NULL;
mbfl_memory_device dev, dev2;
mbfl_string string, result, *ret;
#ifdef TEST_DOCOMO
//char str[] = {0xF9,0xD7,0x00}; // U+2122
//char str[] = {0xF9,0x82,0x00}; // U+1F195
char str[] = {0xF9,0xD6,0x00}; // U+00A9
#endif
#ifdef TEST_KDDI
//char str[] = {0xF7,0x6A,0x00};// U+2122
//char str[] = {0xF7,0xE5,0x00}; // U+1F195
//char str[] = {0xF3,0xD2,0x00}; // U+1F1E8 U+1F1F3
char str[] = {0xF7,0x74,0x00}; // U+00A9
#endif
#ifdef TEST_SOFTBANK
//char str[] = {0xFB,0xD7,0x00};// U+2122
//char str[] = {0xF7,0xB2,0x00}; // U+1F195
//char str[] = {0xFB,0xB3,0x00}; // U+1F1E8 U+1F1F3
char str[] = {0xF7,0xEE,0x00}; // U+00A9
#endif
int final = 0;
int state = 0;
int i;
no_language = mbfl_name2no_language("Japanese");
#ifdef TEST_DOCOMO
from_encoding = mbfl_name2no_encoding("SJIS-win#DOCOMO");
#endif
#ifdef TEST_KDDI
from_encoding = mbfl_name2no_encoding("SJIS-win#KDDI");
#endif
#ifdef TEST_SOFTBANK
from_encoding = mbfl_name2no_encoding("SJIS-win#SOFTBANK");
#endif
to_encoding = mbfl_name2no_encoding("UTF-8");
convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
mbfl_memory_device_init(&dev, 0, 4096);
mbfl_string_init_set(&string, no_language, from_encoding);
mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, dev.allocsz);
strcpy(dev.buffer, str);
dev.pos += strlen(str);
mbfl_memory_device_result(&dev, &string);
mbfl_string_init_set(&result, no_language, to_encoding);
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
#if 0
for (i = 0; i < result.len; i+= 2) {
if (result.val[i] >= 0xD8 && result.val[i] < 0xE0) { // Surrogate pair
int h = (result.val[i] & 0x07)<<8 | result.val[i+1];
int l = (result.val[i+2] & 0x03)<<8 | result.val[i+3];
int c = (h<<(2+8)) | l;
printf("U+%x\n",c+0x10000);
i+=2;
} else {
printf("U+%x\n",(result.val[i] << 8) | result.val[i+1]);
}
}
hexdump(&result);
#endif
#if 1
convd2 = mbfl_buffer_converter_new(to_encoding, from_encoding, 0);
mbfl_memory_device_init(&dev2, 0, 4096);
mbfl_string_init_set(&string, no_language, to_encoding);
mbfl_memory_device_realloc(&dev2, dev2.length + dev2.allocsz, dev2.allocsz);
memcpy(dev2.buffer, result.val, result.len+1);
dev2.pos += strlen(dev2.buffer);
mbfl_memory_device_result(&dev2, &string);
mbfl_string_init_set(&result, no_language, from_encoding);
ret = mbfl_buffer_converter_feed_result(convd2, &string, &result);
hexdump(&result);
mbfl_buffer_converter_delete(convd2);
#endif
mbfl_string_clear(&result);
mbfl_string_clear(&string);
mbfl_buffer_converter_delete(convd);
return EXIT_SUCCESS;
}

View File

@ -1681,7 +1681,12 @@ PHP_MINFO_FUNCTION(mbstring)
php_info_print_table_start();
php_info_print_table_row(2, "Multibyte Support", "enabled");
php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
{
char tmp[256];
snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
php_info_print_table_row(2, "libmbfl version", tmp);
}
php_info_print_table_end();
php_info_print_table_start();