php-src/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c
2010-03-12 04:55:37 +00:00

498 lines
11 KiB
C

#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
struct mappings_entry {
int cp_uni;
int n;
int cp_932[16];
};
struct mappings {
size_t n;
size_t nalloc;
struct mappings_entry *entries;
};
static void mappings_init(struct mappings *map)
{
map->n = 0;
map->nalloc = 0;
map->entries = 0;
}
static void mappings_destroy(struct mappings *map)
{
if (map->entries)
free(map->entries);
}
static int mappings_grow(struct mappings *map)
{
if (map->n >= map->nalloc) {
struct mappings_entry *new_entries;
size_t n = map->nalloc << 1, a;
if (n == 0)
n = 1;
else if (n <= map->n)
return 2;
a = sizeof(*map->entries) * n;
if (a / n != sizeof(*map->entries))
return 2;
new_entries = realloc(map->entries, a);
if (!new_entries)
return 2;
map->entries = new_entries;
map->nalloc = n;
}
return 0;
}
static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
{
size_t i;
size_t s = 0, e = map->n;
struct mappings_entry *entry;
for (;;) {
i = (s + e) / 2;
entry = &map->entries[i];
if (e == i || entry->cp_uni > cp_uni) {
if (e == i) {
int r = mappings_grow(map);
if (r)
return r;
if (map->n > i) {
size_t n = map->n - i, a = sizeof(*map->entries) * n;
if (a / n != sizeof(*map->entries))
return 2;
memmove(&map->entries[i + 1], &map->entries[i], a);
}
++map->n;
entry = &map->entries[i];
entry->cp_uni = cp_uni;
entry->n = 0;
break;
}
e = i;
} else if (entry->cp_uni < cp_uni) {
if (s == i) {
int r = mappings_grow(map);
if (r)
return r;
if (map->n > i + 1) {
size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
if (a / n != sizeof(*map->entries))
return 2;
memmove(&map->entries[i + 2], &map->entries[i + 1], a);
}
++map->n;
entry = &map->entries[i + 1];
entry->cp_uni = cp_uni;
entry->n = 0;
break;
}
s = i;
} else {
break;
}
}
if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
return 1;
entry->cp_932[entry->n++] = cp_932;
return 0;
}
struct generator_entry {
const char *name;
const char *prologue;
const char *epilogue;
void(*visitor)(const struct mappings_entry *);
};
static int utf32_utf8(char *buf, int k)
{
int retval = 0;
if (k < 0x80) {
buf[0] = k;
retval = 1;
} else if (k < 0x800) {
buf[0] = 0xc0 | (k >> 6);
buf[1] = 0x80 | (k & 0x3f);
retval = 2;
} else if (k < 0x10000) {
buf[0] = 0xe0 | (k >> 12);
buf[1] = 0x80 | ((k >> 6) & 0x3f);
buf[2] = 0x80 | (k & 0x3f);
retval = 3;
} else if (k < 0x200000) {
buf[0] = 0xf0 | (k >> 18);
buf[1] = 0x80 | ((k >> 12) & 0x3f);
buf[2] = 0x80 | ((k >> 6) & 0x3f);
buf[3] = 0x80 | (k & 0x3f);
retval = 4;
} else if (k < 0x4000000) {
buf[0] = 0xf8 | (k >> 24);
buf[1] = 0x80 | ((k >> 18) & 0x3f);
buf[2] = 0x80 | ((k >> 12) & 0x3f);
buf[3] = 0x80 | ((k >> 6) & 0x3f);
buf[4] = 0x80 | (k & 0x3f);
retval = 5;
} else {
buf[0] = 0xfc | (k >> 30);
buf[1] = 0x80 | ((k >> 24) & 0x3f);
buf[2] = 0x80 | ((k >> 18) & 0x3f);
buf[3] = 0x80 | ((k >> 12) & 0x3f);
buf[4] = 0x80 | ((k >> 6) & 0x3f);
buf[5] = 0x80 | (k & 0x3f);
retval = 6;
}
buf[retval] = '\0';
return retval;
}
static const char epilogue[] =
"close\n";
static const char prologue_to_cp932[] =
"#!/usr/bin/expect -f\n"
"spawn tests/conv_encoding Japanese CP932 UTF-8\n"
"set timeout 1\n"
"\n"
"expect_after {\n"
" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
"}\n";
static const char prologue_to_cp50220[] =
"#!/usr/bin/expect -f\n"
"spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
"set timeout 1\n"
"\n"
"expect_after {\n"
" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
"}\n";
static const char prologue_to_cp50222[] =
"#!/usr/bin/expect -f\n"
"spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
"set timeout 1\n"
"\n"
"expect_after {\n"
" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
"}\n";
static const char prologue_from_cp932[] =
"#!/usr/bin/expect -f\n"
"spawn tests/conv_encoding Japanese UTF-8 CP932\n"
"set timeout 1\n"
"\n"
"expect_after {\n"
" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
"}\n";
static void to_cp932_visitor(const struct mappings_entry *entry)
{
char buf_uni[32], buf_cp932[8];
int i;
if (entry->cp_uni < 32 || entry->cp_uni == 127)
return;
i = utf32_utf8(buf_uni, entry->cp_uni);
buf_uni[i * 4] = '\0';
while (--i >= 0) {
unsigned char c = ((unsigned char *)buf_uni)[i];
buf_uni[i * 4] = '\\';
buf_uni[i * 4 + 1] = 'x';
buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
}
printf("set test \"U+%06X\"\n"
"send -- \"%s\r\"\n"
"sleep 0.001\n"
"expect {\n", entry->cp_uni, buf_uni);
for (i = 0; i < entry->n; ++i) {
int len = 0;
const int c = entry->cp_932[i];
if (c >= 0x100) {
len = 2;
sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
} else {
len = 1;
sprintf(buf_cp932, "%%%02x", c);
}
printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
}
printf("}\n");
}
static void from_cp932_visitor(const struct mappings_entry *entry)
{
char buf_uni[32], buf_cp932[8];
int i, len;
if (entry->cp_uni < 32 || entry->cp_uni == 127)
return;
len = utf32_utf8(buf_uni, entry->cp_uni);
buf_uni[len * 3] = '\0';
i = len;
while (--i >= 0) {
unsigned char c = ((unsigned char *)buf_uni)[i];
buf_uni[i * 3] = '%';
buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
}
for (i = 0; i < entry->n; ++i) {
const int c = entry->cp_932[i];
if (c >= 0x100)
sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
else
sprintf(buf_cp932, "\\x%02x", c);
printf("set test \"U+%06X\"\n"
"send -- \"%s\r\"\n"
"sleep 0.001\n"
"expect {\n"
" \"%s (%d)\\r\\n\" { pass $test }\n"
"}\n",
entry->cp_uni, buf_cp932, buf_uni, len);
}
}
static void to_cp50220_visitor(const struct mappings_entry *entry)
{
char buf_uni[32], buf_cp50220[32];
int i;
if (entry->cp_uni < 32 || entry->cp_uni == 127)
return;
i = utf32_utf8(buf_uni, entry->cp_uni);
buf_uni[i * 4] = '\0';
while (--i >= 0) {
unsigned char c = ((unsigned char *)buf_uni)[i];
buf_uni[i * 4] = '\\';
buf_uni[i * 4 + 1] = 'x';
buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
}
printf("set test \"U+%06X\"\n"
"send -- \"%s\r\"\n"
"sleep 0.001\n"
"expect {\n", entry->cp_uni, buf_uni);
for (i = 0; i < entry->n; ++i) {
int len = 0;
const int c = entry->cp_932[i];
if (c >= 0xa1 && c < 0xe0) {
static const int jisx0208_tl_map[] = {
0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
};
const int j = jisx0208_tl_map[c - 0xa0];
len = 8;
sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
} else if (c >= 0x100) {
const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
len = 8;
sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
} else {
len = 1;
sprintf(buf_cp50220, "%%%02x", c);
}
printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
}
printf("}\n");
}
static void to_cp50222_visitor(const struct mappings_entry *entry)
{
char buf_uni[32], buf_cp50220[32];
int i;
if (entry->cp_uni < 32 || entry->cp_uni == 127)
return;
i = utf32_utf8(buf_uni, entry->cp_uni);
buf_uni[i * 4] = '\0';
while (--i >= 0) {
unsigned char c = ((unsigned char *)buf_uni)[i];
buf_uni[i * 4] = '\\';
buf_uni[i * 4 + 1] = 'x';
buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
}
printf("set test \"U+%06X\"\n"
"send -- \"%s\r\"\n"
"sleep 0.001\n"
"expect {\n", entry->cp_uni, buf_uni);
for (i = 0; i < entry->n; ++i) {
int len = 0;
const int c = entry->cp_932[i];
if (c >= 0xa1 && c < 0xe0) {
len = 3;
sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
} else if (c >= 0x100) {
const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
len = 8;
sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
} else {
len = 1;
sprintf(buf_cp50220, "%%%02x", c);
}
printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
}
printf("}\n");
}
static struct generator_entry entries[] = {
{ "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
{ "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
{ "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
{ "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
{ NULL }
};
static const char cp932_txt[] = "CP932.TXT";
int main(int argc, char **argv)
{
int retval = 0;
FILE *fp;
char buf[1024];
struct generator_entry* gen;
struct mappings map;
if (argc <= 1) {
fprintf(stderr, "usage: %s generator\n", argv[0]);
return 255;
}
for (gen = entries;; ++gen) {
if (!gen->name) {
fprintf(stderr, "Unknown generator: %s\n", argv[1]);
return 1;
}
if (strcmp(gen->name, argv[1]) == 0)
break;
}
fp = fopen(cp932_txt, "r");
if (!fp) {
fprintf(stderr, "Failed to open %s\n", cp932_txt);
return 2;
}
mappings_init(&map);
while (fgets(buf, sizeof(buf), fp)) {
const char *fields[16];
char *p = buf;
int field = 0;
int cp_932, cp_uni;
for (;;) {
char *q = 0;
int eol = 0;
if (field >= sizeof(fields) / sizeof(*fields)) {
fprintf(stderr, "Too many fields (incorrect file?)\n");
retval = 3;
goto out;
}
for (;;) {
if (*p == '\0' || *p == '#' || *p == 0x0a) {
eol = 1;
break;
} else if (*p != ' ' && *p != '\t') {
break;
}
++p;
}
if (eol)
break;
q = p;
for (;;) {
if (*p == '\0' || *p == '#' || *p == 0x0a) {
eol = 1;
break;
} else if (*p == ' ' || *p == '\t') {
break;
}
++p;
}
*p = '\0';
fields[field++] = q;
if (eol)
break;
++p;
}
if (field == 0 || field == 1) {
continue;
} else if (field != 2) {
fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
retval = 3;
goto out;
}
cp_932 = strtol(fields[0], NULL, 0);
if (errno == ERANGE || errno == EINVAL) {
fprintf(stderr, "Malformed field value: %s\n", fields[0]);
retval = 4;
goto out;
}
cp_uni = strtol(fields[1], NULL, 0);
if (errno == ERANGE || errno == EINVAL) {
fprintf(stderr, "Malformed field value: %s\n", fields[1]);
retval = 4;
goto out;
}
if (mappings_add(&map, cp_uni, cp_932)) {
fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
retval = 4;
goto out;
}
}
{
size_t i;
printf("%s", gen->prologue);
for (i = 0; i < map.n; ++i)
gen->visitor(&map.entries[i]);
printf("%s", gen->epilogue);
}
out:
mappings_destroy(&map);
return retval;
}
/*
* vim: sts=4 sw=4 ts=4 noet
*/