/* * "streamable kanji code filter and converter" * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. * * LICENSE NOTICES * * This file is part of "streamable kanji code filter and converter", * which is distributed under the terms of GNU Lesser General Public * License (version 2) as published by the Free Software Foundation. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with "streamable kanji code filter and converter"; * if not, write to the Free Software Foundation, Inc., 59 Temple Place, * Suite 330, Boston, MA 02111-1307 USA * * The author of this file: * */ /* * The source code included in this files was separated from mbfilter.c * by Moriyoshi Koizumi on 20 Dec 2002. The file * mbfilter.c is included in this package . * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef HAVE_STDDEF_H #include #endif #include "mbfl_encoding.h" #include "mbfl_allocators.h" #include "mbfl_filter_output.h" #include "mbfilter_pass.h" #include "mbfilter_8bit.h" #include "mbfilter_wchar.h" #include "filters/mbfilter_euc_cn.h" #include "filters/mbfilter_hz.h" #include "filters/mbfilter_euc_tw.h" #include "filters/mbfilter_big5.h" #include "filters/mbfilter_uhc.h" #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" #include "filters/mbfilter_iso8859_4.h" #include "filters/mbfilter_iso8859_5.h" #include "filters/mbfilter_iso8859_6.h" #include "filters/mbfilter_iso8859_7.h" #include "filters/mbfilter_iso8859_8.h" #include "filters/mbfilter_iso8859_9.h" #include "filters/mbfilter_iso8859_10.h" #include "filters/mbfilter_iso8859_13.h" #include "filters/mbfilter_iso8859_14.h" #include "filters/mbfilter_iso8859_15.h" #include "filters/mbfilter_base64.h" #include "filters/mbfilter_qprint.h" #include "filters/mbfilter_uuencode.h" #include "filters/mbfilter_7bit.h" #include "filters/mbfilter_utf7.h" #include "filters/mbfilter_utf7imap.h" #include "filters/mbfilter_utf8.h" #include "filters/mbfilter_utf16.h" #include "filters/mbfilter_utf32.h" #include "filters/mbfilter_byte2.h" #include "filters/mbfilter_byte4.h" #include "filters/mbfilter_ucs4.h" #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter); /* hex character table "0123456789ABCDEF" */ static char mbfl_hexchar_table[] = { 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 }; const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_utf8_wchar, &vtbl_wchar_utf8, &vtbl_eucjp_wchar, &vtbl_wchar_eucjp, &vtbl_sjis_wchar, &vtbl_wchar_sjis, &vtbl_cp51932_wchar, &vtbl_wchar_cp51932, &vtbl_jis_wchar, &vtbl_wchar_jis, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp, &vtbl_2022jpms_wchar, &vtbl_wchar_2022jpms, &vtbl_eucjpwin_wchar, &vtbl_wchar_eucjpwin, &vtbl_sjiswin_wchar, &vtbl_wchar_sjiswin, &vtbl_euccn_wchar, &vtbl_wchar_euccn, &vtbl_cp936_wchar, &vtbl_wchar_cp936, &vtbl_hz_wchar, &vtbl_wchar_hz, &vtbl_euctw_wchar, &vtbl_wchar_euctw, &vtbl_big5_wchar, &vtbl_wchar_big5, &vtbl_euckr_wchar, &vtbl_wchar_euckr, &vtbl_uhc_wchar, &vtbl_wchar_uhc, &vtbl_2022kr_wchar, &vtbl_wchar_2022kr, &vtbl_cp1251_wchar, &vtbl_wchar_cp1251, &vtbl_cp866_wchar, &vtbl_wchar_cp866, &vtbl_koi8r_wchar, &vtbl_wchar_koi8r, &vtbl_cp1252_wchar, &vtbl_wchar_cp1252, &vtbl_ascii_wchar, &vtbl_wchar_ascii, &vtbl_8859_1_wchar, &vtbl_wchar_8859_1, &vtbl_8859_2_wchar, &vtbl_wchar_8859_2, &vtbl_8859_3_wchar, &vtbl_wchar_8859_3, &vtbl_8859_4_wchar, &vtbl_wchar_8859_4, &vtbl_8859_5_wchar, &vtbl_wchar_8859_5, &vtbl_8859_6_wchar, &vtbl_wchar_8859_6, &vtbl_8859_7_wchar, &vtbl_wchar_8859_7, &vtbl_8859_8_wchar, &vtbl_wchar_8859_8, &vtbl_8859_9_wchar, &vtbl_wchar_8859_9, &vtbl_8859_10_wchar, &vtbl_wchar_8859_10, &vtbl_8859_13_wchar, &vtbl_wchar_8859_13, &vtbl_8859_14_wchar, &vtbl_wchar_8859_14, &vtbl_8859_15_wchar, &vtbl_wchar_8859_15, &vtbl_8bit_b64, &vtbl_b64_8bit, &vtbl_uuencode_8bit, &vtbl_wchar_html, &vtbl_html_wchar, &vtbl_8bit_qprint, &vtbl_qprint_8bit, &vtbl_8bit_7bit, &vtbl_7bit_8bit, &vtbl_utf7_wchar, &vtbl_wchar_utf7, &vtbl_utf7imap_wchar, &vtbl_wchar_utf7imap, &vtbl_utf16_wchar, &vtbl_wchar_utf16, &vtbl_utf16be_wchar, &vtbl_wchar_utf16be, &vtbl_utf16le_wchar, &vtbl_wchar_utf16le, &vtbl_utf32_wchar, &vtbl_wchar_utf32, &vtbl_utf32be_wchar, &vtbl_wchar_utf32be, &vtbl_utf32le_wchar, &vtbl_wchar_utf32le, &vtbl_ucs4_wchar, &vtbl_wchar_ucs4, &vtbl_ucs4be_wchar, &vtbl_wchar_ucs4be, &vtbl_ucs4le_wchar, &vtbl_wchar_ucs4le, &vtbl_ucs2_wchar, &vtbl_wchar_ucs2, &vtbl_ucs2be_wchar, &vtbl_wchar_ucs2be, &vtbl_ucs2le_wchar, &vtbl_wchar_ucs2le, &vtbl_byte4be_wchar, &vtbl_wchar_byte4be, &vtbl_byte4le_wchar, &vtbl_wchar_byte4le, &vtbl_byte2be_wchar, &vtbl_wchar_byte2be, &vtbl_byte2le_wchar, &vtbl_wchar_byte2le, &vtbl_armscii8_wchar, &vtbl_wchar_armscii8, &vtbl_pass, NULL }; mbfl_convert_filter * mbfl_convert_filter_new( enum mbfl_no_encoding from, enum mbfl_no_encoding to, int (*output_function)(int, void* ), int (*flush_function)(void*), void* data) { mbfl_convert_filter * filter; /* allocate */ filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); if (filter == NULL) { return NULL; } /* encoding structure */ filter->from = mbfl_no2encoding(from); filter->to = mbfl_no2encoding(to); if (filter->from == NULL) { filter->from = &mbfl_encoding_pass; } if (filter->to == NULL) { filter->to = &mbfl_encoding_pass; } if (output_function != NULL) { filter->output_function = output_function; } else { filter->output_function = mbfl_filter_output_null; } filter->flush_function = flush_function; filter->data = data; filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; filter->illegal_substchar = 0x3f; /* '?' */ filter->num_illegalchar = 0; /* setup the function table */ mbfl_convert_filter_reset_vtbl(filter); /* constructor */ (*filter->filter_ctor)(filter); return filter; } void mbfl_convert_filter_delete(mbfl_convert_filter *filter) { if (filter) { (*filter->filter_dtor)(filter); mbfl_free((void*)filter); } } int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter) { return (*filter->filter_function)(c, filter); } int mbfl_convert_filter_flush(mbfl_convert_filter *filter) { (*filter->filter_flush)(filter); return (filter->flush_function ? (*filter->flush_function)(filter->data) : 0); } void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to) { /* destruct old filter */ (*filter->filter_dtor)(filter); /* resset filter member */ filter->from = mbfl_no2encoding(from); filter->to = mbfl_no2encoding(to); /* set the vtbl */ mbfl_convert_filter_reset_vtbl(filter); /* construct new filter */ (*filter->filter_ctor)(filter); } void mbfl_convert_filter_copy( mbfl_convert_filter *src, mbfl_convert_filter *dist) { dist->filter_ctor = src->filter_ctor; dist->filter_dtor = src->filter_dtor; dist->filter_function = src->filter_function; dist->filter_flush = src->filter_flush; dist->output_function = src->output_function; dist->flush_function = src->flush_function; dist->data = src->data; dist->status = src->status; dist->cache = src->cache; dist->from = src->from; dist->to = src->to; dist->illegal_mode = src->illegal_mode; dist->illegal_substchar = src->illegal_substchar; dist->num_illegalchar = src->num_illegalchar; } int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src) { int n; unsigned char *p; p = src->buffer; n = src->pos; while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { return -1; } n--; } return n; } int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p) { int c; while ((c = *p++) != '\0') { if ((*filter->filter_function)(c, filter) < 0) { return -1; } } return 0; } #if 0 static int mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, int n) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { return -1; } n--; } return n; } #endif /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) { int mode_backup, ret, n, m, r; ret = 0; mode_backup = filter->illegal_mode; filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; switch (mode_backup) { case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR: ret = (*filter->filter_function)(filter->illegal_substchar, filter); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: if (c >= 0) { if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); } else { if (c < MBFL_WCSGROUP_WCHARMAX) { m = c & ~MBFL_WCSPLANE_MASK; switch (m) { case MBFL_WCSPLANE_JIS0208: ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS+"); break; case MBFL_WCSPLANE_JIS0212: ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS2+"); break; case MBFL_WCSPLANE_WINCP932: ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"W932+"); break; case MBFL_WCSPLANE_8859_1: ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"I8859_1+"); break; default: ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"?+"); break; } c &= MBFL_WCSPLANE_MASK; } else { ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"BAD+"); c &= MBFL_WCSGROUP_MASK; } } if (ret >= 0) { m = 0; r = 28; while (r >= 0) { n = (c >> r) & 0xf; if (n || m) { m = 1; ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); if (ret < 0) { break; } } r -= 4; } if (m == 0 && ret >= 0) { ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } } } break; default: break; } filter->illegal_mode = mode_backup; filter->num_illegalchar++; return ret; } const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encoding from, enum mbfl_no_encoding to) { const struct mbfl_convert_vtbl *vtbl; int i; if (to == mbfl_no_encoding_base64 || to == mbfl_no_encoding_qprint || to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || from == mbfl_no_encoding_qprint || from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } i = 0; while ((vtbl = mbfl_convert_filter_list[i++]) != NULL){ if (vtbl->from == from && vtbl->to == to) { return vtbl; } } return NULL; } static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter) { const struct mbfl_convert_vtbl *vtbl; vtbl = mbfl_convert_filter_get_vtbl(filter->from->no_encoding, filter->to->no_encoding); if (vtbl == NULL) { vtbl = &vtbl_pass; } filter->filter_ctor = vtbl->filter_ctor; filter->filter_dtor = vtbl->filter_dtor; filter->filter_function = vtbl->filter_function; filter->filter_flush = vtbl->filter_flush; } /* * commonly used constructor and destructor */ void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; } int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; return 0; } void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; }