Moriyoshi Koizumi
moriy****@users*****
2002年 12月 21日 (土) 01:46:36 JST
moriyoshi 02/12/21 01:46:36 Modified: filters mbfilter_ascii.c mbfilter_ascii.h mbfilter_big5.c mbfilter_big5.h mbfilter_cp1251.c mbfilter_cp1251.h mbfilter_cp1252.c mbfilter_cp1252.h mbfilter_cp866.c mbfilter_cp866.h mbfilter_cp932.c mbfilter_cp932.h mbfilter_cp936.c mbfilter_cp936.h mbfilter_euc_cn.c mbfilter_euc_cn.h mbfilter_euc_jp.c mbfilter_euc_jp.h mbfilter_euc_jp_win.c mbfilter_euc_jp_win.h mbfilter_euc_kr.c mbfilter_euc_kr.h mbfilter_euc_tw.c mbfilter_euc_tw.h mbfilter_hz.c mbfilter_hz.h mbfilter_iso2022_kr.c mbfilter_iso2022_kr.h mbfilter_iso8859_1.c mbfilter_iso8859_1.h mbfilter_iso8859_10.c mbfilter_iso8859_10.h mbfilter_iso8859_13.c mbfilter_iso8859_13.h mbfilter_iso8859_14.c mbfilter_iso8859_14.h mbfilter_iso8859_15.c mbfilter_iso8859_15.h mbfilter_iso8859_2.c mbfilter_iso8859_2.h mbfilter_iso8859_3.c mbfilter_iso8859_3.h mbfilter_iso8859_4.c mbfilter_iso8859_4.h mbfilter_iso8859_5.c mbfilter_iso8859_5.h mbfilter_iso8859_6.c mbfilter_iso8859_6.h mbfilter_iso8859_7.c mbfilter_iso8859_7.h mbfilter_iso8859_8.c mbfilter_iso8859_8.h mbfilter_iso8859_9.c mbfilter_iso8859_9.h mbfilter_jis.c mbfilter_jis.h mbfilter_koi8r.c mbfilter_koi8r.h mbfilter_sjis.c mbfilter_sjis.h mbfilter_uhc.c mbfilter_uhc.h mbfilter_utf7.c mbfilter_utf7.h mbfilter_utf8.c mbfilter_utf8.h mbfl mbfilter.c mbfilter.h Log: Externalised identification filter tables Revision Changes Path 1.3 +21 -0 libmbfl/filters/mbfilter_ascii.c Index: mbfilter_ascii.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_ascii.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_ascii.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_ascii.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -35,6 +35,8 @@ #include "mbfilter.h" #include "mbfilter_ascii.h" +static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "cp367", "csASCII", NULL}; const mbfl_encoding mbfl_encoding_ascii = { @@ -46,6 +48,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_ascii = { + mbfl_no_encoding_ascii, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_ascii +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -64,4 +74,15 @@ return c; } +static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x20 && c < 0x80) { + ; + } else if (c == 0x0d || c == 0x0a || c == 0x09 || c == 0) { /* CR or LF or HTAB or null */ + ; + } else { + filter->flag = 1; + } + return c; +} 1.2 +2 -1 libmbfl/filters/mbfilter_ascii.h Index: mbfilter_ascii.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_ascii.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_ascii.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_ascii.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -34,7 +34,8 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_ascii; - +extern const struct mbfl_identify_vtbl vtbl_identify_ascii; + int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter); #endif /* MBFL_MBFILTER_ASCII_H */ 1.2 +30 -0 libmbfl/filters/mbfilter_big5.c Index: mbfilter_big5.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_big5.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_big5.c 7 Dec 2002 19:20:47 -0000 1.1 +++ mbfilter_big5.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -36,6 +36,8 @@ #include "unicode_table_big5.h" +static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,6 +68,14 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_big5 = { + mbfl_no_encoding_big5, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_big5 +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -178,3 +188,23 @@ return c; } + +static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + + return c; +} + + 1.2 +1 -0 libmbfl/filters/mbfilter_big5.h Index: mbfilter_big5.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_big5.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_big5.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_big5.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_big5; +extern const struct mbfl_identify_vtbl vtbl_identify_big5; int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter); 1.3 +22 -0 libmbfl/filters/mbfilter_cp1251.c Index: mbfilter_cp1251.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp1251.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_cp1251.c 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_cp1251.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -35,6 +35,8 @@ #include "mbfilter_cp1251.h" #include "unicode_table_cp1251.h" +static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL}; const mbfl_encoding mbfl_encoding_cp1251 = { @@ -46,6 +48,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_cp1251 = { + mbfl_no_encoding_cp1251, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp1251 +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -111,3 +121,15 @@ return c; } + +/* all of this is so ugly now! */ +static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + 1.3 +1 -0 libmbfl/filters/mbfilter_cp1251.h Index: mbfilter_cp1251.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp1251.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_cp1251.h 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_cp1251.h 20 Dec 2002 16:46:34 -0000 1.3 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp1251; +extern const struct mbfl_identify_vtbl vtbl_identify_cp1251; int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter); 1.4 +24 -0 libmbfl/filters/mbfilter_cp1252.c Index: mbfilter_cp1252.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp1252.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- mbfilter_cp1252.c 7 Dec 2002 23:05:40 -0000 1.3 +++ mbfilter_cp1252.c 20 Dec 2002 16:46:34 -0000 1.4 @@ -35,6 +35,8 @@ #include "mbfilter_cp1252.h" #include "unicode_table_cp1252.h" +static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL}; const mbfl_encoding mbfl_encoding_cp1252 = { @@ -46,6 +48,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_cp1252 = { + mbfl_no_encoding_cp1252, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp1252 +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -100,6 +109,21 @@ CK((*filter->output_function)(s, filter->data)); return c; +} + +/* We only distinguish the MS extensions to ISO-8859-1. + * Actually, this is pretty much a NO-OP, since the identification + * system doesn't allow us to discriminate between a positive match, + * a possible match and a definite non-match. + * The problem here is that cp1252 looks like SJIS for certain chars. + * */ +static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xa0) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; } 1.3 +1 -0 libmbfl/filters/mbfilter_cp1252.h Index: mbfilter_cp1252.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp1252.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_cp1252.h 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_cp1252.h 20 Dec 2002 16:46:34 -0000 1.3 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp1252; +extern const struct mbfl_identify_vtbl vtbl_identify_cp1252; int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter); 1.3 +21 -0 libmbfl/filters/mbfilter_cp866.c Index: mbfilter_cp866.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp866.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_cp866.c 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_cp866.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -34,6 +34,8 @@ #include "mbfilter.h" #include "unicode_table_cp866.h" +static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL}; const mbfl_encoding mbfl_encoding_cp866 = { @@ -45,6 +47,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_cp866 = { + mbfl_no_encoding_cp866, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp866 +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -110,3 +120,14 @@ return c; } + +static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + 1.3 +1 -0 libmbfl/filters/mbfilter_cp866.h Index: mbfilter_cp866.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp866.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_cp866.h 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_cp866.h 20 Dec 2002 16:46:34 -0000 1.3 @@ -31,6 +31,7 @@ #define MBFL_MBFILTER_CP866_H extern const mbfl_encoding mbfl_encoding_cp866; +extern const struct mbfl_identify_vtbl vtbl_identify_cp866; int mbfl_filt_conv_cp866_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp866(int c, mbfl_convert_filter *filter); 1.2 +27 -0 libmbfl/filters/mbfilter_cp932.c Index: mbfilter_cp932.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp932.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_cp932.c 7 Dec 2002 19:20:47 -0000 1.1 +++ mbfilter_cp932.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -37,6 +37,8 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" +static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,6 +69,12 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_sjiswin = { + mbfl_no_encoding_sjis_win, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_sjiswin +}; #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) @@ -301,5 +309,24 @@ return c; } +static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ + ; + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + + return c; +} 1.2 +1 -0 libmbfl/filters/mbfilter_cp932.h Index: mbfilter_cp932.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp932.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_cp932.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_cp932.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_sjis_win; +extern const struct mbfl_identify_vtbl vtbl_identify_sjiswin; int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter); 1.2 +26 -0 libmbfl/filters/mbfilter_cp936.c Index: mbfilter_cp936.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp936.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_cp936.c 7 Dec 2002 19:20:44 -0000 1.1 +++ mbfilter_cp936.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -36,6 +36,8 @@ #include "unicode_table_cp936.h" +static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,6 +68,12 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_cp936 = { + mbfl_no_encoding_cp936, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp936 +}; #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) @@ -175,6 +183,24 @@ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { CK(mbfl_filt_conv_illegal_output(c, filter)); } + } + + return c; +} + +static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; } return c; 1.2 +2 -1 libmbfl/filters/mbfilter_cp936.h Index: mbfilter_cp936.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_cp936.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_cp936.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_cp936.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,8 +33,9 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp936; +extern const struct mbfl_identify_vtbl vtbl_identify_cp936; int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter); - + #endif /* MBFL_MBFILTER_CP936_H */ 1.2 +37 -0 libmbfl/filters/mbfilter_euc_cn.c Index: mbfilter_euc_cn.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_cn.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_cn.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_cn.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -36,6 +36,8 @@ #include "unicode_table_cp936.h" +static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,6 +68,13 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_euccn = { + mbfl_no_encoding_euc_cn, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_euccn +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -173,6 +182,34 @@ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { CK(mbfl_filt_conv_illegal_output(c, filter)); } + } + + return c; +} + +static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got lead byte */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; } return c; 1.2 +1 -0 libmbfl/filters/mbfilter_euc_cn.h Index: mbfilter_euc_cn.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_cn.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_cn.h 7 Dec 2002 19:20:45 -0000 1.1 +++ mbfilter_euc_cn.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -31,6 +31,7 @@ #define MBFL_MBFILTER_EUC_CN_H extern const mbfl_encoding mbfl_encoding_euc_cn; +extern const struct mbfl_identify_vtbl vtbl_identify_euccn; int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter); 1.2 +61 -0 libmbfl/filters/mbfilter_euc_jp.c Index: mbfilter_euc_jp.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_jp.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_jp.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_jp.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -37,6 +37,8 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" +static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,6 +69,13 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_eucjp = { + mbfl_no_encoding_euc_jp, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_eucjp +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -243,6 +252,58 @@ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { CK(mbfl_filt_conv_illegal_output(c, filter)); } + } + + return c; +} + +static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ + filter->status = 1; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got first half */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got 0x8e */ + if (c < 0xa1 || c > 0xdf) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 3: /* got 0x8f */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status++; + break; + case 4: /* got 0x8f */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; } return c; 1.2 +1 -0 libmbfl/filters/mbfilter_euc_jp.h Index: mbfilter_euc_jp.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_jp.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_jp.h 7 Dec 2002 19:20:49 -0000 1.1 +++ mbfilter_euc_jp.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_jp; +extern const struct mbfl_identify_vtbl vtbl_identify_eucjp; int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter); 1.2 +60 -0 libmbfl/filters/mbfilter_euc_jp_win.c Index: mbfilter_euc_jp_win.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_jp_win.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_jp_win.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_jp_win.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -38,6 +38,8 @@ #include "unicode_table_jis.h" #include "cp932_table.h" +static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -60,6 +62,13 @@ static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", NULL}; +const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { + mbfl_no_encoding_eucjp_win, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_eucjp_win +}; + const mbfl_encoding mbfl_encoding_eucjp_win = { mbfl_no_encoding_eucjp_win, "eucJP-win", @@ -338,5 +347,56 @@ return c; } +static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ + filter->status = 1; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got first half */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got 0x8e */ + if (c < 0xa1 || c > 0xdf) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 3: /* got 0x8f */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status++; + break; + case 4: /* got 0x8f */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} 1.2 +1 -0 libmbfl/filters/mbfilter_euc_jp_win.h Index: mbfilter_euc_jp_win.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_jp_win.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_jp_win.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_jp_win.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_eucjp_win; +extern const struct mbfl_identify_vtbl vtbl_identify_eucjpwin; int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter); 1.2 +37 -0 libmbfl/filters/mbfilter_euc_kr.c Index: mbfilter_euc_kr.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_kr.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_kr.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_kr.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -35,6 +35,8 @@ #include "mbfilter_euc_kr.h" #include "unicode_table_uhc.h" +static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -65,6 +67,14 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_euckr = { + mbfl_no_encoding_euc_kr, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_euckr +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -199,3 +209,30 @@ return c; } +static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got lead byte */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} 1.2 +1 -0 libmbfl/filters/mbfilter_euc_kr.h Index: mbfilter_euc_kr.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_kr.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_kr.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_kr.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_kr; +extern const struct mbfl_identify_vtbl vtbl_identify_euckr; int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter); 1.2 +63 -0 libmbfl/filters/mbfilter_euc_tw.c Index: mbfilter_euc_tw.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_tw.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_tw.c 7 Dec 2002 19:20:47 -0000 1.1 +++ mbfilter_euc_tw.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -36,6 +36,8 @@ #include "unicode_table_cns11643.h" +static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,6 +69,14 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_euctw = { + mbfl_no_encoding_euc_tw, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_euctw +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -247,3 +257,56 @@ } return c; } + +static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ + filter->status = 1; + } else if (c == 0x8e) { /* DBCS lead byte */ + filter->status = 2; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got lead byte */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got lead byte */ + if (c >= 0xa1 && c < 0xaf) { /* ok */ + filter->status = 3; + } else { + filter->flag = 1; /* bad */ + } + break; + + case 3: /* got lead byte */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 4; + break; + + case 4: /* got lead byte */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} + 1.2 +1 -0 libmbfl/filters/mbfilter_euc_tw.h Index: mbfilter_euc_tw.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_euc_tw.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_euc_tw.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_euc_tw.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_tw; +extern const struct mbfl_identify_vtbl vtbl_identify_euctw; int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter); 1.2 +57 -0 libmbfl/filters/mbfilter_hz.c Index: mbfilter_hz.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_hz.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_hz.c 7 Dec 2002 19:20:45 -0000 1.1 +++ mbfilter_hz.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -36,6 +36,8 @@ #include "unicode_table_cp936.h" +static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter); + const mbfl_encoding mbfl_encoding_hz = { mbfl_no_encoding_hz, "HZ", @@ -45,6 +47,14 @@ MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE }; +const struct mbfl_identify_vtbl vtbl_identify_hz = { + mbfl_no_encoding_hz, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_hz +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -193,6 +203,53 @@ filter->status &= 0xff; return 0; } + +static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter) +{ + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: GB2312 */ + case 0: + if (c == 0x7e) { + filter->status += 2; + } else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x11: GB2312 second char */ + case 1: + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + case 2: + if (c == 0x7d) { /* '}' */ + filter->status = 0; + } else if (c == 0x7b) { /* '{' */ + filter->status = 0x10; + } else if (c == 0x7e) { /* '~' */ + filter->status = 0; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + /* * Local variables: 1.2 +1 -0 libmbfl/filters/mbfilter_hz.h Index: mbfilter_hz.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_hz.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_hz.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_hz.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_hz; +extern const struct mbfl_identify_vtbl vtbl_identify_hz; int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter); 1.5 +81 -0 libmbfl/filters/mbfilter_iso2022_kr.c Index: mbfilter_iso2022_kr.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso2022_kr.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- mbfilter_iso2022_kr.c 7 Dec 2002 21:41:14 -0000 1.4 +++ mbfilter_iso2022_kr.c 20 Dec 2002 16:46:34 -0000 1.5 @@ -35,6 +35,8 @@ #include "mbfilter_euc_kr.h" #include "unicode_table_uhc.h" +static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter); + const mbfl_encoding mbfl_encoding_2022kr = { mbfl_no_encoding_2022kr, "ISO-2022-KR", @@ -44,6 +46,13 @@ MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE }; +const struct mbfl_identify_vtbl vtbl_identify_2022kr = { + mbfl_no_encoding_2022kr, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022kr +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -252,3 +261,75 @@ filter->status &= 0xff; return 0; } + +static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: KSC5601 mode */ +/* case 0x20: KSC5601 DBCS */ +/* case 0x40: KSC5601 SBCS */ + case 0: + if (!(filter->status & 0x10)) { + if (c == 0x1b) + filter->status += 2; + } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x21: KSC5601 second char */ + case 1: + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x29) { /* ')' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $) */ + case 5: + if (c == 0x43) { /* 'C' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + 1.2 +1 -0 libmbfl/filters/mbfilter_iso2022_kr.h Index: mbfilter_iso2022_kr.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso2022_kr.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso2022_kr.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso2022_kr.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_2022kr; +extern const struct mbfl_identify_vtbl vtbl_identify_2022kr; int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_1.c Index: mbfilter_iso8859_1.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_1.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_1.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_1.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -45,6 +45,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_1 = { + mbfl_no_encoding_8859_1, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_1.h Index: mbfilter_iso8859_1.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_1.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_1.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_1.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_1; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_1; int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter); 1.3 +8 -0 libmbfl/filters/mbfilter_iso8859_10.c Index: mbfilter_iso8859_10.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_10.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_10.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_10.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_10 = { + mbfl_no_encoding_8859_10, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_10.h Index: mbfilter_iso8859_10.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_10.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_10.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_10.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_10; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_10; int mbfl_filt_conv_8859_10_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_10(int c, mbfl_convert_filter *filter); 1.3 +8 -0 libmbfl/filters/mbfilter_iso8859_13.c Index: mbfilter_iso8859_13.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_13.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_13.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_13.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_13 = { + mbfl_no_encoding_8859_13, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_13.h Index: mbfilter_iso8859_13.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_13.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_13.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_13.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_13; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_13; int mbfl_filt_conv_8859_13_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_13(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_14.c Index: mbfilter_iso8859_14.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_14.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_14.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_14.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_14 = { + mbfl_no_encoding_8859_14, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +2 -0 libmbfl/filters/mbfilter_iso8859_14.h Index: mbfilter_iso8859_14.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_14.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_14.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_14.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -16,6 +16,8 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_14; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_14; + int mbfl_filt_conv_8859_14_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_14(int c, mbfl_convert_filter *filter); 1.3 +8 -0 libmbfl/filters/mbfilter_iso8859_15.c Index: mbfilter_iso8859_15.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_15.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_15.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_15.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_15 = { + mbfl_no_encoding_8859_15, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_15.h Index: mbfilter_iso8859_15.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_15.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_15.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_15.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -16,6 +16,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_15; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_15; int mbfl_filt_conv_8859_15_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_15(int c, mbfl_convert_filter *filter); 1.3 +6 -0 libmbfl/filters/mbfilter_iso8859_2.c Index: mbfilter_iso8859_2.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_2.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_2.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_2.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,12 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_2 = { + mbfl_no_encoding_8859_2, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true }; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_2.h Index: mbfilter_iso8859_2.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_2.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_2.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_2.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_2; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_2; int mbfl_filt_conv_8859_2_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_2(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_3.c Index: mbfilter_iso8859_3.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_3.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_3.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_3.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_3 = { + mbfl_no_encoding_8859_3, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true }; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_3.h Index: mbfilter_iso8859_3.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_3.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_3.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_3.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_3; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_3; int mbfl_filt_conv_8859_3_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_4.c Index: mbfilter_iso8859_4.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_4.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_4.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_4.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_4 = { + mbfl_no_encoding_8859_4, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true }; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_4.h Index: mbfilter_iso8859_4.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_4.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_4.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_4.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -31,6 +31,7 @@ #define MBFL_MBFILTER_ISO8859_4_H extern const mbfl_encoding mbfl_encoding_8859_4; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_4; int mbfl_filt_conv_8859_4_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_4(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_5.c Index: mbfilter_iso8859_5.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_5.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_5.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_5.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_5 = { + mbfl_no_encoding_8859_5, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true }; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_5.h Index: mbfilter_iso8859_5.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_5.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_5.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_5.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_5; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_5; int mbfl_filt_conv_8859_5_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_5(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_6.c Index: mbfilter_iso8859_6.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_6.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_6.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_6.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_6 = { + mbfl_no_encoding_8859_6, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_6.h Index: mbfilter_iso8859_6.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_6.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_6.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_6.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_6; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_6; int mbfl_filt_conv_8859_6_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_7.c Index: mbfilter_iso8859_7.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_7.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_7.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_7.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_7 = { + mbfl_no_encoding_8859_7, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_7.h Index: mbfilter_iso8859_7.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_7.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_7.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_7.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_7; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_7; int mbfl_filt_conv_8859_7_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_7(int c, mbfl_convert_filter *filter); 1.3 +8 -0 libmbfl/filters/mbfilter_iso8859_8.c Index: mbfilter_iso8859_8.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_8.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_8.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_8.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,14 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_8 = { + mbfl_no_encoding_8859_8, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_8.h Index: mbfilter_iso8859_8.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_8.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_8.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_8.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_8; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_8; int mbfl_filt_conv_8859_8_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_8(int c, mbfl_convert_filter *filter); 1.3 +7 -0 libmbfl/filters/mbfilter_iso8859_9.c Index: mbfilter_iso8859_9.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_9.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_iso8859_9.c 7 Dec 2002 21:41:14 -0000 1.2 +++ mbfilter_iso8859_9.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -46,6 +46,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_8859_9 = { + mbfl_no_encoding_8859_9, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_true +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* 1.2 +1 -0 libmbfl/filters/mbfilter_iso8859_9.h Index: mbfilter_iso8859_9.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_iso8859_9.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_iso8859_9.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_iso8859_9.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_9; +extern const struct mbfl_identify_vtbl vtbl_identify_8859_9; int mbfl_filt_conv_8859_9_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_8859_9(int c, mbfl_convert_filter *filter); 1.2 +192 -0 libmbfl/filters/mbfilter_jis.c Index: mbfilter_jis.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_jis.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_jis.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_jis.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -37,6 +37,9 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" +static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter); + const mbfl_encoding mbfl_encoding_jis = { mbfl_no_encoding_jis, "JIS", @@ -55,6 +58,21 @@ MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE }; +const struct mbfl_identify_vtbl vtbl_identify_jis = { + mbfl_no_encoding_jis, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_jis +}; + +const struct mbfl_identify_vtbl vtbl_identify_2022jp = { + mbfl_no_encoding_2022jp, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022jp +}; + + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -426,6 +444,180 @@ } filter->status &= 0xff; return 0; +} + +static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; } 1.2 +3 -1 libmbfl/filters/mbfilter_jis.h Index: mbfilter_jis.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_jis.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_jis.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_jis.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -34,7 +34,9 @@ extern const mbfl_encoding mbfl_encoding_jis; extern const mbfl_encoding mbfl_encoding_2022jp; - +extern const struct mbfl_identify_vtbl vtbl_identify_2022jp; +extern const struct mbfl_identify_vtbl vtbl_identify_jis; + int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter); 1.3 +18 -0 libmbfl/filters/mbfilter_koi8r.c Index: mbfilter_koi8r.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_koi8r.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_koi8r.c 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_koi8r.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -34,6 +34,8 @@ #include "mbfilter.h" #include "unicode_table_koi8r.h" +static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_koi8r_aliases[] = {"KOI8-R", "KOI8R", NULL}; const mbfl_encoding mbfl_encoding_koi8r = { @@ -45,6 +47,13 @@ MBFL_ENCTYPE_SBCS }; +const struct mbfl_identify_vtbl vtbl_identify_koi8r = { + mbfl_no_encoding_koi8r, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_koi8r +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -109,4 +118,13 @@ } return c; +} + +static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; } 1.3 +1 -0 libmbfl/filters/mbfilter_koi8r.h Index: mbfilter_koi8r.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_koi8r.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_koi8r.h 7 Dec 2002 23:05:40 -0000 1.2 +++ mbfilter_koi8r.h 20 Dec 2002 16:46:34 -0000 1.3 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_koi8r; +extern const struct mbfl_identify_vtbl vtbl_identify_koi8r; int mbfl_filt_conv_koi8r_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_koi8r(int c, mbfl_convert_filter *filter); 1.2 +30 -0 libmbfl/filters/mbfilter_sjis.c Index: mbfilter_sjis.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_sjis.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_sjis.c 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_sjis.c 20 Dec 2002 16:46:34 -0000 1.2 @@ -37,6 +37,8 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" +static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,6 +69,13 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_sjis = { + mbfl_no_encoding_sjis, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_sjis +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) #define SJIS_ENCODE(c1,c2,s1,s2) \ @@ -245,3 +254,24 @@ return c; } + +static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ + ; + } else if (c > 0x80 && c < 0xf0 && c != 0xa0) { /* kanji first char */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + + return c; +} + 1.2 +1 -0 libmbfl/filters/mbfilter_sjis.h Index: mbfilter_sjis.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_sjis.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_sjis.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_sjis.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_sjis; +extern const struct mbfl_identify_vtbl vtbl_identify_sjis; int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter); 1.6 +51 -0 libmbfl/filters/mbfilter_uhc.c Index: mbfilter_uhc.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_uhc.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- mbfilter_uhc.c 11 Dec 2002 18:22:59 -0000 1.5 +++ mbfilter_uhc.c 20 Dec 2002 16:46:34 -0000 1.6 @@ -35,6 +35,8 @@ #include "mbfilter_euc_kr.h" #include "unicode_table_uhc.h" +static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -65,6 +67,13 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_uhc = { + mbfl_no_encoding_uhc, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_uhc +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -195,3 +204,45 @@ return c; } + +static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */ + filter->status= 1; + } else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */ + filter->status= 2; + } else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */ + filter->status= 3; + } else { /* bad */ + filter->flag = 1; + } + + case 1: + case 2: + if (c < 0x41 || (c > 0x5a && c < 0x61) + || (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 3: + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + 1.2 +1 -0 libmbfl/filters/mbfilter_uhc.h Index: mbfilter_uhc.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_uhc.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_uhc.h 7 Dec 2002 19:20:44 -0000 1.1 +++ mbfilter_uhc.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_uhc; +extern const struct mbfl_identify_vtbl vtbl_identify_uhc; int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter); 1.3 +59 -0 libmbfl/filters/mbfilter_utf7.c Index: mbfilter_utf7.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_utf7.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_utf7.c 7 Dec 2002 23:01:24 -0000 1.2 +++ mbfilter_utf7.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -34,6 +34,8 @@ #include "mbfilter.h" #include "mbfilter_utf7.h" +static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter); + static const unsigned char mbfl_base64_table[] = { /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */ 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d, @@ -58,6 +60,13 @@ MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE }; +const struct mbfl_identify_vtbl vtbl_identify_utf7 = { + mbfl_no_encoding_utf7, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_utf7 +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -379,4 +388,54 @@ } return 0; } + +static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter) +{ + int n; + + switch (filter->status) { + /* directly encoded characters */ + case 0: + if (c == 0x2b) { /* '+' shift character */ + filter->status++; + } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) { /* illegal character */ + filter->flag = 1; /* bad */ + } + break; + + /* Modified Base64 */ + case 1: + case 2: + n = 0; + if (c >= 0x41 && c <= 0x5a) { /* A - Z */ + n = 1; + } else if (c >= 0x61 && c <= 0x7a) { /* a - z */ + n = 1; + } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */ + n = 1; + } else if (c == 0x2b) { /* '+' */ + n = 1; + } else if (c == 0x2f) { /* '/' */ + n = 1; + } + if (n <= 0) { + if (filter->status == 1 && c != 0x2d) { + filter->flag = 1; /* bad */ + } else if (c < 0 || c > 0x7f) { + filter->flag = 1; /* bad */ + } + filter->status = 0; + } else { + filter->status = 2; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + 1.2 +1 -0 libmbfl/filters/mbfilter_utf7.h Index: mbfilter_utf7.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_utf7.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_utf7.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_utf7.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -33,6 +33,7 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_utf7; +extern const struct mbfl_identify_vtbl vtbl_identify_utf7; int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter); 1.3 +67 -0 libmbfl/filters/mbfilter_utf8.c Index: mbfilter_utf8.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_utf8.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfilter_utf8.c 7 Dec 2002 23:01:24 -0000 1.2 +++ mbfilter_utf8.c 20 Dec 2002 16:46:34 -0000 1.3 @@ -34,6 +34,8 @@ #include "mbfilter.h" #include "mbfilter_utf8.h" +static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter); + static const unsigned char mblen_table_utf8[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -64,6 +66,13 @@ MBFL_ENCTYPE_MBCS }; +const struct mbfl_identify_vtbl vtbl_identify_utf8 = { + mbfl_no_encoding_utf8, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_utf8 +}; + #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -177,6 +186,64 @@ } else { if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter) +{ + if (c < 0x80) { + if (c < 0) { + filter->flag = 1; /* bad */ + } else if (c != 0 && filter->status) { + filter->flag = 1; /* bad */ + } + filter->status = 0; + } else if (c < 0xc0) { + switch (filter->status) { + case 0x20: /* 3 byte code 2nd char */ + case 0x30: /* 4 byte code 2nd char */ + case 0x31: /* 4 byte code 3rd char */ + case 0x40: /* 5 byte code 2nd char */ + case 0x41: /* 5 byte code 3rd char */ + case 0x42: /* 5 byte code 4th char */ + case 0x50: /* 6 byte code 2nd char */ + case 0x51: /* 6 byte code 3rd char */ + case 0x52: /* 6 byte code 4th char */ + case 0x53: /* 6 byte code 5th char */ + filter->status++; + break; + case 0x10: /* 2 byte code 2nd char */ + case 0x21: /* 3 byte code 3rd char */ + case 0x32: /* 4 byte code 4th char */ + case 0x43: /* 5 byte code 5th char */ + case 0x54: /* 6 byte code 6th char */ + filter->status = 0; + break; + default: + filter->flag = 1; /* bad */ + filter->status = 0; + break; + } + } else { + if (filter->status) { + filter->flag = 1; /* bad */ + } + filter->status = 0; + if (c < 0xe0) { /* 2 byte code first char */ + filter->status = 0x10; + } else if (c < 0xf0) { /* 3 byte code 1st char */ + filter->status = 0x20; + } else if (c < 0xf8) { /* 4 byte code 1st char */ + filter->status = 0x30; + } else if (c < 0xfc) { /* 5 byte code 1st char */ + filter->status = 0x40; + } else if (c < 0xfe) { /* 6 byte code 1st char */ + filter->status = 0x50; + } else { + filter->flag = 1; /* bad */ } } 1.2 +1 -0 libmbfl/filters/mbfilter_utf8.h Index: mbfilter_utf8.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_utf8.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- mbfilter_utf8.h 7 Dec 2002 19:20:50 -0000 1.1 +++ mbfilter_utf8.h 20 Dec 2002 16:46:34 -0000 1.2 @@ -31,6 +31,7 @@ #define MBFL_MBFILTER_UTF8_H extern const mbfl_encoding mbfl_encoding_utf8; +extern const struct mbfl_identify_vtbl vtbl_identify_utf8; int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_utf8(int c, mbfl_convert_filter *filter); 1.5 +11 -1013 libmbfl/mbfl/mbfilter.c Index: mbfilter.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/mbfl/mbfilter.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- mbfilter.c 9 Dec 2002 01:07:39 -0000 1.4 +++ mbfilter.c 20 Dec 2002 16:46:36 -0000 1.5 @@ -329,43 +329,6 @@ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 }; -/* forward */ -static void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter); -static int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter); -static void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter); - -static void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); -static void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter); -static void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter); - -static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter); - /* convert filter function table */ static const struct mbfl_convert_vtbl vtbl_pass = { mbfl_no_encoding_pass, @@ -1383,215 +1346,13 @@ /* identify filter function table */ -static const struct mbfl_identify_vtbl vtbl_identify_ascii = { - mbfl_no_encoding_ascii, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_ascii }; - -static const struct mbfl_identify_vtbl vtbl_identify_utf8 = { - mbfl_no_encoding_utf8, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_utf8 }; - -static const struct mbfl_identify_vtbl vtbl_identify_utf7 = { - mbfl_no_encoding_utf7, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_utf7 }; - -static const struct mbfl_identify_vtbl vtbl_identify_eucjp = { - mbfl_no_encoding_euc_jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_eucjp }; - -static const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { - mbfl_no_encoding_eucjp_win, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_eucjp }; - -static const struct mbfl_identify_vtbl vtbl_identify_sjis = { - mbfl_no_encoding_sjis, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjis }; - -static const struct mbfl_identify_vtbl vtbl_identify_sjiswin = { - mbfl_no_encoding_sjis_win, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjiswin }; - -static const struct mbfl_identify_vtbl vtbl_identify_jis = { - mbfl_no_encoding_jis, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_jis }; - -static const struct mbfl_identify_vtbl vtbl_identify_2022jp = { - mbfl_no_encoding_2022jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_2022jp }; - -static struct mbfl_identify_vtbl vtbl_identify_euccn = { - mbfl_no_encoding_euc_cn, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euccn }; - -static struct mbfl_identify_vtbl vtbl_identify_cp936 = { - mbfl_no_encoding_cp936, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp936 }; - -static struct mbfl_identify_vtbl vtbl_identify_hz = { - mbfl_no_encoding_hz, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_hz }; - -static struct mbfl_identify_vtbl vtbl_identify_euctw = { - mbfl_no_encoding_euc_tw, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euctw }; - -static struct mbfl_identify_vtbl vtbl_identify_big5 = { - mbfl_no_encoding_big5, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_big5 }; - -static struct mbfl_identify_vtbl vtbl_identify_euckr = { - mbfl_no_encoding_euc_kr, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euckr }; - -static struct mbfl_identify_vtbl vtbl_identify_uhc = { - mbfl_no_encoding_uhc, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_uhc }; - -static struct mbfl_identify_vtbl vtbl_identify_2022kr = { - mbfl_no_encoding_2022kr, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_2022kr }; - -static struct mbfl_identify_vtbl vtbl_identify_cp1251 = { - mbfl_no_encoding_cp1251, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp1251 }; - -static struct mbfl_identify_vtbl vtbl_identify_cp866 = { - mbfl_no_encoding_cp866, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp866 }; - -static struct mbfl_identify_vtbl vtbl_identify_koi8r = { - mbfl_no_encoding_koi8r, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_koi8r }; - -static const struct mbfl_identify_vtbl vtbl_identify_cp1252 = { - mbfl_no_encoding_cp1252, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp1252 }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_1 = { - mbfl_no_encoding_8859_1, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_2 = { - mbfl_no_encoding_8859_2, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_3 = { - mbfl_no_encoding_8859_3, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_4 = { - mbfl_no_encoding_8859_4, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_5 = { - mbfl_no_encoding_8859_5, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_6 = { - mbfl_no_encoding_8859_6, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_7 = { - mbfl_no_encoding_8859_7, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_8 = { - mbfl_no_encoding_8859_8, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_9 = { - mbfl_no_encoding_8859_9, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_10 = { - mbfl_no_encoding_8859_10, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_13 = { - mbfl_no_encoding_8859_13, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_14 = { - mbfl_no_encoding_8859_14, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; - -static const struct mbfl_identify_vtbl vtbl_identify_8859_15 = { - mbfl_no_encoding_8859_15, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; static const struct mbfl_identify_vtbl vtbl_identify_false = { mbfl_no_encoding_pass, mbfl_filt_ident_false_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_false }; + mbfl_filt_ident_false +}; static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_utf8, @@ -2191,812 +1952,49 @@ /* * commonly used constructor and destructor */ -static void -mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter) +void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; } -static int -mbfl_filt_conv_common_flush(mbfl_convert_filter *filter) +int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; return 0; } -static void -mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter) +void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; } -/* - * - * identify filter functions - * - */ - -static void -mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter) +void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter) { filter->status = 0; filter->flag = 0; } -static void -mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter) +void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter) { filter->status = 0; } -static int -mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter) +int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter) { - if (c >= 0x20 && c < 0x80) { - ; - } else if (c == 0x0d || c == 0x0a || c == 0x09 || c == 0) { /* CR or LF or HTAB or null */ - ; - } else { - filter->flag = 1; - } - - return c; -} - -static int -mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter) -{ - if (c < 0x80) { - if (c < 0) { - filter->flag = 1; /* bad */ - } else if (c != 0 && filter->status) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - } else if (c < 0xc0) { - switch (filter->status) { - case 0x20: /* 3 byte code 2nd char */ - case 0x30: /* 4 byte code 2nd char */ - case 0x31: /* 4 byte code 3rd char */ - case 0x40: /* 5 byte code 2nd char */ - case 0x41: /* 5 byte code 3rd char */ - case 0x42: /* 5 byte code 4th char */ - case 0x50: /* 6 byte code 2nd char */ - case 0x51: /* 6 byte code 3rd char */ - case 0x52: /* 6 byte code 4th char */ - case 0x53: /* 6 byte code 5th char */ - filter->status++; - break; - case 0x10: /* 2 byte code 2nd char */ - case 0x21: /* 3 byte code 3rd char */ - case 0x32: /* 4 byte code 4th char */ - case 0x43: /* 5 byte code 5th char */ - case 0x54: /* 6 byte code 6th char */ - filter->status = 0; - break; - default: - filter->flag = 1; /* bad */ - filter->status = 0; - break; - } - } else { - if (filter->status) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - if (c < 0xe0) { /* 2 byte code first char */ - filter->status = 0x10; - } else if (c < 0xf0) { /* 3 byte code 1st char */ - filter->status = 0x20; - } else if (c < 0xf8) { /* 4 byte code 1st char */ - filter->status = 0x30; - } else if (c < 0xfc) { /* 5 byte code 1st char */ - filter->status = 0x40; - } else if (c < 0xfe) { /* 6 byte code 1st char */ - filter->status = 0x50; - } else { - filter->flag = 1; /* bad */ - } - } - - return c; -} - -static int -mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter) -{ - int n; - - switch (filter->status) { - /* directly encoded characters */ - case 0: - if (c == 0x2b) { /* '+' shift character */ - filter->status++; - } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) { /* illegal character */ - filter->flag = 1; /* bad */ - } - break; - - /* Modified Base64 */ - case 1: - case 2: - n = 0; - if (c >= 0x41 && c <= 0x5a) { /* A - Z */ - n = 1; - } else if (c >= 0x61 && c <= 0x7a) { /* a - z */ - n = 1; - } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */ - n = 1; - } else if (c == 0x2b) { /* '+' */ - n = 1; - } else if (c == 0x2f) { /* '/' */ - n = 1; - } - if (n <= 0) { - if (filter->status == 1 && c != 0x2d) { - filter->flag = 1; /* bad */ - } else if (c < 0 || c > 0x7f) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - } else { - filter->status = 2; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ - filter->status = 1; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got first half */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got 0x8e */ - if (c < 0xa1 || c > 0xdf) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status++; - break; - case 4: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ - ; - } else if (c > 0x80 && c < 0xf0 && c != 0xa0) { /* kanji first char */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} - -static int -mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ - ; - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} - -static int -mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} - -static int -mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter) -{ - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: GB2312 */ - case 0: - if (c == 0x7e) { - filter->status += 2; - } else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x11: GB2312 second char */ - case 1: - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - case 2: - if (c == 0x7d) { /* '}' */ - filter->status = 0; - } else if (c == 0x7b) { /* '{' */ - filter->status = 0x10; - } else if (c == 0x7e) { /* '~' */ - filter->status = 0; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else if (c == 0x8e) { /* DBCS lead byte */ - filter->status = 2; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got lead byte */ - if (c >= 0xa1 && c < 0xaf) { /* ok */ - filter->status = 3; - } else { - filter->flag = 1; /* bad */ - } - break; - - case 3: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 4; - break; - - case 4: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} - -static int -mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */ - filter->status= 1; - } else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */ - filter->status= 2; - } else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */ - filter->status= 3; - } else { /* bad */ - filter->flag = 1; - } - - case 1: - case 2: - if (c < 0x41 || (c > 0x5a && c < 0x61) - || (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int -mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: KSC5601 mode */ -/* case 0x20: KSC5601 DBCS */ -/* case 0x40: KSC5601 SBCS */ - case 0: - if (!(filter->status & 0x10)) { - if (c == 0x1b) - filter->status += 2; - } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x21: KSC5601 second char */ - case 1: - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x29) { /* ')' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $) */ - case 5: - if (c == 0x43) { /* 'C' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -/* We only distinguish the MS extensions to ISO-8859-1. - * Actually, this is pretty much a NO-OP, since the identification - * system doesn't allow us to discriminate between a positive match, - * a possible match and a definite non-match. - * The problem here is that cp1252 looks like SJIS for certain chars. - * */ -static int -mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80 && c < 0xa0) - filter->flag = 0; - else - filter->flag = 1; /* not it */ - return c; -} - -/* all of this is so ugly now! */ -static int -mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80 && c < 0xff) - filter->flag = 0; - else - filter->flag = 1; /* not it */ - return c; -} - -static int -mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80 && c < 0xff) - filter->flag = 0; - else - filter->flag = 1; /* not it */ - return c; -} - -static int -mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80 && c < 0xff) - filter->flag = 0; - else - filter->flag = 1; /* not it */ - return c; -} - -static int -mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - + filter->flag = 1; /* bad */ return c; } -static void -mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter) +void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter) { filter->status = 0; filter->flag = 1; } -static int -mbfl_filt_ident_false(int c, mbfl_identify_filter *filter) -{ - filter->flag = 1; /* bad */ - return c; -} - -static int -mbfl_filt_ident_true(int c, mbfl_identify_filter *filter) +int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter) { return c; } 1.4 +13 -0 libmbfl/mbfl/mbfilter.h Index: mbfilter.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/mbfl/mbfilter.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- mbfilter.h 8 Dec 2002 11:00:02 -0000 1.3 +++ mbfilter.h 20 Dec 2002 16:46:36 -0000 1.4 @@ -612,4 +612,17 @@ #define mbfl_prealloc __mbfl_allocators->preallloc #define mbfl_pfree __mbfl_allocators->pfree +/* common filters */ +void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter); +int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter); +void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter); + +void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); +void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter); +void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter); + +int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter); +int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter); + + #endif /* MBFL_MBFILTER_H */