From 8984fa674290c17b62c7cf5cf0dd266d297fc14b Mon Sep 17 00:00:00 2001 From: naruse Date: Sun, 17 Feb 2008 03:21:20 +0000 Subject: [PATCH] * enc/{euc_jp.c,gbk.c,iso_8859_1.c,iso_8859_11.c,iso_8859_13.c, iso_8859_2.c,iso_8859_6.c,iso_8859_7.c,iso_8859_8.c,iso_8859_9.c, shift_jis.c,windows_1251.c}: add document about encodings. * enc/cp949.c: divided into new file. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15516 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 8 ++ enc/cp949.c | 219 +++++++++++++++++++++++++++++++++++++++++++++ enc/euc_jp.c | 22 ++++- enc/gbk.c | 8 +- enc/iso_8859_1.c | 8 ++ enc/iso_8859_11.c | 11 +++ enc/iso_8859_13.c | 5 +- enc/iso_8859_2.c | 8 ++ enc/iso_8859_6.c | 8 ++ enc/iso_8859_7.c | 8 ++ enc/iso_8859_8.c | 8 ++ enc/iso_8859_9.c | 8 ++ enc/shift_jis.c | 21 +++++ enc/windows_1251.c | 7 ++ 14 files changed, 345 insertions(+), 4 deletions(-) create mode 100644 enc/cp949.c diff --git a/ChangeLog b/ChangeLog index dc4e7a9bfa1433..5bcd0ab78dbd5e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Sun Feb 17 12:17:52 2008 NARUSE, Yui + + * enc/{euc_jp.c,gbk.c,iso_8859_1.c,iso_8859_11.c,iso_8859_13.c, + iso_8859_2.c,iso_8859_6.c,iso_8859_7.c,iso_8859_8.c,iso_8859_9.c, + shift_jis.c,windows_1251.c}: add document about encodings. + + * enc/cp949.c: divided into new file. + Sun Feb 17 10:59:04 2008 Tanaka Akira * re.c (rb_reg_quote): return US-ASCII string consistently. diff --git a/enc/cp949.c b/enc/cp949.c new file mode 100644 index 00000000000000..00464606253428 --- /dev/null +++ b/enc/cp949.c @@ -0,0 +1,219 @@ +/********************************************************************** + cp949.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_CP949[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char CP949_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define CP949_ISMB_FIRST(byte) (EncLen_CP949[byte] > 1) +#define CP949_ISMB_TRAIL(byte) CP949_CAN_BE_TRAIL_TABLE[(byte)] + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ A, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, + /* 6 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, + /* 8 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + } +}; +#undef A +#undef F + +static int +cp949_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_CP949[firstbyte]-1); + s = trans[s][*p++]; + RETURN(2); +#undef RETURN +} + +static OnigCodePoint +cp949_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +cp949_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb2_code_to_mbc(enc, code, buf); +} + +static int +cp949_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +cp949_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +cp949_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb2_is_code_ctype(enc, code, ctype); +} + +static UChar* +cp949_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (CP949_ISMB_TRAIL(*p)) { + while (p > start) { + if (! CP949_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(enc, p, s); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +cp949_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + return (CP949_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingDefine(cp949, CP949) = { + cp949_mbc_enc_len, + "CP949", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + cp949_mbc_to_code, + onigenc_mb2_code_to_mbclen, + cp949_code_to_mbc, + cp949_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp949_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + cp949_left_adjust_char_head, + cp949_is_allowed_reverse_match +}; +/* + * Name: CP949 + * Link: http://www.microsoft.com/globaldev/reference/dbcs/949.mspx + * Link: http://en.wikipedia.org/wiki/EUC-KR#EUC-KR + */ diff --git a/enc/euc_jp.c b/enc/euc_jp.c index 1828d03a3d913d..703e0e4f38ae7b 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -362,7 +362,27 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { is_allowed_reverse_match, 0 }; +/* + * Name: EUC-JP + * MIBenum: 18 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://home.m05.itscom.net/numa/cde/sjis-euc/sjis-euc.html + */ ENC_ALIAS("eucJP", "EUC-JP"); /* UI-OSF Application Platform Profile for Japanese Environment Version 1.1 */ + +/* + * Name: eucJP-ms + * Link: http://home.m05.itscom.net/numa/cde/ucs-conv/ucs-conv.html + * Link: http://www2d.biglobe.ne.jp/~msyk/charcode/cp932/eucJP-ms.html + * Link: http://ja.wikipedia.org/wiki/EUC-JP + */ ENC_REPLICATE("eucJP-ms", "EUC-JP"); /* TOG/JVC CDE/Motif Technical WG */ ENC_ALIAS("euc-jp-ms", "EUC-JP"); -ENC_REPLICATE("CP51932", "EUC-JP"); /* Windows CodePage 51932 */ + +/* + * Name: CP51932 + * Link: http://search.cpan.org/src/NARUSE/Encode-EUCJPMS-0.07/ucm/cp51932.ucm + * Link: http://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932 + * Link: http://msyk.at.webry.info/200511/article_2.html + */ +ENC_REPLICATE("CP51932", "EUC-JP"); diff --git a/enc/gbk.c b/enc/gbk.c index 787b1815e037d2..25f42092a963f7 100644 --- a/enc/gbk.c +++ b/enc/gbk.c @@ -212,5 +212,11 @@ OnigEncodingDefine(gbk, GBK) = { gbk_left_adjust_char_head, gbk_is_allowed_reverse_match }; +/* + * Name: GBK + * MIBenum: 113 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.iana.org/assignments/charset-reg/GBK + * Link: http://www.microsoft.com/globaldev/reference/dbcs/936.mspx + */ ENC_ALIAS("CP936", "GBK"); -ENC_REPLICATE("CP949", "GBK"); diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index e2779753e2a00d..efe053131a4c9f 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -273,5 +273,13 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-1", "ISO-8859-1"); + +/* + * Name: windows-1252 + * MIBenum: 2252 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1252.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1252 + */ ENC_REPLICATE("Windows-1252", "ISO-8859-1"); ENC_ALIAS("CP1252", "Windows-1252"); diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index facb28437d3a02..eca0b69a14b09f 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -95,6 +95,17 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-11", "ISO-8859-11"); + +/* + * Name: TIS-620 + * MIBenum: 2259 + * Link: http://en.wikipedia.org/wiki/Thai_Industrial_Standard_620-2533 + */ ENC_REPLICATE("TIS-620", "ISO-8859-11"); + +/* + * Name: Windows-874 + * Link: http://www.microsoft.com/globaldev/reference/sbcs/874.mspx + */ ENC_REPLICATE("Windows-874", "ISO-8859-11"); ENC_ALIAS("CP874", "Windows-874"); diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index b821dadb539cbd..9c9d684b67216d 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -235,8 +235,9 @@ ENC_ALIAS("ISO8859-13", "ISO-8859-13"); /* * Name: windows-1257 * MIBenum: 2257 - * http://www.microsoft.com/globaldev/reference/sbcs/1257.mspx - * http://en.wikipedia.org/wiki/Windows-1257 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1257.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1257 */ ENC_REPLICATE("Windows-1257", "ISO-8859-13"); ENC_ALIAS("CP1257", "Windows-1257"); diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index 8f4e1ad2f89a19..1feb44bdaf8853 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -238,5 +238,13 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-2", "ISO-8859-2"); + +/* + * Name: windows-1250 + * MIBenum: 2250 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1250.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1250 + */ ENC_REPLICATE("Windows-1250", "ISO-8859-2"); ENC_ALIAS("CP1250", "Windows-1250"); diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c index 0f9a1b7d9549f5..2b04321f2724b9 100644 --- a/enc/iso_8859_6.c +++ b/enc/iso_8859_6.c @@ -95,5 +95,13 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-6", "ISO-8859-6"); + +/* + * Name: windows-1256 + * MIBenum: 2256 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1256.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1256 + */ ENC_REPLICATE("Windows-1256", "ISO-8859-6"); ENC_ALIAS("CP1256", "Windows-1256"); diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index 4b1f6167c248a4..cd1e77a74a2004 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -225,5 +225,13 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-7", "ISO-8859-7"); + +/* + * Name: windows-1253 + * MIBenum: 2253 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1253.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1253 + */ ENC_REPLICATE("Windows-1253", "ISO-8859-7"); ENC_ALIAS("CP1253", "Windows-1253"); diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index ab580844610419..52873e761869c6 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -95,5 +95,13 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-8", "ISO-8859-8"); + +/* + * Name: windows-1255 + * MIBenum: 2255 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1255.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1255 + */ ENC_REPLICATE("Windows-1255", "ISO-8859-8"); ENC_ALIAS("CP1255", "Windows-1255"); diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 9c228ad93e964d..17b7f644c33d03 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -231,5 +231,13 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_always_true_is_allowed_reverse_match }; ENC_ALIAS("ISO8859-9", "ISO-8859-9"); + +/* + * Name: windows-1254 + * MIBenum: 2254 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1254.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1254 + */ ENC_REPLICATE("Windows-1254", "ISO-8859-9"); ENC_ALIAS("CP1254", "Windows-1254"); diff --git a/enc/shift_jis.c b/enc/shift_jis.c index 1b4c9c52667f2a..ba1fde32e85551 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -370,9 +370,30 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = { is_allowed_reverse_match, 0 }; +/* + * Name: Shift_JIS + * MIBenum: 17 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://ja.wikipedia.org/wiki/Shift_JIS + */ ENC_ALIAS("SJIS", "Shift_JIS"); + +/* + * Name: Windows-31J + * MIBenum: 2024 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/dbcs/932.mspx + * Link: http://ja.wikipedia.org/wiki/Windows-31J + * Link: http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-932-2000.ucm + */ ENC_REPLICATE("Windows-31J", "Shift_JIS"); ENC_ALIAS("CP932", "Windows-31J"); ENC_ALIAS("csWindows31J", "Windows-31J"); /* IANA. IE6 don't accept Windows-31J but csWindows31J. */ + +/* + * Name: MacJapanese + * Link: http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/JAPANESE.TXT + * Link: http://ja.wikipedia.org/wiki/MacJapanese + */ ENC_REPLICATE("MacJapanese", "Shift_JIS"); ENC_ALIAS("MacJapan", "MacJapanese"); diff --git a/enc/windows_1251.c b/enc/windows_1251.c index aeab6abefa5d8c..1aa48c6305f475 100644 --- a/enc/windows_1251.c +++ b/enc/windows_1251.c @@ -198,4 +198,11 @@ OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +/* + * Name: windows-1251 + * MIBenum: 2251 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1251.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1251 + */ ENC_ALIAS("CP1251", "Windows-1251");