Skip to content

Commit

Permalink
Bug 912470 part 1 - Implement Encoding Standard-compliant big5 decode…
Browse files Browse the repository at this point in the history
…r. r=emk.
  • Loading branch information
hsivonen committed Jun 16, 2015
1 parent 1ea93fa commit a5644ff
Show file tree
Hide file tree
Showing 24 changed files with 19,392 additions and 37,782 deletions.
4 changes: 2 additions & 2 deletions dom/encoding/domainsfallbacks.properties
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ xn--wgbh1c=windows-1256

gr=ISO-8859-7

hk=Big5-HKSCS
xn--j6w193g=Big5-HKSCS
hk=Big5
xn--j6w193g=Big5

hr=windows-1250

Expand Down
2 changes: 1 addition & 1 deletion dom/encoding/labelsencodings.properties
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ x-gbk=gbk
gb18030=gb18030
hz-gb-2312=replacement
big5=Big5
big5-hkscs=Big5-HKSCS
big5-hkscs=Big5
cn-big5=Big5
csbig5=Big5
x-x-big5=Big5
Expand Down
79 changes: 77 additions & 2 deletions dom/encoding/test/test_TextDecoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function runTextDecoderOptions()
}, "testDecodeABVOption");
test(testDecoderForThaiEncoding, "testDecoderForThaiEncoding");
test(testInvalid2022JP, "testInvalid2022JP");
test(testDecoderForBig5, "testDecoderForBig5");
}

/*
Expand Down Expand Up @@ -355,8 +356,7 @@ function testDecoderGetEncoding()
{encoding: "x-mac-cyrillic", labels: ["x-mac-cyrillic", "x-mac-ukrainian"]},
{encoding: "gbk", labels: ["chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk"]},
{encoding: "gb18030", labels: ["gb18030"]},
{encoding: "big5", labels: ["big5", "cn-big5", "csbig5", "x-x-big5"]},
{encoding: "big5-hkscs", labels: ["big5-hkscs"]},
{encoding: "big5", labels: ["big5", "cn-big5", "csbig5", "x-x-big5", "big5-hkscs"]},
{encoding: "euc-jp", labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"]},
{encoding: "iso-2022-jp", labels: ["csiso2022jp", "iso-2022-jp"]},
{encoding: "shift_jis", labels: ["csshiftjis", "ms_kanji", "shift-jis", "shift_jis", "sjis", "windows-31j", "x-sjis"]},
Expand Down Expand Up @@ -463,3 +463,78 @@ function testInvalid2022JP()
});
assert_equals(failureCount, 0, failureCount + " of " + inputs.length + " tests failed");
}

function testDecoderForBig5()
{
const inputs = [
[ 0x61, 0x62 ],
[ 0x87, 0x40 ],
[ 0xFE, 0xFE ],
[ 0xFE, 0xFD ],
[ 0x88, 0x62 ],
[ 0x88, 0x64 ],
[ 0x88, 0x66 ],
[ 0x88, 0xA3 ],
[ 0x88, 0xA5 ],
[ 0x88, 0xA7 ],
[ 0x99, 0xD4 ],
[ 0x99, 0xD5 ],
[ 0x99, 0xD6 ],
[ 0x61, 0x87, 0x40, 0x62 ],
[ 0x61, 0xFE, 0xFE, 0x62 ],
[ 0x61, 0xFE, 0xFD, 0x62 ],
[ 0x61, 0x88, 0x62, 0x62 ],
[ 0x61, 0x88, 0x64, 0x62 ],
[ 0x61, 0x88, 0x66, 0x62 ],
[ 0x61, 0x88, 0xA3, 0x62 ],
[ 0x61, 0x88, 0xA5, 0x62 ],
[ 0x61, 0x88, 0xA7, 0x62 ],
[ 0x61, 0x99, 0xD4, 0x62 ],
[ 0x61, 0x99, 0xD5, 0x62 ],
[ 0x61, 0x99, 0xD6, 0x62 ],
[ 0x80, 0x61 ],
[ 0xFF, 0x61 ],
[ 0xFE, 0x39 ],
[ 0x87, 0x66 ],
[ 0x81, 0x40 ],
[ 0x61, 0x81 ],
];
const expectations = [
"\u0061\u0062",
"\u43F0",
"\u79D4",
"\uD864\uDD0D",
"\u00CA\u0304",
"\u00CA\u030C",
"\u00CA",
"\u00EA\u0304",
"\u00EA\u030C",
"\u00EA",
"\u8991",
"\uD85E\uDD67",
"\u8A29",
"\u0061\u43F0\u0062",
"\u0061\u79D4\u0062",
"\u0061\uD864\uDD0D\u0062",
"\u0061\u00CA\u0304\u0062",
"\u0061\u00CA\u030C\u0062",
"\u0061\u00CA\u0062",
"\u0061\u00EA\u0304\u0062",
"\u0061\u00EA\u030C\u0062",
"\u0061\u00EA\u0062",
"\u0061\u8991\u0062",
"\u0061\uD85E\uDD67\u0062",
"\u0061\u8A29\u0062",
"\uFFFD\u0061",
"\uFFFD\u0061",
"\uFFFD\u0039",
"\uFFFD\u0066",
"\uFFFD\u0040",
"\u0061\uFFFD",
];

for (var i = 0; i < inputs.length; i++) {
testCharset({encoding: "big5", input: inputs[i], expected: expectations[i],
msg: "decoder test #" + i + " for big5."});
}
}
2 changes: 1 addition & 1 deletion intl/locale/windows/wincharset.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ acp.932=Shift_JIS
acp.936=gb18030
acp.949=EUC-KR
acp.950=Big5
acp.951=Big5-HKSCS
acp.951=Big5
acp.1250=windows-1250
acp.1251=windows-1251
acp.1252=windows-1252
Expand Down
2 changes: 0 additions & 2 deletions intl/uconv/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,8 @@ UNIFIED_SOURCES += [
]

UNIFIED_SOURCES += [
'ucvtw/nsBIG5HKSCSToUnicode.cpp',
'ucvtw/nsBIG5ToUnicode.cpp',
'ucvtw/nsUnicodeToBIG5.cpp',
'ucvtw/nsUnicodeToBIG5HKSCS.cpp',
]

UNIFIED_SOURCES += [
Expand Down
14 changes: 9 additions & 5 deletions intl/uconv/nsIUnicodeDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,18 @@ class nsIUnicodeDecoder : public nsISupports
* @param aDestLength [IN/OUT] the length of the destination data buffer;
* after conversion will contain the number of Unicode
* characters written
* @return NS_PARTIAL_MORE_INPUT if only a partial conversion was
* done; more input is needed to continue
* NS_PARTIAL_MORE_OUTPUT if only a partial conversion
* was done; more output space is needed to continue
* NS_ERROR_ILLEGAL_INPUT if an illegal input sequence
* @return NS_ERROR_UDEC_ILLEGALINPUT if an illegal input sequence
* was encountered and the behavior was set to "signal";
* the caller must skip over one byte, reset the decoder
* and retry.
* NS_OK_UDEC_MOREOUTPUT if only a partial conversion
* was done; more output space is needed to continue
* NS_OK_UDEC_MOREINPUT if the input ended in the middle
* of an input code unit sequence. If this is the last
* result the caller has at the end of the stream, the
* caller must append one U+FFFD to the output.
* NS_OK if the input ended after a complete input code
* unit sequence.
*/
NS_IMETHOD Convert(const char * aSrc, int32_t * aSrcLength,
char16_t * aDest, int32_t * aDestLength) = 0;
Expand Down
22 changes: 1 addition & 21 deletions intl/uconv/nsUConvModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@
#include "nsUCvTWDll.h"
#include "nsBIG5ToUnicode.h"
#include "nsUnicodeToBIG5.h"
#include "nsBIG5HKSCSToUnicode.h"
#include "nsUnicodeToBIG5HKSCS.h"

// ucvko
#include "nsUCvKOCID.h"
Expand Down Expand Up @@ -184,7 +182,6 @@ NS_UCONV_REG_UNREG("EUC-JP", NS_EUCJPTOUNICODE_CID, NS_UNICODETOEUCJP_CID)

// ucvtw
NS_UCONV_REG_UNREG("Big5", NS_BIG5TOUNICODE_CID, NS_UNICODETOBIG5_CID)
NS_UCONV_REG_UNREG("Big5-HKSCS", NS_BIG5HKSCSTOUNICODE_CID, NS_UNICODETOBIG5HKSCS_CID)

// ucvko
NS_UCONV_REG_UNREG("EUC-KR", NS_EUCKRTOUNICODE_CID, NS_UNICODETOEUCKR_CID)
Expand Down Expand Up @@ -214,6 +211,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsISO2022JPToUnicodeV2)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToISO2022JP)

// ucvtw
NS_GENERIC_FACTORY_CONSTRUCTOR(nsBIG5ToUnicode)

// ucvko

Expand Down Expand Up @@ -252,18 +250,6 @@ const uint16_t g_ufBig5Mapping[] = {
#include "big5.uf"
};

const uint16_t g_utBIG5Mapping[] = {
#include "big5.ut"
};

const uint16_t g_ufBig5HKSCSMapping[] = {
#include "hkscs.uf"
};

const uint16_t g_utBig5HKSCSMapping[] = {
#include "hkscs.ut"
};

// ucvko
const uint16_t g_utKSC5601Mapping[] = {
#include "u20kscgl.ut"
Expand Down Expand Up @@ -377,8 +363,6 @@ NS_DEFINE_NAMED_CID(NS_UNICODETOEUCJP_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOISO2022JP_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOBIG5_CID);
NS_DEFINE_NAMED_CID(NS_BIG5TOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOBIG5HKSCS_CID);
NS_DEFINE_NAMED_CID(NS_BIG5HKSCSTOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_EUCKRTOUNICODE_CID);
NS_DEFINE_NAMED_CID(NS_UNICODETOEUCKR_CID);
NS_DEFINE_NAMED_CID(NS_GBKTOUNICODE_CID);
Expand Down Expand Up @@ -481,8 +465,6 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = {
{ &kNS_UNICODETOISO2022JP_CID, false, nullptr, nsUnicodeToISO2022JPConstructor },
{ &kNS_UNICODETOBIG5_CID, false, nullptr, nsUnicodeToBIG5Constructor },
{ &kNS_BIG5TOUNICODE_CID, false, nullptr, nsBIG5ToUnicodeConstructor },
{ &kNS_UNICODETOBIG5HKSCS_CID, false, nullptr, nsUnicodeToBIG5HKSCSConstructor },
{ &kNS_BIG5HKSCSTOUNICODE_CID, false, nullptr, nsBIG5HKSCSToUnicodeConstructor },
{ &kNS_EUCKRTOUNICODE_CID, false, nullptr, nsCP949ToUnicodeConstructor },
{ &kNS_UNICODETOEUCKR_CID, false, nullptr, nsUnicodeToCP949Constructor },
{ &kNS_GBKTOUNICODE_CID, false, nullptr, nsGB18030ToUnicodeConstructor },
Expand Down Expand Up @@ -587,8 +569,6 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = {
{ NS_UNICODEENCODER_CONTRACTID_BASE "ISO-2022-JP", &kNS_UNICODETOISO2022JP_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "Big5", &kNS_UNICODETOBIG5_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "Big5", &kNS_BIG5TOUNICODE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "Big5-HKSCS", &kNS_UNICODETOBIG5HKSCS_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "Big5-HKSCS", &kNS_BIG5HKSCSTOUNICODE_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "EUC-KR", &kNS_EUCKRTOUNICODE_CID },
{ NS_UNICODEENCODER_CONTRACTID_BASE "EUC-KR", &kNS_UNICODETOEUCKR_CID },
{ NS_UNICODEDECODER_CONTRACTID_BASE "gbk", &kNS_GBKTOUNICODE_CID },
Expand Down
Loading

0 comments on commit a5644ff

Please sign in to comment.