forked from beeware/voc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_codecs.py
78 lines (65 loc) · 2.92 KB
/
test_codecs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import codecs
from test.test_codecs import ReadTest
import unittest
from voc.java import mutf_8
codecs.register(mutf_8.search_function)
class MUTF8Test(ReadTest, unittest.TestCase):
encoding = 'mutf-8'
def test_decoder_state(self):
u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
self.check_state_handling_decode(self.encoding,
u, u.encode(self.encoding))
def test_lone_surrogates(self):
self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding)
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, self.encoding)
self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"),
b'[\\udc80]')
self.assertEqual("[\uDC80]".encode(self.encoding, "xmlcharrefreplace"),
b'[�]')
self.assertEqual("[\uDC80]".encode(self.encoding, "surrogateescape"),
b'[\x80]')
self.assertEqual("[\uDC80]".encode(self.encoding, "ignore"),
b'[]')
self.assertEqual("[\uDC80]".encode(self.encoding, "replace"),
b'[?]')
def test_surrogatepass_handler(self):
self.assertEqual("abc\ud800def".encode(self.encoding, "surrogatepass"),
b"abc\xed\xa0\x80def")
self.assertEqual(b"abc\xed\xa0\x80def".decode(self.encoding, "surrogatepass"),
"abc\ud800def")
self.assertTrue(codecs.lookup_error("surrogatepass"))
def test_invalid(self):
for invalid in (
b'\xC0\x81',
b'\xC0\xFF',
b'\xC1\x10',
b'\xC1\x80',
):
with self.assertRaises(UnicodeDecodeError):
invalid.decode(self.encoding)
def test_partial(self):
self.check_partial(
"\x00\xff\u07ff\u0800\uffff",
[
"",
"\x00",
"\x00",
"\x00\xff",
"\x00\xff",
"\x00\xff\u07ff",
"\x00\xff\u07ff",
"\x00\xff\u07ff",
"\x00\xff\u07ff\u0800",
"\x00\xff\u07ff\u0800",
"\x00\xff\u07ff\u0800",
"\x00\xff\u07ff\u0800\uffff",
]
)
def test_null_byte(self):
self.assertEqual(b'a\xc0\x80b'.decode(self.encoding), 'a\x00b')
self.assertEqual('a\x00b'.encode(self.encoding), b'a\xc0\x80b')
def test_surrogates(self):
self.assertEqual('Hot \U00010400iggity'.encode(self.encoding), b'Hot \xed\xa0\x81\xed\xb0\x80iggity', 'no match')
self.assertEqual('How \u0205ccentric'.encode(self.encoding), b'How \xc8\x85ccentric')
self.assertEqual(b'Hot \xed\xa0\x81\xed\xb0\x80iggity'.decode(self.encoding), 'Hot \U00010400iggity', 'no match')
self.assertEqual(b'How \xc8\x85ccentric'.decode(self.encoding), 'How \u0205ccentric')