Skip to content

Commit

Permalink
Merge pull request #98 from cjwatson/py2-fix-utf8
Browse files Browse the repository at this point in the history
Fix UTF-8 encoding/decoding on Python 2
  • Loading branch information
Parkayun authored Feb 11, 2020
2 parents e6e654d + f831eab commit c5dd886
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
24 changes: 9 additions & 15 deletions bson/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def encode_string(value):

def encode_cstring(value):
if not isinstance(value, bytes):
value = str(value).encode("utf-8")
value = text_type(value).encode("utf-8")
if b"\x00" in value:
raise ValueError("Element names may not include NUL bytes.")
# A NUL byte is used to delimit our string, accepting one would cause
Expand Down Expand Up @@ -286,31 +286,25 @@ def decode_document(data, base, as_array=False):

if PY3:
ll = data.index(0, base + 1) + 1
try:
base, name = ll, data[base + 1:ll - 1].decode("utf-8") \
if decode_name else None
except UnicodeDecodeError:
base, name = ll, data[base + 1:ll - 1] \
if decode_name else None
else:
ll = data.index("\x00", base + 1) + 1
if decode_name:
name = data[base + 1:ll - 1]
try:
base, name = ll, unicode(data[base + 1:ll - 1])\
if decode_name else None
name = name.decode("utf-8")
except UnicodeDecodeError:
base, name = ll, data[base + 1:ll - 1]\
if decode_name else None
pass
else:
name = None
base = ll

if element_type == 0x01: # double
value = double_struct.unpack(data[base: base + 8])[0]
base += 8
elif element_type == 0x02: # string
length = int_struct.unpack(data[base:base + 4])[0]
value = data[base + 4: base + 4 + length - 1]
if PY3:
value = value.decode("utf-8")
else:
value = unicode(value)
value = value.decode("utf-8")
base += 4 + length
elif element_type == 0x03: # document
base, value = decode_document(data, base)
Expand Down
4 changes: 4 additions & 0 deletions bson/tests/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ def test_binary(self):
dump = dumps(self.doc)
decoded = loads(dump)
self.assertEqual(decoded, self.doc)

def test_utf8_binary(self):
self.doc[u"\N{SNOWMAN}"] = u"\N{SNOWMAN WITHOUT SNOW}"
self.test_binary()

0 comments on commit c5dd886

Please sign in to comment.