Skip to content

Commit

Permalink
BSERv2: handle deserializing UTF-8 strings in Python clients
Browse files Browse the repository at this point in the history
Summary:
This is the other side to D4318629. Based on dhruvsinghal's work:

facebook@f749346

Reviewed By: wez

Differential Revision: D4318697

fbshipit-source-id: 16cfdf7a76e35d4376c54b7f2e206a02ebc3ee3d
  • Loading branch information
sunshowers authored and Facebook Github Bot committed Dec 13, 2016
1 parent 1aa41a0 commit 8fbd9f0
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
17 changes: 17 additions & 0 deletions python/pywatchman/bser.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ typedef unsigned __int64 uint64_t;
#define BSER_NULL 0x0a
#define BSER_TEMPLATE 0x0b
#define BSER_SKIP 0x0c
#define BSER_UTF8STRING 0x0d
// clang-format on

// An immutable object representation of BSER_OBJECT.
Expand Down Expand Up @@ -936,6 +937,22 @@ static PyObject* bser_loads_recursive(
}
}

case BSER_UTF8STRING: {
const char* start;
int64_t len;

if (!bunser_bytestring(ptr, end, &start, &len)) {
return NULL;
}

if (len > LONG_MAX) {
PyErr_Format(PyExc_ValueError, "string too long for python");
return NULL;
}

return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
}

case BSER_ARRAY:
return bunser_array(ptr, end, ctx);

Expand Down
7 changes: 5 additions & 2 deletions python/pywatchman/pybser.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
BSER_NULL = b'\x0a'
BSER_TEMPLATE = b'\x0b'
BSER_SKIP = b'\x0c'
BSER_UTF8STRING = b'\x0d'

if compat.PYTHON3:
STRING_TYPES = (str, bytes)
Expand Down Expand Up @@ -313,7 +314,7 @@ def unser_utf8_string(self, buf, pos):
str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
return (str_val.decode('utf-8'), pos + str_len)

def unser_string(self, buf, pos):
def unser_bytestring(self, buf, pos):
str_len, pos = self.unser_int(buf, pos + 1)
str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
if self.value_encoding is not None:
Expand Down Expand Up @@ -404,7 +405,9 @@ def loads_recursive(self, buf, pos):
elif val_type == BSER_NULL:
return (None, pos + 1)
elif val_type == BSER_BYTESTRING:
return self.unser_string(buf, pos)
return self.unser_bytestring(buf, pos)
elif val_type == BSER_UTF8STRING:
return self.unser_utf8_string(buf, pos)
elif val_type == BSER_ARRAY:
return self.unser_array(buf, pos)
elif val_type == BSER_OBJECT:
Expand Down

0 comments on commit 8fbd9f0

Please sign in to comment.