Skip to content

Commit

Permalink
New method to_basestring replaces some use of native_str.
Browse files Browse the repository at this point in the history
native_str would force the argument to (utf8) bytes, while in python2
it is often more appropriate to preserve the type of the input data.

Closes tornadoweb#280
  • Loading branch information
bdarnell committed Jun 16, 2011
1 parent 3d64c89 commit db579da
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
20 changes: 17 additions & 3 deletions tornado/escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _json_decode(s):

def xhtml_escape(value):
"""Escapes a string so it is valid within XML or XHTML."""
return xml.sax.saxutils.escape(native_str(value), {'"': """})
return xml.sax.saxutils.escape(to_basestring(value), {'"': """})


def xhtml_unescape(value):
Expand All @@ -80,7 +80,7 @@ def json_encode(value):

def json_decode(value):
"""Returns Python objects for the given JSON string."""
return _json_decode(native_str(value))
return _json_decode(to_basestring(value))


def squeeze(value):
Expand Down Expand Up @@ -122,7 +122,7 @@ def url_unescape(value, encoding='utf-8'):
if encoding is None:
return urllib.parse.unquote_to_bytes(value)
else:
return urllib.unquote_plus(native_str(value), encoding=encoding)
return urllib.unquote_plus(to_basestring(value), encoding=encoding)

def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
"""Parses a query string like urlparse.parse_qs, but returns the
Expand Down Expand Up @@ -178,6 +178,20 @@ def to_unicode(value):
else:
native_str = utf8

_BASESTRING_TYPES = (basestring, type(None))
def to_basestring(value):
"""Converts a string argument to a subclass of basestring.
In python2, byte and unicode strings are mostly interchangeable,
so functions that deal with a user-supplied argument in combination
with ascii string constants can use either and should return the type
the user supplied. In python3, the two types are not interchangeable,
so this method is needed to convert byte strings to unicode.
"""
if isinstance(value, _BASESTRING_TYPES):
return value
assert isinstance(value, bytes)
return value.decode("utf-8")

def recursive_unicode(obj):
"""Walks a simple data structure, converting byte strings to unicode.
Expand Down
17 changes: 16 additions & 1 deletion tornado/test/escape_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import tornado.escape
import unittest

from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode
from tornado.util import b

linkify_tests = [
Expand Down Expand Up @@ -165,3 +165,18 @@ def test_url_unescape(self):
# and unicode strings.
self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)

def test_escape_return_types(self):
# On python2 the escape methods should generally return the same
# type as their argument
self.assertEqual(type(xhtml_escape("foo")), str)
self.assertEqual(type(xhtml_escape(u"foo")), unicode)

def test_json_decode(self):
# json_decode accepts both bytes and unicode, but strings it returns
# are always unicode.
self.assertEqual(json_decode(b('"foo"')), u"foo")
self.assertEqual(json_decode(u'"foo"'), u"foo")

# Non-ascii bytes are interpreted as utf8
self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")

0 comments on commit db579da

Please sign in to comment.