Skip to content

Commit

Permalink
Merge pull request boto#3861 from catleeball/replace-get-utf8
Browse files Browse the repository at this point in the history
Ensure strings can be utf-8 encoded
  • Loading branch information
mfschwartz authored Mar 29, 2019
2 parents 9e1cd3b + 98f6bcd commit 03b2268
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 29 deletions.
17 changes: 9 additions & 8 deletions boto/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from boto.compat import urllib, encodebytes, parse_qs_safe, urlparse, six
from boto.auth_handler import AuthHandler
from boto.exception import BotoClientError
from boto.utils import get_utf8able_str

try:
from hashlib import sha1 as sha
Expand Down Expand Up @@ -383,7 +384,7 @@ def query_string(self, http_request):
parameter_names = sorted(http_request.params.keys())
pairs = []
for pname in parameter_names:
pval = boto.utils.get_utf8_value(http_request.params[pname])
pval = get_utf8able_str(http_request.params[pname])
pairs.append(urllib.parse.quote(pname, safe=''.encode('ascii')) +
'=' +
urllib.parse.quote(pval, safe='-_~'.encode('ascii')))
Expand All @@ -396,7 +397,7 @@ def canonical_query_string(self, http_request):
return ""
l = []
for param in sorted(http_request.params):
value = boto.utils.get_utf8_value(http_request.params[param])
value = get_utf8able_str(http_request.params[param])
l.append('%s=%s' % (urllib.parse.quote(param, safe='-_.~'),
urllib.parse.quote(value, safe='-_.~')))
return '&'.join(l)
Expand Down Expand Up @@ -623,7 +624,7 @@ def canonical_query_string(self, http_request):
# query string.
l = []
for param in sorted(http_request.params):
value = boto.utils.get_utf8_value(http_request.params[param])
value = get_utf8able_str(http_request.params[param])
l.append('%s=%s' % (urllib.parse.quote(param, safe='-_.~'),
urllib.parse.quote(value, safe='-_.~')))
return '&'.join(l)
Expand Down Expand Up @@ -836,8 +837,8 @@ def _build_query_string(self, params):
keys.sort(key=lambda x: x.lower())
pairs = []
for key in keys:
val = boto.utils.get_utf8_value(params[key])
pairs.append(key + '=' + self._escape_value(six.ensure_str(val)))
val = get_utf8able_str(params[key])
pairs.append(key + '=' + self._escape_value(get_utf8able_str(val)))
return '&'.join(pairs)

def add_auth(self, http_request, **kwargs):
Expand Down Expand Up @@ -897,7 +898,7 @@ def _calc_signature(self, params, *args):
keys.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
pairs = []
for key in keys:
val = boto.utils.get_utf8_value(params[key])
val = get_utf8able_str(params[key])
pairs.append(key + '=' + urllib.parse.quote(val))
qs = '&'.join(pairs)
return (qs, base64.b64encode(hmac.digest()))
Expand All @@ -924,7 +925,7 @@ def _calc_signature(self, params, *args):
pairs = []
for key in keys:
hmac.update(key.encode('utf-8'))
val = boto.utils.get_utf8_value(params[key])
val = get_utf8able_str(params[key]).encode('utf-8')
hmac.update(val)
pairs.append(key + '=' + urllib.parse.quote(val))
qs = '&'.join(pairs)
Expand All @@ -948,7 +949,7 @@ def _calc_signature(self, params, verb, path, server_name):
keys = sorted(params.keys())
pairs = []
for key in keys:
val = boto.utils.get_utf8_value(params[key])
val = get_utf8able_str(params[key]).encode('utf-8')
pairs.append(urllib.parse.quote(key, safe='') + '=' +
urllib.parse.quote(val, safe='-_~'))
qs = '&'.join(pairs)
Expand Down
2 changes: 2 additions & 0 deletions boto/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,9 +1105,11 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,
def _required_auth_capability(self):
return []


def get_utf8_value(self, value):
return boto.utils.get_utf8_value(value)


def make_request(self, action, params=None, path='/', verb='GET'):
http_request = self.build_base_http_request(verb, path, None,
params, {}, '',
Expand Down
6 changes: 3 additions & 3 deletions boto/gs/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from boto.gs.key import Key as GSKey
from boto.s3.acl import Policy
from boto.s3.bucket import Bucket as S3Bucket
from boto.utils import get_utf8_value
from boto.utils import get_utf8able_str
from boto.compat import quote
from boto.compat import six

Expand Down Expand Up @@ -644,7 +644,7 @@ def set_storage_class(self, storage_class, headers=None):
:param str storage_class: A string containing the storage class.
:param dict headers: Additional headers to send with the request.
"""
req_body = self.StorageClassBody % (get_utf8_value(storage_class))
req_body = self.StorageClassBody % (get_utf8able_str(storage_class))
self.set_subresource(STORAGE_CLASS_ARG, req_body, headers=headers)

# Method with same signature as boto.s3.bucket.Bucket.add_email_grant(),
Expand Down Expand Up @@ -883,7 +883,7 @@ def configure_website(self, main_page_suffix=None, error_key=None,

body = self.WebsiteBody % (main_page_frag, error_frag)
response = self.connection.make_request(
'PUT', get_utf8_value(self.name), data=get_utf8_value(body),
'PUT', get_utf8able_str(self.name), data=get_utf8able_str(body),
query_args='websiteConfig', headers=headers)
body = response.read()
if response.status == 200:
Expand Down
7 changes: 4 additions & 3 deletions boto/gs/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from boto.s3.connection import S3Connection
from boto.s3.connection import SubdomainCallingFormat
from boto.s3.connection import check_lowercase_bucketname
from boto.utils import get_utf8_value
from boto.compat import six
from boto.utils import get_utf8able_str

class Location(object):
DEFAULT = 'US'
Expand Down Expand Up @@ -91,8 +92,8 @@ def create_bucket(self, bucket_name, headers=None,
data = ('<CreateBucketConfiguration>%s%s</CreateBucketConfiguration>'
% (location_elem, storage_class_elem))
response = self.make_request(
'PUT', get_utf8_value(bucket_name), headers=headers,
data=get_utf8_value(data))
'PUT', get_utf8able_str(bucket_name), headers=headers,
data=get_utf8able_str(data))
body = response.read()
if response.status == 409:
raise self.provider.storage_create_error(
Expand Down
7 changes: 3 additions & 4 deletions boto/gs/key.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@
import os
import re

from boto.compat import StringIO
from boto.compat import StringIO, six
from boto.exception import BotoClientError
from boto.s3.key import Key as S3Key
from boto.s3.keyfile import KeyFile
from boto.utils import compute_hash
from boto.utils import get_utf8_value
from boto.utils import compute_hash, get_utf8able_str

class Key(S3Key):
"""
Expand Down Expand Up @@ -707,7 +706,7 @@ def set_contents_from_string(self, s, headers=None, replace=True,
self.md5 = None
self.base64md5 = None

fp = StringIO(get_utf8_value(s))
fp = StringIO(get_utf8able_str(s))
r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
policy, md5,
if_generation=if_generation)
Expand Down
4 changes: 2 additions & 2 deletions boto/mturk/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ def endElement(self, name, value, connection):
if name == 'Answer':
answer_rs = ResultSet([('Answer', QuestionFormAnswer)])
h = handler.XmlHandler(answer_rs, connection)
value = connection.get_utf8_value(value)
value = connection.get_utf8able_str(value)
xml.sax.parseString(value, h)
self.answers.append(answer_rs)
else:
Expand All @@ -1013,7 +1013,7 @@ def endElement(self, name, value, connection):
if name == 'Answer':
answer_rs = ResultSet([('Answer', QuestionFormAnswer)])
h = handler.XmlHandler(answer_rs, connection)
value = connection.get_utf8_value(value)
value = connection.get_utf8able_str(value)
xml.sax.parseString(value, h)
self.answers.append(answer_rs)
else:
Expand Down
8 changes: 3 additions & 5 deletions boto/s3/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import base64
from collections import defaultdict
from boto.compat import BytesIO, six, StringIO, urllib
from boto.utils import get_utf8able_str

# as per http://goo.gl/BDuud (02/19/2011)

Expand Down Expand Up @@ -848,11 +849,8 @@ def copy_key(self, new_key_name, src_bucket_name,
"""
headers = headers or {}
provider = self.connection.provider
if six.PY3:
if isinstance(src_key_name, bytes):
src_key_name = src_key_name.decode('utf-8')
else:
src_key_name = boto.utils.get_utf8_value(src_key_name)
src_key_name = get_utf8able_str(src_key_name)

if preserve_acl:
if self.name == src_bucket_name:
src_bucket = self
Expand Down
7 changes: 4 additions & 3 deletions boto/s3/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from boto.s3.key import Key
from boto.resultset import ResultSet
from boto.exception import BotoClientError, S3ResponseError
from boto.utils import get_utf8able_str


def check_lowercase_bucketname(n):
Expand Down Expand Up @@ -88,7 +89,7 @@ def build_host(self, server, bucket):
return self.get_bucket_server(server, bucket)

def build_auth_path(self, bucket, key=''):
key = boto.utils.get_utf8_value(key)
key = get_utf8able_str(key)
if isinstance(bucket, bytes):
bucket = bucket.decode('utf-8')
path = ''
Expand All @@ -97,7 +98,7 @@ def build_auth_path(self, bucket, key=''):
return path + '/%s' % urllib.parse.quote(key)

def build_path_base(self, bucket, key=''):
key = boto.utils.get_utf8_value(key)
key = get_utf8able_str(key)
return '/%s' % urllib.parse.quote(key)


Expand All @@ -121,7 +122,7 @@ def get_bucket_server(self, server, bucket):
return server

def build_path_base(self, bucket, key=''):
key = boto.utils.get_utf8_value(key)
key = get_utf8able_str(key)
path_base = '/'
if bucket:
path_base += "%s/" % bucket
Expand Down
46 changes: 45 additions & 1 deletion boto/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,51 @@ def parse_host(hostname):
else:
return hostname.split(':', 1)[0]


def get_utf8able_str(s, errors='strict'):
"""Returns a UTF8-encodable string in PY3, UTF8 bytes in PY2.
This method is similar to six's `ensure_str()`, except it also
makes sure that any bytes passed in can be decoded using the
utf-8 codec (and raises a UnicodeDecodeError if not). If the
object isn't a string, this method will attempt to coerce it
to a string with `str()`. Objects without `__str__` property
or `__repr__` property will raise an exception.
"""
if not isinstance(s, (six.text_type, six.binary_type)):
s = str(s)
if six.PY2:
# We want to return utf-8 encoded bytes.
if isinstance(s, six.text_type):
return s.encode('utf-8', errors)
if isinstance(s, six.binary_type):
# Verify the bytes can be represented in utf-8
s.decode('utf-8')
return s
else:
# We want to return a unicode/str object.
if isinstance(s, six.text_type):
return s
if isinstance(s, six.binary_type):
s = s.decode('utf-8')
return s
raise TypeError('not expecting type "%s"' % type(s))


def get_utf8_value(value):
if isinstance(value, bytes):
value.decode('utf-8')
return value

if not isinstance(value, six.string_types):
value = six.text_type(value)

if isinstance(value, six.text_type):
value = value.encode('utf-8')

return value


def print_to_fd(*objects, **kwargs):
"""A Python 2/3 compatible analogue to the print function.
Expand Down Expand Up @@ -1179,4 +1224,3 @@ def write_to_fd(fd, data):
fd.write(six.ensure_binary(data))
else:
fd.write(data)

0 comments on commit 03b2268

Please sign in to comment.