Skip to content

Commit

Permalink
Better support for string formatting (python#7418)
Browse files Browse the repository at this point in the history
Fixes python#1444
Fixes python#2639
Fixes python#7114

This PR does three related things:
* Fixes some corner cases in `%` formatting
* Tightens `bytes` vs `str` interactions (including those that are technically not errors)
* Adds type checking for `str.format()` calls

This also fixes few issues discovered by this PR during testing (notably a bug in f-string transformation), and adds/extends a bunch of docstring and comments to existing code. The implementation is mostly straightforward, there are few hacky things, but IMO nothing unreasonable.

Here are few comments:
* It was hard to keep the approach to `str.format()` calls purely regexp-based (as for `%` formatting), mostly because the former can be both nested and repeated (And we must support nested formatting because this is how we support f-strings). CPython itself uses a custom parser but it is huge, so I decided to have a mixed approach. This way we can keep code simple while still maintaining practically one-to-one compatibility with runtime behavior (the error messages are sometimes different however).
* This causes few hundreds of errors internally, I am not sure what to do with these. Most popular ones are:
  - Unicode upcast (`'%s' % u'...'` results in `unicode`, not `str`, on Python 2)
  - Using `'%s' % some_bytes` and/or `'{:s}'.format(some_bytes)` on Python 3
  - Unused arguments in `str.format()` call (probably because at runtime they are silently ignored)
* I added new error code for string interpolation/formatting and used it everywhere in old and new code. Potentially we might split out the `str` vs `bytes` errors into a separate error code, because technically they are not type errors, just suspicious/dangerous code.
  • Loading branch information
ilevkivskyi authored Sep 24, 2019
1 parent e08889c commit 5b3346f
Show file tree
Hide file tree
Showing 13 changed files with 1,175 additions and 102 deletions.
21 changes: 18 additions & 3 deletions mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from mypy import erasetype
from mypy.checkmember import analyze_member_access, type_object_type
from mypy.argmap import ArgTypeExpander, map_actuals_to_formals, map_formals_to_actuals
from mypy.checkstrformat import StringFormatterChecker
from mypy.checkstrformat import StringFormatterChecker, custom_special_method
from mypy.expandtype import expand_type, expand_type_by_instance, freshen_function_type_vars
from mypy.util import split_module_names
from mypy.typevars import fill_typevars
Expand Down Expand Up @@ -345,6 +345,8 @@ def visit_call_expr_inner(self, e: CallExpr, allow_none_return: bool = False) ->
self.check_runtime_protocol_test(e)
if e.callee.fullname == 'builtins.issubclass':
self.check_protocol_issubclass(e)
if isinstance(e.callee, MemberExpr) and e.callee.name == 'format':
self.check_str_format_call(e)
ret_type = get_proper_type(ret_type)
if isinstance(ret_type, UninhabitedType) and not ret_type.ambiguous:
self.chk.binder.unreachable()
Expand All @@ -357,6 +359,19 @@ def visit_call_expr_inner(self, e: CallExpr, allow_none_return: bool = False) ->
return AnyType(TypeOfAny.from_error)
return ret_type

def check_str_format_call(self, e: CallExpr) -> None:
"""More precise type checking for str.format() calls on literals."""
assert isinstance(e.callee, MemberExpr)
format_value = None
if isinstance(e.callee.expr, (StrExpr, UnicodeExpr)):
format_value = e.callee.expr.value
elif e.callee.expr in self.chk.type_map:
base_typ = try_getting_literal(self.chk.type_map[e.callee.expr])
if isinstance(base_typ, LiteralType) and isinstance(base_typ.value, str):
format_value = base_typ.value
if format_value is not None:
self.strfrm_checker.check_str_format_call(e, format_value)

def method_fullname(self, object_type: Type, method_name: str) -> Optional[str]:
"""Convert a method name to a fully qualified name, based on the type of the object that
it is invoked on. Return `None` if the name of `object_type` cannot be determined.
Expand Down Expand Up @@ -2067,8 +2082,8 @@ def visit_comparison_expr(self, e: ComparisonExpr) -> Type:
# We suppress the error if there is a custom __eq__() method on either
# side. User defined (or even standard library) classes can define this
# to return True for comparisons between non-overlapping types.
if (not custom_equality_method(left_type) and
not custom_equality_method(right_type)):
if (not custom_special_method(left_type, '__eq__') and
not custom_special_method(right_type, '__eq__')):
# Also flag non-overlapping literals in situations like:
# x: Literal['a', 'b']
# if x == 'c':
Expand Down
700 changes: 665 additions & 35 deletions mypy/checkstrformat.py

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions mypy/errorcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def __str__(self) -> str:
'General') # type: Final
VALID_NEWTYPE = ErrorCode(
'valid-newtype', "Check that argument 2 to NewType is valid", 'General') # type: Final
STRING_FORMATTING = ErrorCode(
'str-format', "Check that string formatting/interpolation is type-safe",
'General') # type: Final
STR_BYTES_PY3 = ErrorCode(
'str-bytes-safe', "Warn about dangerous coercions related to bytes and string types",
'General') # type: Final

# These error codes aren't enable by default.
NO_UNTYPED_DEF = ErrorCode(
Expand Down
4 changes: 2 additions & 2 deletions mypy/fastparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,8 +1134,8 @@ def visit_FormattedValue(self, n: ast3.FormattedValue) -> Expression:
format_method.set_line(format_string)
result_expression = CallExpr(format_method,
[val_exp, format_spec_exp],
[ARG_POS],
[None])
[ARG_POS, ARG_POS],
[None, None])
return self.set_line(result_expression, n)

# Bytes(bytes s)
Expand Down
24 changes: 16 additions & 8 deletions mypy/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,25 +779,33 @@ def first_argument_for_super_must_be_type(self, actual: Type, context: Context)
self.fail('Argument 1 for "super" must be a type object; got {}'.format(type_str), context)

def too_few_string_formatting_arguments(self, context: Context) -> None:
self.fail('Not enough arguments for format string', context)
self.fail('Not enough arguments for format string', context,
code=codes.STRING_FORMATTING)

def too_many_string_formatting_arguments(self, context: Context) -> None:
self.fail('Not all arguments converted during string formatting', context)
self.fail('Not all arguments converted during string formatting', context,
code=codes.STRING_FORMATTING)

def unsupported_placeholder(self, placeholder: str, context: Context) -> None:
self.fail('Unsupported format character \'%s\'' % placeholder, context)
self.fail('Unsupported format character \'%s\'' % placeholder, context,
code=codes.STRING_FORMATTING)

def string_interpolation_with_star_and_key(self, context: Context) -> None:
self.fail('String interpolation contains both stars and mapping keys', context)
self.fail('String interpolation contains both stars and mapping keys', context,
code=codes.STRING_FORMATTING)

def requires_int_or_char(self, context: Context) -> None:
self.fail('%c requires int or char', context)
def requires_int_or_char(self, context: Context,
format_call: bool = False) -> None:
self.fail('"{}c" requires int or char'.format(':' if format_call else '%'),
context, code=codes.STRING_FORMATTING)

def key_not_in_mapping(self, key: str, context: Context) -> None:
self.fail('Key \'%s\' not found in mapping' % key, context)
self.fail('Key \'%s\' not found in mapping' % key, context,
code=codes.STRING_FORMATTING)

def string_interpolation_mixing_key_and_non_keys(self, context: Context) -> None:
self.fail('String interpolation mixes specifier with and without mapping keys', context)
self.fail('String interpolation mixes specifier with and without mapping keys', context,
code=codes.STRING_FORMATTING)

def cannot_determine_type(self, name: str, context: Context) -> None:
self.fail("Cannot determine type of '%s'" % name, context, code=codes.HAS_TYPE)
Expand Down
6 changes: 4 additions & 2 deletions mypy/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1658,10 +1658,12 @@ def __eq__(self, other: object) -> bool:

@overload
@staticmethod
def make_union(items: List[ProperType], line: int = -1, column: int = -1) -> ProperType: ...
def make_union(items: Sequence[ProperType],
line: int = -1, column: int = -1) -> ProperType: ...

@overload
@staticmethod
def make_union(items: List[Type], line: int = -1, column: int = -1) -> Type: ...
def make_union(items: Sequence[Type], line: int = -1, column: int = -1) -> Type: ...

@staticmethod
def make_union(items: Sequence[Type], line: int = -1, column: int = -1) -> Type:
Expand Down
2 changes: 1 addition & 1 deletion mypyc/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def emit_box(self, src: str, dest: str, typ: RType, declare_dest: bool = False,
inner_name = self.temp_name()
self.emit_box('{}.f{}'.format(src, i), inner_name, typ.types[i],
declare_dest=True)
self.emit_line('PyTuple_SET_ITEM({}, {}, {});'.format(dest, i, inner_name, i))
self.emit_line('PyTuple_SET_ITEM({}, {}, {});'.format(dest, i, inner_name))
else:
assert not typ.is_unboxed
# Type is boxed -- trivially just assign.
Expand Down
12 changes: 12 additions & 0 deletions test-data/unit/check-errorcodes.test
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,18 @@ def f(): pass
def g() -> int:
return f() # E: Returning Any from function declared to return "int" [no-any-return]

[case testErrorCodeFormatCall]
'{:d}'.format('no') # E: Incompatible types in string interpolation (expression has type "str", placeholder has type "int") [str-format]
'{!x}'.format('Hm...') # E: Invalid conversion type "x", must be one of "r", "s" or "a" [str-format]
'}{'.format() # E: Invalid conversion specifier in format string: unexpected } [str-format]

'%d' % 'no' # E: Incompatible types in string interpolation (expression has type "str", placeholder has type "Union[int, float, SupportsInt]") [str-format]
'%d + %d' % (1, 2, 3) # E: Not all arguments converted during string formatting [str-format]

'{}'.format(b'abc') # E: On Python 3 '{}'.format(b'abc') produces "b'abc'"; use !r if this is a desired behavior [str-bytes-safe]
'%s' % b'abc' # E: On Python 3 '%s' % b'abc' produces "b'abc'"; use %r if this is a desired behavior [str-bytes-safe]
[builtins fixtures/primitives.pyi]

[case testErrorCodeIgnoreNamedDefinedNote]
x: List[int] # type: ignore[name-defined]

Expand Down
Loading

0 comments on commit 5b3346f

Please sign in to comment.