diff --git a/python/common/org/python/types/Str.java b/python/common/org/python/types/Str.java index 198dcf6f6b..f15eb58ac7 100644 --- a/python/common/org/python/types/Str.java +++ b/python/common/org/python/types/Str.java @@ -66,14 +66,76 @@ public Str(org.python.Object[] args, java.util.Map= 0x7f && (int) c <= 0xa0 || (int) c == 0xad) { + return false; + } + if ((int) c == 0x2029) { + return false; + } + if (Character.isISOControl(c)) { + return false; + } + return true; + } + @org.python.Method( __doc__ = "" ) public org.python.Object __repr__() { - String repr = this.value.replaceAll("\\n", "\\\\n").replaceAll("\\r", "\\\\r"); - return new org.python.types.Str("'" + repr + "'"); + /* + * Reference: https://www.python.org/dev/peps/pep-3138/#id7 + * TODO: Need to treat the leading surrogate pair characters + */ + StringBuilder sb = new StringBuilder(); + boolean has_double_quote = false; + boolean has_single_quote = false; + + for (char c : this.value.toCharArray()) { + if (c == '\'') { + has_single_quote = true; + } else if (c == '"') { + has_double_quote = true; + } + + if (c == '\n') { + sb.append("\\n"); + } else if (c == '\t') { + sb.append("\\t"); + } else if (c == '\r') { + sb.append("\\r"); + } else if (c == '\\') { + sb.append("\\\\"); + // ASCII Non-Printable + } else if (c <= 0x1f || c >= 0x7f && c <= 0xa0 || c == 0xad) { + sb.append(String.format("\\x%02x", (int) c)); + } else if (!this.isCharPrintable(c)) { + sb.append(String.format("\\u%04x", (int) c)); + } else { + sb.append((char) c); + } + } + + // Decide if we wanna wrap the result with single or double quotes + String quote; + String repr = sb.toString(); + + if (has_single_quote) { + if (has_double_quote) { + quote = new String("'"); + repr = repr.replaceAll("'", "\\\\'"); + } else { + quote = new String("\""); + } + } else { + quote = new String("'"); + } + + return new org.python.types.Str(quote + repr + quote); } + @org.python.Method( __doc__ = "" ) @@ -900,7 +962,7 @@ public org.python.Object isnumeric() { ) public org.python.Object isprintable() { for (char ch : this.value.toCharArray()) { - if (Character.isISOControl(ch)) { + if (!this.isCharPrintable(ch)) { return new org.python.types.Bool(false); } } @@ -1386,6 +1448,25 @@ public org.python.Object split(org.python.Object sep, org.python.Object maxsplit return result_list; } + private static boolean isLineBreak(char character) { + // List of line boundaries from https://docs.python.org/3.4/library/stdtypes.html#str.splitlines + switch (character) { + case '\n': + case '\r': + case '\u000B': + case '\u000C': + case '\u001C': + case '\u001D': + case '\u001E': + case '\u0085': + case '\u2028': + case '\u2029': + return true; + default: + return false; + } + } + @org.python.Method( __doc__ = "S.splitlines([keepends]) -> list of strings\n" + "\n" + @@ -1415,7 +1496,7 @@ public org.python.Object splitlines(org.python.Object keepends) { next = this.value.charAt(i + 1); } - if (current == '\n' || current == '\r') { + if (this.isLineBreak(current)) { end = i; if (current == '\r' && next == '\n') { skip = true; diff --git a/tests/datatypes/test_str.py b/tests/datatypes/test_str.py index a404b7a8ad..1692bfdb1c 100644 --- a/tests/datatypes/test_str.py +++ b/tests/datatypes/test_str.py @@ -1,3 +1,6 @@ + +from unittest import expectedFailure + from .. utils import TranspileTestCase, UnaryOperationTestCase, BinaryOperationTestCase, InplaceOperationTestCase @@ -697,25 +700,38 @@ def test_isidentifier(self): def test_isprintable(self): self.assertCodeExecution(""" - for str_ in [chr(i) for i in range(33)] + ['AAA', 'bcd', '1234', 'eÃⱣỉ', 'ÃⱣỉ', '', '\x07' + 'foo']: + for str_ in [chr(i) for i in range(33)] + ['AAA', 'bcd', '1234', 'eÃⱣỉ', 'ÃⱣỉ', '', '\x07' + 'foo', '\u2029']: print(str_.isprintable()) """) + @expectedFailure + def test_isprintable_missing_cases(self): + self.assertCodeExecution(r""" + tests = ['\u2028']: + for test in tests: + print(test.isprintable()) + """) + def test_repr(self): - self.assertCodeExecution(""" - str_ = "\\r\\n" - print(repr(str_)) + self.assertCodeExecution(r""" + tests = ["\r\n", "áéíóú", "\u000B", "\u2029", "\\", "'", "\"", "\"'"] + for test in tests: + print(repr(test)) """) def test_splitlines(self): - self.assertCodeExecution(""" - str_ = "aaa\\nbbb\\rccc\\r\\nddd\\n\\reee" + self.assertCodeExecution(r""" + str_ = "aaa\nbbb\rccc\r\nddd\n\reee" print(str_.splitlines()) print(str_.splitlines(True)) - print('Dont Panic\\n'.splitlines()) - print('\\n'.splitlines()) + print("Don't Panic\n".splitlines()) + print('\n'.splitlines()) print(''.splitlines()) + + s1 = '\r\n\r\n\v\f\x0b\x0c\u2029\x1c\x1d\x1e\x85' + print(s1.splitlines()) + print(s1.splitlines(True)) """)