Skip to content

Commit

Permalink
A bunch of stuff on Str
Browse files Browse the repository at this point in the history
* Fixed __repr__ (somecases missing)
* Added more line breaking chars for splitlines method
* Added a new method to help check if a char is printable or not

Signed-off-by: Sergio Oliveira <[email protected]>
  • Loading branch information
marianamioto authored and seocam committed May 25, 2017
1 parent c0ea61b commit aada1f1
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 12 deletions.
89 changes: 85 additions & 4 deletions python/common/org/python/types/Str.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,76 @@ public Str(org.python.Object[] args, java.util.Map<java.lang.String, org.python.
// throw new org.python.exceptions.NotImplementedError("__init__() has not been implemented.");
// }

private static boolean isCharPrintable(char c) {
// ASCII non-printable
if ((int) c <= 0x1f || (int) c >= 0x7f && (int) c <= 0xa0 || (int) c == 0xad) {
return false;
}
if ((int) c == 0x2029) {
return false;
}
if (Character.isISOControl(c)) {
return false;
}
return true;
}

@org.python.Method(
__doc__ = ""
)
public org.python.Object __repr__() {
String repr = this.value.replaceAll("\\n", "\\\\n").replaceAll("\\r", "\\\\r");
return new org.python.types.Str("'" + repr + "'");
/*
* Reference: https://www.python.org/dev/peps/pep-3138/#id7
* TODO: Need to treat the leading surrogate pair characters
*/
StringBuilder sb = new StringBuilder();
boolean has_double_quote = false;
boolean has_single_quote = false;

for (char c : this.value.toCharArray()) {
if (c == '\'') {
has_single_quote = true;
} else if (c == '"') {
has_double_quote = true;
}

if (c == '\n') {
sb.append("\\n");
} else if (c == '\t') {
sb.append("\\t");
} else if (c == '\r') {
sb.append("\\r");
} else if (c == '\\') {
sb.append("\\\\");
// ASCII Non-Printable
} else if (c <= 0x1f || c >= 0x7f && c <= 0xa0 || c == 0xad) {
sb.append(String.format("\\x%02x", (int) c));
} else if (!this.isCharPrintable(c)) {
sb.append(String.format("\\u%04x", (int) c));
} else {
sb.append((char) c);
}
}

// Decide if we wanna wrap the result with single or double quotes
String quote;
String repr = sb.toString();

if (has_single_quote) {
if (has_double_quote) {
quote = new String("'");
repr = repr.replaceAll("'", "\\\\'");
} else {
quote = new String("\"");
}
} else {
quote = new String("'");
}

return new org.python.types.Str(quote + repr + quote);
}


@org.python.Method(
__doc__ = ""
)
Expand Down Expand Up @@ -900,7 +962,7 @@ public org.python.Object isnumeric() {
)
public org.python.Object isprintable() {
for (char ch : this.value.toCharArray()) {
if (Character.isISOControl(ch)) {
if (!this.isCharPrintable(ch)) {
return new org.python.types.Bool(false);
}
}
Expand Down Expand Up @@ -1386,6 +1448,25 @@ public org.python.Object split(org.python.Object sep, org.python.Object maxsplit
return result_list;
}

private static boolean isLineBreak(char character) {
// List of line boundaries from https://docs.python.org/3.4/library/stdtypes.html#str.splitlines
switch (character) {
case '\n':
case '\r':
case '\u000B':
case '\u000C':
case '\u001C':
case '\u001D':
case '\u001E':
case '\u0085':
case '\u2028':
case '\u2029':
return true;
default:
return false;
}
}

@org.python.Method(
__doc__ = "S.splitlines([keepends]) -> list of strings\n" +
"\n" +
Expand Down Expand Up @@ -1415,7 +1496,7 @@ public org.python.Object splitlines(org.python.Object keepends) {
next = this.value.charAt(i + 1);
}

if (current == '\n' || current == '\r') {
if (this.isLineBreak(current)) {
end = i;
if (current == '\r' && next == '\n') {
skip = true;
Expand Down
32 changes: 24 additions & 8 deletions tests/datatypes/test_str.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@

from unittest import expectedFailure

from .. utils import TranspileTestCase, UnaryOperationTestCase, BinaryOperationTestCase, InplaceOperationTestCase


Expand Down Expand Up @@ -697,25 +700,38 @@ def test_isidentifier(self):

def test_isprintable(self):
self.assertCodeExecution("""
for str_ in [chr(i) for i in range(33)] + ['AAA', 'bcd', '1234', 'eÃⱣỉ', 'ÃⱣỉ', '', '\x07' + 'foo']:
for str_ in [chr(i) for i in range(33)] + ['AAA', 'bcd', '1234', 'eÃⱣỉ', 'ÃⱣỉ', '', '\x07' + 'foo', '\u2029']:
print(str_.isprintable())
""")

@expectedFailure
def test_isprintable_missing_cases(self):
self.assertCodeExecution(r"""
tests = ['\u2028']:
for test in tests:
print(test.isprintable())
""")

def test_repr(self):
self.assertCodeExecution("""
str_ = "\\r\\n"
print(repr(str_))
self.assertCodeExecution(r"""
tests = ["\r\n", "áéíóú", "\u000B", "\u2029", "\\", "'", "\"", "\"'"]
for test in tests:
print(repr(test))
""")

def test_splitlines(self):
self.assertCodeExecution("""
str_ = "aaa\\nbbb\\rccc\\r\\nddd\\n\\reee"
self.assertCodeExecution(r"""
str_ = "aaa\nbbb\rccc\r\nddd\n\reee"
print(str_.splitlines())
print(str_.splitlines(True))
print('Dont Panic\\n'.splitlines())
print('\\n'.splitlines())
print("Don't Panic\n".splitlines())
print('\n'.splitlines())
print(''.splitlines())
s1 = '\r\n\r\n\v\f\x0b\x0c\u2029\x1c\x1d\x1e\x85'
print(s1.splitlines())
print(s1.splitlines(True))
""")


Expand Down

0 comments on commit aada1f1

Please sign in to comment.