forked from go-python/gpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
312 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
package parser | ||
|
||
import ( | ||
"bytes" | ||
"strconv" | ||
|
||
"github.com/ncw/gpython/py" | ||
) | ||
|
||
// DecodeEscape unescapes a backslash-escaped buffer | ||
// | ||
// byteMode indicates whether we are creating a unicode string or a bytes output | ||
func DecodeEscape(in *bytes.Buffer, byteMode bool) (out *bytes.Buffer, err error) { | ||
// Early exit if no escape sequences | ||
// NB in.Bytes() is cheap | ||
inBytes := in.Bytes() | ||
if bytes.IndexRune(inBytes, '\\') < 0 { | ||
return in, nil | ||
} | ||
out = new(bytes.Buffer) | ||
runes := bytes.Runes(inBytes) | ||
decodeHex := func(what byte, i, size int) error { | ||
i++ | ||
if i+size <= len(runes) { | ||
cout, err := strconv.ParseInt(string(runes[i:i+size]), 16, 32) | ||
if err != nil { | ||
return py.ExceptionNewf(py.ValueError, "invalid \\%c escape at position %d", what, i-2) | ||
} | ||
if byteMode { | ||
out.WriteByte(byte(cout)) | ||
} else { | ||
out.WriteRune(rune(cout)) | ||
} | ||
} else { | ||
return py.ExceptionNewf(py.ValueError, "truncated \\%c escape at position %d", what, i-2) | ||
} | ||
return nil | ||
} | ||
ignoreEscape := false | ||
for i := 0; i < len(runes); i++ { | ||
c := runes[i] | ||
if c != '\\' { | ||
out.WriteRune(c) | ||
continue | ||
} | ||
i++ | ||
if i >= len(runes) { | ||
return nil, py.ExceptionNewf(py.ValueError, "Trailing \\ in string") | ||
} | ||
c = runes[i] | ||
switch c { | ||
case '\n': | ||
case '\\': | ||
out.WriteRune('\\') | ||
case '\'': | ||
out.WriteRune('\'') | ||
case '"': | ||
out.WriteRune('"') | ||
case 'b': | ||
out.WriteRune('\b') | ||
case 'f': | ||
out.WriteRune('\014') // FF | ||
case 't': | ||
out.WriteRune('\t') | ||
case 'n': | ||
out.WriteRune('\n') | ||
case 'r': | ||
out.WriteRune('\r') | ||
case 'v': | ||
out.WriteRune('\013') // VT | ||
case 'a': | ||
out.WriteRune('\007') // BEL, not classic C | ||
case '0', '1', '2', '3', '4', '5', '6', '7': | ||
// 1 to 3 characters of octal escape | ||
cout := c - '0' | ||
if i+1 < len(runes) && '0' <= runes[i+1] && runes[i+1] <= '7' { | ||
i++ | ||
cout = (cout << 3) + runes[i] - '0' | ||
if i+1 < len(runes) && '0' <= runes[i+1] && runes[i+1] <= '7' { | ||
i++ | ||
cout = (cout << 3) + runes[i] - '0' | ||
} | ||
} | ||
if byteMode { | ||
out.WriteByte(byte(cout)) | ||
} else { | ||
out.WriteRune(cout) | ||
} | ||
case 'x': | ||
// \xhh exactly 2 characters of hex | ||
err = decodeHex('x', i, 2) | ||
if err != nil { | ||
return nil, err | ||
} | ||
i += 2 | ||
// FIXME In a bytes literal, hexadecimal and | ||
// octal escapes denote the byte with the | ||
// given value. In a string literal, these | ||
// escapes denote a Unicode character with the | ||
// given value. | ||
case 'u': | ||
// \uxxxx Character with 16-bit hex value xxxx - 4 characters required | ||
if byteMode { | ||
ignoreEscape = true | ||
break | ||
} | ||
err = decodeHex('u', i, 4) | ||
if err != nil { | ||
return nil, err | ||
} | ||
i += 4 | ||
case 'U': | ||
// \Uxxxxxxxx Character with 32-bit hex value xxxxxxxx - 8 characters required | ||
if byteMode { | ||
ignoreEscape = true | ||
break | ||
} | ||
|
||
err = decodeHex('U', i, 8) | ||
if err != nil { | ||
return nil, err | ||
} | ||
i += 8 | ||
case 'N': | ||
// \N{name} Character named name in the Unicode database | ||
if byteMode { | ||
ignoreEscape = true | ||
break | ||
} | ||
// FIXME go can't do this as builtin so ignore for the moment | ||
ignoreEscape = true | ||
default: | ||
ignoreEscape = true | ||
break | ||
} | ||
// ignore unrecognised escape | ||
if ignoreEscape { | ||
i-- | ||
out.WriteRune('\\') | ||
ignoreEscape = false | ||
} | ||
} | ||
return out, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
package parser | ||
|
||
import ( | ||
"bytes" | ||
"testing" | ||
|
||
"github.com/ncw/gpython/py" | ||
) | ||
|
||
func TestDecodeEscape(t *testing.T) { | ||
for _, test := range []struct { | ||
in string | ||
want string | ||
errString string | ||
byteMode bool | ||
}{ | ||
// Stringmode tests | ||
{``, ``, "", false}, | ||
{`Potato`, `Potato`, "", false}, | ||
{`Potato\`, ``, `Trailing \ in string`, false}, | ||
{`\Potato`, `\Potato`, "", false}, | ||
{`n\\`, `n\`, "", false}, | ||
{`\'x`, `'x`, "", false}, | ||
{`\"`, `"`, "", false}, | ||
{"\\\n", ``, "", false}, | ||
{`\b`, "\010", "", false}, | ||
{`\f`, "\014", "", false}, | ||
{`\t`, "\011", "", false}, | ||
{`\n`, "\012", "", false}, | ||
{`\r`, "\015", "", false}, | ||
{`\v`, "\013", "", false}, | ||
{`\a`, "\007", "", false}, | ||
{`\1`, "\001", "", false}, | ||
{`\12`, "\012", "", false}, | ||
{`\123`, "\123", "", false}, | ||
{`\777`, "\u01ff", "", false}, | ||
{`\1\12\123\1234`, "\001\012\123\123" + "4", "", false}, | ||
{`a\1a\12a\123a`, "a\001a\012a\123a", "", false}, | ||
{`\x`, "", `truncated \x escape at position 0`, false}, | ||
{`\x1`, "", `truncated \x escape at position 0`, false}, | ||
{`\x11`, "\x11", "", false}, | ||
{`\xzz`, "", `invalid \x escape at position 0`, false}, | ||
{`{\x11}`, "{\x11}", "", false}, | ||
{`\x01\x8a\xff`, "\x01\u008a\u00ff", "", false}, | ||
{`\x01\x8A\xFF`, "\x01\u008a\u00ff", "", false}, | ||
{`\u`, "", `truncated \u escape at position 0`, false}, | ||
{`\u1`, "", `truncated \u escape at position 0`, false}, | ||
{`\u12`, "", `truncated \u escape at position 0`, false}, | ||
{`z\u134`, "", `truncated \u escape at position 1`, false}, | ||
{`\u1234`, "\u1234", "", false}, | ||
{`z\uzzzz`, "", `invalid \u escape at position 1`, false}, | ||
{`{\u1234}`, "{\u1234}", "", false}, | ||
{`\U00000001\U0000018a\U000012ff`, "\U00000001\U0000018a\U000012ff", "", false}, | ||
{`\U00000001\U0000018A\U000012FF`, "\U00000001\U0000018a\U000012ff", "", false}, | ||
{`\U0000`, "", `truncated \U escape at position 0`, false}, | ||
{`\U00001`, "", `truncated \U escape at position 0`, false}, | ||
{`\U000012`, "", `truncated \U escape at position 0`, false}, | ||
{`z\U0000134`, "", `truncated \U escape at position 1`, false}, | ||
{`\U00001234`, "\U00001234", "", false}, | ||
{`z\Uzzzzzzzz`, "", `invalid \U escape at position 1`, false}, | ||
{`{\U00001234}`, "{\U00001234}", "", false}, | ||
{`\U00000001\U0000018a\U000012ff`, "\U00000001\U0000018a\U000012ff", "", false}, | ||
{`\U00000001\U0000018A\U000012FF`, "\U00000001\U0000018a\U000012ff", "", false}, | ||
{`\N{potato}`, `\N{potato}`, "", false}, | ||
|
||
// Bytemode tests | ||
{``, ``, "", true}, | ||
{`Potato`, `Potato`, "", true}, | ||
{`Potato\`, ``, `Trailing \ in string`, true}, | ||
{`\Potato`, `\Potato`, "", true}, | ||
{`n\\`, `n\`, "", true}, | ||
{`\'x`, `'x`, "", true}, | ||
{`\"`, `"`, "", true}, | ||
{"\\\n", ``, "", true}, | ||
{`\b`, "\010", "", true}, | ||
{`\f`, "\014", "", true}, | ||
{`\t`, "\011", "", true}, | ||
{`\n`, "\012", "", true}, | ||
{`\r`, "\015", "", true}, | ||
{`\v`, "\013", "", true}, | ||
{`\a`, "\007", "", true}, | ||
{`\1`, "\001", "", true}, | ||
{`\12`, "\012", "", true}, | ||
{`\123`, "\123", "", true}, | ||
{`\777`, "\xff", "", true}, | ||
{`\1\12\123\1234`, "\001\012\123\123" + "4", "", true}, | ||
{`a\1a\12a\123a`, "a\001a\012a\123a", "", true}, | ||
{`\x`, "", `truncated \x escape at position 0`, true}, | ||
{`\x1`, "", `truncated \x escape at position 0`, true}, | ||
{`\x11`, "\x11", "", true}, | ||
{`\xzz`, "", `invalid \x escape at position 0`, true}, | ||
{`{\x11}`, "{\x11}", "", true}, | ||
{`\x01\x8a\xff`, "\x01\x8a\xff", "", true}, | ||
{`\x01\x8A\xFF`, "\x01\x8a\xff", "", true}, | ||
{`\u`, `\u`, "", true}, | ||
{`\u1`, `\u1`, "", true}, | ||
{`\u12`, `\u12`, "", true}, | ||
{`z\u134`, `z\u134`, "", true}, | ||
{`\u1234`, `\u1234`, "", true}, | ||
{`z\uzzzz`, `z\uzzzz`, "", true}, | ||
{`{\u1234}`, `{\u1234}`, "", true}, | ||
{`\U00000001\U0000018a\U000012ff`, `\U00000001\U0000018a\U000012ff`, "", true}, | ||
{`\U00000001\U0000018A\U000012FF`, `\U00000001\U0000018A\U000012FF`, "", true}, | ||
{`\U0000`, `\U0000`, "", true}, | ||
{`\U00001`, `\U00001`, "", true}, | ||
{`\U000012`, `\U000012`, "", true}, | ||
{`z\U0000134`, `z\U0000134`, "", true}, | ||
{`\U00001234`, `\U00001234`, "", true}, | ||
{`z\Uzzzzzzzz`, `z\Uzzzzzzzz`, "", true}, | ||
{`{\U00001234}`, `{\U00001234}`, "", true}, | ||
{`\U00000001\U0000018a\U000012ff`, `\U00000001\U0000018a\U000012ff`, "", true}, | ||
{`\U00000001\U0000018A\U000012FF`, `\U00000001\U0000018A\U000012FF`, "", true}, | ||
{`\N{potato}`, `\N{potato}`, "", true}, | ||
} { | ||
in := bytes.NewBufferString(test.in) | ||
out, err := DecodeEscape(in, test.byteMode) | ||
if err != nil { | ||
if test.errString == "" { | ||
t.Errorf("%q: not expecting error but got: %v", test.in, err) | ||
} else { | ||
exc := err.(*py.Exception) | ||
args := exc.Args.(py.Tuple) | ||
if string(args[0].(py.String)) != test.errString { | ||
t.Errorf("%q: want error %q but got %q", test.in, test.errString, args[0]) | ||
} | ||
} | ||
continue | ||
} | ||
if test.errString != "" { | ||
t.Errorf("%q: expecting error but didn't get one", test.in) | ||
continue | ||
} | ||
got := out.String() | ||
if test.want != got { | ||
t.Errorf("%q: want %q but got %q", test.in, test.want, got) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters