Skip to content

Commit

Permalink
All: fix read/print of \\, and \n
Browse files Browse the repository at this point in the history
  • Loading branch information
kanaka committed Oct 31, 2015

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent f3c3903 commit 8d78bc2
Showing 46 changed files with 162 additions and 97 deletions.
18 changes: 4 additions & 14 deletions awk/reader.awk
Original file line number Diff line number Diff line change
@@ -1,20 +1,10 @@
function reader_read_string(token, v, r)
{
token = substr(token, 1, length(token) - 1)
while (match(token, /\\["n\\]?/, r)) {
switch (r[0]) {
case "\\":
return "!\"Invalid escape character '" substr(token, RSTART, 2) "'."
case "\\n":
v = v substr(token, 1, RSTART - 1) "\n"
break
default:
v = v substr(token, 1, RSTART - 1) substr(r[0], 2, 1)
break
}
token = substr(token, RSTART + RLENGTH)
}
return v token
gsub(/\\\\/, "\\", token)
gsub(/\\"/, "\"", token)
gsub(/\\n/, "\n", token)
return token
}

function reader_read_atom(token)
3 changes: 2 additions & 1 deletion bash/printer.sh
Original file line number Diff line number Diff line change
@@ -42,7 +42,8 @@ _raw_string_pr_str () {
r=":${s:2}"
elif [ "${print_readably}" == "yes" ]; then
s="${s//\\/\\\\}"
r="\"${s//\"/\\\"}\""
s="${s//\"/\\\"}"
r="\"${s//$'\n'/\\n}\""
else
r="${s}"
fi
2 changes: 2 additions & 0 deletions bash/reader.sh
Original file line number Diff line number Diff line change
@@ -13,7 +13,9 @@ READ_ATOM () {
case "${token}" in
[0-9]*) _number "${token}" ;;
\"*) token="${token:1:-1}"
token="${token//\\\\/\\}"
token="${token//\\\"/\"}"
token="${token//\\n/$'\n'}"
_string "${token}" ;;
:*) _keyword "${token:1}" ;;
nil) r="${__nil}" ;;
40 changes: 10 additions & 30 deletions c/reader.c
Original file line number Diff line number Diff line change
@@ -78,36 +78,12 @@ Reader *tokenize(char *line) {
}


// From http://creativeandcritical.net/str-replace-c/ - Laird Shaw
char *replace_str(const char *str, const char *old, const char *new)
{
char *ret, *r;
const char *p, *q;
size_t oldlen = strlen(old);
size_t count, retlen, newlen = strlen(new);

if (oldlen != newlen) {
for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen)
count++;
/* this is undefined if p - str > PTRDIFF_MAX */
retlen = p - str + strlen(p) + count * (newlen - oldlen);
} else
retlen = strlen(str);

if ((ret = malloc(retlen + 1)) == NULL)
return NULL;

for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) {
/* this is undefined if q - p > PTRDIFF_MAX */
ptrdiff_t l = q - p;
memcpy(r, p, l);
r += l;
memcpy(r, new, newlen);
r += newlen;
}
strcpy(r, p);

return ret;
GRegex *reg = g_regex_new (old, 0, 0, NULL);
char *str_tmp = g_regex_replace_literal(reg, str, -1, 0, new, 0, NULL);
free(reg);
return str_tmp;
}


@@ -142,8 +118,12 @@ MalVal *read_atom(Reader *reader) {
atom = &mal_false;
} else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) {
//g_print("read_atom string: %s\n", token);
char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\"", "\"");
atom = malval_new_string(str_tmp);
char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\\\"", "\"");
char *str_tmp2 = replace_str(str_tmp, "\\\\n", "\n");
free(str_tmp);
char *str_tmp3 = replace_str(str_tmp2, "\\\\\\\\", "\\");
free(str_tmp2);
atom = malval_new_string(str_tmp3);
} else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) {
//g_print("read_atom keyword\n");
atom = malval_new_keyword(g_match_info_fetch(matchInfo, 7));
1 change: 1 addition & 0 deletions coffee/reader.coffee
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ read_atom = (rdr) ->
token.slice(1, token.length-1)
.replace(/\\"/g, '"')
.replace(/\\n/g, "\n")
.replace(/\\\\/g, "\\")
else if token[0] == ':' then types._keyword(token[1..])
else if token == "nil" then null
else if token == "true" then true
2 changes: 2 additions & 0 deletions crystal/reader.cr
Original file line number Diff line number Diff line change
@@ -82,6 +82,8 @@ class Reader
when token == "false" then false
when token == "nil" then nil
when token[0] == '"' then token[1..-2].gsub(/\\"/, "\"")
.gsub(/\\n/, "\n")
.gsub(/\\\\/, "\\")
when token[0] == ':' then "\u029e#{token[1..-1]}"
else Mal::Symbol.new token
end
3 changes: 2 additions & 1 deletion cs/reader.cs
Original file line number Diff line number Diff line change
@@ -72,7 +72,8 @@ public static MalVal read_atom(Reader rdr) {
string str = match.Groups[6].Value;
str = str.Substring(1, str.Length-2)
.Replace("\\\"", "\"")
.Replace("\\n", "\n");
.Replace("\\n", "\n")
.Replace("\\\\", "\\");
return new Mal.types.MalString(str);
} else if (match.Groups[7].Value != String.Empty) {
return new Mal.types.MalString("\u029e" + match.Groups[7].Value);
2 changes: 2 additions & 0 deletions elixir/lib/mal/reader.ex
Original file line number Diff line number Diff line change
@@ -87,6 +87,8 @@ defmodule Mal.Reader do
token
|> String.slice(1..-2)
|> String.replace("\\\"", "\"")
|> String.replace("\\n", "\n")
|> String.replace("\\\\", "\\")

integer?(token) ->
Integer.parse(token)
3 changes: 2 additions & 1 deletion es6/reader.js
Original file line number Diff line number Diff line change
@@ -32,7 +32,8 @@ function read_atom (reader) {
} else if (token[0] === "\"") {
return token.slice(1,token.length-1)
.replace(/\\"/g, '"')
.replace(/\\n/g, "\n"); // string
.replace(/\\n/g, "\n")
.replace(/\\\\/g, "\\"); // string
} else if (token[0] === ":") {
return _keyword(token.slice(1));
} else if (token === "nil") {
1 change: 1 addition & 0 deletions factor/mal/printer/printer.factor
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@ M: string (pr-str)
[
"\\" "\\\\" replace
"\"" "\\\"" replace
"\n" "\\n" replace
"\"" dup surround
] when ;
M: array (pr-str) '[ _ (pr-str) ] map " " join "(" ")" surround ;
4 changes: 3 additions & 1 deletion factor/mal/reader/reader.factor
Original file line number Diff line number Diff line change
@@ -10,7 +10,9 @@ DEFER: read-form

: (read-atom) ( str -- maltype )
{
{ [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace ] }
{ [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace
"\\n" "\n" replace
"\\\\" "\\" replace ] }
{ [ dup first CHAR: : = ] [ rest <malkeyword> ] }
{ [ dup "false" = ] [ drop f ] }
{ [ dup "true" = ] [ drop t ] }
6 changes: 4 additions & 2 deletions go/src/reader/reader.go
Original file line number Diff line number Diff line change
@@ -68,8 +68,10 @@ func read_atom(rdr Reader) (MalType, error) {
} else if (*token)[0] == '"' {
str := (*token)[1 : len(*token)-1]
return strings.Replace(
strings.Replace(str, `\"`, `"`, -1),
`\n`, "\n", -1), nil
strings.Replace(
strings.Replace(str, `\"`, `"`, -1),
`\n`, "\n", -1),
`\\`, "\\", -1), nil
} else if (*token)[0] == ':' {
return NewKeyword((*token)[1:len(*token)])
} else if *token == "nil" {
2 changes: 1 addition & 1 deletion guile/printer.scm
Original file line number Diff line number Diff line change
@@ -37,7 +37,7 @@
(string-sub
(string-sub s "\\\\" "\\\\")
"\"" "\\\"")
"\n" "\\\n"))
"\n" "\\n"))
(define (%pr_str o) (pr_str o readable?))
(match obj
((? box?) (%pr_str (unbox obj)))
6 changes: 4 additions & 2 deletions guile/reader.scm
Original file line number Diff line number Diff line change
@@ -80,8 +80,10 @@
(define (read_atom reader)
(define (->str s)
(string-sub
(string-sub s "\\\\\"" "\"")
"\\\\\n" "\n"))
(string-sub
(string-sub s "\\\\\"" "\"")
"\\\\n" "\n")
"\\\\\\\\" "\\"))
(let ((token (reader 'next)))
(cond
((string-match "^-?[0-9][0-9.]*$" token)
3 changes: 2 additions & 1 deletion js/reader.js
Original file line number Diff line number Diff line change
@@ -34,7 +34,8 @@ function read_atom (reader) {
} else if (token[0] === "\"") {
return token.slice(1,token.length-1)
.replace(/\\"/g, '"')
.replace(/\\n/g, "\n"); // string
.replace(/\\n/g, "\n")
.replace(/\\\\/g, "\\"); // string
} else if (token[0] === ":") {
return types._keyword(token.slice(1));
} else if (token === "nil") {
8 changes: 5 additions & 3 deletions julia/reader.jl
Original file line number Diff line number Diff line change
@@ -39,9 +39,11 @@ function read_atom(rdr)
float(token)
elseif ismatch(r"^\".*\"$", token)
replace(
replace(token[2:end-1],
"\\\"", "\""),
"\\n", "\n")
replace(
replace(token[2:end-1],
"\\\"", "\""),
"\\n", "\n"),
"\\\\", "\\")
elseif token[1] == ':'
"\u029e$(token[2:end])"
elseif token == "nil"
2 changes: 1 addition & 1 deletion kotlin/src/mal/printer.kt
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@ fun pr_str(malType: MalType, print_readably: Boolean = false): String =
":" + malType.value.substring(1)
} else if (malType is MalString) {
if (print_readably) {
"\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""
"\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") + "\""
} else malType.value
} else if (malType is MalConstant) {
malType.value
2 changes: 1 addition & 1 deletion kotlin/src/mal/reader.kt
Original file line number Diff line number Diff line change
@@ -139,7 +139,7 @@ fun read_atom(reader: Reader): MalType {
} else if (groups[4]?.value != null) {
FALSE
} else if (groups[5]?.value != null) {
MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\""))
MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\"").replace("\\\\", "\\"))
} else if (groups[6]?.value != null) {
MalKeyword(groups[6]?.value as String)
} else if (groups[7]?.value != null) {
1 change: 1 addition & 0 deletions lua/reader.lua
Original file line number Diff line number Diff line change
@@ -47,6 +47,7 @@ function M.read_atom(rdr)
local sval = string.sub(token,2,string.len(token)-1)
sval = string.gsub(sval, '\\"', '"')
sval = string.gsub(sval, '\\n', '\n')
sval = string.gsub(sval, '\\\\', '\\')
return sval
elseif string.sub(token,1,1) == ':' then
return "\177" .. string.sub(token,2)
2 changes: 1 addition & 1 deletion make/printer.mk
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ symbol_pr_str = $($(1)_value)

keyword_pr_str = $(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value)))

string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value))))",$(call str_decode,$($(1)_value))))
string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(NEWLINE),$(ESC_N),$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value)))))",$(call str_decode,$($(1)_value))))

function_pr_str = <$(if $(word 6,$(value $(1)_value)),$(wordlist 1,5,$(value $(1)_value))...,$(value $(1)_value))>

13 changes: 12 additions & 1 deletion make/reader.mk
Original file line number Diff line number Diff line change
@@ -27,18 +27,29 @@ $(foreach ch,$(word 1,$($(1))),\
))
endef

# $(_NL) is used here instead of $(NEWLINE) because $(strip) removes
# $(NEWLINE). str_encode will just pass through $(_NL) so str_decode
# later will restore a correct newline
define READ_STRING
$(foreach ch,$(word 1,$($(1))),\
$(if $(ch),\
$(if $(and $(filter \,$(ch)),$(filter $(DQUOTE),$(word 2,$($(1))))),\
$(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
$(DQUOTE) $(strip $(call READ_STRING,$(1))),\
$(if $(and $(filter \,$(ch)),$(filter n,$(word 2,$($(1))))),\
$(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
$(_NL) $(strip $(call READ_STRING,$(1))),\
$(if $(and $(filter \,$(ch)),$(filter \,$(word 2,$($(1))))),\
$(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
\ $(strip $(call READ_STRING,$(1))),\
$(if $(filter $(DQUOTE),$(ch)),\
,\
$(eval $(1) := $(wordlist 2,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: $(ch) | $($(1))))\
$(ch) $(strip $(call READ_STRING,$(1))))),))
$(ch) $(strip $(call READ_STRING,$(1))))))),))
endef

define READ_SYMBOL
1 change: 1 addition & 0 deletions make/util.mk
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ RBRACKET := ]
DQUOTE := "# "
SLASH := $(strip \ )
ESC_DQUOTE := $(SLASH)$(DQUOTE)
ESC_N := $(SLASH)n
SQUOTE := '# '
QQUOTE := `# `
SPACE :=
8 changes: 5 additions & 3 deletions miniMAL/reader.json
Original file line number Diff line number Diff line change
@@ -39,9 +39,11 @@
["if", ["=", ["`", "\""], ["get", "token", 0]],
[".",
[".",
["slice", "token", 1, ["-", ["count", "token"], 1]],
["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
[".",
["slice", "token", 1, ["-", ["count", "token"], 1]],
["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
["`", "replace"], ["RegExp", ["`", "\\\\\\\\"], ["`", "g"]], ["`", "\\"]],
["if", ["=", ["`", ":"], ["get", "token", 0]],
["keyword", ["slice", "token", 1]],
["if", ["=", ["`", "nil"], "token"],
2 changes: 1 addition & 1 deletion nim/printer.nim
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ import strutils, sequtils, tables, types
proc str_handle(x: string, pr = true): string =
if x.len > 0 and x[0] == '\xff':
result = ":" & x[1 .. x.high]
elif pr: result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"") & "\""
elif pr: result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") & "\""
else: result = x

proc pr_str*(m: MalType, pr = true): string =
2 changes: 1 addition & 1 deletion nim/reader.nim
Original file line number Diff line number Diff line change
@@ -61,7 +61,7 @@ proc read_hash_map(r: var Reader): MalType =
proc read_atom(r: var Reader): MalType =
let t = r.next
if t.match(intRE): number t.parseInt
elif t[0] == '"': str t[1 .. <t.high].replace("\\\"", "\"")
elif t[0] == '"': str t[1 .. <t.high].replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
elif t[0] == ':': keyword t[1 .. t.high]
elif t == "nil": nilObj
elif t == "true": trueObj
1 change: 1 addition & 0 deletions perl/reader.pm
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@ sub read_atom {
my $str = substr $token, 1, -1;
$str =~ s/\\"/"/g;
$str =~ s/\\n/\n/g;
$str =~ s/\\\\/\\/g;
return String->new($str)
}
when(/^:/) { return _keyword(substr($token,1)) }
2 changes: 1 addition & 1 deletion php/printer.php
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ function _pr_str($obj, $print_readably=True) {
if (strpos($obj, chr(0x7f)) === 0) {
return ":".substr($obj,1);
} elseif ($print_readably) {
$obj = preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj));
$obj = preg_replace('/\n/', '\\n', preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj)));
return '"' . $obj . '"';
} else {
return $obj;
2 changes: 2 additions & 0 deletions php/reader.php
Original file line number Diff line number Diff line change
@@ -39,6 +39,8 @@ function read_atom($reader) {
} elseif ($token[0] === "\"") {
$str = substr($token, 1, -1);
$str = preg_replace('/\\\\"/', '"', $str);
$str = preg_replace('/\\\\n/', "\n", $str);
$str = preg_replace('/\\\\\\\\/', "\\", $str);
return $str;
} elseif ($token[0] === ":") {
return _keyword(substr($token,1));
20 changes: 11 additions & 9 deletions process/guide.md
Original file line number Diff line number Diff line change
@@ -365,15 +365,17 @@ and each step will give progressively more bang for the buck.

* Add support for the other basic data type to your reader and printer
functions: string, nil, true, and false. These become mandatory at
step 4. When a string is read, a slash followed by a doublequote is
translated into a plain doublequote character and a slash followed by
"n" is translated into a newline. To properly print a string (for
step 4 string functions), the `pr_str` function needs another
parameter called `print_readably`. When `print_readably` is true,
doublequotes and newlines are translated into their printed
representations (the reverse of the reader). The `PRINT` function in
the main program should call `pr_str` with print_readably set to
true.
step 4. When a string is read, the following transformations are
applied: a backslash followed by a doublequote is translated into
a plain doublequote character, a backslash followed by "n" is
translated into a newline, and a backslash followed by another
backslash is translated into a single backslash. To properly print
a string (for step 4 string functions), the `pr_str` function needs
another parameter called `print_readably`. When `print_readably` is
true, doublequotes, newlines, and backslashes are translated into
their printed representations (the reverse of the reader). The
`PRINT` function in the main program should call `pr_str` with
print_readably set to true.

* Add support for the other mal types: keyword, vector, hash-map, and
atom.
Binary file modified ps/printer.ps
Binary file not shown.
Binary file modified ps/reader.ps
Binary file not shown.
1 change: 1 addition & 0 deletions python/Dockerfile
Original file line number Diff line number Diff line change
@@ -22,3 +22,4 @@ WORKDIR /mal
##########################################################

# Nothing additional needed for python
RUN apt-get -y install python3
Loading

0 comments on commit 8d78bc2

Please sign in to comment.