Skip to content

Commit

Permalink
haskell: clarify reader
Browse files Browse the repository at this point in the history
The grammar is not fully equivalent to the one in the process, but it
passes all tests.

I suggest that it becomes a formal/testable reference (are tabs
allowed as spaces? is "a~" valid? is "' a" valid?...).

As it is formuled here, it can directly be translated either to
another language with high-level parsers, or to a low-level language
as each production only switches after a look at the next input
character.

Another suggestion to simplify step1 is to make '{' a reader macro,
parsing "{a1 a2..}" into (hash-map a1 a2..).

The step1 tests should then accept both "(hash-map ..)" and "{a1..}",
but the process should in my opinion suggest the former as it moves
the complexity related to maps (language-dependent constructor,
argument count and type checking, reporting of such errors) form
reader.qx/step1 to core.qx/hash-map, where it has to be done later
anyway.
  • Loading branch information
asarhaddon authored and kanaka committed Jul 11, 2021
1 parent cede778 commit 003947b
Showing 1 changed file with 80 additions and 92 deletions.
172 changes: 80 additions & 92 deletions impls/haskell/Reader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,109 +3,97 @@ module Reader
where

import Text.ParserCombinators.Parsec (
Parser, parse, char, digit, letter, try,
(<|>), oneOf, noneOf, many, many1, skipMany, skipMany1, sepEndBy, string)
Parser, (<|>), anyChar, char, digit, many, many1, noneOf, oneOf, parse)
import qualified Data.Map as Map

import Types

spaces :: Parser ()
spaces = skipMany1 (oneOf ", \n")
-- Helpers

comment :: Parser ()
comment = char ';' *> skipMany (noneOf "\r\n")
symbolFalseTrueNil :: String -> MalVal
symbolFalseTrueNil "true" = MalBoolean True
symbolFalseTrueNil "false" = MalBoolean False
symbolFalseTrueNil "nil" = Nil
symbolFalseTrueNil s = MalSymbol s

ignored :: Parser ()
ignored = skipMany (spaces <|> comment)

symbol :: Parser Char
symbol = oneOf "!#$%&|*+-/:<=>?@^_~"

escaped :: Parser Char
escaped = f <$> (char '\\' *> oneOf "\\\"n")
where f 'n' = '\n'
f x = x

read_number :: Parser MalVal
read_number = MalNumber . read <$> many1 digit

read_negative_number :: Parser MalVal
read_negative_number = f <$> char '-' <*> many1 digit
where f sign rest = MalNumber $ read $ sign : rest

read_string :: Parser MalVal
read_string = MalString <$> (char '"' *> many (escaped <|> noneOf "\\\"") <* char '"')

read_symbol :: Parser MalVal
read_symbol = f <$> (letter <|> symbol) <*> many (letter <|> digit <|> symbol)
where f first rest = g (first : rest)
g "true" = MalBoolean True
g "false" = MalBoolean False
g "nil" = Nil
g s = MalSymbol s

read_keyword :: Parser MalVal
read_keyword = MalString . (:) keywordMagic <$> (char ':' *> many (letter <|> digit <|> symbol))

read_atom :: Parser MalVal
read_atom = read_number
<|> try read_negative_number
<|> read_string
<|> read_keyword
<|> read_symbol

read_list :: Parser MalVal
read_list = toList <$> (char '(' *> ignored *> sepEndBy read_form ignored <* char ')')

read_vector :: Parser MalVal
read_vector = MalSeq (MetaData Nil) (Vect True) <$> (char '[' *> ignored *> sepEndBy read_form ignored <* char ']')

read_hash_map :: Parser MalVal
read_hash_map = g . keyValuePairs =<< (char '{' *> ignored *> sepEndBy read_form ignored <* char '}')
where g (Just pairs) = return $ MalHashMap (MetaData Nil) (Map.fromList pairs)
g Nothing = fail "invalid contents inside map braces"

-- reader macros
addPrefix :: String -> MalVal -> MalVal
addPrefix s x = toList [MalSymbol s, x]

read_quote :: Parser MalVal
read_quote = addPrefix "quote" <$> (char '\'' *> read_form)

read_quasiquote :: Parser MalVal
read_quasiquote = addPrefix "quasiquote" <$> (char '`' *> read_form)
addPrefix s m = toList [MalSymbol s, m]

read_splice_unquote :: Parser MalVal
read_splice_unquote = addPrefix "splice-unquote" <$> (string "~@" *> read_form)
with_meta :: MalVal -> MalVal -> MalVal
with_meta m x = toList [MalSymbol "with-meta", x, m]

read_unquote :: Parser MalVal
read_unquote = addPrefix "unquote" <$> (char '~' *> read_form)

read_deref :: Parser MalVal
read_deref = addPrefix "deref" <$> (char '@' *> read_form)

read_with_meta :: Parser MalVal
read_with_meta = f <$> (char '^' *> read_form) <*> read_form
where f m x = toList [MalSymbol "with-meta", x, m]

read_macro :: Parser MalVal
read_macro = read_quote
<|> read_quasiquote
<|> try read_splice_unquote <|> read_unquote
<|> read_deref
<|> read_with_meta

--
hash_map :: [MalVal] -> Parser MalVal
hash_map = g . keyValuePairs
where
g (Just pairs) = return $ MalHashMap (MetaData Nil) $ Map.fromList pairs
g Nothing = fail "invalid contents inside map braces"

read_form :: Parser MalVal
read_form = ignored *> (
read_macro
<|> read_list
<|> read_vector
<|> read_hash_map
<|> read_atom)
toKeyword :: String -> MalVal
toKeyword = MalString . (:) keywordMagic

toVector :: [MalVal] -> MalVal
toVector = MalSeq (MetaData Nil) (Vect True)

-- Parsing

-- For efficiency, <|> expects each choice in an alternative to
-- * either succeed,
-- * or fall after looking only at the next character
-- * or consume some input and fail for incorrect input.

-- The grammar should be human-readable in the first and third column
-- without former knowledge of Haskell, except these two regex-style
-- combinators:
-- many p = (many1 p) | empty AKA p*, zero or more p
-- many1 p = p (many p) AKA p+, one or more p

allowedChar :: Parser Char
allowedChar = noneOf "\n\r \"(),;[\\]{}"

separChar :: Parser ()
separChar = () <$ oneOf "\n ,"
<|> () <$ char ';' <* many (noneOf "\n")

sep :: Parser ()
sep = () <$ many separChar
-- A comment may also reach the end of the input. The terminator, if
-- present, will be consumed by the first option later anyway.

escapedChar :: Parser Char
escapedChar = '\n' <$ char 'n'
<|> anyChar

stringChar :: Parser Char
stringChar = char '\\' *> escapedChar
<|> noneOf "\""

afterMinus :: Parser MalVal
afterMinus = MalNumber . negate . read <$> many1 digit
<|> MalSymbol . (:) '-' <$> many allowedChar

afterTilde :: Parser MalVal
afterTilde = addPrefix "splice-unquote" <$> (char '@' *> sep *> form)
<|> addPrefix "unquote" <$> (sep *> form)

form :: Parser MalVal
form = MalString <$> (char '"' *> many stringChar <* char '"')
<|> addPrefix "quote" <$> (char '\'' *> sep *> form)
<|> toList <$> (char '(' *> sep *> many (form <* sep) <* char ')')
<|> char '-' *> afterMinus
<|> MalNumber . read <$> many1 digit
<|> toKeyword <$> (char ':' *> many1 allowedChar)
<|> addPrefix "deref" <$> (char '@' *> sep *> form)
<|> toVector <$> (char '[' *> sep *> many (form <* sep) <* char ']')
<|> with_meta <$> (char '^' *> sep *> form <* sep) <*> form
<|> addPrefix "quasiquote" <$> (char '`' *> sep *> form)
<|> (hash_map =<< char '{' *> sep *> many (form <* sep) <* char '}')
<|> char '~' *> afterTilde
<|> symbolFalseTrueNil <$> many1 allowedChar

top :: Parser MalVal
top = sep *> form

read_str :: String -> IOThrows MalVal
read_str str = case parse read_form "Mal" str of
read_str str = case parse top "Mal" str of
Left err -> throwStr $ show err
Right val -> return val

0 comments on commit 003947b

Please sign in to comment.