-
Notifications
You must be signed in to change notification settings - Fork 0
/
sql_parser.ex
211 lines (187 loc) · 5.55 KB
/
sql_parser.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# https://www.youtube.com/watch?v=xNzoerDljjo
# combinator returns a parser
# gives us a combinational expressivity
# todo
# - two-pass: add lexer that extracts tokens and feeds further
# - improve errors
# - add look-ahead to avoide choice() problem.
defmodule SqlParser do
def run(input), do: parse(input)
def run do
"select col1 from (
select col2, col3 from (
select col4, col5, col6 from some_table
)
)
"
|> parse()
end
defp parse(input) do
# compile time - assembling the lambda parser
parser = select_statement()
# runtime - invoking the parsers on input
parser.(input)
end
defp select_statement() do
sequence([
keyword(:select),
columns(),
keyword(:from),
choice([token(identifier()), subquery()])
])
|> map(fn [_select, columns, _from, table] ->
%{
statement: :select,
columns: columns,
from: table
}
end)
end
defp subquery() do
sequence([
# wow
token(char(?()),
# deferring creation of another level
lazy(fn -> select_statement() end),
# to runtime when we actually need it
token(char(?)))
])
|> map(fn [_, stmt, _] -> stmt end)
end
# lazy combinator
defp lazy(combinator) do
fn input ->
combinator.().(input)
end
end
defp keyword(expected) do
identifier()
|> token()
|> satisfy(fn token -> String.upcase(token) == String.upcase(to_string(expected)) end)
|> map(fn _ -> expected end)
|> error(fn _error -> "expected #{inspect(expected)}" end)
end
defp error(parser, fun) do
fn input ->
with {:error, reason} <- parser.(input),
do: fun.(reason)
end
end
defp columns(), do: separated_list(token(identifier()), token(char(?,)))
defp token(parser) do
# what is a token?
# it is a sequence of identifiers (letters numbers and _) separated by
# leading/trailiang whitespace (and) newlines
sequence([
# _lw leading whitespace
many(choice([char(?\s), char(?\n)])),
parser,
# _tw trailing whitespace
many(choice([char(?\s), char(?\n)]))
])
|> map(fn [_lw, term, _tw] -> term end)
end
defp separated_list(element_parser, separator_parser) do
# what is a separated_list?
# it is a sequence that starts with the first element
# and followrd by zero or more tokens separated by a , and 0 or more ws
sequence([
# many(choice())
element_parser,
# ["col1", [[",", "col2"], [",", "col3"]]]
many(sequence([separator_parser, element_parser]))
# input = " col1,
# col2,col3 col4"
# {:ok, ["col1", [[44, "col2"], [44, "col3"]]], "col4"}
])
# |> map(fn [head|rest] ->
# [head| Enum.map(Enum.at(rest,0), fn [_comma, token] -> token end)]
# end)
|> map(fn [head, rest] ->
other_elements = Enum.map(rest, fn [_, token] -> token end)
[head | other_elements]
end)
end
# AND combinator
# matches exact sequence
defp sequence(parsers) do
fn input ->
case parsers do
[] ->
{:ok, [], input}
[first | others] ->
# AND implementation via WITH
with {:ok, term, rest} <- first.(input),
{:ok, other_terms, rest} <- sequence(others).(rest),
do: {:ok, [term | other_terms], rest}
end
end
end
defp map(parser, mapper) do
fn input ->
with {:ok, term, rest} <- parser.(input),
do: {:ok, mapper.(term), rest}
end
end
# pretty expressive and intention revealing
defp identifier() do
many(identifier_char())
|> satisfy(fn chars -> chars !== [] end)
|> map(fn chars -> to_string(chars) end)
end
defp many(parser) do
fn input ->
case parser.(input) do
{:error, _reason} ->
{:ok, [], input}
{:ok, first_term, rest} ->
{:ok, other_terms, rest} = many(parser).(rest)
{:ok, [first_term | other_terms], rest}
end
end
end
defp identifier_char(), do: choice([ascii_letter(), char(?_), digit()])
defp ascii_letter(), do: satisfy(char(), fn char -> char in ?A..?Z or char in ?a..?z end)
defp char(expected), do: satisfy(char(), fn char -> char == expected end)
defp digit(), do: satisfy(char(), fn char -> char in ?0..?9 end)
# general purpose combinator
# OR combinator - need at least one parser to succeed <- implented via with block
defp choice(parsers) do
fn input ->
case parsers do
[] ->
{:error, "no parser succeeded"}
[first | others] ->
# iff (if and only if) it fails
# with returns anything that does NOT match its condition
# when condition is matched, we enter do end block
with {:error, _reason} <- first.(input),
# we will try other parsers ON THAT SAME INPUT
do: choice(others).(input)
end
end
end
# general purpose combinator
# refines acceptanse of the parse result
defp satisfy(parser, acceptor) do
fn input ->
with {:ok, term, rest} <- parser.(input) do
if acceptor.(term),
do: {:ok, term, rest},
else: {:error, "term rejected"}
end
end
end
# this is a combinator, becase it returns a parser
# we could write it in char(input) blabla for, in
# which case that would not be a combinator
# because it wouldn't return a parser 😁
defp char() do
fn input ->
case input do
"" -> {:error, "cannot parse empty string"}
<<char::utf8, rest::binary>> -> {:ok, char, rest}
end
end
end
end