Skip to content

Commit

Permalink
Merge branch 'lookup-table' into lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
nixpulvis committed Feb 2, 2016
2 parents d4ef9bb + 2466126 commit d9f99e5
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 91 deletions.
1 change: 1 addition & 0 deletions sources.cm
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ $/basis.cm
$/smlnj-lib.cm
$/ml-yacc-lib.cm

src/token.sig
src/token.sml
src/errormsg.sml

Expand Down
15 changes: 2 additions & 13 deletions src/lexer/lexer.sml
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,19 @@ struct
fun run lexer =
let val t = lexer ()
in
case t of
Token.EOF => [t]
| t => t::(run lexer)
if Token.isEof(t) then [t] else t::(run lexer)
end

fun lexFile filename =
let val file = TextIO.openIn filename
fun get _ = TextIO.input file
val lexer = Mlex.makeLexer get
in
Token.reset();
ErrorMsg.reset();
Newline.reset();
SrcComment.reset();
SrcString.reset();
run lexer
end

fun lexString string =
(* TODO: This is not correct at all. *)
let val lexer = Mlex.makeLexer (fn n => string)
in
ErrorMsg.reset();
Newline.reset();
run lexer
end

end
2 changes: 1 addition & 1 deletion src/lexer/src_string.sml
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ structure SrcString :> SRC_STRING = struct

fun emit (yypos) =
(buildingString := false;
Token.STRING(!innerString, !startPos, yypos))
Token.string (!innerString) (!startPos, yypos))
end
22 changes: 2 additions & 20 deletions src/lexer/tiger.lex
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,6 @@ alpha = [a-zA-Z];
whitespace = [\t\r ];

%%
<INITIAL>type => (Token.TYPE(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>var => (Token.VAR(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>function => (Token.FUNCTION(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>break => (Token.BREAK(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>of => (Token.OF(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>end => (Token.END(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>in => (Token.IN(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>nil => (Token.NIL(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>let => (Token.LET(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>do => (Token.DO(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>to => (Token.TO(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>for => (Token.FOR(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>while => (Token.WHILE(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>else => (Token.ELSE(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>then => (Token.THEN(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>if => (Token.IF(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>array => (Token.ARRAY(sub1 yypos, sub1 yypos + size yytext));

<INITIAL>:\= => (Token.ASSIGN(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>\| => (Token.OR(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>& => (Token.AND(sub1 yypos, sub1 yypos + size yytext));
Expand All @@ -64,8 +46,8 @@ whitespace = [\t\r ];
<INITIAL>: => (Token.COLON(sub1 yypos, sub1 yypos + size yytext));
<INITIAL>, => (Token.COMMA(sub1 yypos, sub1 yypos + size yytext));

<INITIAL>{digit}+ => (Token.INT(atoi yytext, sub1 yypos, sub1 yypos + size yytext));
<INITIAL>{id} => (Token.ID(yytext, sub1 yypos, sub1 yypos + size yytext));
<INITIAL>{digit}+ => (Token.int (atoi yytext) (sub1 yypos, sub1 yypos + size yytext));
<INITIAL>{id} => (Token.find(yytext, sub1 yypos, sub1 yypos + size yytext));
<INITIAL>[ \t]* => (continue());

<INITIAL>\" => (YYBEGIN STRING; SrcString.new(sub1 yypos); continue());
Expand Down
55 changes: 55 additions & 0 deletions src/token.sig
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
signature TOKEN =
sig
eqtype token

val string : string -> int * int -> token
val int : int -> int * int -> token
val id : string -> int * int -> token
val TYPE : int * int -> token
val VAR : int * int -> token
val FUNCTION : int * int -> token
val BREAK : int * int -> token
val OF : int * int -> token
val END : int * int -> token
val IN : int * int -> token
val NIL : int * int -> token
val LET : int * int -> token
val DO : int * int -> token
val TO : int * int -> token
val FOR : int * int -> token
val WHILE : int * int -> token
val ELSE : int * int -> token
val THEN : int * int -> token
val IF : int * int -> token
val ARRAY : int * int -> token
val ASSIGN : int * int -> token
val OR : int * int -> token
val AND : int * int -> token
val GE : int * int -> token
val GT : int * int -> token
val LE : int * int -> token
val LT : int * int -> token
val NEQ : int * int -> token
val EQ : int * int -> token
val DIVIDE : int * int -> token
val TIMES : int * int -> token
val MINUS : int * int -> token
val PLUS : int * int -> token
val DOT : int * int -> token
val RBRACE : int * int -> token
val LBRACE : int * int -> token
val RBRACK : int * int -> token
val LBRACK : int * int -> token
val RPAREN : int * int -> token
val LPAREN : int * int -> token
val SEMICOLON : int * int -> token
val COLON : int * int -> token
val COMMA : int * int -> token
(* TODO: Should this be a token, should it have yypos? *)
val EOF : token

val isEof : token -> bool
val toString : token -> string
val find : string * int * int -> token
val reset : unit -> unit
end
131 changes: 87 additions & 44 deletions src/token.sml
Original file line number Diff line number Diff line change
@@ -1,52 +1,95 @@
structure Token =
structure Token :> TOKEN =
struct
type linenum = int
datatype token =
TYPE of int * int |
VAR of int * int |
FUNCTION of int * int |
BREAK of int * int |
OF of int * int |
END of int * int |
IN of int * int |
NIL of int * int |
LET of int * int |
DO of int * int |
TO of int * int |
FOR of int * int |
WHILE of int * int |
ELSE of int * int |
THEN of int * int |
IF of int * int |
ARRAY of int * int |
ASSIGN of int * int |
OR of int * int |
AND of int * int |
GE of int * int |
GT of int * int |
LE of int * int |
LT of int * int |
NEQ of int * int |
EQ of int * int |
DIVIDE of int * int |
TIMES of int * int |
MINUS of int * int |
PLUS of int * int |
DOT of int * int |
RBRACE of int * int |
LBRACE of int * int |
RBRACK of int * int |
LBRACK of int * int |
RPAREN of int * int |
LPAREN of int * int |
STRING of string * int * int |
INT of int * int * int |
ID of string * int * int |
TYPE of int * int |
VAR of int * int |
FUNCTION of int * int |
BREAK of int * int |
OF of int * int |
END of int * int |
IN of int * int |
NIL of int * int |
LET of int * int |
DO of int * int |
TO of int * int |
FOR of int * int |
WHILE of int * int |
ELSE of int * int |
THEN of int * int |
IF of int * int |
ARRAY of int * int |
ASSIGN of int * int |
OR of int * int |
AND of int * int |
GE of int * int |
GT of int * int |
LE of int * int |
LT of int * int |
NEQ of int * int |
EQ of int * int |
DIVIDE of int * int |
TIMES of int * int |
MINUS of int * int |
PLUS of int * int |
DOT of int * int |
RBRACE of int * int |
LBRACE of int * int |
RBRACK of int * int |
LBRACK of int * int |
RPAREN of int * int |
LPAREN of int * int |
SEMICOLON of int * int |
COLON of int * int |
COMMA of int * int |
STRING of string * int * int |
INT of int * int * int |
ID of string * int * int |
COLON of int * int |
COMMA of int * int |
EOF;

fun string text (yypos, yyend) = STRING(text, yypos, yyend)
fun int num (yypos, yyend) = INT(num, yypos, yyend)
fun id text (yypos, yyend) = ID(text, yypos, yyend)

val lookupTable: (string, (int * int -> token)) HashTable.hash_table =
HashTable.mkTable (HashString.hashString, op=) (100, Fail "ident not found");

fun reset () =
(HashTable.clear lookupTable;
HashTable.insert lookupTable ("type", TYPE);
HashTable.insert lookupTable ("var", VAR);
HashTable.insert lookupTable ("function", FUNCTION);
HashTable.insert lookupTable ("break", BREAK);
HashTable.insert lookupTable ("of", OF);
HashTable.insert lookupTable ("end", END);
HashTable.insert lookupTable ("in", IN);
HashTable.insert lookupTable ("nil", NIL);
HashTable.insert lookupTable ("let", LET);
HashTable.insert lookupTable ("do", DO);
HashTable.insert lookupTable ("to", TO);
HashTable.insert lookupTable ("for", FOR);
HashTable.insert lookupTable ("while", WHILE);
HashTable.insert lookupTable ("else", ELSE);
HashTable.insert lookupTable ("then", THEN);
HashTable.insert lookupTable ("if", IF);
HashTable.insert lookupTable ("array", ARRAY))

fun find (yytext, yypos, yyend) =
let val item = HashTable.find lookupTable yytext
in
case item of
SOME tokenFn => tokenFn (yypos, yyend)
| NONE => let val tokenFn = (id yytext)
in
HashTable.insert lookupTable (yytext, tokenFn);
tokenFn (yypos, yyend)
end
end

fun isEof token =
case token of
EOF => true
| _ => false

fun toString token =
case token of
TYPE(i,j) => "TYPE(" ^ Int.toString(i) ^ "," ^ Int.toString(j) ^ ")"
Expand Down
1 change: 1 addition & 0 deletions test.sml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Test.reset();
use "test/smoke.sml";
use "test/newline.sml";
use "test/src_string.sml";
use "test/token.sml";
use "test/lexer/comments.sml";
use "test/lexer/strings.sml";

Expand Down
6 changes: 3 additions & 3 deletions test/lexer/comments.sml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@ Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/comments/one.tig"
val s = "hello world"
in
Test.assertEq(Token.STRING(s, 25, 37), List.hd(tokens), Token.toString)
Test.assertEq(Token.string s (25, 37), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/comments/two.tig"
val s = "hello world"
in
Test.assertEq(Token.STRING(s, 43, 55), List.hd(tokens), Token.toString)
Test.assertEq(Token.string s (43, 55), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/comments/three.tig"
val s = "hello world"
in
Test.assertEq(Token.STRING(s, 36, 48), List.hd(tokens), Token.toString);
Test.assertEq(Token.string s (36, 48), List.hd(tokens), Token.toString);
Test.assertEq(4, Newline.getLine(36), Int.toString);
Test.assertEq(1, Newline.getPos(36), Int.toString)
end
Expand Down
14 changes: 7 additions & 7 deletions test/lexer/strings.sml
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,55 @@ Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/one.tig"
val string = "hello world"
in
Test.assertEq(Token.STRING(string,1,13), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 13), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/two.tig"
val string = "hello newline\n"
in
Test.assertEq(Token.STRING(string,1,17), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 17), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/three.tig"
val string = "this one has \120 ascii"
in
Test.assertEq(Token.STRING(string,1,25), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 25), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/four.tig"
val string = "hello plus @#^! weird chars!"
in
Test.assertEq(Token.STRING(string,1,30), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 30), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/five.tig"
val string = "this has \\ and \n newline and !@$^ curse words!"
in
Test.assertEq(Token.STRING(string,1,50), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 50), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/six.tig"
val string = "this has control sequence \^C for end of text"
in
Test.assertEq(Token.STRING(string,1,47), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 47), List.hd(tokens), Token.toString)
end
);

Test.test(fn () =>
let val tokens = Lexer.lexFile "fixtures/lexer/strings/seven.tig"
val string = "this is a multiline\ \ string!"
in
Test.assertEq(Token.STRING(string,1,35), List.hd(tokens), Token.toString)
Test.assertEq(Token.string string (1, 35), List.hd(tokens), Token.toString)
end
);

Expand Down
Loading

0 comments on commit d9f99e5

Please sign in to comment.