forked from AbsInt/CompCert
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Tokenize.mll
47 lines (41 loc) · 2.39 KB
/
Tokenize.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU Lesser General Public License as *)
(* published by the Free Software Foundation, either version 2.1 of *)
(* the License, or (at your option) any later version. *)
(* This file is also distributed under the terms of the *)
(* INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(* Parse a string as a list of tokens *)
let identstart = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' ]
let identcont = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' '-' '.' ]
rule tokenize acc = parse
| eof { List.rev acc }
| [' ' '\t' '\n' '\r'] + { tokenize acc lexbuf }
| "\"" { tok_dquote acc (Buffer.create 16) lexbuf }
| "'" { tok_squote acc (Buffer.create 16) lexbuf }
| (identstart identcont*) as s
{ tokenize (s :: acc) lexbuf }
| _ as c { tokenize (String.make 1 c :: acc) lexbuf }
and tok_dquote acc buf = parse
| "\"" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
| "\\t" { Buffer.add_char buf '\t'; tok_dquote acc buf lexbuf }
| "\\n" { Buffer.add_char buf '\n'; tok_dquote acc buf lexbuf }
| "\\r" { Buffer.add_char buf '\r'; tok_dquote acc buf lexbuf }
| "\\" ([ '\\' '\"' ] as c)
{ Buffer.add_char buf c; tok_dquote acc buf lexbuf }
| _ as c { Buffer.add_char buf c; tok_dquote acc buf lexbuf }
and tok_squote acc buf = parse
| "\'" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
| _ as c { Buffer.add_char buf c; tok_squote acc buf lexbuf }
{
let string s =
tokenize [] (Lexing.from_string s)
}