-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.h
100 lines (74 loc) · 2.84 KB
/
lexer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#ifndef LEXER_H
#define LEXER_H
#include "platform.h"
#include "string.h"
#include "core.h"
#define PARSE_ERROR(lexer, fmt, ...)\
LOG_ERROR("parse error: %.*s:%d:%d: " fmt, STRFMT((lexer)->debug_name), (lexer)->t.l0+1, (lexer)->t.c0+1, ##__VA_ARGS__)
enum TokenType : u16 {
TOKEN_ADD = '+',
TOKEN_SUB = '-',
TOKEN_MUL = '*',
TOKEN_DIV = '/',
TOKEN_START = 255, // NOTE(jesper): 0-255 reserved for ascii token values
TOKEN_IDENTIFIER,
TOKEN_INTEGER,
TOKEN_NUMBER,
TOKEN_WHITESPACE, // automatically eaten unless LEXER_WHITESPACE
TOKEN_NEWLINE, // automatically eaten unless LEXER_NEWLINE
TOKEN_EOF,
};
struct Token {
TokenType type;
String str;
i32 l0, c0;
bool operator==(Token t) const { return str.data == t.str.data; }
bool operator==(String str) const { return this->str == str; }
bool operator==(char c) const { return type == (TokenType)c; }
bool operator==(TokenType type) const { return this->type == type; }
operator bool() const { return type != TOKEN_EOF; }
};
enum LexerFlags : u8 {
LEXER_NEWLINE = 1 << 0,
LEXER_WHITESPACE = 1 << 1,
LEXER_ALL = 0xFF,
};
struct Lexer {
char *ptr;
char *end;
String debug_name;
Token t = {};
i32 line = 0, col = 0;
u32 flags;
Lexer(u8 *data, i32 size, String debug_name, u32 flags = 0)
: ptr((char*)data), end((char*)data + size), debug_name(debug_name), flags(flags)
{}
Lexer(String str, String debug_name, u32 flags = 0)
: ptr(str.data), end(str.data + str.length), debug_name(debug_name), flags(flags)
{}
explicit operator bool() const { return ptr < end; }
};
inline const char* sz_from_enum(TokenType type)
{
static char c[2] = { 0, 0 };
switch (type) {
case TOKEN_IDENTIFIER: return "IDENTIFIER";
case TOKEN_INTEGER: return "INTEGER";
case TOKEN_NUMBER: return "NUMBER";
case TOKEN_WHITESPACE: return "WHITESPACE";
case TOKEN_NEWLINE: return "NEWLINE";
case TOKEN_EOF: return "EOF";
default:
c[0] = (char)type;
return &c[0];
}
}
#include "generated/lexer.h"
inline Token next_token(Lexer *lexer) { return next_token(lexer, lexer->flags); }
inline Token peek_token(Lexer *lexer) { return peek_token(lexer, lexer->flags); }
inline Token next_nth_token(Lexer *lexer, i32 n) { return next_nth_token(lexer, n, lexer->flags); }
inline Token peek_nth_token(Lexer *lexer, i32 n) { return peek_nth_token(lexer, n, lexer->flags); }
inline Token eat_until(Lexer *lexer, char terminator, u32 flags) { return eat_until(lexer, (TokenType)terminator, flags); }
inline Token eat_until(Lexer *lexer, TokenType terminator) { return eat_until(lexer, terminator, lexer->flags); }
inline Token eat_until(Lexer *lexer, char terminator) { return eat_until(lexer, (TokenType)terminator, lexer->flags); }
#endif // LEXER_H