Skip to content

Commit

Permalink
Output byte positions from the lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Marwes committed Aug 20, 2016
1 parent 3347a48 commit 10b8c58
Showing 1 changed file with 38 additions and 14 deletions.
52 changes: 38 additions & 14 deletions parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@ use combine::combinator::EnvParser;
use combine::*;
use combine_language::{LanguageEnv, LanguageDef, Identifier};

#[derive(Clone)]
pub struct BytePositioned<I> {
position: usize,
input: I
}

impl<I> StreamOnce for BytePositioned<I> where I : StreamOnce<Item = char> {
type Item = I::Item;
type Range = I::Range;
type Position = usize;

fn uncons(&mut self) -> Result<Self::Item, CombineError<Self::Item, Self::Range>> {
self.input.uncons()
.map(|c| {
self.position += c.len_utf8();
c
})
}

fn position(&self) -> Self::Position {
self.position
}
}

#[derive(Copy, Clone, PartialEq, Debug)]
pub enum Delimiter {
Brace,
Expand Down Expand Up @@ -195,7 +219,7 @@ pub enum Context {
}

/// Parser passes the environment to each parser function
type LanguageParser<'a: 'b, 'b, I: 'b, F: 'b, T> = EnvParser<&'b Lexer<'a, I, F>, State<I>, T>;
type LanguageParser<'a: 'b, 'b, I: 'b, F: 'b, T> = EnvParser<&'b Lexer<'a, I, F>, BytePositioned<I>, T>;

pub struct Contexts {
stack: Vec<Offside>,
Expand Down Expand Up @@ -258,9 +282,9 @@ pub struct Lexer<'a, I, F>
where I: Stream<Item = char>,
F: IdentEnv
{
pub env: LanguageEnv<'a, State<I>>,
pub env: LanguageEnv<'a, BytePositioned<I>>,
pub make_ident: Rc<RefCell<F>>,
pub input: Option<State<I>>,
pub input: Option<BytePositioned<I>>,
pub unprocessed_tokens: Vec<PToken<F::Ident>>,
pub indent_levels: Contexts,
end_position: SourcePosition,
Expand Down Expand Up @@ -296,7 +320,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
Lexer {
env: env,
make_ident: make_ident,
input: Some(State::new(input)),
input: Some(BytePositioned { position: 0, input : input }),
unprocessed_tokens: Vec::new(),
indent_levels: Contexts { stack: Vec::new() },
end_position: SourcePosition {
Expand All @@ -311,7 +335,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
}

fn parser<T>(&'s self,
parser: fn(&Lexer<'a, I, F>, State<I>) -> ParseResult<T, State<I>>)
parser: fn(&Lexer<'a, I, F>, BytePositioned<I>) -> ParseResult<T, BytePositioned<I>>)
-> LanguageParser<'a, 's, I, F, T> {
env_parser(self, parser)
}
Expand All @@ -321,7 +345,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
self.parser(Lexer::parse_op)
}

fn parse_op(&self, input: State<I>) -> ParseResult<Id, State<I>> {
fn parse_op(&self, input: BytePositioned<I>) -> ParseResult<Id, BytePositioned<I>> {
(optional(char('#').with(many(letter()))), try(self.env.op()))
.map(|(builtin, op): (Option<String>, String)| {
match builtin {
Expand All @@ -339,7 +363,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
fn ident(&'s self) -> LanguageParser<'a, 's, I, F, Token<Id>> {
self.parser(Lexer::parse_ident)
}
fn parse_ident(&self, input: State<I>) -> ParseResult<Token<Id>, State<I>> {
fn parse_ident(&self, input: BytePositioned<I>) -> ParseResult<Token<Id>, BytePositioned<I>> {
self.parser(Lexer::parse_ident2)
.map(|x| Token::Identifier(x.0, x.1))
.parse_state(input)
Expand All @@ -349,7 +373,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
self.parser(Self::integer_)
}

fn integer_<'b>(&'b self, input: State<I>) -> ParseResult<i64, State<I>> {
fn integer_<'b>(&'b self, input: BytePositioned<I>) -> ParseResult<i64, BytePositioned<I>> {
let (s, input) = try!(many1::<String, _>(digit()).parse_lazy(input));
let mut n = 0;
for c in s.chars() {
Expand All @@ -359,7 +383,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
}

/// Identifier parser which returns the identifier as well as the type of the identifier
fn parse_ident2(&self, input: State<I>) -> ParseResult<(Id, IdentType), State<I>> {
fn parse_ident2(&self, input: BytePositioned<I>) -> ParseResult<(Id, IdentType), BytePositioned<I>> {
let id = self.env.identifier().map(|id| {
let typ = if id.chars().next().unwrap().is_uppercase() {
IdentType::Constructor
Expand Down Expand Up @@ -442,9 +466,9 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
}

fn next_token_(&mut self,
location: &mut SourcePosition,
mut input: State<I>)
-> ParseResult<Token<Id>, State<I>> {
location: &mut usize,
mut input: BytePositioned<I>)
-> ParseResult<Token<Id>, BytePositioned<I>> {
loop {
// Skip all whitespace before the token
let (_, new_input) = try!(spaces().parse_lazy(input));
Expand Down Expand Up @@ -544,7 +568,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
}
}

fn skip_block_comment(&self, input: State<I>) -> ParseResult<(), State<I>> {
fn skip_block_comment(&self, input: BytePositioned<I>) -> ParseResult<(), BytePositioned<I>> {
let mut block_doc_comment = parser(|input| {
let mut input = Consumed::Empty(input);
loop {
Expand All @@ -564,7 +588,7 @@ impl<'a, 's, I, Id, F> Lexer<'a, I, F>
});
block_doc_comment.parse_state(input)
}
fn block_doc_comment(&self, input: State<I>) -> ParseResult<Token<Id>, State<I>> {
fn block_doc_comment(&self, input: BytePositioned<I>) -> ParseResult<Token<Id>, BytePositioned<I>> {
let mut block_doc_comment = parser(|input| {
let mut input = Consumed::Empty(input);
let mut out = String::new();
Expand Down

0 comments on commit 10b8c58

Please sign in to comment.