From 7c45838bc23fde6d60cbcc9d7642666452176b2c Mon Sep 17 00:00:00 2001 From: mitchmindtree Date: Thu, 30 Jun 2022 17:09:18 +1000 Subject: [PATCH] Add `lex_commented` and `CommentedTokenStream` to `sway_parse` (#2123) * Add `CommentedTokenStream` to `sway_parse` This doesn't yet collect any comments, but adds the necessary structure and attempts to preserve the original API and behaviour where possible. Collecting of comments to be added in a follow-up commit. * Collect multi-line comments in CommentedTokenStream * Collect single-line comments in CommentedTokenStream * Add token_trees and spanned impls for CommentedTokenStream * Add Spanned impl for CommentedTokenTree. Add comment lexing test. * Expose `lex_commented` function from root * Add CommentedTree and CommentedGroup aliases * Move CommentedTokenTree impl to better location * Clean up by using CommentedTree type alias where applicable Co-authored-by: Alex Hansen Co-authored-by: Chris O'Brien <57543709+eureka-cpu@users.noreply.github.com> --- sway-parse/src/lib.rs | 2 +- sway-parse/src/token.rs | 302 ++++++++++++++++++++++++++++++---------- 2 files changed, 233 insertions(+), 71 deletions(-) diff --git a/sway-parse/src/lib.rs b/sway-parse/src/lib.rs index 92f3cc561a0..a5b71a2bef7 100644 --- a/sway-parse/src/lib.rs +++ b/sway-parse/src/lib.rs @@ -55,8 +55,8 @@ pub use crate::{ path::{PathExpr, PathExprSegment, PathType, PathTypeSegment, QualifiedPathRoot}, pattern::{Pattern, PatternStructField}, statement::{Statement, StatementLet}, - token::lex, token::LexError, + token::{lex, lex_commented}, ty::Ty, where_clause::{WhereBound, WhereClause}, }; diff --git a/sway-parse/src/token.rs b/sway-parse/src/token.rs index 964a8e7cc87..3b63a750126 100644 --- a/sway-parse/src/token.rs +++ b/sway-parse/src/token.rs @@ -69,13 +69,16 @@ impl PunctKind { } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -pub struct Group { +pub struct GenericGroup { pub delimiter: Delimiter, - pub token_stream: TokenStream, + pub token_stream: T, pub span: Span, } -impl Spanned for Group { +pub type Group = GenericGroup; +pub type CommentedGroup = GenericGroup; + +impl Spanned for GenericGroup { fn span(&self) -> Span { self.span.clone() } @@ -106,30 +109,112 @@ impl Delimiter { } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -pub enum TokenTree { +pub struct Comment { + pub span: Span, +} + +impl Spanned for Comment { + fn span(&self) -> Span { + self.span.clone() + } +} + +/// Allows for generalizing over commented and uncommented token streams. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub enum GenericTokenTree { Punct(Punct), Ident(Ident), - Group(Group), + Group(GenericGroup), Literal(Literal), } -impl Spanned for TokenTree { +pub type TokenTree = GenericTokenTree; +pub type CommentedTree = GenericTokenTree; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub enum CommentedTokenTree { + Comment(Comment), + Tree(CommentedTree), +} + +impl CommentedGroup { + pub fn strip_comments(self) -> Group { + Group { + delimiter: self.delimiter, + token_stream: self.token_stream.strip_comments(), + span: self.span, + } + } +} + +impl Spanned for GenericTokenTree { fn span(&self) -> Span { match self { - TokenTree::Punct(punct) => punct.span(), - TokenTree::Ident(ident) => ident.span(), - TokenTree::Group(group) => group.span(), - TokenTree::Literal(literal) => literal.span(), + Self::Punct(punct) => punct.span(), + Self::Ident(ident) => ident.span(), + Self::Group(group) => group.span(), + Self::Literal(literal) => literal.span(), } } } +impl Spanned for CommentedTokenTree { + fn span(&self) -> Span { + match self { + Self::Comment(cmt) => cmt.span(), + Self::Tree(tt) => tt.span(), + } + } +} + +impl From for GenericTokenTree { + fn from(punct: Punct) -> Self { + Self::Punct(punct) + } +} + +impl From for GenericTokenTree { + fn from(ident: Ident) -> Self { + Self::Ident(ident) + } +} + +impl From> for GenericTokenTree { + fn from(group: GenericGroup) -> Self { + Self::Group(group) + } +} + +impl From for GenericTokenTree { + fn from(lit: Literal) -> Self { + Self::Literal(lit) + } +} + +impl From for CommentedTokenTree { + fn from(comment: Comment) -> Self { + Self::Comment(comment) + } +} + +impl From for CommentedTokenTree { + fn from(tree: CommentedTree) -> Self { + Self::Tree(tree) + } +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub struct TokenStream { token_trees: Vec, full_span: Span, } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub struct CommentedTokenStream { + token_trees: Vec, + full_span: Span, +} + #[derive(Error, Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] #[error("{}", kind)] pub struct LexError { @@ -277,13 +362,22 @@ pub fn lex( end: usize, path: Option>, ) -> Result { + lex_commented(src, start, end, path).map(|stream| stream.strip_comments()) +} + +pub fn lex_commented( + src: &Arc, + start: usize, + end: usize, + path: Option>, +) -> Result { let mut char_indices = CharIndicesInner { src: &src[..end], position: start, } .peekable(); let mut parent_token_trees = Vec::new(); - let mut token_trees = Vec::new(); + let mut token_trees: Vec = Vec::new(); while let Some((mut index, mut character)) = char_indices.next() { if character.is_whitespace() { continue; @@ -292,8 +386,11 @@ pub fn lex( match char_indices.peek() { Some((_, '/')) => { let _ = char_indices.next(); - for (_, character) in char_indices.by_ref() { + for (end, character) in char_indices.by_ref() { if character == '\n' { + let span = Span::new(src.clone(), index, end, path.clone()).unwrap(); + let comment = Comment { span }; + token_trees.push(comment.into()); break; } } @@ -301,42 +398,33 @@ pub fn lex( Some((_, '*')) => { let _ = char_indices.next(); let mut unclosed_indices = vec![index]; + + let unclosed_multiline_comment = |unclosed_indices: Vec<_>| { + let span = Span::new( + src.clone(), + *unclosed_indices.last().unwrap(), + src.len(), + path.clone(), + ) + .unwrap(); + LexError { + kind: LexErrorKind::UnclosedMultilineComment { unclosed_indices }, + span, + } + }; + loop { match char_indices.next() { - None => { - let span = Span::new( - src.clone(), - *unclosed_indices.last().unwrap(), - src.len(), - path.clone(), - ) - .unwrap(); - return Err(LexError { - kind: LexErrorKind::UnclosedMultilineComment { - unclosed_indices, - }, - span, - }); - } + None => return Err(unclosed_multiline_comment(unclosed_indices)), Some((_, '*')) => match char_indices.next() { - None => { - let span = Span::new( - src.clone(), - *unclosed_indices.last().unwrap(), - src.len(), - path.clone(), - ) - .unwrap(); - return Err(LexError { - kind: LexErrorKind::UnclosedMultilineComment { - unclosed_indices, - }, - span, - }); - } - Some((_, '/')) => { + None => return Err(unclosed_multiline_comment(unclosed_indices)), + Some((end, '/')) => { let _ = char_indices.next(); - unclosed_indices.pop(); + let start = unclosed_indices.pop().unwrap(); + let span = + Span::new(src.clone(), start, end, path.clone()).unwrap(); + let comment = Comment { span }; + token_trees.push(comment.into()); if unclosed_indices.is_empty() { break; } @@ -344,21 +432,7 @@ pub fn lex( Some((_, _)) => (), }, Some((next_index, '/')) => match char_indices.next() { - None => { - let span = Span::new( - src.clone(), - *unclosed_indices.last().unwrap(), - src.len(), - path.clone(), - ) - .unwrap(); - return Err(LexError { - kind: LexErrorKind::UnclosedMultilineComment { - unclosed_indices, - }, - span, - }); - } + None => return Err(unclosed_multiline_comment(unclosed_indices)), Some((_, '*')) => { unclosed_indices.push(next_index); } @@ -380,7 +454,7 @@ pub fn lex( spacing, span, }; - token_trees.push(TokenTree::Punct(punct)); + token_trees.push(CommentedTokenTree::Tree(punct.into())); } None => { let span = Span::new(src.clone(), start, end, path.clone()).unwrap(); @@ -389,7 +463,7 @@ pub fn lex( spacing: Spacing::Alone, span, }; - token_trees.push(TokenTree::Punct(punct)); + token_trees.push(CommentedTokenTree::Tree(punct.into())); } } continue; @@ -419,7 +493,7 @@ pub fn lex( } let span = span_until(src, index, &mut char_indices, &path); let ident = Ident::new_with_raw(span, is_raw_ident); - token_trees.push(TokenTree::Ident(ident)); + token_trees.push(CommentedTokenTree::Tree(ident.into())); continue; } } @@ -467,15 +541,15 @@ pub fn lex( let start_index = open_index + open_delimiter.as_open_char().len_utf8(); let full_span = Span::new(src.clone(), start_index, index, path.clone()).unwrap(); - let group = Group { - token_stream: TokenStream { + let group = CommentedGroup { + token_stream: CommentedTokenStream { token_trees: parent, full_span, }, delimiter: close_delimiter, span: span_until(src, open_index, &mut char_indices, &path), }; - token_trees.push(TokenTree::Group(group)); + token_trees.push(CommentedTokenTree::Tree(group.into())); } } continue; @@ -523,7 +597,7 @@ pub fn lex( } let span = span_until(src, index, &mut char_indices, &path); let literal = Literal::String(LitString { span, parsed }); - token_trees.push(TokenTree::Literal(literal)); + token_trees.push(CommentedTokenTree::Tree(literal.into())); continue; } if character == '\'' { @@ -575,7 +649,7 @@ pub fn lex( } let span = span_until(src, index, &mut char_indices, &path); let literal = Literal::Char(LitChar { span, parsed }); - token_trees.push(TokenTree::Literal(literal)); + token_trees.push(CommentedTokenTree::Tree(literal.into())); continue; } if let Some(digit) = character.to_digit(10) { @@ -755,7 +829,7 @@ pub fn lex( parsed: big_uint, ty_opt, }); - token_trees.push(TokenTree::Literal(literal)); + token_trees.push(CommentedTokenTree::Tree(literal.into())); continue; } if let Some(kind) = character.as_punct_kind() { @@ -771,7 +845,7 @@ pub fn lex( spacing, span, }; - token_trees.push(TokenTree::Punct(punct)); + token_trees.push(CommentedTokenTree::Tree(punct.into())); continue; } return Err(LexError { @@ -804,7 +878,7 @@ pub fn lex( }); } let full_span = Span::new(src.clone(), start, end, path).unwrap(); - let token_stream = TokenStream { + let token_stream = CommentedTokenStream { token_trees, full_span, }; @@ -983,3 +1057,91 @@ impl Spanned for TokenStream { self.full_span.clone() } } + +impl CommentedTokenTree { + pub fn strip_comments(self) -> Option { + let commented_tt = match self { + Self::Comment(_) => return None, + Self::Tree(commented_tt) => commented_tt, + }; + let tt = match commented_tt { + CommentedTree::Punct(punct) => punct.into(), + CommentedTree::Ident(ident) => ident.into(), + CommentedTree::Group(group) => group.strip_comments().into(), + CommentedTree::Literal(lit) => lit.into(), + }; + Some(tt) + } +} + +impl CommentedTokenStream { + pub fn token_trees(&self) -> &[CommentedTokenTree] { + &self.token_trees + } + + pub fn strip_comments(self) -> TokenStream { + let token_trees = self + .token_trees + .into_iter() + .filter_map(|tree| tree.strip_comments()) + .collect(); + TokenStream { + token_trees, + full_span: self.full_span, + } + } +} + +impl Spanned for CommentedTokenStream { + fn span(&self) -> Span { + self.full_span.clone() + } +} + +#[cfg(test)] +mod tests { + use super::{lex_commented, CommentedTokenTree, CommentedTree}; + use crate::priv_prelude::*; + use std::sync::Arc; + + #[test] + fn lex_commented_token_stream() { + let input = r#" + // Single-line comment. + struct Foo { + /* multi- + * line- + * comment */ + bar: i32, + } + "#; + let start = 0; + let end = input.len(); + let path = None; + let stream = lex_commented(&Arc::from(input), start, end, path).unwrap(); + let mut tts = stream.token_trees().iter(); + assert_eq!( + tts.next().unwrap().span().as_str(), + "// Single-line comment." + ); + assert_eq!(tts.next().unwrap().span().as_str(), "struct"); + assert_eq!(tts.next().unwrap().span().as_str(), "Foo"); + { + let group = match tts.next() { + Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group, + _ => panic!("expected group"), + }; + let mut tts = group.token_stream.token_trees().iter(); + assert_eq!( + tts.next().unwrap().span().as_str(), + "/* multi-\n * line-\n * comment *", + ); + assert_eq!(tts.next().unwrap().span().as_str(), "bar"); + assert_eq!(tts.next().unwrap().span().as_str(), ":"); + assert_eq!(tts.next().unwrap().span().as_str(), "i32"); + assert_eq!(tts.next().unwrap().span().as_str(), ","); + assert!(tts.next().is_none()); + } + assert!(tts.next().is_none()); + } +}