Skip to content

Commit

Permalink
Reworked byte position <-> LSP position conversions (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewF001 authored Jun 5, 2023
1 parent eff1daf commit 5be8774
Show file tree
Hide file tree
Showing 11 changed files with 666 additions and 85 deletions.
4 changes: 2 additions & 2 deletions crates/analyzer-core/benches/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
extern crate analyzer_core;

use analyzer_core::*;
use analyzer_core::{lsp_file::LspFile, *};
use base_abstractions::*;
use lexer::*;

Expand All @@ -11,7 +11,7 @@ fn baseline(input: String) -> Vec<char> { input.chars().into_iter().collect() }

fn basic(input: String) -> Vec<(Token, Span)> {
let db = Database::new(|base, _| Ok(base.into()));
let buf = Buffer::new(&db, input);
let buf = Buffer::from_string(&db, &input);
let file_id = FileId::new(&db, "foo".to_string());
let lexed = lex(&db, file_id, buf);
lexed.lexemes(&db).clone()
Expand Down
11 changes: 10 additions & 1 deletion crates/analyzer-core/src/base_abstractions.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
pub use logos::Span;

use crate::{lsp_file::LspFile, Database};

/// The input buffer.
#[salsa::input]
pub struct Buffer {
#[return_ref]
pub contents: String,
pub file: LspFile,
}

impl Buffer {
pub fn from_string(db: &Database, file: &String) -> Buffer {
let lsp_file = LspFile::new(file);
Buffer::new(db, lsp_file)
}
}

#[salsa::interned]
Expand Down
25 changes: 21 additions & 4 deletions crates/analyzer-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod base_abstractions;
pub mod lexer;
pub mod lsp_file;
pub mod parser;
pub mod preprocessor;

Expand All @@ -9,6 +10,7 @@ use logos::Logos;

use base_abstractions::*;
use lexer::*;
use lsp_file::{ChangeEvent, LspFile};
use preprocessor::*;

// #[derive(Default)]
Expand Down Expand Up @@ -77,15 +79,28 @@ impl Analyzer {

fn filesystem(&self) -> HashMap<FileId, Buffer> { self.fs.map(|fs| fs.fs(&self.db)).unwrap_or_default() }

pub fn update(&mut self, file_id: FileId, input: String) {
pub fn file_change_event(&mut self, file_id: FileId, event_vec: &Vec<ChangeEvent>) {
let mut filesystem = self.filesystem();
filesystem.insert(file_id, Buffer::new(&self.db, input));

// TODO: avoid cloning
let mut lsp_file = self.get_file(file_id).clone();
for event in event_vec {
lsp_file.lazy_add(event);
}

filesystem.insert(file_id, Buffer::new(&self.db, lsp_file));
self.fs = Fs::new(&self.db, filesystem).into();
}

pub fn update(&mut self, file_id: FileId, input: &String) {
let mut filesystem = self.filesystem();
filesystem.insert(file_id, Buffer::from_string(&self.db, input));
self.fs = Fs::new(&self.db, filesystem).into();
}

pub fn input(&self, file_id: FileId) -> Option<&str> {
let buffer = self.buffer(file_id)?;
Some(buffer.contents(&self.db))
Some(buffer.file(&self.db).get_file_content())
}

pub fn buffer(&self, file_id: FileId) -> Option<Buffer> { self.filesystem().get(&file_id).copied() }
Expand Down Expand Up @@ -136,6 +151,8 @@ impl Analyzer {
pub fn path(&self, id: FileId) -> String { id.path(&self.db) }

pub fn files(&self) -> Vec<String> { self.filesystem().keys().map(|k| k.path(&self.db)).collect() }

pub fn get_file(&self, id: FileId) -> &LspFile { self.buffer(id).unwrap().file(&self.db) }
}

// TODO: trait for workspace logic?
Expand Down Expand Up @@ -210,7 +227,7 @@ where

#[salsa::tracked(return_ref)]
pub fn lex(db: &dyn crate::Db, file_id: FileId, buf: Buffer) -> LexedBuffer {
let contents = buf.contents(db);
let contents = buf.file(db).get_file_content();
let lexer = {
let db = unsafe { std::mem::transmute(db) };
Token::lexer_with_extras(contents, Lextras { db: Some(db), file_id })
Expand Down
244 changes: 244 additions & 0 deletions crates/analyzer-core/src/lsp_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
use logos::Source; // for slice

#[derive(Debug, Eq, PartialEq, PartialOrd, Copy, Clone, Default)]
pub struct Position {
pub line: usize, // lsp_types uses u32
pub character: usize,
}

impl Position {
pub fn new(line: usize, character: usize) -> Position { Position { line, character } }
}

#[derive(Debug, Eq, PartialEq, Copy, Clone, Default)]
pub struct Range {
pub start: Position,
pub end: Position,
}

impl Range {
pub fn new(start: Position, end: Position) -> Range { Range { start, end } }
}

pub struct ChangeEvent {
pub range: Option<Range>,
pub text: String,
}

#[derive(Clone, Debug)]
pub struct LspFile {
file: String, // File content, use OsString?
ranges: Vec<usize>, // Each element represents the line, last byte position (see test_parse_file())
}

impl LspFile {
pub fn new(file: &String) -> Self {
let ranges = LspFile::parse_string(&file);
LspFile { file: file.clone(), ranges }
}

pub fn get_file_content(&self) -> &String { &self.file }

pub fn get_ranges(&self) -> &Vec<usize> { &self.ranges }

// helper function
fn parse_string(string: &String) -> Vec<usize> {
let mut result: Vec<usize> = Vec::new();
if string.is_empty() {
return result;
}

let chars = string.chars();
let mut byte_count = 0;
for (_, c) in chars.enumerate() {
byte_count += c.len_utf8();
if c == '\n' {
result.push(byte_count - 1);
}
}

// If there are bytes left add it to vector
if *result.last().unwrap_or(&(usize::MAX - 1)) != byte_count - 1 {
result.push(byte_count - 1);
}

result
}

// used to get a valid lsp position for the current file
fn lsp_to_lsp(&self, lsp_pos: &Position) -> Position { self.byte_to_lsp(self.lsp_to_byte(lsp_pos)) }

pub fn line_char(&self, line: usize) -> usize {
// a useful case to expect
if line >= self.ranges.len() {
return 0; // no characters exist on that line
}

let start_pos = if line == 0 { 0 } else { self.ranges.get(line - 1).unwrap_or(&0) + 1 };

let slice = self.file.slice(start_pos..self.ranges[line] + 1).unwrap_or("");
slice.chars().count()
}

pub fn lsp_to_byte(&self, lsp_pos: &Position) -> usize {
// O(1) time complexity
// file is empty
if self.ranges.is_empty() {
return 0;
}

// line greater than contain, return last byte + 1
if lsp_pos.line >= self.ranges.len() {
return *self.ranges.last().unwrap() + 1;
}

let start_byte = if lsp_pos.line == 0 { 0 } else { self.ranges.get(lsp_pos.line - 1).unwrap_or(&0) + 1 };

// get byte offset for character position in line
let slice = self.file.slice(start_byte..self.ranges[lsp_pos.line]).unwrap_or("").chars();
let mut byte_count = 0;
for (i, c) in slice.enumerate() {
if i == lsp_pos.character {
break;
}
byte_count += c.len_utf8();
}

start_byte + byte_count
}

pub fn lsp_range_to_byte_range(&self, lsp_range: &Range) -> std::ops::Range<usize> {
let start = self.lsp_to_byte(&lsp_range.start);
let end = self.lsp_to_byte(&lsp_range.end);
start..end
}

// O(log(n))
pub fn byte_to_lsp(&self, byte_pos: usize) -> Position {
// byte position greater than end of current file
if self.ranges.is_empty() {
return Position { line: 0, character: 0 };
}

if byte_pos > *self.ranges.last().unwrap_or(&0) {
return Position { line: self.ranges.len(), character: 0 }; // return next position of last line
}

let line = self.ranges.binary_search(&byte_pos).unwrap_or_else(|x| x);

// calculate character position in byte offset
let mut byte_count = if line == 0 { 0 } else { self.ranges[line - 1] + 1 };
let slice = self.file.slice(byte_count..self.ranges[line]).unwrap_or("").chars();
let mut char = slice.clone().count();

for (i, c) in slice.enumerate() {
byte_count += c.len_utf8();
if byte_count > byte_pos {
char = i;
break;
}
}

Position { line, character: char }
}

pub fn byte_range_to_lsp_range(&self, byte_range: &std::ops::Range<usize>) -> Range {
let start = self.byte_to_lsp(byte_range.start);
let end = self.byte_to_lsp(byte_range.end);
Range { start, end }
}

// used to update ranges from TextDocumentContentChangeEvent
// will lazily add as only parse the text to be added
// optimal for large files with small changes
pub fn lazy_add(&mut self, changes: &ChangeEvent) {
// The whole file got changes || file was empty, so reparse as new file
if changes.range.is_none() || self.ranges.is_empty() {
*self = LspFile::new(&changes.text);
return;
}

// calculate position in current file
let start_pos = self.lsp_to_lsp(&changes.range.unwrap().start); // inclusive
let end_pos_exc = self.lsp_to_lsp(&changes.range.unwrap().end); // exclusive

// undefined behaviour
if start_pos > end_pos_exc {
panic!(
"range.start: {:?} is greater than range.end: {:?} in TextDocumentContentChangeEvent!",
start_pos, end_pos_exc
)
}

// parse input
let mut additional_ranges = LspFile::parse_string(&changes.text);
let addition_byte: i64 = additional_ranges.last().map_or(-1, |value| *value as i64);

// align additions to their placement in current file
let start_byte = self.lsp_to_byte(&start_pos);
let end_byte = self.lsp_to_byte(&end_pos_exc);
for elm in &mut additional_ranges {
*elm += start_byte;
}

// caching frequent conversions and calculation
let mut start_line = start_pos.line;
let end_line = end_pos_exc.line;
let range_size = self.ranges.len();

// need to make addition calculations for head and tail of new additions
let tailing_end_bytes = self.lsp_to_byte(&Position { line: end_line + 1, character: 0 }) - end_byte;

// special cases if change text is empty
if additional_ranges.is_empty() {
let end_line_byte = *self.ranges.get(end_line).unwrap_or(self.ranges.last().unwrap());
let val = end_line_byte.wrapping_sub(end_byte).wrapping_add(start_byte) as i64;
// we're deleteing the whole file
if val < 0 {
self.file.clear();
self.ranges.clear();
return;
}

// The case for deleting nothing to end of file
if start_line == range_size {
return;
}

// The change is just a deletion
if tailing_end_bytes != 0 || start_pos.character != 0 {
additional_ranges.push(val as usize);
}
} else {
// \n is our line break, if adding to end of file don't make duplicate range
if changes.text.chars().last() == Some('\n') && end_line != range_size {
additional_ranges.push(*additional_ranges.last().unwrap());
}
*additional_ranges.last_mut().unwrap() += tailing_end_bytes;
}

// we're adding to end of file
// if it doesn't has eof flag then merge addition onto end
// if it does add a new index
if start_line == range_size && self.file.chars().last() != Some('\n') {
start_line -= 1;
}

// update file
let range = start_byte..end_byte;
//info!("replacing range {:?} of {:?} with {:?}", range, &self.file[range.clone()], &changes.text);
self.file.replace_range(range, &changes.text);

// remove old ranges and add new ranges
let len = additional_ranges.len();
let s = (start_line).min(range_size);
let e = (end_line + 1).min(range_size);
self.ranges.splice(s..e, additional_ranges); // used for performance benefits

// realignment of tail end of old ranges
let diff = (addition_byte + 1) - (end_byte as i64 - start_byte as i64);
for elm in self.ranges.iter_mut().skip(start_line + len) {
*elm = (*elm as i64 + diff) as usize;
}
}
}
2 changes: 1 addition & 1 deletion crates/analyzer-core/src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ mod test {
let mut pp = PreprocessorState::new(|path| FileId::new(&db, path.into()), |_| unreachable!());

let test_id = FileId::new(&db, "<test-code>.p4".into());
let input = Buffer::new(&db, s.into());
let input = Buffer::from_string(&db, &s.to_string());
let lexed = lex(&db, test_id, input);
let mut lexemes = lexed.lexemes(&db).iter().cloned().map(|(tk, span)| (test_id, tk, span)).collect();

Expand Down
2 changes: 1 addition & 1 deletion crates/analyzer-core/tests/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use pretty_assertions::assert_eq;

fn lex_str(s: &str) -> Vec<Token> {
let db = Database::new(|base, _| Ok(base.into()));
let buf = Buffer::new(&db, s.to_string());
let buf = Buffer::from_string(&db, &s.to_string());
let file_id = FileId::new(&db, "foo.p4".to_string());
let lexed = lex(&db, file_id, buf);
lexed.lexemes(&db).iter().map(|(tk, _)| tk).cloned().collect()
Expand Down
Loading

0 comments on commit 5be8774

Please sign in to comment.