From cb855dd5f9f36795c735a004ff6ac6aaca8d7828 Mon Sep 17 00:00:00 2001 From: Dhruv Rajvanshi Date: Sat, 25 Mar 2023 13:02:42 +0100 Subject: [PATCH] Add offset field to lexer --- hadesboot/build.gradle.kts | 6 ++ .../src/main/kotlin/hadesc/ast/SourceFile.kt | 3 +- .../src/main/kotlin/hadesc/parser/Lexer.kt | 5 +- .../src/main/kotlin/hadesc/parser/Parser.kt | 21 ++++-- .../src/test/kotlin/hadesc/LexerTests.kt | 28 +++++++ .../src/test/kotlin/hadesc/ParserTests.kt | 75 +++++++++++++++++++ 6 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 hadesboot/src/test/kotlin/hadesc/LexerTests.kt create mode 100644 hadesboot/src/test/kotlin/hadesc/ParserTests.kt diff --git a/hadesboot/build.gradle.kts b/hadesboot/build.gradle.kts index 4f5143c4..756a3007 100644 --- a/hadesboot/build.gradle.kts +++ b/hadesboot/build.gradle.kts @@ -40,6 +40,7 @@ dependencies { testImplementation("org.junit.jupiter:junit-jupiter-engine:$junitVersion") testImplementation("org.junit.jupiter:junit-jupiter-api:$junitVersion") + testImplementation("io.mockk:mockk:1.13.4") } java { @@ -48,6 +49,11 @@ java { } } +//tasks.withType { +// duplicatesStrategy = DuplicatesStrategy.EXCLUDE // NOCOMMIT +//} + + tasks.test { workingDir = File("..") environment["HADES_HOME"] = hadesHome diff --git a/hadesboot/src/main/kotlin/hadesc/ast/SourceFile.kt b/hadesboot/src/main/kotlin/hadesc/ast/SourceFile.kt index ea0c76e3..59960a6b 100644 --- a/hadesboot/src/main/kotlin/hadesc/ast/SourceFile.kt +++ b/hadesboot/src/main/kotlin/hadesc/ast/SourceFile.kt @@ -6,5 +6,6 @@ import hadesc.qualifiedname.QualifiedName data class SourceFile( override val location: SourceLocation, val moduleName: QualifiedName, - val declarations: List + val declarations: List, + val length: Int, ) : ScopeTree diff --git a/hadesboot/src/main/kotlin/hadesc/parser/Lexer.kt b/hadesboot/src/main/kotlin/hadesc/parser/Lexer.kt index 4ce1c494..37de4cde 100644 --- a/hadesboot/src/main/kotlin/hadesc/parser/Lexer.kt +++ b/hadesboot/src/main/kotlin/hadesc/parser/Lexer.kt @@ -1,7 +1,6 @@ package hadesc.parser import hadesc.ast.Token -import hadesc.context.FileTextProvider import hadesc.location.Position import hadesc.location.SourceLocation import hadesc.location.SourcePath @@ -81,6 +80,8 @@ class Lexer(private val file: SourcePath, text: Text) { private var currentLine: Int = 1 private var currentColumn: Int = 1 + var offset: Int = 0 + fun startPosition(): Position = Position(startLine, startColumn) fun stopPosition(): Position = Position(currentLine, currentColumn) @@ -94,6 +95,7 @@ class Lexer(private val file: SourcePath, text: Text) { val result = currentChar currentChar = nextChar nextChar = iter.nextOrEOFChar() + offset++ if (result == '\n') { currentLine++ currentColumn = 1 @@ -105,6 +107,7 @@ class Lexer(private val file: SourcePath, text: Text) { } } + val offset get() = state.offset fun nextToken(): Token { skipWhitespace() if (currentChar == '/' && state.nextChar == '/') { diff --git a/hadesboot/src/main/kotlin/hadesc/parser/Parser.kt b/hadesboot/src/main/kotlin/hadesc/parser/Parser.kt index 8c0a469c..af6c1aa3 100644 --- a/hadesboot/src/main/kotlin/hadesc/parser/Parser.kt +++ b/hadesboot/src/main/kotlin/hadesc/parser/Parser.kt @@ -2,7 +2,6 @@ package hadesc.parser import hadesc.ast.* import hadesc.context.Context -import hadesc.context.FileTextProvider import hadesc.diagnostics.Diagnostic import hadesc.hir.BinaryOperator import hadesc.location.HasLocation @@ -90,14 +89,20 @@ class Parser( private val file: SourcePath, text: Text ) { - private val tokenBuffer = TokenBuffer(maxLookahead = 4, lexer = Lexer(file, text)) + private val lexer = Lexer(file, text) + + private val tokenBuffer = TokenBuffer(maxLookahead = 4, lexer = lexer) private val currentToken get() = tokenBuffer.currentToken fun parseSourceFile(): SourceFile { + val startOffset = tokenBuffer.offset val declarations = parseDeclarations() val start = Position(1, 1) val location = SourceLocation(file, start, currentToken.location.stop) - val sourceFile = SourceFile(location, moduleName, declarations) + // consume remaining whitespace/comments by asking for the next token + tokenBuffer.advance() + val stopOffset = tokenBuffer.offset + val sourceFile = SourceFile(location, moduleName, declarations, length = stopOffset - startOffset) ctx.resolver.onParseSourceFile(sourceFile) return sourceFile } @@ -1498,7 +1503,7 @@ class Parser( } class TokenBuffer(private val maxLookahead: Int, private val lexer: Lexer) { - private val buffer: Array = Array(maxLookahead) { lexer.nextToken() } + private val buffer: Array> = Array(maxLookahead) { lexer.offset to lexer.nextToken() } private var current = 0 @@ -1507,12 +1512,14 @@ class TokenBuffer(private val maxLookahead: Int, private val lexer: Lexer) { val lastToken get() = _lastToken val currentToken: Token get() { - return buffer[current] + return buffer[current].second } + val offset get() = buffer[current].first + fun advance(): Token { val result = currentToken - buffer[current] = lexer.nextToken() + buffer[current] = lexer.offset to lexer.nextToken() current = (current + 1) % maxLookahead _lastToken = result return result @@ -1520,6 +1527,6 @@ class TokenBuffer(private val maxLookahead: Int, private val lexer: Lexer) { fun peek(offset: Int): Token { require(offset < maxLookahead) { "Tried to peek past max lookahead $maxLookahead" } - return buffer[(current + offset) % maxLookahead] + return buffer[(current + offset) % maxLookahead].second } } diff --git a/hadesboot/src/test/kotlin/hadesc/LexerTests.kt b/hadesboot/src/test/kotlin/hadesc/LexerTests.kt new file mode 100644 index 00000000..d8b09353 --- /dev/null +++ b/hadesboot/src/test/kotlin/hadesc/LexerTests.kt @@ -0,0 +1,28 @@ +package hadesc + +import hadesc.location.SourcePath +import hadesc.parser.Lexer +import hadesc.text.Text +import kotlin.io.path.Path +import kotlin.test.Test +import kotlin.test.assertEquals + +class LexerTests { + @Test + fun `lexer offset should be correct`() { + val text = "def foo bar (" + val lexer = makeLexer(text) + assertEquals(0, lexer.offset) + + lexer.nextToken() + assertEquals(3, lexer.offset) + + lexer.nextToken() + assertEquals(7, lexer.offset) + + } + + +} +private fun makeLexer(text: String) = + Lexer(SourcePath(Path("test.hds")), Text.from(text)) \ No newline at end of file diff --git a/hadesboot/src/test/kotlin/hadesc/ParserTests.kt b/hadesboot/src/test/kotlin/hadesc/ParserTests.kt new file mode 100644 index 00000000..55b47e99 --- /dev/null +++ b/hadesboot/src/test/kotlin/hadesc/ParserTests.kt @@ -0,0 +1,75 @@ +package hadesc + +import hadesc.context.Context +import hadesc.location.SourcePath +import hadesc.parser.Parser +import hadesc.qualifiedname.QualifiedName +import hadesc.text.Text +import io.mockk.every +import io.mockk.mockk +import org.junit.jupiter.api.Test +import kotlin.io.path.Path +import kotlin.test.assertEquals + +class ParserTests { + private val ctx = mockk { + every { resolver } returns mockk { + every { onParseBlock(any()) } returns unit + every { onParseSourceFile(any()) } returns unit + every { onParseDeclaration(any()) } returns unit + every { onParseClosure(any()) } returns unit + every { onParseMatchArm(any()) } returns unit + every { onParseMatchExpression(any()) } returns unit + every { onParseScopeNode(any()) } returns unit + } + every { makeName(any()) } answers { Name(invocation.args[0] as String) } + } + + @Test + fun `sourceFiles have correct length`() { + val text = """ + def foo(): Void {} + """.trimIndent() + val parser = makeParser(text) + val sourceFile = parser.parseSourceFile() + + assertEquals( + text.length, + sourceFile.length, + ) + } + + @Test + fun `Source files with leading comments have correct length`() { + val text = """ + // this is a comment + def foo(): Void {} + """.trimIndent() + val parser = makeParser(text) + val sourceFile = parser.parseSourceFile() + + assertEquals( + text.length, + sourceFile.length + ) + } + + @Test + fun `Source files with trailing comments have the correct length`() { + val text = """ + // this is a comment + def foo(): Void {} + // a trailing comment + """.trimIndent() + val parser = makeParser(text) + val sourceFile = parser.parseSourceFile() + + assertEquals( + text.length + 1, // 1 for EOF character + sourceFile.length + ) + } + + private fun makeParser(source: String) = + Parser(ctx, QualifiedName(emptyList()), SourcePath(Path("test.hds")), Text.from(source)) +} \ No newline at end of file