Skip to content

Commit

Permalink
Optimised softKeyword (j-mie6#154)
Browse files Browse the repository at this point in the history
* Added an generalised form of SoftKeyword to handle both unicode, basic, and not required

* Ruled out forward source compatibility issue

* Fixed some style issues

* Improved test coverage

* Removed unused code

* Coverage off for NotRequired
  • Loading branch information
j-mie6 authored Jan 26, 2023
1 parent f3e0427 commit 5b53048
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 73 deletions.
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ inThisBuild(List(
ProblemFilters.exclude[DirectMissingMethodProblem]("parsley.errors.combinator#ErrorMethods.unexpected"),
ProblemFilters.exclude[MissingClassProblem]("parsley.token.errors.FilterOps"),
ProblemFilters.exclude[MissingClassProblem]("parsley.token.errors.FilterOps$"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("parsley.token.predicate#CharPredicate.asInternalPredicate")
),
tlVersionIntroduced := Map(
"2.13" -> "1.5.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ private [backend] object Choice {
//case op@MaxOp(o) => Some((o.head, Some(Desc(o)), o.size, backtracks))
//case _: StringLiteral | RawStringLiteral => Some(('"', Some(Desc("string")), 1, backtracks))
// TODO: This can be done for case insensitive things too, but with duplicated branching
case t@Specific(s) if t.caseSensitive => Some((s.head, Some(ExpectDesc(s)), s.codePointCount(0, s.length), backtracks))
case t@token.SoftKeyword(s) if t.caseSensitive => Some((s.head, Some(ExpectDesc(s)), s.codePointCount(0, s.length), backtracks))
case Attempt(t) => tablable(t, backtracks = true)
case (_: Pure[_]) <*> t => tablable(t, backtracks)
case Lift2(_, t, _) => tablable(t, backtracks)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ package parsley.internal.deepembedding.singletons

import parsley.token.descriptions.SpaceDesc
import parsley.token.descriptions.numeric.PlusSignPresence
import parsley.token.errors.{ErrorConfig, LabelConfig}
import parsley.token.errors.ErrorConfig

import parsley.internal.deepembedding.Sign.SignType
import parsley.internal.machine.instructions
Expand Down Expand Up @@ -43,14 +43,6 @@ private [parsley] class NonSpecific(name: String, unexpectedIllegal: String => S
override def instr: instructions.Instr = new instructions.TokenNonSpecific(name, unexpectedIllegal)(start, letter, illegal)
}

private [parsley] final class Specific(private [Specific] val specific: String, expected: LabelConfig,
expectedEnd: String, letter: Char => Boolean, val caseSensitive: Boolean) extends Singleton[Unit] {
// $COVERAGE-OFF$
override def pretty: String = s"specific($specific)"
// $COVERAGE-ON$
override def instr: instructions.Instr = new instructions.TokenSpecific(specific, expected, expectedEnd, letter, caseSensitive)
}

/*
private [parsley] final class MaxOp(private [MaxOp] val operator: String, ops: Set[String]) extends Singleton[Unit] {
// $COVERAGE-OFF$
Expand All @@ -59,9 +51,3 @@ private [parsley] final class MaxOp(private [MaxOp] val operator: String, ops: S
override def instr: instructions.Instr = new instructions.TokenMaxOp(operator, ops)
}
*/

// $COVERAGE-OFF$
private [deepembedding] object Specific {
def unapply(self: Specific): Some[String] = Some(self.specific)
}
// $COVERAGE-ON$
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/* SPDX-FileCopyrightText: © 2023 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.internal.deepembedding.singletons.token

import parsley.token.errors.LabelConfig
import parsley.token.predicate.CharPredicate

import parsley.internal.deepembedding.singletons.Singleton
import parsley.internal.machine.instructions

private [parsley] final class SoftKeyword(private [SoftKeyword] val specific: String, letter: CharPredicate, val caseSensitive: Boolean,
expected: LabelConfig, expectedEnd: String) extends Singleton[Unit] {
// $COVERAGE-OFF$
override def pretty: String = s"softKeyword($specific)"
// $COVERAGE-ON$
override def instr: instructions.Instr = new instructions.token.SoftKeyword(specific, letter, caseSensitive, expected, expectedEnd)
}

/*
private [parsley] final class MaxOp(private [MaxOp] val operator: String, ops: Set[String]) extends Singleton[Unit] {
// $COVERAGE-OFF$
override def pretty: String = s"maxOp($operator)"
// $COVERAGE-ON$
override def instr: instructions.Instr = new instructions.TokenMaxOp(operator, ops)
}
*/

// $COVERAGE-OFF$
private [deepembedding] object SoftKeyword {
def unapply(self: SoftKeyword): Some[String] = Some(self.specific)
}
// $COVERAGE-ON$
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,13 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr]
offset += 1
c
}
private [machine] def fastUncheckedConsumeChars(n: Int) = {
private [machine] def fastConsumeSupplementaryChar(): Unit = {
assert(this.peekChar.isHighSurrogate, "must have a high surrogate to consume supplementary")
// not going to be a tab or newline
offset += 2
col += 1
}
private [machine] def fastUncheckedConsumeChars(n: Int): Unit = {
offset += n
col += n
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ private [internal] class SupplementaryCharTok(codepoint: Int, x: Any, errorItem:
override def apply(ctx: Context): Unit = {
ensureRegularInstruction(ctx)
if (ctx.moreInput(2) && ctx.peekChar(0) == h && ctx.peekChar(1) == l) {
// not going to be a tab or newline
ctx.offset += 2
ctx.col += 1
ctx.fastConsumeSupplementaryChar()
ctx.pushAndContinue(x)
}
else ctx.expectedFail(errorItem, unexpectedWidth = 1)
Expand Down Expand Up @@ -146,9 +144,7 @@ private [internal] final class UniSat(f: Int => Boolean, expected: Option[Expect
lazy val l = ctx.peekChar(1)
lazy val c = Character.toCodePoint(hc, l)
if (ctx.moreInput(2) && hc.isHighSurrogate && Character.isSurrogatePair(hc, l) && f(c)) {
// not going to be a tab or newline
ctx.offset += 2
ctx.col += 1
ctx.fastConsumeSupplementaryChar()
ctx.pushAndContinue(c)
}
else if (ctx.moreInput && f(h)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import scala.annotation.tailrec

import parsley.XAssert._
import parsley.token.descriptions.SpaceDesc
import parsley.token.errors.{ErrorConfig, LabelConfig}
import parsley.token.errors.ErrorConfig

import parsley.internal.errors.{ExpectDesc, ExpectItem, UnexpectDesc}
import parsley.internal.machine.Context
Expand Down Expand Up @@ -242,6 +242,7 @@ private [internal] final class TokenNonSpecific(name: String, unexpectedIllegal:
// $COVERAGE-ON$
}

/*
private [instructions] abstract class TokenSpecificAllowTrailing(
specific: String, expected: Option[ExpectDesc], protected final val expectedEnd: Option[ExpectDesc], caseSensitive: Boolean) extends Instr {
def this(specific: String, expected: LabelConfig, expectedEnd: String, caseSensitive: Boolean) = {
Expand Down Expand Up @@ -272,25 +273,6 @@ private [instructions] abstract class TokenSpecificAllowTrailing(
}
}
private [internal] final class TokenSpecific(specific: String, expected: LabelConfig, _expectedEnd: String, letter: Char => Boolean, caseSensitive: Boolean)
extends TokenSpecificAllowTrailing(specific, expected, _expectedEnd, caseSensitive) {
override def postprocess(ctx: Context, i: Int): Unit = {
if (i < ctx.inputsz && letter(ctx.input.charAt(i))) {
ctx.expectedFail(expectedEnd, unexpectedWidth = 1) //This should only report a single token
ctx.restoreState()
}
else {
ctx.states = ctx.states.tail
ctx.pushAndContinue(())
}
}

// $COVERAGE-OFF$
override def toString: String = s"TokenSpecific($specific)"
// $COVERAGE-ON$
}

/*
private [internal] final class TokenMaxOp(operator: String, _ops: Set[String]) extends TokenSpecificAllowTrailing(operator, true) {
private val ops = Radix.makeSet(_ops.collect {
case op if op.length > operator.length && op.startsWith(operator) => op.substring(operator.length)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/* SPDX-FileCopyrightText: © 2023 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.internal.machine.instructions.token

import parsley.internal.machine.Context

private [parsley] sealed abstract class CharPredicate {
//def pop(ctx: Context): Boolean
def peek(ctx: Context): Boolean
}
private [parsley] class Basic(f: Char => Boolean) extends CharPredicate {
def peek(ctx: Context): Boolean = ctx.moreInput && f(ctx.peekChar)
}
private [parsley] class Unicode(f: Int => Boolean) extends CharPredicate {
def peek(ctx: Context): Boolean = {
lazy val hc = ctx.peekChar(0)
lazy val l = ctx.peekChar(1)
ctx.moreInput(2) && hc.isHighSurrogate && Character.isSurrogatePair(hc, l) && f(Character.toCodePoint(hc, l)) || ctx.moreInput && f(hc.toInt)
}
}
private [parsley] object NotRequired extends CharPredicate {
// $COVERAGE-OFF$
def peek(ctx: Context): Boolean = false
// $COVERAGE-ON$
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/* SPDX-FileCopyrightText: © 2023 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.internal.machine.instructions.token

import parsley.token.errors.LabelConfig
import parsley.token.predicate

import parsley.internal.errors.ExpectDesc
import parsley.internal.machine.Context
import parsley.internal.machine.XAssert._
import parsley.internal.machine.instructions.Instr

private [internal] final class SoftKeyword(
specific: String, letter: CharPredicate, caseSensitive: Boolean, expected: Option[ExpectDesc], expectedEnd: Option[ExpectDesc]) extends Instr {
def this(specific: String, letter: predicate.CharPredicate, caseSensitive: Boolean, expected: LabelConfig, expectedEnd: String) = {
this(if (caseSensitive) specific else specific.toLowerCase,
letter.asInternalPredicate,
caseSensitive,
expected.asExpectDesc, Some(new ExpectDesc(expectedEnd)))
}

private [this] final val strsz = specific.length
private [this] final val numCodePoints = specific.codePointCount(0, strsz)

final override def apply(ctx: Context): Unit = {
ensureRegularInstruction(ctx)
if (ctx.moreInput(strsz)) {
ctx.saveState()
readSpecific(ctx, 0)
}
else ctx.expectedFail(expected, numCodePoints)
}

private def postprocess(ctx: Context): Unit = {
if (letter.peek(ctx)) {
ctx.expectedFail(expectedEnd, unexpectedWidth = 1) //This should only report a single token
ctx.restoreState()
}
else {
ctx.states = ctx.states.tail
ctx.pushAndContinue(())
}
}

val readCharCaseHandledBMP = {
if (caseSensitive) (ctx: Context) => ctx.peekChar
else (ctx: Context) => ctx.peekChar.toLower
}

val readCharCaseHandledSupplementary = {
if (caseSensitive) (ctx: Context) => Character.toCodePoint(ctx.peekChar(0), ctx.peekChar(1))
else (ctx: Context) => Character.toLowerCase(Character.toCodePoint(ctx.peekChar(0), ctx.peekChar(1)))
}

final private def readSpecific(ctx: Context, j: Int): Unit = {
if (j < strsz) {
val c = specific.codePointAt(j)
if (Character.isSupplementaryCodePoint(c) && ctx.moreInput(2) && readCharCaseHandledSupplementary(ctx) == c) {
ctx.fastConsumeSupplementaryChar()
readSpecific(ctx, j + 2)
}
else if (ctx.moreInput && readCharCaseHandledBMP(ctx) == c.toChar) {
ctx.consumeChar()
readSpecific(ctx, j + 1)
}
else {
ctx.restoreState()
ctx.expectedFail(expected, numCodePoints)
}
}
else postprocess(ctx)
}

// $COVERAGE-OFF$
override def toString: String = s"SoftKeyword($specific)"
// $COVERAGE-ON$
}
4 changes: 4 additions & 0 deletions parsley/shared/src/main/scala/parsley/token/predicate.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ object predicate {
private [token] def toNative: Parsley[Unit]
private [token] def startsWith(s: String): Boolean
private [token] def endsWith(s: String): Boolean
private [parsley] def asInternalPredicate: parsley.internal.machine.instructions.token.CharPredicate
}

/** More generic character predicate, which reads any unicode codepoint.
Expand All @@ -45,6 +46,7 @@ object predicate {
private [token] override def toNative = toUnicode.void
private [token] def startsWith(s: String) = s.nonEmpty && predicate(s.codePointAt(0))
private [token] def endsWith(s: String) = s.nonEmpty && predicate(s.codePointBefore(s.length))
private [parsley] def asInternalPredicate = new parsley.internal.machine.instructions.token.Unicode(predicate)
}

/** Basic character predicate, which reads regular Scala 16-bit characters.
Expand All @@ -63,6 +65,7 @@ object predicate {
private [token] override def toNative = toBmp.void
private [token] def startsWith(s: String) = s.headOption.exists(predicate)
private [token] def endsWith(s: String) = s.lastOption.exists(predicate)
private [parsley] def asInternalPredicate = new parsley.internal.machine.instructions.token.Basic(predicate)
}
// this runs the ability to pass functions in as it creates an overloading ambiguity
/*object Basic {
Expand All @@ -80,6 +83,7 @@ object predicate {
private [token] override def toNative = empty
private [token] def startsWith(s: String) = true
private [token] def endsWith(s: String) = true
private [parsley] def asInternalPredicate = parsley.internal.machine.instructions.token.NotRequired
}

/** This object provides implicit functionality for constructing `CharPredicate` values.
Expand Down
Loading

0 comments on commit 5b53048

Please sign in to comment.