Skip to content

Commit

Permalink
simple opening name search engine
Browse files Browse the repository at this point in the history
  • Loading branch information
ornicar committed Oct 2, 2022
1 parent b905eab commit 18e0405
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 11 deletions.
64 changes: 53 additions & 11 deletions modules/opening/src/main/OpeningSearch.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package lila.opening

import chess.opening.{ FullOpening, FullOpeningDB }
import java.text.Normalizer

import lila.common.base.StringUtils.levenshtein
import lila.common.Heapsort.implicits._
import lila.memo.CacheApi
import lila.common.Heapsort

case class OpeningSearchResult(opening: FullOpening)

Expand All @@ -13,18 +14,59 @@ final class OpeningSearch(cacheApi: CacheApi, explorer: OpeningExplorer) {
val max = 32

def apply(q: String): Fu[List[OpeningSearchResult]] = fuccess {
Heapsort
.topN[(FullOpening, Int), Iterable[(FullOpening, Int)]](
Opening.shortestLines.values map { op =>
(op, levenshtein(q, op.name))
},
32,
levenshteinOrdering
)
OpeningSearch(q).map(OpeningSearchResult)
}
}

object OpeningSearch {

private val openings: Vector[FullOpening] = Opening.shortestLines.values.toVector

private type Token = String
private type Position = Int
private type Freq = Int
private type NameSize = Int

private val tokenR = """[^\w-]""".r
private val tokenMultiDashRegex = """-{2,}""".r
private val excludeTokens = Set("opening", "variation")

private def tokenize(str: String): List[Token] = {
val singleDashes = tokenMultiDashRegex.replaceAllIn(str.trim.replace(' ', '-'), "-")
val normalized = Normalizer.normalize(singleDashes, Normalizer.Form.NFD)
tokenR
.replaceAllIn(normalized, "")
.toLowerCase
.split('-')
.view
.map { case (op, _) => OpeningSearchResult(op) }
.filterNot(excludeTokens.contains)
.toList
}
private def tokenize(opening: FullOpening): List[Token] =
opening.key.toLowerCase.split('_').view.filterNot(excludeTokens.contains).toList

private val levenshteinOrdering = Ordering.by[(FullOpening, Int), Int](-_._2)
private val index: Map[Token, Set[Position]] =
openings.zipWithIndex.foldLeft(Map.empty[Token, Set[Position]]) { case (index, (opening, position)) =>
tokenize(opening).foldLeft(index) { case (index, token) =>
index.updatedWith(token) {
case None => Set(position).some
case Some(positions) => (positions + position).some
}
}
}
private val searchOrdering = Ordering.by[(FullOpening, Freq, NameSize), (Freq, NameSize)] {
case (_, freq, size) => (freq, -size)
}

def apply(q: String): List[FullOpening] = {
val tokens = tokenize(q)
val positions: List[Set[Position]] = tokens.flatMap(index.get)
val merged = positions.flatMap(_.toList)
val positionsWithFreq = merged.groupBy(identity).view.mapValues(_.size).toList
val openingsWithFreqAndLen: List[(FullOpening, Freq, NameSize)] = positionsWithFreq.flatMap {
case (position, freq) => openings.lift(position).map(op => (op, freq, op.name.size))
}
val sorted = openingsWithFreqAndLen.topN(10)(searchOrdering)
sorted.map(_._1)
}
}
17 changes: 17 additions & 0 deletions modules/opening/src/test/OpeningSearchTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package lila.opening

import org.specs2.mutable.Specification

class OpeningSearchTest extends Specification {

"search opening name" in {
import OpeningSearch.{ apply => search }

"literal" in {
search("Sicilian Defense").headOption.map(_.name) must beSome("Sicilian Defense")
}
"partial" in {
search("Sicilian").headOption.map(_.name) must beSome("Sicilian Defense")
}
}
}

0 comments on commit 18e0405

Please sign in to comment.