Skip to content
This repository was archived by the owner on Dec 23, 2022. It is now read-only.

Commit ad92265

Browse files
committed
src/model/model_index: use BKTree for similarity matches
Signed-off-by: Alexandre Terrasa <[email protected]>
1 parent 5a8f4c2 commit ad92265

File tree

1 file changed

+37
-35
lines changed

1 file changed

+37
-35
lines changed

src/model/model_index.nit

+37-35
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,9 @@
126126
module model_index
127127

128128
import model::model_collect
129+
129130
import trees::trie
131+
import trees::bktree
130132

131133
redef class Model
132134

@@ -257,6 +259,9 @@ class ModelIndex
257259
# TODO add an option.
258260
var name_prefixes = new Trie[Array[MEntity]]
259261

262+
# Distance tree for mentities `name`
263+
var name_distances = new BKTree
264+
260265
# Map of all mentities indexed by their `full_name`
261266
var full_names = new HashMap[String, MEntity]
262267

@@ -266,6 +271,9 @@ class ModelIndex
266271
# arrays of mentities to be consistent with `name_prefixes`.
267272
var full_name_prefixes = new Trie[Array[MEntity]]
268273

274+
# Distance tree for mentities `full_name`
275+
var full_name_distances = new BKTree
276+
269277
# Index `mentity` by it's `MEntity::name`
270278
#
271279
# See `name_prefixes`.
@@ -283,6 +291,9 @@ class ModelIndex
283291
name_prefixes[name] = new Array[MEntity]
284292
end
285293
name_prefixes[name].add mentity
294+
295+
# Index distance
296+
name_distances.add(name)
286297
end
287298

288299
# Index `mentity` by its `MEntity::full_name`
@@ -297,6 +308,9 @@ class ModelIndex
297308
full_name_prefixes[name] = new Array[MEntity]
298309
end
299310
full_name_prefixes[name].add mentity
311+
312+
# Index distance
313+
full_name_distances.add(name)
300314
end
301315

302316
# Index `mentity` so it can be retrieved by a find query
@@ -345,10 +359,15 @@ class ModelIndex
345359
# Warning: may not scale to large indexes.
346360
fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
347361
var results = new IndexMatches
348-
for mentity in mentities do
349-
if filter != null and not filter.accept_mentity(mentity) then continue
350-
if mentity isa MClassDef or mentity isa MPropDef then continue
351-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
362+
for match in name_distances.search(name) do
363+
var dist = match.distance
364+
var mname = match.key
365+
if not names.has_key(mname) then continue
366+
for mentity in names[mname] do
367+
if mentity isa MClassDef or mentity isa MPropDef then continue
368+
if filter != null and not filter.accept_mentity(mentity) then continue
369+
results.add new IndexMatch(mentity, dist)
370+
end
352371
end
353372
return results
354373
end
@@ -359,45 +378,37 @@ class ModelIndex
359378
# Warning: may not scale to large indexes.
360379
fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
361380
var results = new IndexMatches
362-
for mentity in mentities do
363-
if filter != null and not filter.accept_mentity(mentity) then continue
381+
for match in full_name_distances.search(name) do
382+
var dist = match.distance
383+
var mname = match.key
384+
if not full_names.has_key(mname) then continue
385+
var mentity = full_names[mname]
364386
if mentity isa MClassDef or mentity isa MPropDef then continue
365-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
387+
if filter != null and not filter.accept_mentity(mentity) then continue
388+
results.add new IndexMatch(mentity, dist)
366389
end
367390
return results
368391
end
369392

370393
# Rank all mentities by the distance between `name` and both the mentity name and full name
371394
fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
372395
var results = new IndexMatches
373-
for mentity in mentities do
374-
if filter != null and not filter.accept_mentity(mentity) then continue
375-
if mentity isa MClassDef or mentity isa MPropDef then continue
376-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
377-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
378-
end
396+
results.add_all find_by_full_name_similarity(name, filter)
397+
results.add_all find_by_name_similarity(name, filter)
379398
return results
380399
end
381400

382401
# Find mentities by name trying first by prefix then by similarity
383402
fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
384-
var results = find_by_name_prefix(name)
385-
for mentity in mentities do
386-
if filter != null and not filter.accept_mentity(mentity) then continue
387-
if mentity isa MClassDef or mentity isa MPropDef then continue
388-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
389-
end
403+
var results = find_by_name_prefix(name, filter)
404+
results.add_all find_by_name_similarity(name, filter)
390405
return results
391406
end
392407

393408
# Find mentities by full name trying firt by prefix then by similarity
394409
fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
395410
var results = find_by_full_name_prefix(name)
396-
for mentity in mentities do
397-
if filter != null and not filter.accept_mentity(mentity) then continue
398-
if mentity isa MClassDef or mentity isa MPropDef then continue
399-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
400-
end
411+
results.add_all find_by_full_name_similarity(name, filter)
401412
return results
402413
end
403414

@@ -409,17 +420,8 @@ class ModelIndex
409420
# 4. try similarity by full_name
410421
fun find(name: String, filter: nullable ModelFilter): IndexMatches do
411422
var results = find_by_name_prefix(name, filter)
412-
413-
for result in find_by_full_name_prefix(name, filter) do
414-
results.add result
415-
end
416-
417-
for mentity in mentities do
418-
if filter != null and not filter.accept_mentity(mentity) then continue
419-
if mentity isa MClassDef or mentity isa MPropDef then continue
420-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
421-
results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
422-
end
423+
results.add_all find_by_full_name_prefix(name, filter)
424+
results.add_all find_by_similarity(name, filter)
423425
return results
424426
end
425427
end

0 commit comments

Comments
 (0)