Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid throwing exceptions during lookup #9

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.idea
target
7 changes: 2 additions & 5 deletions src/main/java/io/gitlab/rxp90/jsymspell/SymSpell.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,24 @@ public interface SymSpell {
* @param verbosity see {@link Verbosity}
* @param includeUnknown controls whether non-lexicon words should be considered
* @return sorted {@code List} of {@code SuggestItem} for a given {@code input}
* @throws NotInitializedException if no unigram lexicon has been provided, i.e. {@link SymSpell#getUnigramLexicon} is empty
*/
List<SuggestItem> lookup(String input, Verbosity verbosity, boolean includeUnknown) throws NotInitializedException;
List<SuggestItem> lookup(String input, Verbosity verbosity, boolean includeUnknown);

/**
* Same as {@link SymSpell#lookup(String, Verbosity, boolean)} where {@code includeUnknown} is false
* @see SymSpell#lookup(String, Verbosity, boolean)
* @param input string to apply spelling correction to
* @param verbosity see {@link Verbosity}
* @return sorted {@code List} of {@code SuggestItem} for a given {@code input}
* @throws NotInitializedException if no unigram lexicon has been provided, i.e. {@link SymSpell#getUnigramLexicon} is empty
*/
List<SuggestItem> lookup(String input, Verbosity verbosity) throws NotInitializedException;
List<SuggestItem> lookup(String input, Verbosity verbosity);

/**
* Performs spelling correction of multiple space separated words.
* @param input string to apply spelling correction to, where words are separated by spaces
* @param editDistanceMax limit up to which lexicon words can be considered suggestions, must be lower or equal than {@link SymSpell#getMaxDictionaryEditDistance()}
* @param includeUnknown controls whether non-lexicon words should be considered
* @return sorted {@code List} of {@code SuggestItem} for a given {@code input}
* @throws NotInitializedException if no unigram, and/or bigram lexicon has been provided, i.e. {@link SymSpell#getUnigramLexicon} is empty, and/or {@link SymSpell#getBigramLexicon()} is empty
*/
List<SuggestItem> lookupCompound(String input, int editDistanceMax, boolean includeUnknown) throws NotInitializedException;

Expand Down
52 changes: 47 additions & 5 deletions src/main/java/io/gitlab/rxp90/jsymspell/SymSpellBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@
import io.gitlab.rxp90.jsymspell.api.Bigram;
import io.gitlab.rxp90.jsymspell.api.DamerauLevenshteinOSA;
import io.gitlab.rxp90.jsymspell.api.StringDistance;
import io.gitlab.rxp90.jsymspell.exceptions.NotInitializedException;

import java.util.HashMap;
import java.util.Map;

/**
* Responsible for constructing an instance of {@link SymSpell}. By default,
* the unigram lexicon is empty and must be filled with one more or entries
* added using {@link #setUnigramLexicon(Map)}.
*/
public class SymSpellBuilder {

private int maxDictionaryEditDistance = 2;
private int prefixLength = 7;
private StringDistance stringDistanceAlgorithm = new DamerauLevenshteinOSA();
private Map<String, Long> unigramLexicon = new HashMap<>();
private Map<Bigram, Long> bigramLexicon = new HashMap<>();
private final Map<String, Long> unigramLexicon = new HashMap<>();
private final Map<Bigram, Long> bigramLexicon = new HashMap<>();

public SymSpellBuilder setMaxDictionaryEditDistance(int maxDictionaryEditDistance) {
this.maxDictionaryEditDistance = maxDictionaryEditDistance;
Expand All @@ -25,17 +31,32 @@ public SymSpellBuilder setPrefixLength(int prefixLength) {
return this;
}

/**
* Appends the given set of unigrams to the lexicon. This must be called
* at least once with one or more values in the {@code unigramLexicon}.
*
* @param unigramLexicon The list of words to add to the lexicon.
* @return {@code this}
*/
public SymSpellBuilder setUnigramLexicon(Map<String, Long> unigramLexicon) {
this.unigramLexicon = unigramLexicon;
assert unigramLexicon != null;
assert !unigramLexicon.isEmpty();

this.unigramLexicon.putAll(unigramLexicon);
return this;
}

public SymSpellBuilder setBigramLexicon(Map<Bigram, Long> bigramLexicon) {
this.bigramLexicon = bigramLexicon;
assert bigramLexicon != null;
assert !bigramLexicon.isEmpty();

this.bigramLexicon.putAll(bigramLexicon);
return this;
}

public SymSpellBuilder setStringDistanceAlgorithm(StringDistance distanceAlgorithm){
assert distanceAlgorithm != null;

this.stringDistanceAlgorithm = distanceAlgorithm;
return this;
}
Expand All @@ -60,7 +81,28 @@ public StringDistance getStringDistanceAlgorithm() {
return stringDistanceAlgorithm;
}

public SymSpellImpl createSymSpell() {
/**
* Responsible for creating an object that implements the {@link SymSpell}
* contract.
*
* @return A new spelling instance for alternative spelling suggestions.
* @throws NotInitializedException The unigram lexicon is missing.
*/
public SymSpellImpl createSymSpell() throws NotInitializedException {
// Prevent creation of the speller instance if there's no lexicon.
if( unigramLexicon.isEmpty() ) {
throw new NotInitializedException( "Missing unigram lexicon" );
}

return new SymSpellImpl(this);
}

/**
* Alias.
*
* @see #createSymSpell()
*/
public SymSpellImpl build() throws NotInitializedException {
return createSymSpell();
}
}
19 changes: 6 additions & 13 deletions src/main/java/io/gitlab/rxp90/jsymspell/SymSpellImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@
import io.gitlab.rxp90.jsymspell.api.Bigram;
import io.gitlab.rxp90.jsymspell.api.StringDistance;
import io.gitlab.rxp90.jsymspell.api.SuggestItem;
import io.gitlab.rxp90.jsymspell.exceptions.NotInitializedException;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Logger;

import static io.gitlab.rxp90.jsymspell.Verbosity.*;

public class SymSpellImpl implements SymSpell {

private static final Logger logger = Logger.getLogger(SymSpellImpl.class.getName());
private static final long BIGRAM_COUNT_MIN = Long.MAX_VALUE;

private final int maxDictionaryEditDistance;
Expand Down Expand Up @@ -98,24 +95,20 @@ private Set<String> editsPrefix(String key) {
}

@Override
public List<SuggestItem> lookup(String input, Verbosity verbosity, boolean includeUnknown) throws NotInitializedException {
public List<SuggestItem> lookup(String input, Verbosity verbosity, boolean includeUnknown) {
return lookup(input, verbosity, this.maxDictionaryEditDistance, includeUnknown);
}

@Override
public List<SuggestItem> lookup(String input, Verbosity verbosity) throws NotInitializedException {
public List<SuggestItem> lookup(String input, Verbosity verbosity) {
return lookup(input, verbosity, false);
}

private List<SuggestItem> lookup(String input, Verbosity verbosity, int maxEditDistance, boolean includeUnknown) throws NotInitializedException {
private List<SuggestItem> lookup(String input, Verbosity verbosity, int maxEditDistance, boolean includeUnknown) {
if (maxEditDistance > maxDictionaryEditDistance) {
throw new IllegalArgumentException("maxEditDistance > maxDictionaryEditDistance");
}

if (unigramLexicon.isEmpty()) {
throw new NotInitializedException("There are no words in the lexicon.");
}

List<SuggestItem> suggestions = new ArrayList<>();
int inputLen = input.length();
boolean wordIsTooLong = inputLen - maxEditDistance > maxDictionaryWordLength;
Expand Down Expand Up @@ -266,7 +259,7 @@ private Set<String> generateNewCandidates(String candidate, Set<String> deletesA
}

@Override
public List<SuggestItem> lookupCompound(String input, int editDistanceMax, boolean includeUnknown) throws NotInitializedException {
public List<SuggestItem> lookupCompound(String input, int editDistanceMax, boolean includeUnknown) {
String[] termList = input.split(" ");
List<SuggestItem> suggestionParts = new ArrayList<>();

Expand Down Expand Up @@ -314,7 +307,7 @@ public List<SuggestItem> lookupCompound(String input, int editDistanceMax, boole
return suggestionsLine;
}

private void splitWords(int editDistanceMax, String[] termList, List<SuggestItem> suggestions, List<SuggestItem> suggestionParts, int i) throws NotInitializedException {
private void splitWords(int editDistanceMax, String[] termList, List<SuggestItem> suggestions, List<SuggestItem> suggestionParts, int i) {
SuggestItem suggestionSplitBest = null;
if (!suggestions.isEmpty()) suggestionSplitBest = suggestions.get(0);

Expand Down Expand Up @@ -393,7 +386,7 @@ private long estimatedWordOccurrenceProbability(String word) {
return (long) ((double) 10 / Math.pow(10, word.length()));
}

Optional<SuggestItem> combineWords(int editDistanceMax, boolean includeUnknown, String token, String previousToken, SuggestItem suggestItem, SuggestItem secondBestSuggestion) throws NotInitializedException {
Optional<SuggestItem> combineWords(int editDistanceMax, boolean includeUnknown, String token, String previousToken, SuggestItem suggestItem, SuggestItem secondBestSuggestion) {
List<SuggestItem> suggestionsCombination = lookup(previousToken + token, TOP, editDistanceMax, includeUnknown);
if (!suggestionsCombination.isEmpty()) {
SuggestItem best2;
Expand Down
29 changes: 6 additions & 23 deletions src/main/java/io/gitlab/rxp90/jsymspell/api/Bigram.java
Original file line number Diff line number Diff line change
@@ -1,40 +1,23 @@
package io.gitlab.rxp90.jsymspell.api;

import java.util.Objects;
import java.util.AbstractMap;

/**
* Holds a pair of words.
*/
public class Bigram {
private final String word1;
private final String word2;

public class Bigram extends AbstractMap.SimpleImmutableEntry<String, String> {
/**
* Constructs a bigram with the specified words.
*
* @param word1 first word
* @param word2 second word
*/
public Bigram(String word1, String word2) {
this.word1 = word1;
this.word2 = word2;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Bigram)) return false;
Bigram bigram = (Bigram) o;
return Objects.equals(word1, bigram.word1) &&
Objects.equals(word2, bigram.word2);
}

@Override
public int hashCode() {
return Objects.hash(word1, word2);
public Bigram(final String word1, final String word2) {
super(word1, word2);
}

@Override
public String toString() {
return word1 + ' ' + word2;
return getKey() + ' ' + getValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,4 @@ public class JSymSpellException extends Exception {
public JSymSpellException(String message) {
super(message);
}

public JSymSpellException(String message, Throwable cause) {
super(message, cause);
}

}
24 changes: 15 additions & 9 deletions src/test/java/io/gitlab/rxp90/jsymspell/SymSpellTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class SymSpellTest {
}

@Test
void loadDictionary() throws Exception {
void loadDictionary() throws NotInitializedException {
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(2)
.setUnigramLexicon(mapOf("abcde", 100L, "abcdef", 90L))
.createSymSpell();
Expand All @@ -43,7 +43,7 @@ void loadDictionary() throws Exception {
}

@Test
void lookupCompound() throws Exception {
void lookupCompound() throws NotInitializedException {
SymSpell symSpell = new SymSpellBuilder().setUnigramLexicon(unigrams)
.setBigramLexicon(bigrams)
.setMaxDictionaryEditDistance(2)
Expand Down Expand Up @@ -135,9 +135,8 @@ void combineWords() throws Exception {
}

@Test
void lookupWithoutLoadingDictThrowsException() throws Exception {
SymSpell symSpell = new SymSpellBuilder().createSymSpell();
assertThrows(NotInitializedException.class, () -> symSpell.lookup("boom", Verbosity.CLOSEST));
void lookupWithoutLoadingDictThrowsException() {
assertThrows(NotInitializedException.class, () -> new SymSpellBuilder().createSymSpell());
}

@Test
Expand All @@ -155,23 +154,30 @@ void lookupAll() throws Exception {
@Test
void editsDistance0() throws Exception {
int maxEditDistance = 0;
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance).createSymSpell();
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance)
.setUnigramLexicon(unigrams)
.createSymSpell();

Set<String> edits = symSpell.edits("example", 0, new HashSet<>());
assertEquals(Collections.emptySet(), edits);
}

@Test
void editsDistance1() throws Exception {
int maxEditDistance = 1;
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance).createSymSpell();
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance)
.setUnigramLexicon(unigrams)
.createSymSpell();
Set<String> edits = symSpell.edits("example", 0, new HashSet<>());
assertEquals(setOf("xample", "eample", "exmple", "exaple", "examle", "exampe", "exampl"), edits);
}

@Test
void editsDistance2() throws Exception {
int maxEditDistance = 2;
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance).createSymSpell();
SymSpellImpl symSpell = new SymSpellBuilder().setMaxDictionaryEditDistance(maxEditDistance)
.setUnigramLexicon(unigrams)
.createSymSpell();
Set<String> edits = symSpell.edits("example", 0, new HashSet<>());
Set<String> expected = setOf("xample", "eample", "exmple", "exaple", "examle", "exampe", "exampl", "exale", "emple",
"exape", "exmpe", "exapl", "xampe", "exple", "exmpl", "exmle", "xamle", "xmple",
Expand Down Expand Up @@ -210,7 +216,7 @@ void customStringDistanceAlgorithm() throws NotInitializedException {


public static <T> Map<String, T> mapOf(Object... objects){
Map<String, T> map = new HashMap<>();
final Map<String, T> map = new HashMap<>();
for (int i = 0; i < objects.length; i+=2){
map.put((String) objects[i], (T) objects[i+1]);
}
Expand Down