Skip to content

Commit

Permalink
SOLR-3737: fix Stempel factory resource loading, use singleton instance
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/branch_4x@1374116 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
rmuir committed Aug 17, 2012
1 parent f003a25 commit 2a898e8
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 17 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ Bug Fixes
encoders / stemmers via the ResourceLoader now instead of Class.forName().
Solr users should now no longer have to embed these in its war. (David Smiley)

* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)

Build

* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ public static CharArraySet getDefaultStopSet(){
return DefaultsHolder.DEFAULT_STOP_SET;
}

/**
* Returns an unmodifiable instance of the default stemmer table.
*/
public static Trie getDefaultTable() {
return DefaultsHolder.DEFAULT_TABLE;
}

/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,17 @@
* limitations under the License.
*/

import java.io.IOException;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.egothor.stemmer.Trie;

/**
* Factory for {@link StempelFilter} using a Polish stemming table.
*/
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private Trie stemmer = null;
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";

public class StempelPolishStemFilterFactory extends TokenFilterFactory {
public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(stemmer));
}

public void inform(ResourceLoader loader) throws IOException {
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ public void store(DataOutput os) throws IOException {
* @param key the key
* @param cmd the patch command
*/
public void add(CharSequence key, CharSequence cmd) {
void add(CharSequence key, CharSequence cmd) {
if (key == null || cmd == null) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;

/**
* Tests for {@link StempelPolishStemFilterFactory}
Expand All @@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts,
new String[] { "student", "student" });
Expand Down

0 comments on commit 2a898e8

Please sign in to comment.