Skip to content

Commit

Permalink
LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/branch_4x@1371645 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
rmuir committed Aug 10, 2012
1 parent dc0b164 commit f489780
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 35 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Bug Fixes
did not work at all, it would infinitely recurse.
(Alberto Paro via Robert Muir)

* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
had a custom Similarity where coord(1,1) != 1F, then the rewritten
query would be scored differently. (Robert Muir)

======================= Lucene 4.0.0-BETA =======================

New features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,11 @@ public float getValueForNormalization() throws IOException {
}

public float coord(int overlap, int maxOverlap) {
return similarity.coord(overlap, maxOverlap);
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
// so coord() is not applied. But when BQ cannot optimize itself away
// for a single clause (minNrShouldMatch, prohibited clauses, etc), its
// important not to apply coord(1,1) for consistency, it might not be 1.0F
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -297,8 +299,8 @@ public void testNoOptionalButMin2() throws Exception {
}

public void testRandomQueries() throws Exception {
String field="data";
String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
final String field="data";
final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
int maxLev=4;

// callback object to set a random setMinimumNumberShouldMatch
Expand All @@ -310,13 +312,18 @@ public void postCreate(BooleanQuery q) {
if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
}
q.setMinimumNumberShouldMatch(random().nextInt(opt+2));
if (random().nextBoolean()) {
// also add a random negation
Term randomTerm = new Term(field, vals[random().nextInt(vals.length)]);
q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
}
}
};



// increase number of iterations for more complete testing
int num = atLeast(10);
int num = atLeast(20);
for (int i=0; i<num; i++) {
int lev = random().nextInt(maxLev);
final long seed = random().nextLong();
Expand All @@ -336,44 +343,90 @@ public void postCreate(BooleanQuery q) {
QueryUtils.check(random(), q1,s);
QueryUtils.check(random(), q2,s);
}
// The constrained query
// should be a superset to the unconstrained query.
if (top2.totalHits > top1.totalHits) {
fail("Constrained results not a subset:\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString());
}

for (int hit=0; hit<top2.totalHits; hit++) {
int id = top2.scoreDocs[hit].doc;
float score = top2.scoreDocs[hit].score;
boolean found=false;
// find this doc in other hits
for (int other=0; other<top1.totalHits; other++) {
if (top1.scoreDocs[other].doc == id) {
found=true;
float otherScore = top1.scoreDocs[other].score;
// check if scores match
assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString(),
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
}
}
assertSubsetOfSameScores(q2, top1, top2);
}
// System.out.println("Total hits:"+tot);
}

private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
// The constrained query
// should be a subset to the unconstrained query.
if (top2.totalHits > top1.totalHits) {
fail("Constrained results not a subset:\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q.toString());
}

// check if subset
if (!found) fail("Doc " + id + " not found\n"
for (int hit=0; hit<top2.totalHits; hit++) {
int id = top2.scoreDocs[hit].doc;
float score = top2.scoreDocs[hit].score;
boolean found=false;
// find this doc in other hits
for (int other=0; other<top1.totalHits; other++) {
if (top1.scoreDocs[other].doc == id) {
found=true;
float otherScore = top1.scoreDocs[other].score;
// check if scores match
assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q2.toString());
+ "for query:" + q.toString(),
score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
}
}

// check if subset
if (!found) fail("Doc " + id + " not found\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
+ "for query:" + q.toString());
}
// System.out.println("Total hits:"+tot);
}


public void testRewriteCoord1() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.setMinimumNumberShouldMatch(1);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}

public void testRewriteNegate() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}

protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {

Expand Down

0 comments on commit f489780

Please sign in to comment.