Skip to content

Commit

Permalink
Finished the AttributeFacetHandler Implemendtation, plus some minor
Browse files Browse the repository at this point in the history
fixes, plus refactored and enhanced integration tests
  • Loading branch information
vzhabiuk committed Jan 12, 2012
1 parent f9829e3 commit 8d4080e
Show file tree
Hide file tree
Showing 16 changed files with 339 additions and 15,245 deletions.
8 changes: 1 addition & 7 deletions example/cars/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
default delimiter is ","
-->
<column name="tags" type="string" multi="true" delimiter=","/>
<column name="object_properties" type="string" multi="true" delimiter=","/>
<!-- attributes: indexed,store,termvector are only used when type is text -->
<column name="contents" type="text" index="ANALYZED" store="YES" termvector="YES" />
</table>
Expand Down Expand Up @@ -81,12 +80,7 @@
</params>
</facet>
<facet name="tags" type="multi" />
<facet name="object_properties" type="attribute" column="object_properties">
<params>
<param name="numFacetsPerKey" value="5" />
<param name="separator" value="=" />
</params>
</facet>

<!-- example of a custom facet, defined in custom-facets.xml -->
<!--
a bean with name "foobar" must be defined
Expand Down
2 changes: 1 addition & 1 deletion example/cars/conf/sensei.properties
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ sensei.index.freshness = 5
# gateway parameters

sensei.gateway.class=com.sensei.indexing.api.gateway.file.LinedFileDataProviderBuilder
sensei.gateway.file.path = example/cars/data/test_data.json
sensei.gateway.file.path = example/cars/data/cars.json
# index manager parameters

sensei.index.manager.default.maxpartition.id = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.util.OpenBitSet;

import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.FacetIterator;
Expand All @@ -17,43 +19,38 @@
public final class AttributesFacetCountCollector extends DefaultFacetCountCollector {
private final AttributesFacetHandler attributesFacetHandler;
public final BigNestedIntArray _array;
private RangePredicate rangePredicate;
private int[] buffer;
private List<BrowseFacet> cachedFacets;
private final int numFacetsPerKey;
private final char separator;
private OpenBitSet excludes;
private OpenBitSet includes;
private final MultiValueFacetDataCache dataCache;

@SuppressWarnings("rawtypes")
public AttributesFacetCountCollector(AttributesFacetHandler attributesFacetHandler, String name, MultiValueFacetDataCache dataCache, int docBase, BrowseSelection browseSelection, FacetSpec ospec, int numFacetsPerKey, char separator){
super(name,dataCache,docBase,browseSelection,ospec);
this.attributesFacetHandler = attributesFacetHandler;
this.dataCache = dataCache;
this.numFacetsPerKey = numFacetsPerKey;
this.separator = separator;
_array = dataCache._nestedArray;
buffer = new int[10];
if (browseSelection != null) {
rangePredicate = new RangePredicate(browseSelection.getValues(), browseSelection.getNotValues(), browseSelection.getSelectionOperation(), separator);
if (browseSelection.getValues().length > 0) includes = attributesFacetHandler.buildBitSet(dataCache, browseSelection.getValues());
if (browseSelection.getNotValues().length > 0) excludes = attributesFacetHandler.buildBitSet(dataCache, browseSelection.getNotValues());
}
}

@Override
public final void collect(int docid) {
if (rangePredicate != null) {
if (buffer.length < _array.getNumItems(docid)) {
buffer = new int[_array.getNumItems(docid) + 10];
}
int count = _array.getData(docid, buffer);
if (count == 1) {
if (rangePredicate.evaluateValue(_dataCache, buffer[0])) {
_count[buffer[0]]++;
}
}
for (int i = 0; i < count; i++) {
if (rangePredicate.evaluateValue(_dataCache, buffer[i])) {
_count[buffer[i]]++;
}
}
if (excludes != null && dataCache._nestedArray.contains(docid, excludes)) {
return;
}
if (includes != null) {
dataCache._nestedArray.countNoReturnWithFilter(docid, _count, includes);
} else {
_array.countNoReturn(docid, _count);
dataCache._nestedArray.countNoReturn(docid, _count);
}
}

Expand All @@ -66,7 +63,7 @@ public final void collectAll()
public List<BrowseFacet> getFacets() {
if (cachedFacets == null) {
int max = _ospec.getMaxCount();
_ospec.setMaxCount(-1);
_ospec.setMaxCount(max * 10);
List<BrowseFacet> facets = super.getFacets();
_ospec.setMaxCount(max);
filterByKeys(facets, separator, numFacetsPerKey);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.lucene.index.Term;
import org.apache.lucene.util.OpenBitSet;

import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.BrowseSelection.ValueOperation;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetCountCollectorSource;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.MultiValueFacetDataCache;
import com.browseengine.bobo.facets.data.TermListFactory;
import com.browseengine.bobo.facets.filter.EmptyFilter;
Expand All @@ -24,7 +26,7 @@ public class AttributesFacetHandler extends MultiValueFacetHandler {
private char separator;
private int numFacetsPerKey = 7;
public static final String SEPARATOR_PROP_NAME = "separator";
public static final String NUM_FACETS_PER_KEY_PROP_NAME = "numFacetsPerKey";
public static final String MAX_FACETS_PER_KEY_PROP_NAME = "maxFacetsPerKey";

public AttributesFacetHandler(String name, String indexFieldName, TermListFactory termListFactory, Term sizePayloadTerm, Set<String> depends, Map<String, String> facetProps) {
super(name, indexFieldName, termListFactory, sizePayloadTerm, depends);
Expand All @@ -33,8 +35,8 @@ public AttributesFacetHandler(String name, String indexFieldName, TermListFactor
} else {
this.separator = DEFAULT_SEPARATOR;
}
if (facetProps.containsKey(NUM_FACETS_PER_KEY_PROP_NAME)) {
this.numFacetsPerKey = Integer.parseInt(narrow(facetProps.get(NUM_FACETS_PER_KEY_PROP_NAME)));
if (facetProps.containsKey(MAX_FACETS_PER_KEY_PROP_NAME)) {
this.numFacetsPerKey = Integer.parseInt(narrow(facetProps.get(MAX_FACETS_PER_KEY_PROP_NAME)));
}
}
private String narrow(String string) {
Expand All @@ -46,34 +48,71 @@ public char getSeparator(BrowseSelection browseSelection) {
}
return browseSelection.getSelectionProperties().get(SEPARATOR_PROP_NAME).toString().charAt(0);
}

@Override
public RandomAccessFilter buildRandomAccessFilter(String value, Properties prop) throws IOException {
return new PredicateFacetFilter(new SimpleDataCacheBuilder(getName()), new RangePredicate(value, separator));
}

@Override
public RandomAccessFilter buildRandomAccessOrFilter(final String[] vals, Properties prop, boolean isNot) throws IOException {
if (vals.length == 0) {
return EmptyFilter.getInstance();
}
RandomAccessFilter filter;
if (vals.length == 1) {
filter = buildRandomAccessFilter(vals[0], prop);
} else {
filter = new BitSetFilter(new BitSetBuilder() {
@Override
public OpenBitSet bitSet(FacetDataCache dataCache) {
return buildBitSet(dataCache, vals);
}
}, new SimpleDataCacheBuilder(getName()));
}
if (!isNot) {
return filter;
} else {
return new RandomAccessNotFilter(filter);
}
}



public int getFacetsPerKey(BrowseSelection browseSelection) {
if (browseSelection == null || !browseSelection.getSelectionProperties().containsKey(NUM_FACETS_PER_KEY_PROP_NAME)) {
if (browseSelection == null || !browseSelection.getSelectionProperties().containsKey(MAX_FACETS_PER_KEY_PROP_NAME)) {
return numFacetsPerKey;
}
return Integer.valueOf(browseSelection.getSelectionProperties().get(NUM_FACETS_PER_KEY_PROP_NAME).toString());
return Integer.valueOf(browseSelection.getSelectionProperties().get(MAX_FACETS_PER_KEY_PROP_NAME).toString());
}
@Override
public RandomAccessFilter buildFilter(final BrowseSelection browseSelection) throws IOException {
final String[] values = browseSelection.getValues();
final String[] notValues = browseSelection.getNotValues();
final ValueOperation operation = browseSelection.getSelectionOperation();

if (values.length ==0 && notValues.length == 0) {
return EmptyFilter.getInstance();
} else if (values.length ==0 && notValues.length > 0) {
return new RandomAccessNotFilter(new PredicateFacetFilter(new SimpleDataCacheBuilder(getName()), new RangePredicate(notValues, values, operation, getSeparator(browseSelection))));
} else return new PredicateFacetFilter(new SimpleDataCacheBuilder(getName()), new RangePredicate(values, notValues, operation, getSeparator(browseSelection)));
}

public OpenBitSet buildBitSet(FacetDataCache facetDataCache, String[] values) {
MultiValueFacetDataCache multiCache = (MultiValueFacetDataCache) facetDataCache;
Range[] includes = Range.getRanges(multiCache, values, separator);
int max = -1;

OpenBitSet openBitSet = new OpenBitSet(facetDataCache.valArray.size());
for(Range range : includes) {
for (int i = range.start; i < range.end; i++) {
openBitSet.fastSet(i);
}
}
return openBitSet;
}
@Override
public FacetCountCollectorSource getFacetCountCollectorSource(final BrowseSelection browseSelection, final FacetSpec ospec){
return new FacetCountCollectorSource(){


return new FacetCountCollectorSource(){

@Override
public FacetCountCollector getFacetCountCollector(
BoboIndexReader reader, int docBase) {
int facetsPerKey = getFacetsPerKey(browseSelection);
if (ospec.getProperties() != null && ospec.getProperties().containsKey(MAX_FACETS_PER_KEY_PROP_NAME)) {
facetsPerKey = Integer.parseInt(ospec.getProperties().get(MAX_FACETS_PER_KEY_PROP_NAME));
}
MultiValueFacetDataCache dataCache = (MultiValueFacetDataCache) reader.getFacetData(_name);
return new AttributesFacetCountCollector(AttributesFacetHandler.this, _name,dataCache,docBase,browseSelection, ospec, getFacetsPerKey(browseSelection), getSeparator(browseSelection));
return new AttributesFacetCountCollector(AttributesFacetHandler.this, _name,dataCache,docBase,browseSelection, ospec, facetsPerKey, getSeparator(browseSelection));
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public Comparable next() {
if (next == null) {
return null;
}
count = next.getFacetValueHitCount();
facet = next.getValue();
return next.getValue();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.sensei.facet.attribute;

import org.apache.lucene.util.OpenBitSet;

import com.browseengine.bobo.facets.data.FacetDataCache;

public interface BitSetBuilder {
OpenBitSet bitSet(FacetDataCache dataCache);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package com.sensei.facet.attribute;

import java.io.IOException;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.OpenBitSet;

import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.docidset.EmptyDocIdSet;
import com.browseengine.bobo.docidset.RandomAccessDocIdSet;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.MultiValueFacetDataCache;
import com.browseengine.bobo.facets.filter.AdaptiveFacetFilter.FacetDataCacheBuilder;
import com.browseengine.bobo.facets.filter.FacetOrFilter;
import com.browseengine.bobo.facets.filter.MultiValueORFacetFilter;
import com.browseengine.bobo.facets.filter.RandomAccessFilter;

public class BitSetFilter extends RandomAccessFilter {
private static final long serialVersionUID = 1L;

protected final FacetDataCacheBuilder facetDataCacheBuilder;
protected final BitSetBuilder bitSetBuilder;
private volatile OpenBitSet bitSet;
private volatile FacetDataCache lastCache;
public BitSetFilter(BitSetBuilder bitSetBuilder, FacetDataCacheBuilder facetDataCacheBuilder) {
this.bitSetBuilder = bitSetBuilder;
this.facetDataCacheBuilder = facetDataCacheBuilder;
}
public OpenBitSet getBitSet( FacetDataCache dataCache) {

if (lastCache == dataCache) {
return bitSet;
}
bitSet = bitSetBuilder.bitSet(dataCache);
lastCache = dataCache;
return bitSet;
}

@Override
public RandomAccessDocIdSet getRandomAccessDocIdSet(final BoboIndexReader reader) throws IOException {
final FacetDataCache dataCache = facetDataCacheBuilder.build(reader);
final OpenBitSet openBitSet = getBitSet(dataCache);
long count = openBitSet.cardinality();
if (count == 0) {
return EmptyDocIdSet.getInstance();
} else {
final boolean multi = dataCache instanceof MultiValueFacetDataCache;
final MultiValueFacetDataCache multiCache = multi ? (MultiValueFacetDataCache) dataCache : null;

return new RandomAccessDocIdSet() {
public DocIdSetIterator iterator() {

if (multi) {
return new MultiValueORFacetFilter.MultiValueOrFacetDocIdSetIterator(multiCache, openBitSet);
} else {
return new FacetOrFilter.FacetOrDocIdSetIterator(dataCache, openBitSet);

}
}
public boolean get(int docId) {
if (multi) {
return multiCache._nestedArray.contains(docId, openBitSet);
} else {
return openBitSet.fastGet(dataCache.orderArray.get(docId));
}
}
};
}
}

@Override
public double getFacetSelectivity(BoboIndexReader reader) {
FacetDataCache dataCache = facetDataCacheBuilder.build(reader);
final OpenBitSet openBitSet = getBitSet(dataCache);
int[] frequencies = dataCache.freqs;
double selectivity = 0;
int accumFreq = 0;
int index = openBitSet.nextSetBit(-1);
while (index >= 0) {
accumFreq += frequencies[index];
index = openBitSet.nextSetBit(index);
}
int total = reader.maxDoc();
selectivity = (double) accumFreq / (double) total;
if (selectivity > 0.999) {
selectivity = 1.0;
}
return selectivity;
}
}
Loading

0 comments on commit 8d4080e

Please sign in to comment.