diff --git a/aggregation_test.go b/aggregation_test.go index 64d67a57..ea4a8f60 100644 --- a/aggregation_test.go +++ b/aggregation_test.go @@ -7,9 +7,9 @@ import ( ) func testAggregations(t *testing.T, - and func(bitmaps ... *Bitmap) *Bitmap, - or func(bitmaps ... *Bitmap) *Bitmap, - xor func(bitmaps ... *Bitmap) *Bitmap) { + and func(bitmaps ...*Bitmap) *Bitmap, + or func(bitmaps ...*Bitmap) *Bitmap, + xor func(bitmaps ...*Bitmap) *Bitmap) { t.Run("simple case", func(t *testing.T) { rb1 := NewBitmap() @@ -271,10 +271,10 @@ func testAggregations(t *testing.T, } func TestParAggregations(t *testing.T) { - andFunc := func(bitmaps ... *Bitmap) *Bitmap { + andFunc := func(bitmaps ...*Bitmap) *Bitmap { return ParAnd(0, bitmaps...) } - orFunc := func(bitmaps ... *Bitmap) *Bitmap { + orFunc := func(bitmaps ...*Bitmap) *Bitmap { return ParOr(0, bitmaps...) } @@ -287,4 +287,4 @@ func TestFastAggregations(t *testing.T) { func TestHeapAggregations(t *testing.T) { testAggregations(t, nil, HeapOr, HeapXor) -} \ No newline at end of file +} diff --git a/arraycontainer.go b/arraycontainer.go index 82eaec84..98a5a55e 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -28,6 +28,10 @@ func (ac *arrayContainer) getShortIterator() shortIterable { return &shortIterator{ac.content, 0} } +func (ac *arrayContainer) getManyIterator() manyIterable { + return &manyIterator{ac.content, 0} +} + func (ac *arrayContainer) minimum() uint16 { return ac.content[0] // assume not empty } diff --git a/benchmark_test.go b/benchmark_test.go index 06be39e0..0e4d7449 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -497,15 +497,102 @@ func BenchmarkEqualsClone(b *testing.B) { } } -func BenchmarkSequentialAdd(b *testing.B) { - for j := 0; j < b.N; j++ { - s := NewBitmap() - for i := 0; i < 10000000; i += 16 { - s.Add(uint32(i)) +// go test -bench BenchmarkNexts -benchmem -run - +func BenchmarkNexts(b *testing.B) { + + for _, gap := range []uint32{1, 2, 4, 8, 16, 32, 64, 256, 1024, 8096} { + + rrs := make([]uint32, 500000) + v := uint32(0) + for i := range rrs { + rrs[i] = v + v += gap + } + + bm := NewBitmap() + bm.AddMany(rrs) + + var totnext uint64 + var totnextmany uint64 + + density := float32(100) / float32(gap) + + density_str := fmt.Sprintf("__%f%%", density) + + b.Run("next"+density_str, func(b *testing.B) { + for n := 0; n < b.N; n++ { + totnext = 0 + iter := bm.Iterator() + for iter.HasNext() { + v := iter.Next() + totnext += uint64(v) + } + } + b.StopTimer() + }) + + b.Run("nextmany"+density_str, func(b *testing.B) { + for n := 0; n < b.N; n++ { + totnextmany = 0 + iter := bm.ManyIterator() + // worst case, in practice will reuse buffers across many roars + buf := make([]uint32, 4096) + for j := iter.NextMany(buf); j != 0; j = iter.NextMany(buf) { + for i := 0; i < j; i++ { + totnextmany += uint64(buf[i]) + } + } + } + b.StopTimer() + }) + + if totnext != totnextmany { + b.Fatalf("Cardinalities don't match: %d, %d", totnext, totnextmany) } } } +// go test -bench BenchmarkRLENexts -benchmem -run - +func BenchmarkNextsRLE(b *testing.B) { + + var totadd uint64 + var totaddmany uint64 + + bm := NewBitmap() + bm.AddRange(0, 1000000) + + b.Run("next", func(b *testing.B) { + for n := 0; n < b.N; n++ { + totadd = 0 + iter := bm.Iterator() + for iter.HasNext() { + v := iter.Next() + totadd += uint64(v) + } + } + b.StopTimer() + }) + + b.Run("nextmany", func(b *testing.B) { + for n := 0; n < b.N; n++ { + totaddmany = 0 + iter := bm.ManyIterator() + // worst case, in practice will reuse buffers across many roars + buf := make([]uint32, 2048) + for j := iter.NextMany(buf); j != 0; j = iter.NextMany(buf) { + for i := 0; i < j; i++ { + totaddmany += uint64(buf[i]) + } + } + } + b.StopTimer() + }) + if totadd != totaddmany { + b.Fatalf("Cardinalities don't match: %d, %d", totadd, totaddmany) + } + +} + func BenchmarkXor(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) diff --git a/bitmapcontainer.go b/bitmapcontainer.go index 6fa83bb1..ef0f7ab9 100644 --- a/bitmapcontainer.go +++ b/bitmapcontainer.go @@ -109,13 +109,56 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 { func (bcsi *bitmapContainerShortIterator) hasNext() bool { return bcsi.i >= 0 } + func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { return &bitmapContainerShortIterator{a, a.NextSetBit(0)} } + func (bc *bitmapContainer) getShortIterator() shortIterable { return newBitmapContainerShortIterator(bc) } +type bitmapContainerManyIterator struct { + ptr *bitmapContainer + base int + bitset uint64 +} + +func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { + n := 0 + base := bcmi.base + bitset := bcmi.bitset + + for n < len(buf) { + if bitset == 0 { + base += 1 + if base >= len(bcmi.ptr.bitmap) { + bcmi.base = base + bcmi.bitset = bitset + return n + } + bitset = bcmi.ptr.bitmap[base] + continue + } + t := bitset & -bitset + buf[n] = uint32(((base * 64) + int(popcount(t-1)))) | hs + n = n + 1 + bitset ^= t + } + + bcmi.base = base + bcmi.bitset = bitset + return n +} + +func newBitmapContainerManyIterator(a *bitmapContainer) *bitmapContainerManyIterator { + return &bitmapContainerManyIterator{a, -1, 0} +} + +func (bc *bitmapContainer) getManyIterator() manyIterable { + return newBitmapContainerManyIterator(bc) +} + func (bc *bitmapContainer) getSizeInBytes() int { return len(bc.bitmap) * 8 // + bcBaseBytes } diff --git a/manyiterator.go b/manyiterator.go new file mode 100644 index 00000000..b4f630a7 --- /dev/null +++ b/manyiterator.go @@ -0,0 +1,23 @@ +package roaring + +type manyIterable interface { + nextMany(hs uint32, buf []uint32) int +} + +type manyIterator struct { + slice []uint16 + loc int +} + +func (si *manyIterator) nextMany(hs uint32, buf []uint32) int { + n := 0 + l := si.loc + s := si.slice + for n < len(buf) && l < len(s) { + buf[n] = uint32(s[l]) | hs + l++ + n++ + } + si.loc = l + return n +} diff --git a/real_data_benchmark_test.go b/real_data_benchmark_test.go index e1b8bb46..5bad7a76 100644 --- a/real_data_benchmark_test.go +++ b/real_data_benchmark_test.go @@ -137,6 +137,35 @@ func benchmarkRealDataAggregate(b *testing.B, aggregator func(b []*Bitmap) uint6 } } +func BenchmarkRealDataNext(b *testing.B) { + benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { + tot := uint64(0) + for _, b := range bitmaps { + it := b.Iterator() + for it.HasNext() { + tot += uint64(it.Next()) + } + } + return tot + }) +} + +func BenchmarkRealDataNextMany(b *testing.B) { + benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { + tot := uint64(0) + buf := make([]uint32, 4096) + for _, b := range bitmaps { + it := b.ManyIterator() + for n := it.NextMany(buf); n != 0; n = it.NextMany(buf) { + for _, v := range buf[:n] { + tot += uint64(v) + } + } + } + return tot + }) +} + func BenchmarkRealDataParOr(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { return ParOr(0, bitmaps...).GetCardinality() diff --git a/rle16.go b/rle16.go index e5afa6d9..8c4f6342 100644 --- a/rle16.go +++ b/rle16.go @@ -1204,6 +1204,68 @@ func (ri *runIterator16) remove() uint16 { return cur } +type manyRunIterator16 struct { + rc *runContainer16 + curIndex int64 + curPosInIndex uint16 + curSeq int64 +} + +func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 { + return &manyRunIterator16{rc: rc, curIndex: -1} +} + +func (ri *manyRunIterator16) hasNext() bool { + if len(ri.rc.iv) == 0 { + return false + } + if ri.curIndex == -1 { + return true + } + return ri.curSeq+1 < ri.rc.cardinality() +} + +// hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany +func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int { + n := 0 + if !ri.hasNext() { + return n + } + // start and end are inclusive + for n < len(buf) { + if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 { + ri.curPosInIndex = 0 + ri.curIndex++ + if ri.curIndex == int64(len(ri.rc.iv)) { + break + } + buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs + if ri.curIndex != 0 { + ri.curSeq += 1 + } + n += 1 + // not strictly necessarily due to len(buf)-n min check, but saves some work + continue + } + // add as many as you can from this seq + moreVals := min(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n) + + base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs + + // allows BCE + buf2 := buf[n : n+moreVals] + for i := range buf2 { + buf2[i] = base + uint32(i) + } + + // update values + ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 + ri.curSeq += int64(moreVals) + n += moreVals + } + return n +} + // remove removes key from the container. func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { diff --git a/rle16_test.go b/rle16_test.go index 8ea5ff42..8a606b5b 100644 --- a/rle16_test.go +++ b/rle16_test.go @@ -99,6 +99,107 @@ func TestRleRunIterator16(t *testing.T) { So(it.hasNext(), ShouldBeFalse) } + { + // basic nextMany test + rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 9)}) + So(rc.cardinality(), ShouldEqual, 6) + it := rc.newManyRunIterator16() + + buf := make([]uint32, 10) + n := it.nextMany(0, buf) + So(n, ShouldEqual, 6) + expected := []uint32{4, 5, 6, 7, 8, 9, 0, 0, 0, 0} + for i, e := range expected { + So(buf[i], ShouldEqual, e) + } + } + + { + // nextMany with len(buf) == 0 + rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 9)}) + So(rc.cardinality(), ShouldEqual, 6) + it := rc.newManyRunIterator16() + + buf := make([]uint32, 0) + n := it.nextMany(0, buf) + So(n, ShouldEqual, 0) + } + + { + // basic nextMany test across ranges + rc := newRunContainer16CopyIv([]interval16{ + newInterval16Range(4, 7), + newInterval16Range(11, 13), + newInterval16Range(18, 21)}) + So(rc.cardinality(), ShouldEqual, 11) + it := rc.newManyRunIterator16() + + buf := make([]uint32, 15) + n := it.nextMany(0, buf) + So(n, ShouldEqual, 11) + expected := []uint32{4, 5, 6, 7, 11, 12, 13, 18, 19, 20, 21, 0, 0, 0, 0} + for i, e := range expected { + So(buf[i], ShouldEqual, e) + } + } + { + // basic nextMany test across ranges with different buffer sizes + rc := newRunContainer16CopyIv([]interval16{ + newInterval16Range(4, 7), + newInterval16Range(11, 13), + newInterval16Range(18, 21)}) + expectedCard := 11 + expectedVals := []uint32{4, 5, 6, 7, 11, 12, 13, 18, 19, 20, 21} + hs := uint32(1 << 16) + + So(rc.cardinality(), ShouldEqual, expectedCard) + + for bufSize := 2; bufSize < 15; bufSize++ { + buf := make([]uint32, bufSize) + seen := 0 + it := rc.newManyRunIterator16() + for n := it.nextMany(hs, buf); n != 0; n = it.nextMany(hs, buf) { + // catch runaway iteration + So(seen+n, ShouldBeLessThanOrEqualTo, expectedCard) + + for i, e := range expectedVals[seen : seen+n] { + So(buf[i], ShouldEqual, e+hs) + } + seen += n + // if we have more values to return then we shouldn't leave empty slots in the buffer + if seen < expectedCard { + So(n, ShouldEqual, bufSize) + } + } + So(seen, ShouldEqual, expectedCard) + } + } + + { + // basic nextMany interaction with hasNext + rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 4)}) + So(rc.cardinality(), ShouldEqual, 1) + it := rc.newManyRunIterator16() + So(it.hasNext(), ShouldBeTrue) + + buf := make([]uint32, 4) + + n := it.nextMany(0, buf) + So(n, ShouldEqual, 1) + expected := []uint32{4, 0, 0, 0} + for i, e := range expected { + So(buf[i], ShouldEqual, e) + } + So(it.hasNext(), ShouldBeFalse) + + buf = make([]uint32, 4) + n = it.nextMany(0, buf) + So(n, ShouldEqual, 0) + expected = []uint32{0, 0, 0, 0} + for i, e := range expected { + So(buf[i], ShouldEqual, e) + } + } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(4, 9)}) card := rc.cardinality() diff --git a/rlei.go b/rlei.go index 747c978e..2a31685c 100644 --- a/rlei.go +++ b/rlei.go @@ -180,6 +180,10 @@ func (rc *runContainer16) getShortIterator() shortIterable { return rc.newRunIterator16() } +func (rc *runContainer16) getManyIterator() manyIterable { + return rc.newManyRunIterator16() +} + // add the values in the range [firstOfRange, endx). endx // is still abe to express 2^16 because it is an int not an uint16. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { diff --git a/roaring.go b/roaring.go index 857765f6..f9fbc3f7 100644 --- a/roaring.go +++ b/roaring.go @@ -251,6 +251,53 @@ func newIntIterator(a *Bitmap) *intIterator { return p } +// ManyIntIterable allows you to iterate over the values in a Bitmap +type ManyIntIterable interface { + // pass in a buffer to fill up with values, returns how many values were returned + NextMany([]uint32) int +} + +type manyIntIterator struct { + pos int + hs uint32 + iter manyIterable + highlowcontainer *roaringArray +} + +func (ii *manyIntIterator) init() { + if ii.highlowcontainer.size() > ii.pos { + ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator() + ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + } else { + ii.iter = nil + } +} + +func (ii *manyIntIterator) NextMany(buf []uint32) int { + n := 0 + for n < len(buf) { + if ii.iter == nil { + break + } + moreN := ii.iter.nextMany(ii.hs, buf[n:]) + n += moreN + if moreN == 0 { + ii.pos = ii.pos + 1 + ii.init() + } + } + + return n +} + +func newManyIntIterator(a *Bitmap) *manyIntIterator { + p := new(manyIntIterator) + p.pos = 0 + p.highlowcontainer = &a.highlowcontainer + p.init() + return p +} + // String creates a string representation of the Bitmap func (rb *Bitmap) String() string { // inspired by https://github.com/fzandona/goroar/ @@ -282,6 +329,11 @@ func (rb *Bitmap) Iterator() IntIterable { return newIntIterator(rb) } +// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order +func (rb *Bitmap) ManyIterator() ManyIntIterable { + return newManyIntIterator(rb) +} + // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { ptr := new(Bitmap) diff --git a/roaring_test.go b/roaring_test.go index a174fa5f..33ebc94e 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -1,14 +1,15 @@ package roaring import ( - . "github.com/smartystreets/goconvey/convey" - "github.com/willf/bitset" "log" "math" "math/rand" "strconv" "testing" "unsafe" + + . "github.com/smartystreets/goconvey/convey" + "github.com/willf/bitset" ) func TestRoaringRangeEnd(t *testing.T) { @@ -1453,6 +1454,42 @@ func TestXORtest4(t *testing.T) { //need to add the massives } +func TestNextMany(t *testing.T) { + Convey("NextMany test", t, func() { + count := 70000 + for _, gap := range []uint32{1, 8, 32, 128} { + expected := make([]uint32, count) + { + v := uint32(0) + for i, _ := range expected { + expected[i] = v + v += gap + } + } + bm := BitmapOf(expected...) + for _, bufSize := range []int{1, 64, 4096, count} { + buf := make([]uint32, bufSize) + it := bm.ManyIterator() + cur := 0 + for n := it.NextMany(buf); n != 0; n = it.NextMany(buf) { + // much faster tests... (10s -> 5ms) + if cur+n > count { + So(cur+n, ShouldBeLessThanOrEqualTo, count) + } + for i, v := range buf[:n] { + // much faster tests... + if v != expected[cur+i] { + So(v, ShouldEqual, expected[cur+i]) + } + } + cur += n + } + So(cur, ShouldEqual, count) + } + } + }) +} + func TestBigRandom(t *testing.T) { Convey("randomTest", t, func() { rTest(15) @@ -1698,6 +1735,36 @@ func TestFlipBigA(t *testing.T) { }) } +func TestNextManyOfAddRangeAcrossContainers(t *testing.T) { + Convey("NextManyOfAddRangeAcrossContainers ", t, func() { + rb := NewBitmap() + rb.AddRange(65530, 65540) + expectedCard := 10 + expected := []uint32{65530, 65531, 65532, 65533, 65534, 65535, 65536, 65537, 65538, 65539, 0} + + // test where all values can be returned in a single buffer + it := rb.ManyIterator() + buf := make([]uint32, 11) + n := it.NextMany(buf) + So(n, ShouldEqual, expectedCard) + for i, e := range expected { + So(buf[i], ShouldEqual, e) + } + + // test where buf is size 1, so many iterations + it = rb.ManyIterator() + n = 0 + buf = make([]uint32, 1) + for i := 0; i < expectedCard; i++ { + n = it.NextMany(buf) + So(n, ShouldEqual, 1) + So(buf[0], ShouldEqual, expected[i]) + } + n = it.NextMany(buf) + So(n, ShouldEqual, 0) + }) +} + func TestDoubleAdd(t *testing.T) { Convey("doubleadd ", t, func() { rb := NewBitmap() diff --git a/roaringarray.go b/roaringarray.go index ee2db7c0..80a799fb 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -38,6 +38,7 @@ type container interface { inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) xor(r container) container getShortIterator() shortIterable + getManyIterator() manyIterable contains(i uint16) bool maximum() uint16 minimum() uint16