Skip to content

Commit

Permalink
Merge pull request RoaringBitmap#277 from jacksonrnewhouse/minimize_c…
Browse files Browse the repository at this point in the history
…lones

Minimize clones (WIP)
  • Loading branch information
lemire authored Oct 19, 2020
2 parents c39a223 + d5f9722 commit 7521df4
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 1 deletion.
70 changes: 70 additions & 0 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package roaring
import (
"bytes"
"fmt"
"github.com/stretchr/testify/require"
"math/rand"
"runtime"
"testing"
Expand Down Expand Up @@ -235,6 +236,75 @@ func BenchmarkUnionRoaring(b *testing.B) {
}
}

// BenchmarkUnionInPlaceCopyOnWrite tests the performance of bitmap.Or()
// when the bitmap was generated via FromBuffer.
// In this case all left containers need to be copied in order to be updated.
// The nested for-loops test a number of different scenarios
// with respect to the ranges and densities of bitmaps.
func BenchmarkUnionInPlaceCopyOnWrite(b *testing.B) {
//uint32s to maintain 1.12 compatibility, which requires unsigned shifts.
startingContainerPower := uint32(4)
finalContainerPower := uint32(10)
containerIncrement := uint32(3)
startingItemsPower := uint32(3)
finalItemsPower := uint32(10)
itemsIncrement := uint32(7)
for leftContainerPower := startingContainerPower; leftContainerPower <= finalContainerPower; leftContainerPower += containerIncrement {
for rightContainerPower := startingContainerPower; rightContainerPower <= finalContainerPower; rightContainerPower += containerIncrement {
for leftItemsPerContainerPower := startingItemsPower; leftItemsPerContainerPower <= finalItemsPower; leftItemsPerContainerPower += itemsIncrement {
for rightItemsPerContainerPower := startingItemsPower; rightItemsPerContainerPower <= finalItemsPower; rightItemsPerContainerPower += itemsIncrement {
b.Run(fmt.Sprintf("%d-%d-%d-%d", leftContainerPower, rightContainerPower, leftItemsPerContainerPower, rightItemsPerContainerPower),
func(b *testing.B) {
leftMax := (1 << 16) << leftContainerPower
rightMax := (1 << 16) << rightContainerPower
leftItems := 1 << (leftContainerPower + leftItemsPerContainerPower)
rightItems := 1 << (rightContainerPower + rightItemsPerContainerPower)
left := make([][]byte, 10)
right := make([]*Bitmap, 10)
for i := 0; i < 10; i++ {
right[i] = NewBitmap()
left[i] = generateRandomBitmap(b, leftMax, leftItems)
_, err := right[i].FromBuffer(generateRandomBitmap(b, rightMax, rightItems))
require.NoError(b, err)
}
// This tests a destructive operation, Or() so have to have a fresh bitmap per test.
targetLefts := make([]*Bitmap, b.N)
for i := 0; i < b.N; i++ {
targetLefts[i] = NewBitmap()
_, err := targetLefts[i].FromBuffer(left[i%10])
require.NoError(b, err)
}
runActualBenchmark(b, targetLefts, right)
})
}
}
}
}
}

// runActualBenchmark is broken out primarily so you can profile the tests,
// as otherwise the generation overwhelms the actual test.
func runActualBenchmark(b *testing.B, targetLefts []*Bitmap, right []*Bitmap) uint64 {
b.ResetTimer()
b.ReportAllocs()
total := uint64(0)
for i := 0; i < b.N; i++ {
targetLefts[i].Or(right[i%10])
total += targetLefts[i].GetCardinality()
}
return total
}

func generateRandomBitmap(b *testing.B, max, terms int) []byte {
bitmap := NewBitmap()
for i := 0; i < terms; i++ {
bitmap.Add(uint32(rand.Intn(max)))
}
result, err := bitmap.ToBytes()
require.NoError(b, err)
return result
}

// go test -bench BenchmarkSize -run -
func BenchmarkSizeBitset(b *testing.B) {
b.StopTimer()
Expand Down
2 changes: 1 addition & 1 deletion roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ main:
}
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
} else {
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getWritableContainerAtIndex(pos1).ior(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2)), false)
pos1++
pos2++
if (pos1 == length1) || (pos2 == length2) {
Expand Down
11 changes: 11 additions & 0 deletions roaringarray.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,17 @@ func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) cont
return c
}

// getUnionedWritableContainer switches behavior for in-place Or
// depending on whether the container requires a copy on write.
// If it does using the non-inplace or() method leads to fewer allocations.
func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
if ra.needCopyOnWrite[pos] {
return ra.getContainerAtIndex(pos).or(other)
}
return ra.getContainerAtIndex(pos).ior(other)

}

func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
if ra.needCopyOnWrite[i] {
ra.containers[i] = ra.containers[i].clone()
Expand Down
35 changes: 35 additions & 0 deletions roaringcow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1939,3 +1939,38 @@ func TestCloneCOWContainers(t *testing.T) {

assert.EqualValues(t, rb.ToArray(), newRb1.ToArray())
}

func TestInPlaceCOWContainers(t *testing.T) {
// write bitmap
wb1 := NewBitmap()
wb2 := NewBitmap()

wb1.AddRange(0, 3000)
wb2.AddRange(2000, 5000)

buf1 := &bytes.Buffer{}
buf2 := &bytes.Buffer{}

wb1.WriteTo(buf1)
wb2.WriteTo(buf2)

// read bitmaps
rb1 := NewBitmap()
rb2 := NewBitmap()

rb1.FromBuffer(buf1.Bytes())
rb2.FromBuffer(buf2.Bytes())

assert.True(t, wb1.Equals(rb1))
assert.True(t, wb2.Equals(rb2))

rb1.Or(rb2)

assert.True(t, Or(wb1, wb2).Equals(rb1))
assert.True(t, wb2.Equals(rb2))

rb3 := NewBitmap()
rb3.FromBuffer(buf1.Bytes())

assert.True(t, rb3.Equals(wb1))
}

0 comments on commit 7521df4

Please sign in to comment.