Skip to content

Commit

Permalink
Trimming the run containers so that they use less memory and are safer.
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Apr 14, 2021
1 parent d626fca commit bb268bd
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 131 deletions.
2 changes: 1 addition & 1 deletion roaring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2091,7 +2091,7 @@ func TestStats(t *testing.T) {
if intSize == 4 {
runContainerBytes = 40
} else {
runContainerBytes = 52
runContainerBytes = 36
}

expectedStats := Statistics{
Expand Down
2 changes: 1 addition & 1 deletion roaringcow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1871,7 +1871,7 @@ func TestStatsCOW(t *testing.T) {
if intSize == 4 {
runContainerBytes = 40
} else {
runContainerBytes = 52
runContainerBytes = 36
}

expectedStats := Statistics{
Expand Down
98 changes: 44 additions & 54 deletions runcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ import (
type runContainer16 struct {
iv []interval16
card int64

// avoid allocation during search
myOpts searchOptions `msg:"-"`
}

// interval16 is the internal to runContainer16
Expand Down Expand Up @@ -613,10 +610,7 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint64 {

// indexOfIntervalAtOrAfter is a helper for union.
func (rc *runContainer16) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 {
rc.myOpts.startIndex = startIndex
rc.myOpts.endxIndex = 0

w, already, _ := rc.search(key, &rc.myOpts)
w, already, _ := rc.searchRange(key, startIndex, 0)
if already {
return w
}
Expand Down Expand Up @@ -840,7 +834,7 @@ toploop:

// get returns true iff key is in the container.
func (rc *runContainer16) contains(key uint16) bool {
_, in, _ := rc.search(int64(key), nil)
_, in, _ := rc.search(int64(key))
return in
}

Expand All @@ -849,22 +843,7 @@ func (rc *runContainer16) numIntervals() int {
return len(rc.iv)
}

// searchOptions allows us to accelerate search with
// prior knowledge of (mostly lower) bounds. This is used by Union
// and Intersect.
type searchOptions struct {
// start here instead of at 0
startIndex int64

// upper bound instead of len(rc.iv);
// endxIndex == 0 means ignore the bound and use
// endxIndex == n ==len(rc.iv) which is also
// naturally the default for search()
// when opt = nil.
endxIndex int64
}

// search returns alreadyPresent to indicate if the
// searchRange returns alreadyPresent to indicate if the
// key is already in one of our interval16s.
//
// If key is alreadyPresent, then whichInterval16 tells
Expand All @@ -888,24 +867,16 @@ type searchOptions struct {
//
// runContainer16.search always returns whichInterval16 < len(rc.iv).
//
// If not nil, opts can be used to further restrict
// the search space.
// The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
// no upper bound.
//
func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval16 int64, alreadyPresent bool, numCompares int) {
func (rc *runContainer16) searchRange(key int64, startIndex int64, endxIndex int64) (whichInterval16 int64, alreadyPresent bool, numCompares int) {
n := int64(len(rc.iv))
if n == 0 {
return -1, false, 0
}

startIndex := int64(0)
endxIndex := n
if opts != nil {
startIndex = opts.startIndex

// let endxIndex == 0 mean no effect
if opts.endxIndex > 0 {
endxIndex = opts.endxIndex
}
if endxIndex == 0 {
endxIndex = n
}

// sort.Search returns the smallest index i
Expand Down Expand Up @@ -975,6 +946,34 @@ func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval1
return
}

// search returns alreadyPresent to indicate if the
// key is already in one of our interval16s.
//
// If key is alreadyPresent, then whichInterval16 tells
// you where.
//
// If key is not already present, then whichInterval16 is
// set as follows:
//
// a) whichInterval16 == len(rc.iv)-1 if key is beyond our
// last interval16 in rc.iv;
//
// b) whichInterval16 == -1 if key is before our first
// interval16 in rc.iv;
//
// c) whichInterval16 is set to the minimum index of rc.iv
// which comes strictly before the key;
// so rc.iv[whichInterval16].last < key,
// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
// (Note that whichInterval16+1 won't exist when
// whichInterval16 is the last interval.)
//
// runContainer16.search always returns whichInterval16 < len(rc.iv).
//
func (rc *runContainer16) search(key int64) (whichInterval16 int64, alreadyPresent bool, numCompares int) {
return rc.searchRange(key, 0, 0)
}

// cardinality returns the count of the integers stored in the
// runContainer16.
func (rc *runContainer16) cardinality() int64 {
Expand Down Expand Up @@ -1068,7 +1067,7 @@ func (rc *runContainer16) Add(k uint16) (wasNew bool) {

k64 := int64(k)

index, present, _ := rc.search(k64, nil)
index, present, _ := rc.search(k64)
if present {
return // already there
}
Expand Down Expand Up @@ -1201,13 +1200,8 @@ func (ri *runIterator16) advanceIfNeeded(minval uint16) {
return
}

opt := &searchOptions{
startIndex: ri.curIndex,
endxIndex: int64(len(ri.rc.iv)),
}

// interval cannot be -1 because of minval > peekNext
interval, isPresent, _ := ri.rc.search(int64(minval), opt)
interval, isPresent, _ := ri.rc.searchRange(int64(minval), ri.curIndex, int64(len(ri.rc.iv)))

// if the minval is present, set the curPosIndex at the right position
if isPresent {
Expand Down Expand Up @@ -1360,7 +1354,7 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {

var index int64
index, wasPresent, _ = rc.search(int64(key), nil)
index, wasPresent, _ = rc.search(int64(key))
if !wasPresent {
return // already removed, nothing to do.
}
Expand Down Expand Up @@ -1451,12 +1445,8 @@ func intersectWithLeftover16(astart, alast, bstart, blast int64) (isOverlap, isL
return
}

func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) {

rc.myOpts.startIndex = startIndex
rc.myOpts.endxIndex = 0

w, _, _ := rc.search(key, &rc.myOpts)
func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) {
w, _, _ := rc.searchRange(key, startIndex, 0)
// rc.search always returns w < len(rc.iv)
if w < startIndex {
// not found and comes before lower bound startIndex,
Expand Down Expand Up @@ -1597,8 +1587,8 @@ func (rc *runContainer16) isubtract(del interval16) {
}

// INVAR there is some intersection between rc and del
istart, startAlready, _ := rc.search(int64(del.start), nil)
ilast, lastAlready, _ := rc.search(int64(del.last()), nil)
istart, startAlready, _ := rc.search(int64(del.start))
ilast, lastAlready, _ := rc.search(int64(del.last()))
rc.card = -1
if istart == -1 {
if ilast == n-1 && !lastAlready {
Expand Down Expand Up @@ -2350,7 +2340,7 @@ func (rc *runContainer16) getCardinality() int {
func (rc *runContainer16) rank(x uint16) int {
n := int64(len(rc.iv))
xx := int64(x)
w, already, _ := rc.search(xx, nil)
w, already, _ := rc.search(xx)
if w < 0 {
return 0
}
Expand Down
75 changes: 0 additions & 75 deletions runcontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,81 +366,6 @@ func TestRleRunReverseIterator16(t *testing.T) {
})
}

func TestRleRunSearch16(t *testing.T) {

t.Run("RunContainer16.search should respect the prior bounds we provide for efficiency of searching through a subset of the intervals", func(t *testing.T) {
{
vals := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, MaxUint16 - 3, MaxUint16}
exAt := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11} // expected at
absent := []uint16{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, MaxUint16 - 2}

rc := newRunContainer16FromVals(true, vals...)

assert.EqualValues(t, 12, rc.cardinality())

var where int64
var present bool

for i, v := range vals {
where, present, _ = rc.search(int64(v), nil)
assert.True(t, present)
assert.EqualValues(t, exAt[i], where)
}

for i, v := range absent {
where, present, _ = rc.search(int64(v), nil)
assert.False(t, present)
assert.EqualValues(t, i, where)
}

// delete the MaxUint16 so we can test
// the behavior when searching near upper limit.

assert.EqualValues(t, 12, rc.cardinality())
assert.Equal(t, 12, rc.numIntervals())

rc.removeKey(MaxUint16)

assert.EqualValues(t, 11, rc.cardinality())
assert.Equal(t, 11, rc.numIntervals())

where, present, _ = rc.search(MaxUint16, nil)

assert.False(t, present)
assert.EqualValues(t, 10, where)

var numCompares int
where, present, numCompares = rc.search(MaxUint16, nil)

assert.False(t, present)
assert.EqualValues(t, 10, where)
assert.EqualValues(t, 3, numCompares)

opts := &searchOptions{
startIndex: 5,
}

where, present, numCompares = rc.search(MaxUint16, opts)

assert.False(t, present)
assert.EqualValues(t, 10, where)
assert.EqualValues(t, 2, numCompares)

where, present, _ = rc.search(MaxUint16-3, opts)

assert.True(t, present)
assert.EqualValues(t, 10, where)

// with the bound in place, MaxUint16-3 should not be found
opts.endxIndex = 10
where, present, _ = rc.search(MaxUint16-3, opts)

assert.False(t, present)
assert.EqualValues(t, 9, where)
}
})
}

func TestRleIntersection16(t *testing.T) {
t.Run("RunContainer16.intersect of two RunContainer16(s) should return their intersection", func(t *testing.T) {
{
Expand Down

0 comments on commit bb268bd

Please sign in to comment.