Skip to content

Commit

Permalink
Merge pull request RoaringBitmap#312 from jacksonrnewhouse/faster_run…
Browse files Browse the repository at this point in the history
…_unions

faster run container unions.
  • Loading branch information
lemire authored Jun 7, 2021
2 parents 55ff8cc + 77da837 commit 2feadb1
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 19 deletions.
18 changes: 13 additions & 5 deletions arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,19 @@ func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container {
}

func (ac *arrayContainer) iorRun16(rc *runContainer16) container {
bc1 := ac.toBitmapContainer()
bc2 := rc.toBitmapContainer()
bc1.iorBitmap(bc2)
*ac = *newArrayContainerFromBitmap(bc1)
return ac
runCardinality := rc.getCardinality()
// heuristic for if the container should maybe be an
// array container.
if runCardinality < ac.getCardinality() &&
runCardinality+ac.getCardinality() < arrayDefaultMaxSize {
var result container
result = ac
for _, run := range rc.iv {
result = result.iaddRange(int(run.start), int(run.start)+int(run.length))
}
return result
}
return rc.orArray(ac)
}

func (ac *arrayContainer) lazyIOR(a container) container {
Expand Down
111 changes: 97 additions & 14 deletions runcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import (
// runContainer16 does run-length encoding of sets of
// uint16 integers.
type runContainer16 struct {
iv []interval16
iv []interval16
}

// interval16 is the internal to runContainer16
Expand Down Expand Up @@ -849,7 +849,7 @@ func (rc *runContainer16) numIntervals() int {
//
// runContainer16.search always returns whichInterval16 < len(rc.iv).
//
// The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
// The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
// no upper bound.
//
func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (whichInterval16 int, alreadyPresent bool, numCompares int) {
Expand Down Expand Up @@ -968,14 +968,12 @@ func (rc *runContainer16) getCardinality() int {
return n
}


// isEmpty returns true if the container is empty.
// It runs in constant time.
func (rc *runContainer16) isEmpty() bool {
return len(rc.iv) == 0
}


// AsSlice decompresses the contents into a []uint16 slice.
func (rc *runContainer16) AsSlice() []uint16 {
s := make([]uint16, rc.getCardinality())
Expand Down Expand Up @@ -1198,7 +1196,7 @@ func (ri *runIterator16) advanceIfNeeded(minval uint16) {
// before calling next() to insure there are contents.
type runReverseIterator16 struct {
rc *runContainer16
curIndex int // index into rc.iv
curIndex int // index into rc.iv
curPosInIndex uint16 // offset in rc.iv[curIndex]
}

Expand Down Expand Up @@ -1288,7 +1286,6 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
return n
}


func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
n := 0

Expand Down Expand Up @@ -1424,7 +1421,7 @@ func intersectWithLeftover16(astart, alast, bstart, blast int) (isOverlap, isLef
return
}

func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int, key int) (index int, done bool) {
func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int, key int) (index int, done bool) {
w, _, _ := rc.searchRange(key, startIndex, 0)
// rc.search always returns w < len(rc.iv)
if w < startIndex {
Expand All @@ -1448,7 +1445,6 @@ func sliceToString16(m []interval16) string {
return s
}


// helper for invert
func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 {
cur := rc.iv[lastIdx]
Expand Down Expand Up @@ -2152,9 +2148,21 @@ func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int

// orArray finds the union of rc and ac.
func (rc *runContainer16) orArray(ac *arrayContainer) container {
bc1 := newBitmapContainerFromRun(rc)
bc2 := ac.toBitmapContainer()
return bc1.orBitmap(bc2)
if ac.isEmpty() {
return rc.clone()
}
if rc.isEmpty() {
return ac.clone()
}
intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
result := newRunContainer16TakeOwnership(intervals)
if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
return newBitmapContainerFromRun(result)
}
if len(intervals)*2 > 1+int(cardMinusOne) {
return result.toArrayContainer()
}
return result
}

// orArray finds the union of rc and ac.
Expand Down Expand Up @@ -2197,13 +2205,88 @@ func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
}

func (rc *runContainer16) iorArray(ac *arrayContainer) container {
it := ac.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
if rc.isEmpty() {
return ac.clone()
}
if ac.isEmpty() {
return rc
}
var cardMinusOne uint16
//TODO: perform the union algorithm in-place using rc.iv
// this can be done with methods like the in-place array container union
// but maybe lazily moving the remaining elements back.
rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
return newBitmapContainerFromRun(rc)
}
if len(rc.iv)*2 > 1+int(cardMinusOne) {
return rc.toArrayContainer()
}
return rc
}

func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, uint16) {
pos1 := 0
pos2 := 0
length1 := len(ac.content)
length2 := len(rc.iv)
target := make([]interval16, 0, len(rc.iv))
// have to find the first range
// options are
// 1. from array container
// 2. from run container
var previousInterval interval16
var cardMinusOne uint16
if ac.content[0] < rc.iv[0].start {
previousInterval.start = ac.content[0]
previousInterval.length = 0
pos1++
} else {
previousInterval.start = rc.iv[0].start
previousInterval.length = rc.iv[0].length
pos2++
}

for pos1 < length1 || pos2 < length2 {
if pos1 < length1 {
s1 := ac.content[pos1]
if s1 <= previousInterval.start+previousInterval.length {
pos1++
continue
}
if previousInterval.last() < MaxUint16 && previousInterval.last()+1 == s1 {
previousInterval.length++
pos1++
continue
}
}
if pos2 < length2 {
range2 := rc.iv[pos2]
if range2.start <= previousInterval.last() || range2.start > 0 && range2.start-1 == previousInterval.last() {
pos2++
if previousInterval.last() < range2.last() {
previousInterval.length = range2.last() - previousInterval.start
}
continue
}
}
cardMinusOne += previousInterval.length + 1
target = append(target, previousInterval)
if pos2 == length2 || pos1 < length1 && ac.content[pos1] < rc.iv[pos2].start {
previousInterval.start = ac.content[pos1]
previousInterval.length = 0
pos1++
} else {
previousInterval = rc.iv[pos2]
pos2++
}
}
cardMinusOne += previousInterval.length + 1
target = append(target, previousInterval)

return target, cardMinusOne
}

// lazyIOR is described (not yet implemented) in
// this nice note from @lemire on
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
Expand Down

0 comments on commit 2feadb1

Please sign in to comment.