Skip to content

Commit

Permalink
Implemented logicalorcount and logicalandcount
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Mar 17, 2017
1 parent c22d781 commit 075b1b8
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 1 deletion.
99 changes: 99 additions & 0 deletions headers/ewah.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,22 @@ template <class uword = uint32_t> class EWAHBoolArray {
*/
void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const;


/**
* computes the size (in number of set bits) of the logical or with another compressed bitmap
* Running time complexity is proportional to the sum of the compressed
* bitmap sizes.
*/
size_t logicalorcount(const EWAHBoolArray &a) const;


/**
* computes the size (in number of set bits) of the logical and with another compressed bitmap
* Running time complexity is proportional to the sum of the compressed
* bitmap sizes.
*/
size_t logicalandcount(const EWAHBoolArray &a) const;

/**
* computes the logical or with another compressed bitmap
* Return the answer
Expand Down Expand Up @@ -1670,6 +1686,51 @@ void EWAHBoolArray<uword>::logicalor(const EWAHBoolArray &a,
remaining.discharge(container);
}


template <class uword>
size_t EWAHBoolArray<uword>::logicalorcount(const EWAHBoolArray &a) const {
size_t answer = 0;
EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
EWAHBoolArrayRawIterator<uword> j = raw_iterator();
if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
return 0;
}
// at this point, this should be safe:
BufferedRunningLengthWord<uword> &rlwi = i.next();
BufferedRunningLengthWord<uword> &rlwj = j.next();

while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
if (predator.getRunningBit()) {
answer += predator.getRunningLength() * wordinbits;
} else {
//const size_t index =
prey.dischargeCount(predator.getRunningLength(), & answer);
}
predator.discardRunningWordsWithReload();
}

const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (size_t k = 0; k < nbre_literal; ++k) {
answer += countOnes(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k));
}
rlwi.discardLiteralWordsWithReload(nbre_literal);
rlwj.discardLiteralWordsWithReload(nbre_literal);
}
}
const bool i_remains = rlwi.size() > 0;
BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
answer += remaining.dischargeCount();
return answer;
}



template <class uword>
void EWAHBoolArray<uword>::logicalxor(const EWAHBoolArray &a,
EWAHBoolArray &container) const {
Expand Down Expand Up @@ -1759,6 +1820,44 @@ void EWAHBoolArray<uword>::logicaland(const EWAHBoolArray &a,
container.setSizeInBits(sizeInBits());
}

template <class uword>
size_t EWAHBoolArray<uword>::logicalandcount(const EWAHBoolArray &a) const {
EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
EWAHBoolArrayRawIterator<uword> j = raw_iterator();
if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
return 0;
}
size_t answer = 0;
// at this point, this should be safe:
BufferedRunningLengthWord<uword> &rlwi = i.next();
BufferedRunningLengthWord<uword> &rlwj = j.next();

while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
if (!predator.getRunningBit()) {
prey.discardFirstWordsWithReload(predator.getRunningLength());
} else {
//const size_t index =
prey.dischargeCount(predator.getRunningLength(), &answer);
}
predator.discardRunningWordsWithReload();
}
const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (size_t k = 0; k < nbre_literal; ++k) {
answer += countOnes(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
}
rlwi.discardLiteralWordsWithReload(nbre_literal);
rlwj.discardLiteralWordsWithReload(nbre_literal);
}
}
return answer;
}

template <class uword>
bool EWAHBoolArray<uword>::intersects(const EWAHBoolArray &a) const {
EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
Expand Down
41 changes: 41 additions & 0 deletions headers/runninglengthword.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ template <class uword> class EWAHBoolArrayRawIterator;
*/
template <class uword = uint32_t> class BufferedRunningLengthWord {
public:
enum { wordinbits = sizeof(uword) * 8 };

BufferedRunningLengthWord(const uword &data,
EWAHBoolArrayRawIterator<uword> *p)
Expand Down Expand Up @@ -234,6 +235,46 @@ template <class uword = uint32_t> class BufferedRunningLengthWord {
break;
}
}
size_t dischargeCount() {
size_t answer = 0;
while (size() > 0) {
// first run
if(getRunningBit()) {
answer += wordinbits * getRunningLength();
}
size_t pd = getNumberOfLiteralWords();
for(size_t i = 0; i < pd; ++i) answer += countOnes(getLiteralWordAt(i));
if (!next())
break;
}
return answer;
}
// Symbolically write out up to max words, returns how many were written, write to count the number bits written (we assume that count was initially zero)
size_t dischargeCount(size_t max, size_t * count) {
size_t index = 0;
while (true) {
if (index + RunningLength > max) {
const size_t offset = max - index;
if(getRunningBit()) *count += offset * wordinbits;
RunningLength -= offset;
return max;
}
if(getRunningBit()) *count += RunningLength * wordinbits;
index += RunningLength;
if (NumberOfLiteralWords + index > max) {
const size_t offset = max - index;
for(size_t i = 0; i < offset; ++i) *count += countOnes(getLiteralWordAt(i));
RunningLength = 0;
NumberOfLiteralWords -= offset;
return max;
}
for(size_t i = 0; i < NumberOfLiteralWords; ++i) *count += countOnes(getLiteralWordAt(i));
index += NumberOfLiteralWords;
if (!next())
break;
}
return index;
}

bool nonzero_discharge() {
while (size() > 0) {
Expand Down
11 changes: 10 additions & 1 deletion src/unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,12 @@ bool testRealData() {
cerr << "bad size at vec 2" << endl;
return false;
}
size_t predictedintersection = ewahs[0]->logicalandcount(*ewahs[1]);
ewahs[0]->logicaland(*ewahs[1], container);
if(container.numberOfOnes() != predictedintersection) {
cerr << "bad logicalandcount" << endl;
return false;
}
container.appendSetBits(va);
if (container.numberOfOnes() != va.size()) {
cout << "Loading bitmaps from file " << filename << endl;
Expand All @@ -1143,8 +1148,12 @@ bool testRealData() {
}

container.reset();

size_t predictedunion = ewahs[0]->logicalorcount(*ewahs[1]);
ewahs[0]->logicalor(*ewahs[1], container);
if(container.numberOfOnes() != predictedunion) {
cerr << "bad logicalorcount" << endl;
return false;
}
container.appendSetBits(vor);
if (container.numberOfOnes() != vor.size()) {
cout << "Loading bitmaps from file " << filename << endl;
Expand Down

0 comments on commit 075b1b8

Please sign in to comment.