Skip to content

Commit

Permalink
bloom: use Span instead of std::vector for insert and contains
Browse files Browse the repository at this point in the history
We can avoid many unnecessary std::vector allocations by changing
CBloomFilter to take Spans instead of std::vector's for the `insert`
and `contains` operations.

CBloomFilter currently converts types such as CDataStream and uint256
to std::vector on `insert` and `contains`. This is unnecessary because
CDataStreams and uint256 are already std::vectors internally. We just
need a way to point to the right data within those types. Span gives
us this ability.

Signed-off-by: William Casarin <[email protected]>
  • Loading branch information
jb55 authored and fanquake committed Sep 29, 2021
1 parent 3c776fd commit 2ba4ddf
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 45 deletions.
43 changes: 9 additions & 34 deletions src/bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ CBloomFilter::CBloomFilter(const unsigned int nElements, const double nFPRate, c
{
}

inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, Span<const unsigned char> vDataToHash) const
{
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);
}

void CBloomFilter::insert(const std::vector<unsigned char>& vKey)
void CBloomFilter::insert(Span<const unsigned char> vKey)
{
if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700)
return;
Expand All @@ -59,17 +59,10 @@ void CBloomFilter::insert(const COutPoint& outpoint)
{
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << outpoint;
std::vector<unsigned char> data(stream.begin(), stream.end());
insert(data);
insert(stream);
}

void CBloomFilter::insert(const uint256& hash)
{
std::vector<unsigned char> data(hash.begin(), hash.end());
insert(data);
}

bool CBloomFilter::contains(const std::vector<unsigned char>& vKey) const
bool CBloomFilter::contains(Span<const unsigned char> vKey) const
{
if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700)
return true;
Expand All @@ -87,14 +80,7 @@ bool CBloomFilter::contains(const COutPoint& outpoint) const
{
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << outpoint;
std::vector<unsigned char> data(stream.begin(), stream.end());
return contains(data);
}

bool CBloomFilter::contains(const uint256& hash) const
{
std::vector<unsigned char> data(hash.begin(), hash.end());
return contains(data);
return contains(MakeUCharSpan(stream));
}

bool CBloomFilter::IsWithinSizeConstraints() const
Expand Down Expand Up @@ -198,7 +184,8 @@ CRollingBloomFilter::CRollingBloomFilter(const unsigned int nElements, const dou
}

/* Similar to CBloomFilter::Hash */
static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, const std::vector<unsigned char>& vDataToHash) {
static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, Span<const unsigned char> vDataToHash)
{
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
}

Expand All @@ -210,7 +197,7 @@ static inline uint32_t FastMod(uint32_t x, size_t n) {
return ((uint64_t)x * (uint64_t)n) >> 32;
}

void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
void CRollingBloomFilter::insert(Span<const unsigned char> vKey)
{
if (nEntriesThisGeneration == nEntriesPerGeneration) {
nEntriesThisGeneration = 0;
Expand Down Expand Up @@ -241,13 +228,7 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
}
}

void CRollingBloomFilter::insert(const uint256& hash)
{
std::vector<unsigned char> vData(hash.begin(), hash.end());
insert(vData);
}

bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
bool CRollingBloomFilter::contains(Span<const unsigned char> vKey) const
{
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
Expand All @@ -261,12 +242,6 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
return true;
}

bool CRollingBloomFilter::contains(const uint256& hash) const
{
std::vector<unsigned char> vData(hash.begin(), hash.end());
return contains(vData);
}

void CRollingBloomFilter::reset()
{
nTweak = GetRand(std::numeric_limits<unsigned int>::max());
Expand Down
15 changes: 5 additions & 10 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

class COutPoint;
class CTransaction;
class uint256;

//! 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
static const unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes
Expand Down Expand Up @@ -49,7 +48,7 @@ class CBloomFilter
unsigned int nTweak;
unsigned char nFlags;

unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
unsigned int Hash(unsigned int nHashNum, Span<const unsigned char> vDataToHash) const;

public:
/**
Expand All @@ -66,13 +65,11 @@ class CBloomFilter

SERIALIZE_METHODS(CBloomFilter, obj) { READWRITE(obj.vData, obj.nHashFuncs, obj.nTweak, obj.nFlags); }

void insert(const std::vector<unsigned char>& vKey);
void insert(Span<const unsigned char> vKey);
void insert(const COutPoint& outpoint);
void insert(const uint256& hash);

bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(Span<const unsigned char> vKey) const;
bool contains(const COutPoint& outpoint) const;
bool contains(const uint256& hash) const;

//! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
//! (catch a filter which was just deserialized which was too big)
Expand Down Expand Up @@ -112,10 +109,8 @@ class CRollingBloomFilter
public:
CRollingBloomFilter(const unsigned int nElements, const double nFPRate);

void insert(const std::vector<unsigned char>& vKey);
void insert(const uint256& hash);
bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(const uint256& hash) const;
void insert(Span<const unsigned char> vKey);
bool contains(Span<const unsigned char> vKey) const;

void reset();

Expand Down
1 change: 1 addition & 0 deletions src/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <hash.h>
#include <span.h>
#include <crypto/common.h>
#include <crypto/hmac_sha512.h>

Expand Down
2 changes: 1 addition & 1 deletion src/test/bloom_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(bloom_create_insert_key)
CBloomFilter filter(2, 0.001, 0, BLOOM_UPDATE_ALL);
filter.insert(vchPubKey);
uint160 hash = pubkey.GetID();
filter.insert(std::vector<unsigned char>(hash.begin(), hash.end()));
filter.insert(hash);

CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << filter;
Expand Down

0 comments on commit 2ba4ddf

Please sign in to comment.