Skip to content

Commit

Permalink
BloomFileTestBase: refactor test fixture
Browse files Browse the repository at this point in the history
clang-tidy flags the gflag definitions in bloomfile-test-base.h for
potential ODR violations. To avoid that, let's move the bulk of the test
fixture into a new .cc file.

Change-Id: I2213ec39f08af8c36fa730598f2b415c9786c6f0
Reviewed-on: http://gerrit.cloudera.org:8080/15446
Reviewed-by: Andrew Wong <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Kudu Jenkins
  • Loading branch information
adembo committed Mar 17, 2020
1 parent 1c60956 commit 2c01b80
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 97 deletions.
9 changes: 8 additions & 1 deletion src/kudu/cfile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,18 @@ set(CFILE_LIBS

target_link_libraries(cfile ${CFILE_LIBS})

add_library(cfile_test_util
bloomfile-test-base.cc)

target_link_libraries(cfile_test_util
cfile)

# Tests
SET_KUDU_TEST_LINK_LIBS(cfile)
ADD_KUDU_TEST(index-test)
ADD_KUDU_TEST(cfile-test NUM_SHARDS 4)
ADD_KUDU_TEST(encoding-test LABELS no_tsan)
ADD_KUDU_TEST(block_cache-test)
SET_KUDU_TEST_LINK_LIBS(cfile cfile_test_util)
ADD_KUDU_TEST(bloomfile-test)
ADD_KUDU_TEST(mt-bloomfile-test RUN_SERIAL true)
ADD_KUDU_TEST(block_cache-test)
127 changes: 127 additions & 0 deletions src/kudu/cfile/bloomfile-test-base.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "kudu/cfile/bloomfile-test-base.h"

#include <utility>

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>

#include "kudu/cfile/bloomfile.h"
#include "kudu/cfile/cfile_util.h"
#include "kudu/fs/block_manager.h"
#include "kudu/fs/fs_manager.h"
#include "kudu/gutil/endian.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/bloom_filter.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
#include "kudu/util/slice.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"

DEFINE_int32(bloom_size_bytes, 4*1024, "Size of each bloom filter");
DEFINE_int32(n_keys, 10*1000, "Number of keys to insert into the file");
DEFINE_double(fp_rate, 0.01F, "False positive rate to aim for");

DEFINE_int64(benchmark_queries, 1000000, "Number of probes to benchmark");
DEFINE_bool(benchmark_should_hit, false, "Set to true for the benchmark to query rows which match");

using std::unique_ptr;

namespace kudu {
namespace cfile {

void BloomFileTestBase::SetUp() {
KuduTest::SetUp();

fs_manager_.reset(new FsManager(env_, FsManagerOpts(GetTestPath("fs_root"))));
ASSERT_OK(fs_manager_->CreateInitialFileSystemLayout());
ASSERT_OK(fs_manager_->Open());
}

void BloomFileTestBase::AppendBlooms(BloomFileWriter* bfw) {
uint64_t key_buf;
Slice key_slice(reinterpret_cast<const uint8_t*>(&key_buf),
sizeof(key_buf));

for (uint64_t i = 0; i < FLAGS_n_keys; i++) {
// Shift the key left a bit so that while querying, we can
// get a good mix of hits and misses while still staying within
// the real key range.
key_buf = BigEndian::FromHost64(i << kKeyShift);
ASSERT_OK_FAST(bfw->AppendKeys(&key_slice, 1));
}
}

void BloomFileTestBase::WriteTestBloomFile() {
unique_ptr<fs::WritableBlock> sink;
ASSERT_OK(fs_manager_->CreateNewBlock({}, &sink));
block_id_ = sink->id();

// Set sizing based on flags
BloomFilterSizing sizing = BloomFilterSizing::BySizeAndFPRate(
FLAGS_bloom_size_bytes, FLAGS_fp_rate);
ASSERT_NEAR(sizing.n_bytes(), FLAGS_bloom_size_bytes, FLAGS_bloom_size_bytes * 0.05);
ASSERT_GT(FLAGS_n_keys, sizing.expected_count())
<< "Invalid parameters: --n_keys isn't set large enough to fill even "
<< "one bloom filter of the requested --bloom_size_bytes";

BloomFileWriter bfw(std::move(sink), sizing);

ASSERT_OK(bfw.Start());
AppendBlooms(&bfw);
ASSERT_OK(bfw.Finish());
}

Status BloomFileTestBase::OpenBloomFile() {
unique_ptr<fs::ReadableBlock> source;
RETURN_NOT_OK(fs_manager_->OpenBlock(block_id_, &source));

return BloomFileReader::Open(std::move(source), ReaderOptions(), &bfr_);
}

uint64_t BloomFileTestBase::ReadBenchmark() {
Random rng(GetRandomSeed32());
uint64_t count_present = 0;
LOG_TIMING(INFO, strings::Substitute("Running $0 queries", FLAGS_benchmark_queries)) {
for (uint64_t i = 0; i < FLAGS_benchmark_queries; i++) {
uint64_t key = rng.Uniform(FLAGS_n_keys);
key <<= kKeyShift;
if (!FLAGS_benchmark_should_hit) {
// Since the keys are bitshifted, setting the last bit
// ensures that none of the queries will match.
key |= 1;
}

key = BigEndian::FromHost64(key);

Slice s(reinterpret_cast<uint8_t *>(&key), sizeof(key));
bool present;
CHECK_OK(bfr_->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
if (present) count_present++;
}
}
return count_present;
}

} // namespace cfile
} // namespace kudu

108 changes: 21 additions & 87 deletions src/kudu/cfile/bloomfile-test-base.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,112 +14,46 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cstdint>
#include <memory>

#include "kudu/cfile/bloomfile.h"
#include "kudu/fs/block_id.h"
#include "kudu/fs/fs_manager.h"
#include "kudu/gutil/endian.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/status.h"
#include "kudu/util/test_util.h"
#include "kudu/util/thread.h"

DEFINE_int32(bloom_size_bytes, 4*1024, "Size of each bloom filter");
DEFINE_int32(n_keys, 10*1000, "Number of keys to insert into the file");
DEFINE_double(fp_rate, 0.01f, "False positive rate to aim for");

DEFINE_int64(benchmark_queries, 1000000, "Number of probes to benchmark");
DEFINE_bool(benchmark_should_hit, false, "Set to true for the benchmark to query rows which match");

namespace kudu {
namespace cfile {

static const int kKeyShift = 2;

class BloomFileTestBase : public KuduTest {
public:
void SetUp() OVERRIDE {
KuduTest::SetUp();

fs_manager_.reset(new FsManager(env_, FsManagerOpts(GetTestPath("fs_root"))));
ASSERT_OK(fs_manager_->CreateInitialFileSystemLayout());
ASSERT_OK(fs_manager_->Open());
}

void AppendBlooms(BloomFileWriter *bfw) {
uint64_t key_buf;
Slice key_slice(reinterpret_cast<const uint8_t *>(&key_buf),
sizeof(key_buf));

for (uint64_t i = 0; i < FLAGS_n_keys; i++) {
// Shift the key left a bit so that while querying, we can
// get a good mix of hits and misses while still staying within
// the real key range.
key_buf = BigEndian::FromHost64(i << kKeyShift);
ASSERT_OK_FAST(bfw->AppendKeys(&key_slice, 1));
}
}

void WriteTestBloomFile() {
std::unique_ptr<fs::WritableBlock> sink;
ASSERT_OK(fs_manager_->CreateNewBlock({}, &sink));
block_id_ = sink->id();

// Set sizing based on flags
BloomFilterSizing sizing = BloomFilterSizing::BySizeAndFPRate(
FLAGS_bloom_size_bytes, FLAGS_fp_rate);
ASSERT_NEAR(sizing.n_bytes(), FLAGS_bloom_size_bytes, FLAGS_bloom_size_bytes * 0.05);
ASSERT_GT(FLAGS_n_keys, sizing.expected_count())
<< "Invalid parameters: --n_keys isn't set large enough to fill even "
<< "one bloom filter of the requested --bloom_size_bytes";

BloomFileWriter bfw(std::move(sink), sizing);

ASSERT_OK(bfw.Start());
AppendBlooms(&bfw);
ASSERT_OK(bfw.Finish());
}
static const int kKeyShift = 2;

Status OpenBloomFile() {
std::unique_ptr<fs::ReadableBlock> source;
RETURN_NOT_OK(fs_manager_->OpenBlock(block_id_, &source));
void SetUp() override;

return BloomFileReader::Open(std::move(source), ReaderOptions(), &bfr_);
}
// Creates a test bloomfile on disk. The block ID is written to block_id_.
void WriteTestBloomFile();

uint64_t ReadBenchmark() {
Random rng(GetRandomSeed32());
uint64_t count_present = 0;
LOG_TIMING(INFO, strings::Substitute("Running $0 queries", FLAGS_benchmark_queries)) {
// Opens the bloomfile with block id block_id_ for reading.
// WriteTestBloomFile() must have been called. The reader is written to bfr_.
Status OpenBloomFile();

for (uint64_t i = 0; i < FLAGS_benchmark_queries; i++) {
uint64_t key = rng.Uniform(FLAGS_n_keys);
key <<= kKeyShift;
if (!FLAGS_benchmark_should_hit) {
// Since the keys are bitshifted, setting the last bit
// ensures that none of the queries will match.
key |= 1;
}
// Uses the bloomfile reader in bfr_ to run a simple query benchmark.
//
// Returns the number of queries that were a hit in the bloom filter.
uint64_t ReadBenchmark();

key = BigEndian::FromHost64(key);
FsManager* fs_manager() const { return fs_manager_.get(); }
BloomFileReader* bfr() const { return bfr_.get(); }
BlockId block_id() const { return block_id_; }
private:

Slice s(reinterpret_cast<uint8_t *>(&key), sizeof(key));
bool present;
CHECK_OK(bfr_->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
if (present) count_present++;
}
}
return count_present;
}
// Appends FLAG_n_keys keys to 'bfw'.
static void AppendBlooms(BloomFileWriter* bfw);

protected:
std::unique_ptr<FsManager> fs_manager_;
std::unique_ptr<BloomFileReader> bfr_;
BlockId block_id_;
Expand Down
26 changes: 17 additions & 9 deletions src/kudu/cfile/bloomfile-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@
// specific language governing permissions and limitations
// under the License.

#include "kudu/cfile/bloomfile.h"

#include <cstdint>
#include <cstdlib>
#include <memory>
#include <ostream>
#include <utility>

#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <gtest/gtest.h>

#include "kudu/cfile/bloomfile-test-base.h"
#include "kudu/cfile/bloomfile.h"
#include "kudu/cfile/cfile_util.h"
#include "kudu/fs/block_manager.h"
#include "kudu/fs/fs-test-util.h"
Expand All @@ -36,15 +38,21 @@
#include "kudu/util/slice.h"
#include "kudu/util/test_macros.h"

DECLARE_int32(bloom_size_bytes);
DECLARE_int32(n_keys);
DECLARE_double(fp_rate);

DECLARE_int64(benchmark_queries);
DECLARE_bool(benchmark_should_hit);

using kudu::fs::CountingReadableBlock;
using kudu::fs::ReadableBlock;
using std::shared_ptr;
using std::unique_ptr;

namespace kudu {
namespace cfile {

using fs::CountingReadableBlock;
using fs::ReadableBlock;
using std::unique_ptr;

class BloomFileTest : public BloomFileTestBase {

protected:
Expand All @@ -55,7 +63,7 @@ class BloomFileTest : public BloomFileTestBase {
Slice s(reinterpret_cast<char *>(&i_byteswapped), sizeof(i));

bool present = false;
ASSERT_OK_FAST(bfr_->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
ASSERT_OK_FAST(bfr()->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
ASSERT_TRUE(present);
}

Expand All @@ -66,7 +74,7 @@ class BloomFileTest : public BloomFileTestBase {
Slice s(reinterpret_cast<char *>(&key), sizeof(key));

bool present = false;
ASSERT_OK_FAST(bfr_->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
ASSERT_OK_FAST(bfr()->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
if (present) {
positive_count++;
}
Expand Down Expand Up @@ -115,7 +123,7 @@ TEST_F(BloomFileTest, TestLazyInit) {

// Open the bloom file using a "counting" readable block.
unique_ptr<ReadableBlock> block;
ASSERT_OK(fs_manager_->OpenBlock(block_id_, &block));
ASSERT_OK(fs_manager()->OpenBlock(block_id(), &block));
size_t bytes_read = 0;
unique_ptr<ReadableBlock> count_block(
new CountingReadableBlock(std::move(block), &bytes_read));
Expand All @@ -142,7 +150,7 @@ TEST_F(BloomFileTest, TestLazyInit) {

// And let's test non-lazy open for good measure; it should yield the
// same number of bytes read.
ASSERT_OK(fs_manager_->OpenBlock(block_id_, &block));
ASSERT_OK(fs_manager()->OpenBlock(block_id(), &block));
bytes_read = 0;
count_block.reset(new CountingReadableBlock(std::move(block), &bytes_read));
ASSERT_OK(BloomFileReader::Open(std::move(count_block), ReaderOptions(), &reader));
Expand Down

0 comments on commit 2c01b80

Please sign in to comment.