Skip to content

Commit

Permalink
fix performance issue in BufferedDisk and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
arvidn authored and hoffmang9 committed Jan 27, 2021
1 parent 2b363e9 commit 72dae59
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 4 deletions.
23 changes: 19 additions & 4 deletions src/disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,23 @@ struct BufferedDisk : Disk
NeedReadCache();
// all allocations need 7 bytes head-room, since
// SliceInt64FromBytes() may overrun by 7 bytes
if (read_buffer_start_ <= begin && read_buffer_start_ + read_buffer_size_ > begin + length + 7) {
if (read_buffer_start_ <= begin
&& read_buffer_start_ + read_buffer_size_ >= begin + length
&& read_buffer_start_ + read_ahead >= begin + length + 7)
{
// if the read is entirely inside the buffer, just return it
return read_buffer_.get() + (begin - read_buffer_start_);
}
else if (begin >= read_buffer_start_ || begin == 0) {
else if (begin >= read_buffer_start_ || begin == 0 || read_buffer_start_ == std::uint64_t(-1)) {

// if the read is beyond the current buffer (i.e.
// forward-sequential) move the buffer forward and read the next
// buffer-capacity number of bytes
// buffer-capacity number of bytes.
// this is also the case we enter the first time we perform a read,
// where we haven't read anything into the buffer yet. Note that
// begin == 0 won't reliably detect that case, sinec we may have
// discarded the first entry and start at some low offset but still
// greater than 0
read_buffer_start_ = begin;
uint64_t const amount_to_read = std::min(file_size_ - read_buffer_start_, read_ahead);
disk_->Read(begin, read_buffer_.get(), amount_to_read);
Expand All @@ -248,7 +257,13 @@ struct BufferedDisk : Disk
}
else {
// ideally this won't happen
assert(false);
std::cout << "Disk read position regressed. It's optimized for forward scans. Performance may suffer\n"
<< " read-offset: " << begin
<< " read-length: " << length
<< " file-size: " << file_size_
<< " read-buffer: [" << read_buffer_start_ << ", " << read_buffer_size_ << "]"
<< " file: " << disk_->GetFileName()
<< '\n';
static uint8_t temp[128];
// all allocations need 7 bytes head-room, since
// SliceInt64FromBytes() may overrun by 7 bytes
Expand Down
117 changes: 117 additions & 0 deletions tests/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -904,3 +904,120 @@ TEST_CASE("bitfield_index edge-sizes")
test_bitfield_size(bitfield_index::kIndexBucket);
test_bitfield_size(bitfield_index::kIndexBucket + 1);
}

namespace {

constexpr int num_test_entries = 2000000;

void write_disk_file(FileDisk& df)
{
std::uint32_t val = 0;
for (int i = 0; i < num_test_entries; ++i) {
df.Write(i * 4, reinterpret_cast<std::uint8_t const*>(&val), 4);
++val;
}
}

}

TEST_CASE("FileDisk")
{
FileDisk d = FileDisk("test_file.bin");
write_disk_file(d);

std::uint32_t val = 0;
for (int i = 0; i < num_test_entries; ++i) {
d.Read(i * 4, reinterpret_cast<std::uint8_t*>(&val), 4);
CHECK(i == val);
}

for (int i = num_test_entries - 1; i > 0; --i) {
d.Read(i * 4, reinterpret_cast<std::uint8_t*>(&val), 4);
CHECK(i == val);
}

remove("test_file.bin");
}

TEST_CASE("BufferedDisk")
{
FileDisk d = FileDisk("test_file.bin");
write_disk_file(d);

BufferedDisk bd(&d, num_test_entries * 4);

for (int i = 0; i < num_test_entries; ++i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(bd.Read(i * 4, 4));
CHECK(i == val);
}

// don't go all the way down to 0, every backwards read cursor movement will
// print a warning
for (int i = num_test_entries - 1; i > num_test_entries / 2 + 200; --i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(bd.Read(i * 4, 4));
CHECK(i == val);
}

remove("test_file.bin");
}

TEST_CASE("FilteredDisk")
{
FileDisk d = FileDisk("test_file.bin");
write_disk_file(d);

SECTION("filter even")
{
BufferedDisk bd(&d, num_test_entries * 4);
// filter every other entry (starting with 0)
bitfield filter(num_test_entries);
for (int i = 0; i < num_test_entries; ++i) {
if ((i & 1) == 1) filter.set(i);
}
FilteredDisk fd(std::move(bd), std::move(filter), 4);

for (int i = 0; i < num_test_entries / 2 - 1; ++i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(fd.Read(i * 4, 4));
CHECK((i * 2) + 1 == val);
}

// don't go all the way down to 0, every backwards read cursor movement will
// print a warning
for (int i = num_test_entries / 2 - 1; i > num_test_entries / 2 + 200; --i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(fd.Read(i * 4, 4));
CHECK((i * 2) + 1 == val);
}
}

SECTION("filter odd")
{
BufferedDisk bd(&d, num_test_entries * 4);
// filter every other entry (starting with 0)
bitfield filter(num_test_entries);
for (int i = 0; i < num_test_entries; ++i) {
if ((i & 1) == 0) filter.set(i);
}
FilteredDisk fd(std::move(bd), std::move(filter), 4);

for (int i = 0; i < num_test_entries / 2 - 1; ++i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(fd.Read(i * 4, 4));
CHECK((i * 2) == val);
}

// don't go all the way down to 0, every backwards read cursor movement will
// print a warning
for (int i = num_test_entries / 2 - 1; i > num_test_entries / 2 + 200; --i) {
auto const val = *reinterpret_cast<std::uint32_t const*>(fd.Read(i * 4, 4));
CHECK((i * 2) == val);
}
}
/*
SECTION("empty bitfield")
{
BufferedDisk bd(&d, num_test_entries * 4);
bitfield filter(num_test_entries);
FilteredDisk fd(std::move(bd), std::move(filter), 4);
}
*/
remove("test_file.bin");
}

0 comments on commit 72dae59

Please sign in to comment.