Skip to content

Commit

Permalink
heap allocate bitfield
Browse files Browse the repository at this point in the history
  • Loading branch information
arvidn authored and hoffmang9 committed Nov 9, 2020
1 parent 2581208 commit 95e0398
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 56 deletions.
21 changes: 9 additions & 12 deletions src/bitfield.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@

#pragma once

#include <memory>

struct bitfield
{
bitfield(uint8_t* buffer, int64_t num_bytes)
: buffer_(reinterpret_cast<uint64_t*>(buffer))
, size_(num_bytes / 8)
explicit bitfield(int64_t size)
: buffer_(new uint64_t[(size + 63) / 64])
, size_((size + 63) / 64)
{
// we want this buffer to be 8-byte aligned
// both the pointer and size
assert((uintptr_t(buffer) & 7) == 0);
assert((num_bytes % 8) == 0);

clear();
}

Expand All @@ -42,7 +39,7 @@ struct bitfield

void clear()
{
std::memset(buffer_, 0, size_ * 8);
std::memset(buffer_.get(), 0, size_ * 8);
}

int64_t size() const { return size_ * 64; }
Expand All @@ -59,8 +56,8 @@ struct bitfield
assert((start_bit % 64) == 0);
assert(start_bit <= end_bit);

uint64_t const* start = buffer_ + start_bit / 64;
uint64_t const* end = buffer_ + end_bit / 64;
uint64_t const* start = buffer_.get() + start_bit / 64;
uint64_t const* end = buffer_.get() + end_bit / 64;
int64_t ret = 0;
while (start != end) {
#ifdef _MSC_VER
Expand All @@ -82,7 +79,7 @@ struct bitfield
return ret;
}
private:
uint64_t* buffer_;
std::unique_ptr<uint64_t[]> buffer_;

// number of 64-bit words
int64_t size_;
Expand Down
30 changes: 4 additions & 26 deletions src/phase2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ struct Phase2Results
// to final values in f7, to minimize disk usage. A sort on disk is applied to each table,
// so that they are sorted by position.
Phase2Results RunPhase2(
uint8_t *memory,
std::vector<FileDisk> &tmp_1_disks,
std::vector<uint64_t> table_sizes,
uint8_t const k,
Expand All @@ -51,18 +50,6 @@ Phase2Results RunPhase2(
uint32_t const num_buckets,
uint32_t const log_num_buckets)
{
// memory is split in two halves.

// The first half is used for read cache of the table we're reading.

// The second half is used for the sort_manager bucket cache, except for
// table 7, where we don't use the sort_manager; then it's used as a write
// cache for the table, as we update it.

// As the last step, we compact table 1. At that point the halfes are also
// used as the read- and write cache for the table. As we read and write
// back to the same file.

// An extra bit is used, since we may have more than 2^k entries in a table. (After pruning,
// each table will have 0.8*2^k or fewer entries).
uint8_t const pos_size = k;
Expand All @@ -84,20 +71,11 @@ Phase2Results RunPhase2(
// At the end of the iteration, we transfer the next_bitfield to the current bitfield
// to use it to prune the next table to scan.

int64_t const max_table_size_bytes = *std::max_element(table_sizes.begin()
, table_sizes.end()) / 8;

int64_t const bitfield_memory_size = (max_table_size_bytes + 7) & ~uint64_t(7);
assert((bitfield_memory_size % 8) == 0);
assert((uintptr_t(memory) % 8) == 0);
int64_t const max_table_size = *std::max_element(table_sizes.begin()
, table_sizes.end());

// TODO: memory should be wrapped up in a stack allocator to simplify this
bitfield next_bitfield(memory, bitfield_memory_size);
memory += bitfield_memory_size;
memory_size -= bitfield_memory_size;
bitfield current_bitfield(memory, bitfield_memory_size);
memory += bitfield_memory_size;
memory_size -= bitfield_memory_size;
bitfield next_bitfield(max_table_size);
bitfield current_bitfield(max_table_size);

std::vector<std::unique_ptr<SortManager>> output_files;

Expand Down
4 changes: 2 additions & 2 deletions src/phase3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ Phase3Results RunPhase3(
}

R_sort_manager = std::make_unique<SortManager>(
memory_size / 2,
(table_index == 1 || table_index == 6) ? memory_size : (memory_size / 2),
num_buckets,
log_num_buckets,
right_entry_size_bytes,
Expand Down Expand Up @@ -365,7 +365,7 @@ Phase3Results RunPhase3(
// reader
R_sort_manager->FreeMemory();
L_sort_manager = std::make_unique<SortManager>(
memory_size / 2,
(table_index == 6) ? memory_size : (memory_size / 2),
num_buckets,
log_num_buckets,
right_entry_size_bytes,
Expand Down
6 changes: 0 additions & 6 deletions src/plotter_disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,8 @@ class DiskPlotter {
<< "Starting phase 2/4: Backpropagation into tmp files... "
<< Timer::GetNow();

// Memory to be used for sorting and buffers
std::unique_ptr<uint8_t[]> memory(new uint8_t[memory_size + 7]);

Timer p2;
Phase2Results res2 = RunPhase2(
memory.get(),
tmp_1_disks,
table_sizes,
k,
Expand All @@ -240,8 +236,6 @@ class DiskPlotter {
log_num_buckets);
p2.PrintElapsed("Time for phase 2 =");

memory.reset();

// Now we open a new file, where the final contents of the plot will be stored.
uint32_t header_size = WriteHeader(tmp2_disk, k, id, memo, memo_len);

Expand Down
15 changes: 5 additions & 10 deletions tests/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,8 +782,7 @@ TEST_CASE("Sort on disk")

TEST_CASE("bitfield-simple")
{
uint64_t buffer[1];
bitfield b(reinterpret_cast<uint8_t*>(buffer), sizeof(buffer));
bitfield b(4);
CHECK(!b.get(0));
CHECK(!b.get(1));
CHECK(!b.get(2));
Expand All @@ -810,8 +809,7 @@ TEST_CASE("bitfield-simple")

TEST_CASE("bitfield-count")
{
uint64_t buffer[8];
bitfield b(reinterpret_cast<uint8_t*>(buffer), sizeof(buffer));
bitfield b(512);

for (int i = 0; i < 512; ++i) {
CHECK(b.count(0, 512) == i);
Expand All @@ -824,8 +822,7 @@ TEST_CASE("bitfield-count")

TEST_CASE("bitfield-count-unaligned")
{
uint64_t buffer[8];
bitfield b(reinterpret_cast<uint8_t*>(buffer), sizeof(buffer));
bitfield b(512);

for (int i = 0; i < 512; ++i) {
b.set(i);
Expand All @@ -838,8 +835,7 @@ TEST_CASE("bitfield-count-unaligned")

TEST_CASE("bitfield_index-simple")
{
uint64_t buffer[1];
bitfield b(reinterpret_cast<uint8_t*>(buffer), sizeof(buffer));
bitfield b(64);
b.set(0);
b.set(1);
b.set(3);
Expand All @@ -856,8 +852,7 @@ TEST_CASE("bitfield_index-simple")

TEST_CASE("bitfield_index-use index")
{
uint64_t buffer[1048576 / 64];
bitfield b(reinterpret_cast<uint8_t*>(buffer), sizeof(buffer));
bitfield b(1048576);
CHECK(b.size() == 1048576);
b.set(1048576 - 3);
b.set(1048576 - 2);
Expand Down

0 comments on commit 95e0398

Please sign in to comment.