Skip to content

Commit

Permalink
partial, try to get f1 cut
Browse files Browse the repository at this point in the history
  • Loading branch information
AWice committed Sep 11, 2020
1 parent b70c4c9 commit 825b22b
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 9 deletions.
41 changes: 37 additions & 4 deletions src/plotter_disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ class DiskPlotter {
case 1:
// Represents f1, x
if (phase_1_size) {
return Util::ByteAlign(k + kExtraBits + k) / 8;
return Util::ByteAlign(k) / 8;
// return Util::ByteAlign(k + kExtraBits + k) / 8;
} else {
// After computing matches, table 1 is rewritten without the f1, which
// is useless after phase1.
Expand Down Expand Up @@ -445,7 +446,9 @@ class DiskPlotter {
std::cout << "Computing table 1" << std::endl;
Timer f1_start_time;
F1Calculator f1(k, id);
SortOnDiskUtils::F1Calc = f1;
uint64_t x = 0;
x++; // If we use [1..2**k - 1] instead

uint32_t entry_size_bytes = GetMaxEntrySize(k, 1, true);

Expand Down Expand Up @@ -474,9 +477,10 @@ class DiskPlotter {

for (uint64_t lp = 0; lp < (((uint64_t)1) << (k - kBatchSizes)); lp++) {
uint64_t count0 = (x * (uint128_t)k) / kF1BlockSizeBits;
uint64_t count1 = (x + (uint128_t)num_eval + 1) * k / kF1BlockSizeBits;
uint64_t count1 =
(x + (uint128_t)num_eval + (x == 1 ? 0 : 1)) * k / kF1BlockSizeBits;
uint16_t start_bit = (x * (uint128_t)k) % kF1BlockSizeBits;
uint64_t x2 = x + num_eval;
uint64_t x2 = x + num_eval + (x == 1 ? -1 : 0);
t = 0;
while (count0 <= count1) {
chacha8_get_keystream(&enc_ctx, count0++, 1, ciphertext_bytes);
Expand All @@ -486,6 +490,32 @@ class DiskPlotter {

t = 0;
for (; x < x2; x++) {
// New way: only write L to disk, and bucket_sizes[y.Slice(0, klog)]++
Bits(x, k).ToBytes(buf);
tmp_1_disks[1].Write(plot_file, (buf), entry_size_bytes);
plot_file += entry_size_bytes;

if (start_bit + k < kF1BlockSizeBits) {
bucket_sizes[blocks[t]
.Slice(start_bit, start_bit + kLogNumSortBuckets)
.GetValue()]++;
} else {
if (start_bit + kLogNumSortBuckets < kF1BlockSizeBits) {
bucket_sizes[blocks[t]
.Slice(start_bit, start_bit + kLogNumSortBuckets)
.GetValue()]++;
} else {
bucket_sizes[(blocks[t].Slice(start_bit) +
blocks[t + 1].Slice(
0,
kLogNumSortBuckets - (kF1BlockSizeBits - start_bit)))
.GetValue()]++;
}
t++;
}

/* Below: old way
Bits L_bits = Bits(x, k);
// Takes the first kExtraBits bits from the input, and adds zeroes if it's not
// enough
Expand Down Expand Up @@ -514,6 +544,7 @@ class DiskPlotter {
buf, entry_size_bytes, 0, kLogNumSortBuckets)] += 1;
t++;
}
*/

// Start bit of the output slice in the current block
start_bit += k;
Expand Down Expand Up @@ -568,7 +599,9 @@ class DiskPlotter {
0,
bucket_sizes,
memory,
memorySize);
memorySize,
0,
table_index == 1);
if (spare_written > max_spare_written) {
max_spare_written = spare_written;
}
Expand Down
136 changes: 131 additions & 5 deletions src/sort_on_disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class SortOnDiskUtils {

// This is used to virtually prepend F1Calc.caclulateF(entry)
// when comparing two entries.
static F1Calculator F1Calc;
inline static F1Calculator F1Calc;
inline static int MemCmpBitsF1(uint8_t* left_arr, uint8_t* right_arr, uint32_t len)
{
Bits left_prepend = F1Calc.CalculateF(left_arr, 0);
Expand Down Expand Up @@ -244,14 +244,16 @@ class BucketStore {
uint32_t entry_len,
uint32_t bits_begin,
uint32_t bucket_log,
uint64_t entries_per_seg)
uint64_t entries_per_seg,
bool virtual_f1 = false)
{
mem_ = mem;
mem_len_ = mem_len;
entry_len_ = entry_len;
bits_begin_ = bits_begin;
bucket_log_ = bucket_log;
entries_per_seg_ = entries_per_seg;
virtual_f1_ = virtual_f1;

for (uint64_t i = 0; i < (1UL << bucket_log); i++) {
bucket_sizes_.push_back(0);
Expand Down Expand Up @@ -332,7 +334,13 @@ class BucketStore {
{
assert(new_val_len == entry_len_);
assert(first_empty_seg_id_ != length_);
uint64_t b = SortOnDiskUtils::ExtractNum(new_val, new_val_len, bits_begin_, bucket_log_);
uint64_t b;
if (virtual_f1_) {
b = (SortOnDiskUtils::F1Calc).CalculateF(new_val, 0).GetValue();
b >>= (SortOnDiskUtils::F1Calc).k_ + kExtraBits - bucket_log_;
} else {
SortOnDiskUtils::ExtractNum(new_val, new_val_len, bits_begin_, bucket_log_);
}
bucket_sizes_[b] += 1;

// If bucket b contains no segments, or the head segment of bucket b is full, append a new
Expand Down Expand Up @@ -500,6 +508,7 @@ class BucketStore {
uint32_t entry_len_;
uint32_t bucket_log_;
uint64_t entries_per_seg_;
bool virtual_f1_;
std::vector<uint64_t> bucket_sizes_;
uint64_t seg_size_;
uint64_t length_;
Expand Down Expand Up @@ -554,7 +563,8 @@ class Sorting {
std::vector<uint64_t> bucket_sizes,
uint8_t* mem,
uint64_t mem_len,
int quicksort = 0)
int quicksort = 0,
bool virtual_f1 = false)
{
spare.Truncate(0);
uint64_t length = mem_len / entry_len;
Expand Down Expand Up @@ -631,7 +641,8 @@ class Sorting {
}

uint64_t spare_consumed = 0;
BucketStore bstore = BucketStore(mem, mem_len, entry_len, bits_begin, bucket_log, 100);
BucketStore bstore =
BucketStore(mem, mem_len, entry_len, bits_begin, bucket_log, 100, virtual_f1);
uint64_t read_pos = 0;

uint8_t* buf = new uint8_t[entry_len];
Expand Down Expand Up @@ -885,6 +896,121 @@ class Sorting {
delete[] common_prefix;
}

inline static void SortInMemoryF1(
Disk& disk,
uint64_t disk_begin,
uint8_t* memory,
uint32_t entry_len,
uint64_t num_entries,
uint32_t bits_begin)
{
uint32_t entry_len_memory = entry_len;
uint64_t memory_len = SortOnDiskUtils::RoundSize(num_entries) * entry_len_memory;

uint8_t* swap_space = new uint8_t[entry_len];
uint8_t* buffer = new uint8_t[BUF_SIZE];
uint64_t bucket_length = 0;
while ((1ULL << bucket_length) < 2 * num_entries) bucket_length++;
memset(memory, 0, sizeof(memory[0]) * memory_len);

uint64_t read_pos = disk_begin;
uint64_t buf_size = 0;
uint64_t buf_ptr = 0;
// std::vector<bool> mem_mask(SortOnDiskUtils::RoundSize(num_entries) + 1);
// bitset<BUF_SIZE> buf_mask;
// uint64_t buf_mask_ptr = 0;
// bool temp_bit;

for (uint64_t i = 0; i < num_entries; i++) {
if (buf_size == 0) {
// If read buffer is empty, read from disk and refill it.
buf_size = std::min((uint64_t)BUF_SIZE / entry_len, num_entries - i);
buf_ptr = 0;
disk.Read(read_pos, buffer, buf_size * entry_len);
read_pos += buf_size * entry_len;

// buf_mask_ptr = 0;
// buf_mask.set();
}
buf_size--;
// First unique bits in the entry give the expected position of it in the
// sorted array. We take 'bucket_length' bits starting with the first unique
// one.
// uint64_t base_pos = (*F1Calc).CalculateF(buffer + buf_ptr, 0).GetValue();
// base_pos >>= ((*F1Calc).k_ + kExtraBits - bucket_length);
// uint64_t pos = base_pos * entry_len_memory;
uint64_t pos = ((SortOnDiskUtils::F1Calc).CalculateF(buffer + buf_ptr, 0).GetValue() >>
((SortOnDiskUtils::F1Calc).k_ + kExtraBits - bucket_length)) *
entry_len_memory;

// As long as position is occupied by a previous entry...
// while (mem_mask[base_pos] == 1 && pos < memory_len) {
while (SortOnDiskUtils::IsPositionEmpty(memory + pos, entry_len_memory) == false &&
pos < memory_len) {
// ...store there the minimum between the two and continue to push the
// higher one.

if (SortOnDiskUtils::MemCmpBitsF1(
memory + pos, buffer + buf_ptr, entry_len_memory) > 0) {
// We always store the entry without the common prefix.
memcpy(swap_space, memory + pos, entry_len_memory);
memcpy(memory + pos, buffer + buf_ptr, entry_len_memory);
memcpy(buffer + buf_ptr, swap_space, entry_len_memory);

// temp_bit = mem_mask[base_pos];
// mem_mask[base_pos] = buf_mask[buf_mask_ptr];
// buf_mask[buf_mask_ptr] = temp_bit;
}

pos += entry_len_memory;
// base_pos++;
}
// Push the entry in the first free spot.
memcpy(memory + pos, buffer + buf_ptr, entry_len_memory);
buf_ptr += entry_len;

// mem_mask[base_pos] = 1;
// buf_mask[buf_mask_ptr++] = 0;
}

uint64_t entries_written = 0;
buf_size = 0;
memset(buffer, 0, BUF_SIZE);
uint64_t write_pos = disk_begin;
// Search the memory buffer for occupied entries.

// for (uint64_t pos = 0, mem_mask_ptr = 0; entries_written < num_entries && pos <
// memory_len;
// pos += entry_len_memory, mem_mask_ptr++) {
// if (mem_mask[mem_mask_ptr] == 1) {
for (uint64_t pos = 0; entries_written < num_entries && pos < memory_len;
pos += entry_len_memory) {
if (SortOnDiskUtils::IsPositionEmpty(memory + pos, entry_len_memory) == false) {
// We've found an entry.
if (buf_size + entry_len >= BUF_SIZE) {
// Write buffer is full, write it and clean it.
disk.Write(write_pos, buffer, buf_size);
write_pos += buf_size;
entries_written += buf_size / entry_len;
buf_size = 0;
}
// Then the stored entry itself.
memcpy(buffer + buf_size, memory + pos, entry_len_memory);
buf_size += entry_len;
}
}

if (buf_size > 0) {
disk.Write(write_pos, buffer, buf_size);
write_pos += buf_size;
entries_written += buf_size / entry_len;
}

assert(entries_written == num_entries);
delete[] swap_space;
delete[] buffer;
}

inline static void QuickSort(
uint8_t* memory,
uint32_t entry_len,
Expand Down

0 comments on commit 825b22b

Please sign in to comment.