diff --git a/src/entry_sizes.hpp b/src/entry_sizes.hpp index f2286194d..41140a111 100644 --- a/src/entry_sizes.hpp +++ b/src/entry_sizes.hpp @@ -70,6 +70,13 @@ class EntrySizes { } } + // Get size of entries containing (sort_key, pos, offset). Such entries are + // written to table 7 in phase 1 and to tables 2-7 in phase 2. + static uint32_t GetKeyPosOffsetSize(uint8_t k) + { + return cdiv(2 * k + kOffsetSize, 8); + } + // Calculates the size of one C3 park. This will store bits for each f7 between // two C1 checkpoints, depending on how many times that f7 is present. For low // values of k, we need extra space to account for the additional variability. diff --git a/src/phase1.hpp b/src/phase1.hpp index c88fc7aae..edace6239 100644 --- a/src/phase1.hpp +++ b/src/phase1.hpp @@ -597,6 +597,7 @@ std::vector RunPhase1( uint32_t const log_num_buckets, uint32_t const stripe_size, uint8_t const num_threads, + bool const enable_bitfield, bool const show_progress) { std::cout << "Computing table 1" << std::endl; @@ -652,9 +653,19 @@ std::vector RunPhase1( // Determines how many bytes the entries in our left and right tables will take up. uint32_t const entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, true); - uint32_t const compressed_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, false); + uint32_t compressed_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, false); right_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index + 1, true); + if (enable_bitfield && table_index != 1) { + // We only write pos and offset to tables 2-6 after removing + // metadata + compressed_entry_size_bytes = cdiv(k + kOffsetSize, 8); + if (table_index == 6) { + // Table 7 will contain f7, pos and offset + right_entry_size_bytes = EntrySizes::GetKeyPosOffsetSize(k); + } + } + std::cout << "Computing table " << int{table_index + 1} << std::endl; // Start of parallel execution diff --git a/src/phase2.hpp b/src/phase2.hpp index 038efc153..b6e28b207 100644 --- a/src/phase2.hpp +++ b/src/phase2.hpp @@ -60,6 +60,7 @@ Phase2Results RunPhase2( uint8_t const pos_offset_shift = write_counter_shift - pos_offset_size; uint8_t const f7_shift = 128 - k; uint8_t const t7_pos_offset_shift = f7_shift - pos_offset_size; + uint8_t const new_entry_size = EntrySizes::GetKeyPosOffsetSize(k); std::vector new_table_sizes(8, 0); new_table_sizes[7] = table_sizes[7]; @@ -102,7 +103,7 @@ Phase2Results RunPhase2( next_bitfield.clear(); int64_t const table_size = table_sizes[table_index]; - int16_t const entry_size = EntrySizes::GetMaxEntrySize(k, table_index, false); + int16_t const entry_size = cdiv(k + kOffsetSize + (table_index == 7 ? k : 0), 8); BufferedDisk disk(&tmp_1_disks[table_index], table_size * entry_size); @@ -157,7 +158,7 @@ Phase2Results RunPhase2( table_index == 2 ? memory_size : memory_size / 2, num_buckets, log_num_buckets, - uint16_t(entry_size), + new_entry_size, tmp_dirname, filename + ".p2.t" + std::to_string(table_index), uint32_t(k), @@ -265,7 +266,7 @@ Phase2Results RunPhase2( return { FilteredDisk(std::move(disk), std::move(current_bitfield), entry_size) - , BufferedDisk(&tmp_1_disks[7], new_table_sizes[7] * EntrySizes::GetMaxEntrySize(k, 7, false)) + , BufferedDisk(&tmp_1_disks[7], new_table_sizes[7] * new_entry_size) , std::move(output_files) , std::move(new_table_sizes) }; diff --git a/src/phase3.hpp b/src/phase3.hpp index 528529153..acddc6381 100644 --- a/src/phase3.hpp +++ b/src/phase3.hpp @@ -181,6 +181,7 @@ Phase3Results RunPhase3( uint32_t right_sort_key_size = k; uint32_t left_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, false); + uint32_t p2_entry_size_bytes = EntrySizes::GetKeyPosOffsetSize(k); right_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index + 1, false); uint64_t left_reader = 0; @@ -242,8 +243,8 @@ Phase3Results RunPhase3( } // The right entries are in the format from backprop, (sort_key, pos, // offset) - uint8_t const* right_entry_buf = right_disk.Read(right_reader, right_entry_size_bytes); - right_reader += right_entry_size_bytes; + uint8_t const* right_entry_buf = right_disk.Read(right_reader, p2_entry_size_bytes); + right_reader += p2_entry_size_bytes; right_reader_count++; entry_sort_key = diff --git a/src/plotter_disk.hpp b/src/plotter_disk.hpp index 2b6c409e0..d2038b7be 100644 --- a/src/plotter_disk.hpp +++ b/src/plotter_disk.hpp @@ -227,6 +227,7 @@ class DiskPlotter { log_num_buckets, stripe_size, num_threads, + !nobitfield, show_progress); p1.PrintElapsed("Time for phase 1 =");