diff --git a/src/entry_sizes.hpp b/src/entry_sizes.hpp index 49198ac4b..d28eefa1b 100644 --- a/src/entry_sizes.hpp +++ b/src/entry_sizes.hpp @@ -60,8 +60,8 @@ class EntrySizes { // a: sort_key, pos, offset or // b: line_point, sort_key return Util::ByteAlign( - std::max(static_cast(k + 1 + (k) + kOffsetSize), - static_cast(2 * k + k + 1))) / + std::max(static_cast(2 * k + kOffsetSize), + static_cast(3 * k))) / 8; case 7: default: diff --git a/src/phase2.hpp b/src/phase2.hpp index 7ec110e87..038efc153 100644 --- a/src/phase2.hpp +++ b/src/phase2.hpp @@ -52,11 +52,11 @@ Phase2Results RunPhase2( uint32_t const log_num_buckets, bool const show_progress) { - // An extra bit is used, since we may have more than 2^k entries in a table. (After pruning, - // each table will have 0.8*2^k or fewer entries). + // After pruning each table will have 0.865 * 2^k or fewer entries on + // average uint8_t const pos_size = k; uint8_t const pos_offset_size = pos_size + kOffsetSize; - uint8_t const write_counter_shift = 128 - (k + 1); + uint8_t const write_counter_shift = 128 - k; uint8_t const pos_offset_shift = write_counter_shift - pos_offset_size; uint8_t const f7_shift = 128 - k; uint8_t const t7_pos_offset_shift = f7_shift - pos_offset_size; @@ -160,7 +160,7 @@ Phase2Results RunPhase2( uint16_t(entry_size), tmp_dirname, filename + ".p2.t" + std::to_string(table_index), - uint32_t(k) + 1, + uint32_t(k), 0, strategy_t::quicksort_last); diff --git a/src/phase3.hpp b/src/phase3.hpp index e15b28cfa..b2150c482 100644 --- a/src/phase3.hpp +++ b/src/phase3.hpp @@ -173,9 +173,10 @@ Phase3Results RunPhase3( Disk& right_disk = res2.disk_for_table(table_index + 1); Disk& left_disk = res2.disk_for_table(table_index); - // Sort key for table 7 is just y, which is k bits. For all other tables it can - // be higher than 2^k and therefore k+1 bits are used. - uint32_t right_sort_key_size = table_index == 6 ? k : k + 1; + // Sort key is k bits for all tables. For table 7 it is just y, which + // is k bits, and for all other tables the number of entries does not + // exceed 0.865 * 2^k on average. + uint32_t right_sort_key_size = k; uint32_t left_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, false); right_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index + 1, false); @@ -302,7 +303,7 @@ Phase3Results RunPhase3( } else { // k+1 bits in case it overflows left_new_pos[current_pos % kCachedPositionsSize] = - Util::SliceInt64FromBytes(left_entry_disk_buf, k + 1, k + 1); + Util::SliceInt64FromBytes(left_entry_disk_buf, right_sort_key_size, k + 1); } }