Skip to content

Commit

Permalink
Reduce size of sort_key from k + 1 to k bits in phases 2 and 3
Browse files Browse the repository at this point in the history
k bits are sufficient to encode table entry position after removing
unused entries in phase 2.
  • Loading branch information
rostislav authored and hoffmang9 committed Apr 2, 2021
1 parent a3c390f commit 7bacb3a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/entry_sizes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ class EntrySizes {
// a: sort_key, pos, offset or
// b: line_point, sort_key
return Util::ByteAlign(
std::max(static_cast<uint32_t>(k + 1 + (k) + kOffsetSize),
static_cast<uint32_t>(2 * k + k + 1))) /
std::max(static_cast<uint32_t>(2 * k + kOffsetSize),
static_cast<uint32_t>(3 * k))) /
8;
case 7:
default:
Expand Down
8 changes: 4 additions & 4 deletions src/phase2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ Phase2Results RunPhase2(
uint32_t const log_num_buckets,
bool const show_progress)
{
// An extra bit is used, since we may have more than 2^k entries in a table. (After pruning,
// each table will have 0.8*2^k or fewer entries).
// After pruning each table will have 0.865 * 2^k or fewer entries on
// average
uint8_t const pos_size = k;
uint8_t const pos_offset_size = pos_size + kOffsetSize;
uint8_t const write_counter_shift = 128 - (k + 1);
uint8_t const write_counter_shift = 128 - k;
uint8_t const pos_offset_shift = write_counter_shift - pos_offset_size;
uint8_t const f7_shift = 128 - k;
uint8_t const t7_pos_offset_shift = f7_shift - pos_offset_size;
Expand Down Expand Up @@ -160,7 +160,7 @@ Phase2Results RunPhase2(
uint16_t(entry_size),
tmp_dirname,
filename + ".p2.t" + std::to_string(table_index),
uint32_t(k) + 1,
uint32_t(k),
0,
strategy_t::quicksort_last);

Expand Down
9 changes: 5 additions & 4 deletions src/phase3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,10 @@ Phase3Results RunPhase3(
Disk& right_disk = res2.disk_for_table(table_index + 1);
Disk& left_disk = res2.disk_for_table(table_index);

// Sort key for table 7 is just y, which is k bits. For all other tables it can
// be higher than 2^k and therefore k+1 bits are used.
uint32_t right_sort_key_size = table_index == 6 ? k : k + 1;
// Sort key is k bits for all tables. For table 7 it is just y, which
// is k bits, and for all other tables the number of entries does not
// exceed 0.865 * 2^k on average.
uint32_t right_sort_key_size = k;

uint32_t left_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index, false);
right_entry_size_bytes = EntrySizes::GetMaxEntrySize(k, table_index + 1, false);
Expand Down Expand Up @@ -302,7 +303,7 @@ Phase3Results RunPhase3(
} else {
// k+1 bits in case it overflows
left_new_pos[current_pos % kCachedPositionsSize] =
Util::SliceInt64FromBytes(left_entry_disk_buf, k + 1, k + 1);
Util::SliceInt64FromBytes(left_entry_disk_buf, right_sort_key_size, k + 1);
}
}

Expand Down

0 comments on commit 7bacb3a

Please sign in to comment.