Skip to content

Commit

Permalink
Support for non-trivial value types in DynamicPGMIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
gvinciguerra committed Sep 13, 2021
1 parent 3780dc9 commit aac4678
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 27 deletions.
34 changes: 19 additions & 15 deletions include/pgm/pgm_index_dynamic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,22 @@ class DynamicPGMIndex {
auto it = std::move(level(min_level).begin(), insertion_point, tmp_a.begin());
*it++ = new_item;
it = std::move(insertion_point, level(min_level).end(), it);
tmp_a.resize(std::distance(tmp_a.begin(), it));
auto tmp_size = std::distance(tmp_a.begin(), it);

// Merge subsequent levels
uint8_t merge_limit = level(target).empty() ? target - 1 : target;
for (uint8_t i = 1 + min_level; i <= merge_limit; ++i, alternate = !alternate) {
auto tmp_begin = (alternate ? tmp_a : tmp_b).begin();
auto tmp_end = (alternate ? tmp_a : tmp_b).end();
auto tmp_end = tmp_begin + tmp_size;
auto out_begin = (alternate ? tmp_b : tmp_a).begin();
decltype(out_begin) out_end;

auto can_delete_permanently = i == used_levels - 1;
if (can_delete_permanently)
out_end = merge<true>(tmp_begin, tmp_end, level(i).begin(), level(i).end(), out_begin);
out_end = merge<true, true>(tmp_begin, tmp_end, level(i).begin(), level(i).end(), out_begin);
else
out_end = merge<false>(tmp_begin, tmp_end, level(i).begin(), level(i).end(), out_begin);

(alternate ? tmp_b : tmp_a).resize(std::distance(out_begin, out_end));
(alternate ? tmp_a : tmp_b).clear();
out_end = merge<false, true>(tmp_begin, tmp_end, level(i).begin(), level(i).end(), out_begin);
tmp_size = std::distance(out_begin, out_end);

// Empty this level and the corresponding index
level(i).clear();
Expand All @@ -106,6 +104,7 @@ class DynamicPGMIndex {

level(min_level).clear();
level(target) = std::move(alternate ? tmp_a : tmp_b);
level(target).resize(tmp_size);

// Rebuild index, if needed
if (has_pgm(target))
Expand Down Expand Up @@ -262,7 +261,7 @@ class DynamicPGMIndex {
}

/**
* Returns all the elements with key between and including @p lo and @p hi.
* Returns a copy of the elements with key between and including @p lo and @p hi.
* @param lo lower endpoint of the range query
* @param hi upper endpoint of the range query, must be greater than or equal to @p lo
* @return a vector of key-value pairs satisfying the range query
Expand Down Expand Up @@ -299,10 +298,10 @@ class DynamicPGMIndex {
continue;

auto tmp_size = (alternate ? tmp_a : tmp_b).size();
(alternate ? tmp_b : tmp_a).reserve(tmp_size + range_size);
(alternate ? tmp_b : tmp_a).resize(tmp_size + range_size);
auto tmp_it = (alternate ? tmp_a : tmp_b).begin();
auto out_it = (alternate ? tmp_b : tmp_a).begin();
tmp_size = std::distance(out_it, merge<false>(tmp_it, tmp_it + tmp_size, it_lo, it_hi, out_it));
tmp_size = std::distance(out_it, merge<false, false>(tmp_it, tmp_it + tmp_size, it_lo, it_hi, out_it));
(alternate ? tmp_b : tmp_a).resize(tmp_size);
alternate = !alternate;
}
Expand Down Expand Up @@ -419,27 +418,32 @@ class DynamicPGMIndex {

private:

template<bool SkipDeleted, typename In1, typename In2, typename OutIterator>
template<bool SkipDeleted, bool Move, typename In1, typename In2, typename OutIterator>
static OutIterator merge(In1 first1, In1 last1, In2 first2, In2 last2, OutIterator result) {
while (first1 != last1 && first2 != last2) {
if (*first2 < *first1) {
*result = *first2;
if constexpr (Move) *result = std::move(*first2);
else *result = *first2;
++first2;
++result;
} else if (*first1 < *first2) {
*result = *first1;
if constexpr (Move) *result = std::move(*first1);
else *result = *first1;
++first1;
++result;
} else if (SkipDeleted && first1->deleted()) {
++first1;
++first2;
} else {
*result = *first1;
if constexpr (Move) *result = std::move(*first1);
else *result = *first1;
++first1;
++first2;
++result;
}
}
if constexpr (Move)
return std::move(first2, last2, std::move(first1, last1, result));
return std::copy(first2, last2, std::copy(first1, last1, result));
}

Expand Down Expand Up @@ -700,7 +704,7 @@ class DynamicPGMIndex<K, V, PGMType>::ItemB {
K first;
V second;

ItemB() { /* do not (default-)initialize for a more efficient std::vector<ItemB>::resize */ }
ItemB() = default;
explicit ItemB(const K &key) : flag(true), first(key), second() {}
explicit ItemB(const K &key, const V &value) : flag(false), first(key), second(value) {}

Expand Down
33 changes: 21 additions & 12 deletions test/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,19 @@ TEMPLATE_TEST_CASE_SIG("Mapped PGM-index", "", ((size_t E), E), 8, 32, 128) {
std::remove(tmp_filename.c_str());
}

TEMPLATE_TEST_CASE("Dynamic PGM-index", "", uint32_t*, uint32_t) {
TestType time = 0;
auto rand = std::bind(std::uniform_int_distribution<uint32_t>(0, 1000000000), std::mt19937{42});
auto gen = [&] { return std::pair<uint32_t, TestType>{rand(), ++time}; };
TEMPLATE_TEST_CASE("Dynamic PGM-index", "", uint32_t*, uint32_t, std::string) {
using time_type = uint32_t;
auto make_key = std::bind(std::uniform_int_distribution<uint32_t>(0, 1000000000), std::mt19937{42});
auto make_value = [&] {
static time_type time = 0;
if constexpr (std::is_same_v<TestType, std::string>) return std::to_string(++time);
else return reinterpret_cast<TestType>(++time);
};
auto get_value = [](auto x) {
if constexpr (std::is_same_v<TestType, std::string>) return (time_type) std::stoll(x);
else return x;
};
auto gen = [&] { return std::pair<uint32_t, TestType>{make_key(), make_value()}; };

std::vector<std::pair<uint32_t, TestType>> bulk(GENERATE(0, 10, 1000, 100000));
std::generate(bulk.begin(), bulk.end(), gen);
Expand All @@ -170,18 +179,18 @@ TEMPLATE_TEST_CASE("Dynamic PGM-index", "", uint32_t*, uint32_t) {

// Test lower bound
for (size_t i = 0; i < std::min<size_t>(1000, bulk.size()); ++i) {
auto q = bulk[rand() % bulk.size()];
auto q = bulk[make_key() % bulk.size()];
auto c = pgm.count(q.first);
auto it = pgm.lower_bound(q.first);
REQUIRE(c == 1);
REQUIRE(it->first == q.first);
}

// Overwrite some elements
++time;
for (size_t i = 0; i < std::min<size_t>(10000, bulk.size()); ++i, ++time) {
pgm.insert_or_assign(bulk[i].first, time);
map.insert_or_assign(bulk[i].first, time);
for (size_t i = 0; i < std::min<size_t>(10000, bulk.size()); ++i) {
auto v = make_value();
pgm.insert_or_assign(bulk[i].first, v);
map.insert_or_assign(bulk[i].first, v);
}

// Insert new elements
Expand All @@ -197,7 +206,7 @@ TEMPLATE_TEST_CASE("Dynamic PGM-index", "", uint32_t*, uint32_t) {
auto q = bulk[i];
auto it = pgm.lower_bound(q.first);
REQUIRE(it->first == q.first);
REQUIRE(it->second > q.second);
REQUIRE(get_value(it->second) > get_value(q.second));
REQUIRE(it->second == map.lower_bound(q.first)->second);
}

Expand Down Expand Up @@ -225,8 +234,8 @@ TEMPLATE_TEST_CASE("Dynamic PGM-index", "", uint32_t*, uint32_t) {

// Test range
for (int i = 0; i < 10; ++i) {
auto lo = rand();
auto hi = lo + rand() / 2;
auto lo = make_key();
auto hi = lo + make_key() / 2;
auto range_result = pgm.range(lo, hi);
auto map_it = map.lower_bound(lo);
for (auto[k, v] : range_result) {
Expand Down

0 comments on commit aac4678

Please sign in to comment.