Skip to content

Commit

Permalink
Better move semantics for IdTables (ad-freiburg#1110)
Browse files Browse the repository at this point in the history
When an `IdTable` is moved from, it now becomes a valid `IdTable` with the same number of columns as before the move and zero rows. So far, it became an invalid `IdTable` with no columns. This simplifies the code in various places. In particular, we can now simple call `clear()` on an `IdTable` in order to empty it.

This comes with a small overhead, namely for allocating space for `k` new empty column vectors after the move, where `k` is the number of columns before the move. However, this should not matter in practice because we handle only few `IdTable`s, which are either small (in which case everything is very fast anyway) or they have a large number of rows (in which case the cost for creating `k` empty colums is negligible). Should be performance overhead really matter at some point, we could implement a `small_vector` type, that allocates storage for the columns on the stack when the number of columns is below a certain threshold.
  • Loading branch information
joka921 authored Oct 2, 2023
1 parent cc40054 commit fa3e620
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 70 deletions.
7 changes: 3 additions & 4 deletions src/engine/idTable/CompressedExternalIdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,10 @@ class CompressedExternalIdTableBase {
}

protected:
// Note: This method is currently needed, because the move semantics of the
// IdTable are not correct.
// Clear the current block. If `reserve` is `true`, we subsequently also
// reserve the `blocksize_`.
void resetCurrentBlock(bool reserve) {
currentBlock_ =
IdTableStatic<NumStaticCols>(numColumns_, writer_.allocator());
currentBlock_.clear();
if (reserve) {
currentBlock_.reserve(blocksize_);
}
Expand Down
82 changes: 21 additions & 61 deletions src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
#include <vector>

#include "engine/idTable/IdTableRow.h"
#include "engine/idTable/VectorWithElementwiseMove.h"
#include "global/Id.h"
#include "util/Algorithm.h"
#include "util/AllocatorWithLimit.h"
#include "util/Iterators.h"
#include "util/LambdaHelpers.h"
#include "util/ResetWhenMoved.h"
#include "util/UninitializedAllocator.h"
#include "util/Views.h"

namespace columnBasedIdTable {
// The `IdTable` class is QLever's central data structure. It is used to store
Expand Down Expand Up @@ -114,7 +116,7 @@ class IdTable {
static constexpr int numStaticColumns = NumColumns;
// The actual storage is a plain 1D vector with the logical columns
// concatenated.
using Storage = std::vector<ColumnStorage>;
using Storage = detail::VectorWithElementwiseMove<ColumnStorage>;
using ViewSpans = std::vector<std::span<const T>>;
using Data = std::conditional_t<isView, ViewSpans, Storage>;
using Allocator = decltype(std::declval<ColumnStorage&>().get_allocator());
Expand Down Expand Up @@ -392,63 +394,21 @@ class IdTable {

// Add the `newRow` at the end. Requires that `newRow.size() == numColumns()`,
// otherwise the behavior is undefined (in Release mode) or an assertion will
// fail (in Debug mode).
//
// Note: This behavior is the same for all the overloads of `push_back`. The
// correct size of `newRow` will be checked at compile time if possible (when
// both the size of `newRow` and `numColumns()` are known at compile time. If
// this check cannot be performed, a wrong size of `newRow` will lead to
// undefined behavior which is caught by an `assert` in Debug builds.
void push_back(const std::initializer_list<T>& newRow) requires(!isView) {
// fail (in Debug mode). The `newRow` can be any random access range that
// stores the right type and has the right size.
template <std::ranges::random_access_range RowLike>
requires std::same_as<std::ranges::range_value_t<RowLike>, T>
void push_back(const RowLike& newRow) requires(!isView) {
AD_EXPENSIVE_CHECK(newRow.size() == numColumns());
++numRows_;
for (size_t i = 0; i < numColumns(); ++i) {
data()[i].push_back(*(newRow.begin() + i));
}
}

void push_back(std::span<const T> newRow) requires(!isView) {
AD_CONTRACT_CHECK(newRow.size() == numColumns());
++numRows_;
for (size_t i = 0; i < numColumns(); ++i) {
data()[i].push_back(*(newRow.begin() + i));
}
}

// Overload of `push_back` for `std:array`. If this IdTable is static
// (`NumColumns != 0`), then this is a safe interface, as the correct size of
// `newRow` can be statically checked.
template <size_t N>
void push_back(const std::array<T, N>& newRow)
requires(!isView && (isDynamic || NumColumns == N)) {
if constexpr (isDynamic) {
AD_EXPENSIVE_CHECK(newRow.size() == numColumns());
}
++numRows_;
for (size_t i = 0; i < numColumns(); ++i) {
data()[i].push_back(*(newRow.begin() + i));
}
std::ranges::for_each(ad_utility::integerRange(numColumns()),
[this, &newRow](auto i) {
data()[i].push_back(*(std::begin(newRow) + i));
});
}

// Overload of `push_back` for all kinds of `(const_)row_reference(_proxy)`
// types that are compatible with this `IdTable`.
// Note: This currently excludes rows from `IdTables` with the correct number
// of columns, but with a different allocator. If this should ever be needed,
// we would have to reformulate the `requires()` clause here a little more
// complicated.
template <typename RowT>
requires ad_utility::isTypeContainedIn<
RowT, std::tuple<row_type, row_reference, const_row_reference,
row_reference_restricted, const_row_reference_restricted,
const_row_reference_view_restricted>>
void push_back(const RowT& newRow) requires(!isView) {
if constexpr (isDynamic) {
AD_EXPENSIVE_CHECK(newRow.numColumns() == numColumns());
}
++numRows_;
for (size_t i = 0; i < numColumns(); ++i) {
data()[i].push_back(newRow[i]);
}
void push_back(const std::initializer_list<T>& newRow) requires(!isView) {
push_back(std::ranges::ref_view{newRow});
}

// Create a deep copy of this `IdTable` that owns its memory. In most cases
Expand All @@ -475,13 +435,13 @@ class IdTable {
std::vector<ColumnStorage> newColumns, Allocator allocator = {}) const
requires(!std::is_copy_constructible_v<ColumnStorage>) {
AD_CONTRACT_CHECK(newColumns.size() >= numColumns());
Storage newStorage(
std::make_move_iterator(newColumns.begin()),
std::make_move_iterator(newColumns.begin() + numColumns()));
for (size_t i = 0; i < numColumns(); ++i) {
newStorage[i].insert(newStorage[i].end(), data()[i].begin(),
data()[i].end());
}
Data newStorage(std::make_move_iterator(newColumns.begin()),
std::make_move_iterator(newColumns.begin() + numColumns()));
std::ranges::for_each(
ad_utility::integerRange(numColumns()), [this, &newStorage](auto i) {
newStorage[i].insert(newStorage[i].end(), data()[i].begin(),
data()[i].end());
});
return IdTable<T, NumColumns, ColumnStorage, IsView::False>{
std::move(newStorage), numColumns_, numRows_, allocator};
}
Expand Down
1 change: 1 addition & 0 deletions src/engine/idTable/IdTableRow.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class Row {
const_iterator end() const { return {this, numColumns()}; };

size_t numColumns() const { return data_.size(); }
size_t size() const { return numColumns(); }

friend void swap(Row& a, Row& b) { std::swap(a.data_, b.data_); }

Expand Down
62 changes: 62 additions & 0 deletions src/engine/idTable/VectorWithElementwiseMove.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2023. University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#ifndef QLEVER_VECTORWITHELEMENTWISEMOVE_H
#define QLEVER_VECTORWITHELEMENTWISEMOVE_H

#include <vector>

#include "util/ExceptionHandling.h"

namespace columnBasedIdTable::detail {
// A class that inherits from a `vector<T>`. This class changes the move
// operators of the underlying `vector` as follows: Instead of moving the vector
// as a whole, only the individual elements are moved. This is used for the
// column-based `IdTables` where we move the individual columns, but still want
// a table that was moved from to have the same number of columns as before, but
// with the columns now being empty.

// Note: This class is an implementation detail of the column-based ID
// tables. Its semantics are very particular, so we don't expect it to have a
// use case outside the `IdTable` module.
template <typename T>
struct VectorWithElementwiseMove : public std::vector<T> {
using Base = std::vector<T>;
using Base::Base;
// Defaulted copy operations, inherited from the base class.
VectorWithElementwiseMove(const VectorWithElementwiseMove&) = default;
VectorWithElementwiseMove& operator=(const VectorWithElementwiseMove&) =
default;

// Move operations with the specified semantics.
VectorWithElementwiseMove(VectorWithElementwiseMove&& other) noexcept {
moveImpl(std::move(other));
}
VectorWithElementwiseMove& operator=(
VectorWithElementwiseMove&& other) noexcept {
this->clear();
moveImpl(std::move(other));
return *this;
}
// Nothing changes for the destructors, but we obey the rule of 5.
~VectorWithElementwiseMove() = default;

private:
// The common implementation of the move semantics, move-insert the elements
// of `other` into `this`. Terminate with a readable error message in the
// unlikely case of `std::bad_alloc`.
void moveImpl(VectorWithElementwiseMove&& other) noexcept {
ad_utility::terminateIfThrows(
[&other, self = this] {
AD_CORRECTNESS_CHECK(self->empty());
self->insert(self->end(), std::make_move_iterator(other.begin()),
std::make_move_iterator(other.end()));
},
"Error happened during the move construction or move assignment of an "
"IdTable");
}
};
} // namespace columnBasedIdTable::detail

#endif // QLEVER_VECTORWITHELEMENTWISEMOVE_H
39 changes: 39 additions & 0 deletions src/util/AllocatorWithLimit.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,48 @@ class AllocatorWithLimit {
AllocatorWithLimit() = delete;

template <typename U>
requires(!std::same_as<U, T>)
AllocatorWithLimit(const AllocatorWithLimit<U>& other)
: memoryLeft_(other.getMemoryLeft()){};

// Defaulted copy operations.
AllocatorWithLimit(const AllocatorWithLimit&) = default;
AllocatorWithLimit& operator=(const AllocatorWithLimit&) = default;

// We deliberately let the move assignment call the copy assignment, because
// when a `vector<..., AllocatorWithLimit>` is moved from, we still want the
// vector that was moved from to have a valid allocator that uses the same
// memory limit. Note: We could also implicitly leave out the move operators,
// then the copy operators would be called implicitly, but
// 1. It is hard to reason about things that are implicitly not there.
// 2. This would inhibit move semantics from `std::vector` unless the copy
// operations are also `noexcept`, so we need
// some extra code anyway.
AllocatorWithLimit(AllocatorWithLimit&& other) noexcept try
: AllocatorWithLimit(static_cast<const AllocatorWithLimit&>(other)) {
// Empty body, because all the logic is done by the delegated constructor
// and the catch block.
} catch (const std::exception& e) {
auto log = [&e](auto& stream) {
stream << "The move constructor of `AllocatorWithLimit` threw an "
"exception with message "
<< e.what() << " .This should never happen, terminating"
<< std::endl;
};
log(ad_utility::Log::getLog<ERROR>());
log(std::cerr);
std::terminate();
}
AllocatorWithLimit& operator=(AllocatorWithLimit&& other) noexcept {
ad_utility::terminateIfThrows(
[self = this, &other] {
*self = static_cast<const AllocatorWithLimit&>(other);
},
"The move assignment operator of `AllocatorWithLimit` should never "
"throw in practice.");
return *this;
}

// An allocator must have a function "allocate" with exactly this signature.
// TODO<C++20> : the exact signature of allocate changes
T* allocate(std::size_t n) {
Expand Down
11 changes: 11 additions & 0 deletions src/util/Views.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ struct OwningView
auto end() { return value_.end(); }
};

// Returns a view that contains all the values in `[0, upperBound)`, similar to
// Python's `range` function. Avoids the common pitfall in `std::views::iota`
// that the count variable is only derived from the first argument. For example,
// `std::views::iota(0, size_t(INT_MAX) + 1)` leads to undefined behavior
// because of an integer overflow, but `ad_utility::integerRange(size_t(INT_MAX)
// + 1)` is perfectly safe and behaves as expected.
template <std::unsigned_integral Int>
auto integerRange(Int upperBound) {
return std::views::iota(Int{0}, upperBound);
}

} // namespace ad_utility

#endif // QLEVER_VIEWS_H
37 changes: 37 additions & 0 deletions test/AllocatorWithLimitTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,40 @@ TEST(AllocatorWithLimit, equality) {
ASSERT_EQ(a2, a2);
ASSERT_NE(a1, a2);
}

TEST(AllocatorWithLimit, unlikelyExceptionsDuringCopyingAndMoving) {
struct ThrowOnCopy {
ThrowOnCopy() = default;
ThrowOnCopy& operator=(const ThrowOnCopy&) {
throw std::runtime_error("unexpected copy assign");
}
ThrowOnCopy(const ThrowOnCopy&) {
throw std::runtime_error("unexpected copy construct");
}
ThrowOnCopy& operator=(ThrowOnCopy&&) noexcept = default;
ThrowOnCopy(ThrowOnCopy&&) noexcept = default;
void operator()(ad_utility::MemorySize) const {}
};
AllocatorWithLimit<int> a1{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(20_B),
ThrowOnCopy{}};
auto copy = [&a1]() { [[maybe_unused]] auto a2 = a1; };
auto copyAssign = [&a1]() {
AllocatorWithLimit<int> a2{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(20_B)};
a2 = a1;
};
ASSERT_THROW(copy(), std::runtime_error);
ASSERT_THROW(copyAssign(), std::runtime_error);
auto move = [&a1]() { auto a2 = std::move(a1); };
auto moveAssign = [&a1]() {
AllocatorWithLimit<int> a2{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(20_B)};
a2 = std::move(a1);
};
// The move operations call the copy operations which throw, but are declared
// `noexcept`, so the program dies when they are called.
ASSERT_DEATH(move(), "The move constructor of `AllocatorWithLimit`");
ASSERT_DEATH(moveAssign(),
"The move assignment operator of `AllocatorWithLimit`");
}
46 changes: 46 additions & 0 deletions test/IdTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <vector>

#include "./util/AllocatorTestHelpers.h"
#include "./util/GTestHelpers.h"
#include "./util/IdTestHelpers.h"
#include "engine/idTable/IdTable.h"
#include "global/Id.h"
Expand Down Expand Up @@ -742,6 +743,51 @@ TEST(IdTableStaticTest, copyAndMove) {
}
}

TEST(IdTableTest, statusAfterMove) {
{
IdTableStatic<3> t1{makeAllocator()};
t1.push_back(std::array{V(1), V(42), V(2304)});

auto t2 = std::move(t1);
// `t1` is valid and still has the same number of columns, but they now are
// empty.
ASSERT_EQ(3, t1.numColumns());
ASSERT_EQ(0, t1.numRows());
ASSERT_NO_THROW(t1.push_back(std::array{V(4), V(16), V(23)}));
ASSERT_EQ(1, t1.numRows());
ASSERT_EQ((static_cast<std::array<Id, 3>>(t1[0])),
(std::array{V(4), V(16), V(23)}));
}
{
using Buffer = ad_utility::BufferedVector<Id>;
Buffer buffer(0, "IdTableTest.statusAfterMove.dat");
using BufferedTable = columnBasedIdTable::IdTable<Id, 1, Buffer>;
BufferedTable table{1, std::array{std::move(buffer)}};
table.push_back(std::array{V(19)});
auto t2 = std::move(table);
// The `table` has been moved from and is invalid, because we don't have a
// file anymore where we could write the contents. This means that all
// operations that would have to change the size of the IdTable throw until
// we have reinstated the column vector by explicitly assigning a newly
// constructed table. The exceptions that are thrown are from the
// `BufferedVector` class which throws when it is being accessed after being
// moved from. In other words, the `IdTable` class needs no special code to
// handle the case of the columns being stored in a `BufferedVector`.
AD_EXPECT_THROW_WITH_MESSAGE(
table.push_back(std::array{V(4)}),
::testing::ContainsRegex("Tried to access a DiskBasedArray"));
AD_EXPECT_THROW_WITH_MESSAGE(
table.resize(42),
::testing::ContainsRegex("Tried to access a DiskBasedArray"));
table = BufferedTable{
1, std::array{Buffer{0, "IdTableTest.statusAfterMove2.dat"}}};
ASSERT_NO_THROW(table.push_back(std::array{V(4)}));
ASSERT_NO_THROW(table.resize(42));
ASSERT_EQ(table.size(), 42u);
ASSERT_EQ(table(0, 0), V(4));
}
}

TEST(IdTableStaticTest, erase) {
constexpr size_t NUM_ROWS = 12;
constexpr size_t NUM_COLS = 4;
Expand Down
11 changes: 11 additions & 0 deletions test/ViewsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,14 @@ TEST(Views, owningView) {
::testing::ElementsAre(
"4", "fourhundredseventythousandBlimbambum", "3", "1"));
}

TEST(Views, integerRange) {
std::vector<size_t> expected;
for (size_t i = 0; i < 42; ++i) {
expected.push_back(i);
}

std::vector<size_t> actual;
std::ranges::copy(ad_utility::integerRange(42u), std::back_inserter(actual));
ASSERT_THAT(actual, ::testing::ElementsAreArray(expected));
}
Loading

0 comments on commit fa3e620

Please sign in to comment.