Skip to content

Commit

Permalink
Consistently use the ColumnIndex typedef (ad-freiburg#990)
Browse files Browse the repository at this point in the history
QLever currently contains a type `ColumnIndex` that is just a typedef for `uint64_t`. So far, sometimes `size_t` was used where `ColumnIndex` would have been semantically more appropriate. This helps twofold: It improves the readability of the code, and it brings us closer to a successful compilation on MacOS, where `uint64_t` and `size_t` are different types. Note that in the future we might use a stronger type for `ColumnIndex`, but that would require changes in many additional places.
  • Loading branch information
bradenmacdonald authored May 25, 2023
1 parent 3911eb6 commit 82feec9
Show file tree
Hide file tree
Showing 54 changed files with 166 additions and 155 deletions.
2 changes: 1 addition & 1 deletion misc/format-check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ for source in "${SOURCE_FILES[@]}" ;do
printf "Use clang-format with the .clang-format provided in the QLever\n"
printf "repository's root to ensure all code files are formatted "
printf "properly. We currently use clang-format 16\n"
printf "(See `.github/workflows/format-check.yml` for instructions on how to install it.\n"
printf "(See '.github/workflows/format-check.yml' for instructions on how to install it.\n"
printf "\x1b[m"
ERROR=1
fi
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ float Bind::getMultiplicity(size_t col) {
string Bind::getDescriptor() const { return _bind.getDescriptor(); }

// _____________________________________________________________________________
[[nodiscard]] vector<size_t> Bind::resultSortedOn() const {
[[nodiscard]] vector<ColumnIndex> Bind::resultSortedOn() const {
// We always append the result column of the BIND at the end and this column
// is not sorted, so the sequence of indices of the sorted columns do not
// change.
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Bind : public Operation {
}

protected:
[[nodiscard]] vector<size_t> resultSortedOn() const override;
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

private:
ResultTable computeResult() override;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ string CountAvailablePredicates::getDescriptor() const {
size_t CountAvailablePredicates::getResultWidth() const { return 2; }

// _____________________________________________________________________________
vector<size_t> CountAvailablePredicates::resultSortedOn() const {
vector<ColumnIndex> CountAvailablePredicates::resultSortedOn() const {
// The result is not sorted on any column.
return {};
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CountAvailablePredicates.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class CountAvailablePredicates : public Operation {

[[nodiscard]] size_t getResultWidth() const override;

[[nodiscard]] vector<size_t> resultSortedOn() const override;
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

vector<QueryExecutionTree*> getChildren() override {
using R = vector<QueryExecutionTree*>;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ size_t Distinct::getResultWidth() const { return _subtree->getResultWidth(); }
// _____________________________________________________________________________
Distinct::Distinct(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
const vector<size_t>& keepIndices)
const vector<ColumnIndex>& keepIndices)
: Operation(qec), _subtree(subtree), _keepIndices(keepIndices) {}

// _____________________________________________________________________________
Expand Down
6 changes: 3 additions & 3 deletions src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@ using std::vector;
class Distinct : public Operation {
private:
std::shared_ptr<QueryExecutionTree> _subtree;
vector<size_t> _keepIndices;
vector<ColumnIndex> _keepIndices;

public:
Distinct(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
const vector<size_t>& keepIndices);
const vector<ColumnIndex>& keepIndices);

[[nodiscard]] size_t getResultWidth() const override;

public:
[[nodiscard]] string getDescriptor() const override;

[[nodiscard]] vector<size_t> resultSortedOn() const override {
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override {
return _subtree->resultSortedOn();
}

Expand Down
4 changes: 2 additions & 2 deletions src/engine/Engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class Engine {
**/
template <size_t WIDTH>
static void distinct(const IdTable& dynInput,
const std::vector<size_t>& keepIndices,
const std::vector<ColumnIndex>& keepIndices,
IdTable* dynResult) {
LOG(DEBUG) << "Distinct on " << dynInput.size() << " elements.\n";
const IdTableView<WIDTH> input = dynInput.asStaticView<WIDTH>();
Expand All @@ -158,7 +158,7 @@ class Engine {

auto last = std::unique(result.begin(), result.end(),
[&keepIndices](const auto& a, const auto& b) {
for (size_t i : keepIndices) {
for (ColumnIndex i : keepIndices) {
if (a[i] != b[i]) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Filter : public Operation {
public:
string getDescriptor() const override;

std::vector<size_t> resultSortedOn() const override {
std::vector<ColumnIndex> resultSortedOn() const override {
return _subtree->resultSortedOn();
}

Expand Down
13 changes: 7 additions & 6 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,29 +83,30 @@ size_t GroupBy::getResultWidth() const {
return getInternallyVisibleVariableColumns().size();
}

vector<size_t> GroupBy::resultSortedOn() const {
vector<ColumnIndex> GroupBy::resultSortedOn() const {
auto varCols = getInternallyVisibleVariableColumns();
vector<size_t> sortedOn;
vector<ColumnIndex> sortedOn;
sortedOn.reserve(_groupByVariables.size());
for (const auto& var : _groupByVariables) {
sortedOn.push_back(varCols[var].columnIndex_);
}
return sortedOn;
}

vector<size_t> GroupBy::computeSortColumns(const QueryExecutionTree* subtree) {
vector<size_t> cols;
vector<ColumnIndex> GroupBy::computeSortColumns(
const QueryExecutionTree* subtree) {
vector<ColumnIndex> cols;
if (_groupByVariables.empty()) {
// the entire input is a single group, no sorting needs to be done
return cols;
}

const auto& inVarColMap = subtree->getVariableColumns();

std::unordered_set<size_t> sortColSet;
std::unordered_set<ColumnIndex> sortColSet;

for (const auto& var : _groupByVariables) {
size_t col = inVarColMap.at(var).columnIndex_;
ColumnIndex col = inVarColMap.at(var).columnIndex_;
// avoid sorting by a column twice
if (sortColSet.find(col) == sortColSet.end()) {
sortColSet.insert(col);
Expand Down
4 changes: 2 additions & 2 deletions src/engine/GroupBy.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GroupBy : public Operation {

virtual size_t getResultWidth() const override;

virtual vector<size_t> resultSortedOn() const override;
virtual vector<ColumnIndex> resultSortedOn() const override;

virtual void setTextLimit(size_t limit) override {
_subtree->setTextLimit(limit);
Expand All @@ -77,7 +77,7 @@ class GroupBy : public Operation {
* @param subtree The QueryExecutionTree that contains the operations
* creating the sorting operation inputs.
*/
vector<size_t> computeSortColumns(const QueryExecutionTree* subtree);
vector<ColumnIndex> computeSortColumns(const QueryExecutionTree* subtree);

vector<QueryExecutionTree*> getChildren() override {
return {_subtree.get()};
Expand Down
2 changes: 1 addition & 1 deletion src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ size_t HasPredicateScan::getResultWidth() const {
return -1;
}

vector<size_t> HasPredicateScan::resultSortedOn() const {
vector<ColumnIndex> HasPredicateScan::resultSortedOn() const {
switch (_type) {
case ScanType::FREE_S:
// is the lack of sorting here a problem?
Expand Down
2 changes: 1 addition & 1 deletion src/engine/HasPredicateScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class HasPredicateScan : public Operation {

[[nodiscard]] size_t getResultWidth() const override;

[[nodiscard]] vector<size_t> resultSortedOn() const override;
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

void setTextLimit(size_t limit) override;

Expand Down
10 changes: 5 additions & 5 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,26 +115,26 @@ size_t IndexScan::getResultWidth() const {
}

// _____________________________________________________________________________
vector<size_t> IndexScan::resultSortedOn() const {
vector<ColumnIndex> IndexScan::resultSortedOn() const {
switch (_type) {
case PSO_BOUND_S:
case POS_BOUND_O:
case SOP_BOUND_O:
return {0};
return {ColumnIndex{0}};
case PSO_FREE_S:
case POS_FREE_O:
case SPO_FREE_P:
case SOP_FREE_O:
case OSP_FREE_S:
case OPS_FREE_P:
return {0, 1};
return {ColumnIndex{0}, ColumnIndex{1}};
case FULL_INDEX_SCAN_SPO:
case FULL_INDEX_SCAN_SOP:
case FULL_INDEX_SCAN_PSO:
case FULL_INDEX_SCAN_POS:
case FULL_INDEX_SCAN_OSP:
case FULL_INDEX_SCAN_OPS:
return {0, 1, 2};
return {ColumnIndex{0}, ColumnIndex{1}, ColumnIndex{2}};
default:
AD_FAIL();
}
Expand All @@ -145,7 +145,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const {
VariableToColumnMap res;
// All the columns of an index scan only contain defined values.
auto makeCol = makeAlwaysDefinedColumn;
size_t col = 0;
auto col = ColumnIndex{0};

// Helper lambdas that add the respective triple component as the next column.
auto addSubject = [&]() {
Expand Down
2 changes: 1 addition & 1 deletion src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class IndexScan : public Operation {

size_t getResultWidth() const override;

vector<size_t> resultSortedOn() const override;
vector<ColumnIndex> resultSortedOn() const override;

void setTextLimit(size_t) override {
// Do nothing.
Expand Down
42 changes: 21 additions & 21 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ using std::string;

// _____________________________________________________________________________
Join::Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1,
std::shared_ptr<QueryExecutionTree> t2, size_t t1JoinCol,
size_t t2JoinCol, bool keepJoinColumn)
std::shared_ptr<QueryExecutionTree> t2, ColumnIndex t1JoinCol,
ColumnIndex t2JoinCol, bool keepJoinColumn)
: Operation(qec) {
AD_CONTRACT_CHECK(t1 && t2);
// Currently all join algorithms require both inputs to be sorted, so we
Expand Down Expand Up @@ -134,7 +134,7 @@ ResultTable Join::computeResult() {
VariableToColumnMap Join::computeVariableToColumnMap() const {
AD_CORRECTNESS_CHECK(!isFullScanDummy(_left));
if (isFullScanDummy(_right)) {
AD_CORRECTNESS_CHECK(_rightJoinCol == 0u);
AD_CORRECTNESS_CHECK(_rightJoinCol == ColumnIndex{0});
}
return makeVarToColMapForJoinOperation(
_left->getVariableColumns(), _right->getVariableColumns(),
Expand All @@ -151,11 +151,11 @@ size_t Join::getResultWidth() const {
}

// _____________________________________________________________________________
vector<size_t> Join::resultSortedOn() const {
vector<ColumnIndex> Join::resultSortedOn() const {
if (!isFullScanDummy(_left)) {
return {_leftJoinCol};
} else {
return {2 + _rightJoinCol};
return {ColumnIndex{2 + _rightJoinCol}};
}
}

Expand Down Expand Up @@ -347,8 +347,8 @@ void Join::computeSizeEstimateAndMultiplicities() {
<< " * " << jcMultiplicityInResult << " * " << nofDistinctInResult
<< std::endl;

for (size_t i = isFullScanDummy(_left) ? 1 : 0; i < _left->getResultWidth();
++i) {
for (auto i = isFullScanDummy(_left) ? ColumnIndex{1} : ColumnIndex{0};
i < _left->getResultWidth(); ++i) {
double oldMult = _left->getMultiplicity(i);
double m = std::max(
1.0, oldMult * _right->getMultiplicity(_rightJoinCol) * corrFactor);
Expand All @@ -359,7 +359,7 @@ void Join::computeSizeEstimateAndMultiplicities() {
}
_multiplicities.emplace_back(m);
}
for (size_t i = 0; i < _right->getResultWidth(); ++i) {
for (auto i = ColumnIndex{0}; i < _right->getResultWidth(); ++i) {
if (i == _rightJoinCol && !isFullScanDummy(_left)) {
continue;
}
Expand Down Expand Up @@ -401,8 +401,8 @@ void Join::appendCrossProduct(const IdTable::const_iterator& leftBegin,

// ______________________________________________________________________________

void Join::join(const IdTable& a, size_t jc1, const IdTable& b, size_t jc2,
IdTable* result) const {
void Join::join(const IdTable& a, ColumnIndex jc1, const IdTable& b,
ColumnIndex jc2, IdTable* result) const {
LOG(DEBUG) << "Performing join between two tables.\n";
LOG(DEBUG) << "A: width = " << a.numColumns() << ", size = " << a.size()
<< "\n";
Expand Down Expand Up @@ -501,8 +501,8 @@ void Join::join(const IdTable& a, size_t jc1, const IdTable& b, size_t jc2,

// ______________________________________________________________________________
template <int L_WIDTH, int R_WIDTH, int OUT_WIDTH>
void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
size_t jc2, IdTable* dynRes) {
void Join::hashJoinImpl(const IdTable& dynA, ColumnIndex jc1,
const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes) {
const IdTableView<L_WIDTH> a = dynA.asStaticView<L_WIDTH>();
const IdTableView<R_WIDTH> b = dynB.asStaticView<R_WIDTH>();

Expand All @@ -522,7 +522,7 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
// Puts the rows of the given table into a hash map, with the value of
// the join column of a row as the key, and returns the hash map.
auto idTableToHashMap = []<typename Table>(const Table& table,
const size_t jc) {
const ColumnIndex jc) {
// This declaration works, because generic lambdas are just syntactic sugar
// for templates.
ad_utility::HashMap<Id, std::vector<typename Table::row_type>> map;
Expand Down Expand Up @@ -550,9 +550,9 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
&result]<bool leftIsLarger, typename LargerTableType,
typename SmallerTableType>(
const LargerTableType& largerTable,
const size_t largerTableJoinColumn,
const ColumnIndex largerTableJoinColumn,
const SmallerTableType& smallerTable,
const size_t smallerTableJoinColumn) {
const ColumnIndex smallerTableJoinColumn) {
// Put the smaller table into the hash table.
auto map = idTableToHashMap(smallerTable, smallerTableJoinColumn);

Expand Down Expand Up @@ -598,8 +598,8 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
}

// ______________________________________________________________________________
void Join::hashJoin(const IdTable& dynA, size_t jc1, const IdTable& dynB,
size_t jc2, IdTable* dynRes) {
void Join::hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB,
ColumnIndex jc2, IdTable* dynRes) {
CALL_FIXED_SIZE(
(std::array{dynA.numColumns(), dynB.numColumns(), dynRes->numColumns()}),
&Join::hashJoinImpl, this, dynA, jc1, dynB, jc2, dynRes);
Expand All @@ -608,24 +608,24 @@ void Join::hashJoin(const IdTable& dynA, size_t jc1, const IdTable& dynB,
// ___________________________________________________________________________
template <typename ROW_A, typename ROW_B, int TABLE_WIDTH>
void Join::addCombinedRowToIdTable(const ROW_A& rowA, const ROW_B& rowB,
const size_t jcRowB,
const ColumnIndex jcRowB,
IdTableStatic<TABLE_WIDTH>* table) {
// Add a new, empty row.
const size_t backIndex = table->size();
table->emplace_back();

// Copy the entire rowA in the table.
for (size_t h = 0; h < rowA.numColumns(); h++) {
for (auto h = ColumnIndex{0}; h < rowA.numColumns(); h++) {
(*table)(backIndex, h) = rowA[h];
}

// Copy rowB columns before the join column.
for (size_t h = 0; h < jcRowB; h++) {
for (auto h = ColumnIndex{0}; h < jcRowB; h++) {
(*table)(backIndex, h + rowA.numColumns()) = rowB[h];
}

// Copy rowB columns after the join column.
for (size_t h = jcRowB + 1; h < rowB.numColumns(); h++) {
for (auto h = jcRowB + 1; h < rowB.numColumns(); h++) {
(*table)(backIndex, h + rowA.numColumns() - 1) = rowB[h];
}
}
Loading

0 comments on commit 82feec9

Please sign in to comment.