Consistently use the ColumnIndex typedef (ad-freiburg#990)

QLever currently contains a type `ColumnIndex` that is just a typedef for `uint64_t`. So far, sometimes `size_t` was used where `ColumnIndex` would have been semantically more appropriate. This helps twofold: It improves the readability of the code, and it brings us closer to a successful compilation on MacOS, where `uint64_t` and `size_t` are different types. Note that in the future we might use a stronger type for `ColumnIndex`, but that would require changes in many additional places.
schlegan · May 25, 2023 · 82feec9 · 82feec9
1 parent 3911eb6
commit 82feec9
Show file tree

Hide file tree

Showing 54 changed files with 166 additions and 155 deletions.
diff --git a/misc/format-check.sh b/misc/format-check.sh
@@ -18,7 +18,7 @@ for source in "${SOURCE_FILES[@]}" ;do
 		printf "Use clang-format with the .clang-format provided in the QLever\n"
 		printf "repository's root to ensure all code files are formatted "
 		printf "properly. We currently use clang-format 16\n"
-		printf "(See `.github/workflows/format-check.yml` for instructions on how to install it.\n"
+		printf "(See '.github/workflows/format-check.yml' for instructions on how to install it.\n"
 		printf "\x1b[m"
 		ERROR=1
 	fi

diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
@@ -42,7 +42,7 @@ float Bind::getMultiplicity(size_t col) {
 string Bind::getDescriptor() const { return _bind.getDescriptor(); }
 
 // _____________________________________________________________________________
-[[nodiscard]] vector<size_t> Bind::resultSortedOn() const {
+[[nodiscard]] vector<ColumnIndex> Bind::resultSortedOn() const {
   // We always append the result column of the BIND at the end and this column
   // is not sorted, so the sequence of indices of the sorted columns do not
   // change.

diff --git a/src/engine/Bind.h b/src/engine/Bind.h
@@ -43,7 +43,7 @@ class Bind : public Operation {
   }
 
  protected:
-  [[nodiscard]] vector<size_t> resultSortedOn() const override;
+  [[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;
 
  private:
   ResultTable computeResult() override;

diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp
@@ -55,7 +55,7 @@ string CountAvailablePredicates::getDescriptor() const {
 size_t CountAvailablePredicates::getResultWidth() const { return 2; }
 
 // _____________________________________________________________________________
-vector<size_t> CountAvailablePredicates::resultSortedOn() const {
+vector<ColumnIndex> CountAvailablePredicates::resultSortedOn() const {
   // The result is not sorted on any column.
   return {};
 }

diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h
@@ -58,7 +58,7 @@ class CountAvailablePredicates : public Operation {
 
   [[nodiscard]] size_t getResultWidth() const override;
 
-  [[nodiscard]] vector<size_t> resultSortedOn() const override;
+  [[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;
 
   vector<QueryExecutionTree*> getChildren() override {
     using R = vector<QueryExecutionTree*>;

diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp
@@ -17,7 +17,7 @@ size_t Distinct::getResultWidth() const { return _subtree->getResultWidth(); }
 // _____________________________________________________________________________
 Distinct::Distinct(QueryExecutionContext* qec,
                    std::shared_ptr<QueryExecutionTree> subtree,
-                   const vector<size_t>& keepIndices)
+                   const vector<ColumnIndex>& keepIndices)
     : Operation(qec), _subtree(subtree), _keepIndices(keepIndices) {}
 
 // _____________________________________________________________________________

diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h
@@ -19,19 +19,19 @@ using std::vector;
 class Distinct : public Operation {
  private:
   std::shared_ptr<QueryExecutionTree> _subtree;
-  vector<size_t> _keepIndices;
+  vector<ColumnIndex> _keepIndices;
 
  public:
   Distinct(QueryExecutionContext* qec,
            std::shared_ptr<QueryExecutionTree> subtree,
-           const vector<size_t>& keepIndices);
+           const vector<ColumnIndex>& keepIndices);
 
   [[nodiscard]] size_t getResultWidth() const override;
 
  public:
   [[nodiscard]] string getDescriptor() const override;
 
-  [[nodiscard]] vector<size_t> resultSortedOn() const override {
+  [[nodiscard]] vector<ColumnIndex> resultSortedOn() const override {
     return _subtree->resultSortedOn();
   }
 

diff --git a/src/engine/Engine.h b/src/engine/Engine.h
@@ -147,7 +147,7 @@ class Engine {
    **/
   template <size_t WIDTH>
   static void distinct(const IdTable& dynInput,
-                       const std::vector<size_t>& keepIndices,
+                       const std::vector<ColumnIndex>& keepIndices,
                        IdTable* dynResult) {
     LOG(DEBUG) << "Distinct on " << dynInput.size() << " elements.\n";
     const IdTableView<WIDTH> input = dynInput.asStaticView<WIDTH>();
@@ -158,7 +158,7 @@ class Engine {
 
       auto last = std::unique(result.begin(), result.end(),
                               [&keepIndices](const auto& a, const auto& b) {
-                                for (size_t i : keepIndices) {
+                                for (ColumnIndex i : keepIndices) {
                                   if (a[i] != b[i]) {
                                     return false;
                                   }

diff --git a/src/engine/Filter.h b/src/engine/Filter.h
@@ -32,7 +32,7 @@ class Filter : public Operation {
  public:
   string getDescriptor() const override;
 
-  std::vector<size_t> resultSortedOn() const override {
+  std::vector<ColumnIndex> resultSortedOn() const override {
     return _subtree->resultSortedOn();
   }
 

diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
@@ -83,29 +83,30 @@ size_t GroupBy::getResultWidth() const {
   return getInternallyVisibleVariableColumns().size();
 }
 
-vector<size_t> GroupBy::resultSortedOn() const {
+vector<ColumnIndex> GroupBy::resultSortedOn() const {
   auto varCols = getInternallyVisibleVariableColumns();
-  vector<size_t> sortedOn;
+  vector<ColumnIndex> sortedOn;
   sortedOn.reserve(_groupByVariables.size());
   for (const auto& var : _groupByVariables) {
     sortedOn.push_back(varCols[var].columnIndex_);
   }
   return sortedOn;
 }
 
-vector<size_t> GroupBy::computeSortColumns(const QueryExecutionTree* subtree) {
-  vector<size_t> cols;
+vector<ColumnIndex> GroupBy::computeSortColumns(
+    const QueryExecutionTree* subtree) {
+  vector<ColumnIndex> cols;
   if (_groupByVariables.empty()) {
     // the entire input is a single group, no sorting needs to be done
     return cols;
   }
 
   const auto& inVarColMap = subtree->getVariableColumns();
 
-  std::unordered_set<size_t> sortColSet;
+  std::unordered_set<ColumnIndex> sortColSet;
 
   for (const auto& var : _groupByVariables) {
-    size_t col = inVarColMap.at(var).columnIndex_;
+    ColumnIndex col = inVarColMap.at(var).columnIndex_;
     // avoid sorting by a column twice
     if (sortColSet.find(col) == sortColSet.end()) {
       sortColSet.insert(col);

diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h
@@ -53,7 +53,7 @@ class GroupBy : public Operation {
 
   virtual size_t getResultWidth() const override;
 
-  virtual vector<size_t> resultSortedOn() const override;
+  virtual vector<ColumnIndex> resultSortedOn() const override;
 
   virtual void setTextLimit(size_t limit) override {
     _subtree->setTextLimit(limit);
@@ -77,7 +77,7 @@ class GroupBy : public Operation {
    * @param subtree The QueryExecutionTree that contains the operations
    *                  creating the sorting operation inputs.
    */
-  vector<size_t> computeSortColumns(const QueryExecutionTree* subtree);
+  vector<ColumnIndex> computeSortColumns(const QueryExecutionTree* subtree);
 
   vector<QueryExecutionTree*> getChildren() override {
     return {_subtree.get()};

diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp
@@ -93,7 +93,7 @@ size_t HasPredicateScan::getResultWidth() const {
   return -1;
 }
 
-vector<size_t> HasPredicateScan::resultSortedOn() const {
+vector<ColumnIndex> HasPredicateScan::resultSortedOn() const {
   switch (_type) {
     case ScanType::FREE_S:
       // is the lack of sorting here a problem?

diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h
@@ -56,7 +56,7 @@ class HasPredicateScan : public Operation {
 
   [[nodiscard]] size_t getResultWidth() const override;
 
-  [[nodiscard]] vector<size_t> resultSortedOn() const override;
+  [[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;
 
   void setTextLimit(size_t limit) override;
 

diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
@@ -115,26 +115,26 @@ size_t IndexScan::getResultWidth() const {
 }
 
 // _____________________________________________________________________________
-vector<size_t> IndexScan::resultSortedOn() const {
+vector<ColumnIndex> IndexScan::resultSortedOn() const {
   switch (_type) {
     case PSO_BOUND_S:
     case POS_BOUND_O:
     case SOP_BOUND_O:
-      return {0};
+      return {ColumnIndex{0}};
     case PSO_FREE_S:
     case POS_FREE_O:
     case SPO_FREE_P:
     case SOP_FREE_O:
     case OSP_FREE_S:
     case OPS_FREE_P:
-      return {0, 1};
+      return {ColumnIndex{0}, ColumnIndex{1}};
     case FULL_INDEX_SCAN_SPO:
     case FULL_INDEX_SCAN_SOP:
     case FULL_INDEX_SCAN_PSO:
     case FULL_INDEX_SCAN_POS:
     case FULL_INDEX_SCAN_OSP:
     case FULL_INDEX_SCAN_OPS:
-      return {0, 1, 2};
+      return {ColumnIndex{0}, ColumnIndex{1}, ColumnIndex{2}};
     default:
       AD_FAIL();
   }
@@ -145,7 +145,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const {
   VariableToColumnMap res;
   // All the columns of an index scan only contain defined values.
   auto makeCol = makeAlwaysDefinedColumn;
-  size_t col = 0;
+  auto col = ColumnIndex{0};
 
   // Helper lambdas that add the respective triple component as the next column.
   auto addSubject = [&]() {

diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h
@@ -56,7 +56,7 @@ class IndexScan : public Operation {
 
   size_t getResultWidth() const override;
 
-  vector<size_t> resultSortedOn() const override;
+  vector<ColumnIndex> resultSortedOn() const override;
 
   void setTextLimit(size_t) override {
     // Do nothing.

diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp
@@ -23,8 +23,8 @@ using std::string;
 
 // _____________________________________________________________________________
 Join::Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1,
-           std::shared_ptr<QueryExecutionTree> t2, size_t t1JoinCol,
-           size_t t2JoinCol, bool keepJoinColumn)
+           std::shared_ptr<QueryExecutionTree> t2, ColumnIndex t1JoinCol,
+           ColumnIndex t2JoinCol, bool keepJoinColumn)
     : Operation(qec) {
   AD_CONTRACT_CHECK(t1 && t2);
   // Currently all join algorithms require both inputs to be sorted, so we
@@ -134,7 +134,7 @@ ResultTable Join::computeResult() {
 VariableToColumnMap Join::computeVariableToColumnMap() const {
   AD_CORRECTNESS_CHECK(!isFullScanDummy(_left));
   if (isFullScanDummy(_right)) {
-    AD_CORRECTNESS_CHECK(_rightJoinCol == 0u);
+    AD_CORRECTNESS_CHECK(_rightJoinCol == ColumnIndex{0});
   }
   return makeVarToColMapForJoinOperation(
       _left->getVariableColumns(), _right->getVariableColumns(),
@@ -151,11 +151,11 @@ size_t Join::getResultWidth() const {
 }
 
 // _____________________________________________________________________________
-vector<size_t> Join::resultSortedOn() const {
+vector<ColumnIndex> Join::resultSortedOn() const {
   if (!isFullScanDummy(_left)) {
     return {_leftJoinCol};
   } else {
-    return {2 + _rightJoinCol};
+    return {ColumnIndex{2 + _rightJoinCol}};
   }
 }
 
@@ -347,8 +347,8 @@ void Join::computeSizeEstimateAndMultiplicities() {
              << " * " << jcMultiplicityInResult << " * " << nofDistinctInResult
              << std::endl;
 
-  for (size_t i = isFullScanDummy(_left) ? 1 : 0; i < _left->getResultWidth();
-       ++i) {
+  for (auto i = isFullScanDummy(_left) ? ColumnIndex{1} : ColumnIndex{0};
+       i < _left->getResultWidth(); ++i) {
     double oldMult = _left->getMultiplicity(i);
     double m = std::max(
         1.0, oldMult * _right->getMultiplicity(_rightJoinCol) * corrFactor);
@@ -359,7 +359,7 @@ void Join::computeSizeEstimateAndMultiplicities() {
     }
     _multiplicities.emplace_back(m);
   }
-  for (size_t i = 0; i < _right->getResultWidth(); ++i) {
+  for (auto i = ColumnIndex{0}; i < _right->getResultWidth(); ++i) {
     if (i == _rightJoinCol && !isFullScanDummy(_left)) {
       continue;
     }
@@ -401,8 +401,8 @@ void Join::appendCrossProduct(const IdTable::const_iterator& leftBegin,
 
 // ______________________________________________________________________________
 
-void Join::join(const IdTable& a, size_t jc1, const IdTable& b, size_t jc2,
-                IdTable* result) const {
+void Join::join(const IdTable& a, ColumnIndex jc1, const IdTable& b,
+                ColumnIndex jc2, IdTable* result) const {
   LOG(DEBUG) << "Performing join between two tables.\n";
   LOG(DEBUG) << "A: width = " << a.numColumns() << ", size = " << a.size()
              << "\n";
@@ -501,8 +501,8 @@ void Join::join(const IdTable& a, size_t jc1, const IdTable& b, size_t jc2,
 
 // ______________________________________________________________________________
 template <int L_WIDTH, int R_WIDTH, int OUT_WIDTH>
-void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
-                        size_t jc2, IdTable* dynRes) {
+void Join::hashJoinImpl(const IdTable& dynA, ColumnIndex jc1,
+                        const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes) {
   const IdTableView<L_WIDTH> a = dynA.asStaticView<L_WIDTH>();
   const IdTableView<R_WIDTH> b = dynB.asStaticView<R_WIDTH>();
 
@@ -522,7 +522,7 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
   // Puts the rows of the given table into a hash map, with the value of
   // the join column of a row as the key, and returns the hash map.
   auto idTableToHashMap = []<typename Table>(const Table& table,
-                                             const size_t jc) {
+                                             const ColumnIndex jc) {
     // This declaration works, because generic lambdas are just syntactic sugar
     // for templates.
     ad_utility::HashMap<Id, std::vector<typename Table::row_type>> map;
@@ -550,9 +550,9 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
                           &result]<bool leftIsLarger, typename LargerTableType,
                                    typename SmallerTableType>(
                              const LargerTableType& largerTable,
-                             const size_t largerTableJoinColumn,
+                             const ColumnIndex largerTableJoinColumn,
                              const SmallerTableType& smallerTable,
-                             const size_t smallerTableJoinColumn) {
+                             const ColumnIndex smallerTableJoinColumn) {
     // Put the smaller table into the hash table.
     auto map = idTableToHashMap(smallerTable, smallerTableJoinColumn);
 
@@ -598,8 +598,8 @@ void Join::hashJoinImpl(const IdTable& dynA, size_t jc1, const IdTable& dynB,
 }
 
 // ______________________________________________________________________________
-void Join::hashJoin(const IdTable& dynA, size_t jc1, const IdTable& dynB,
-                    size_t jc2, IdTable* dynRes) {
+void Join::hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB,
+                    ColumnIndex jc2, IdTable* dynRes) {
   CALL_FIXED_SIZE(
       (std::array{dynA.numColumns(), dynB.numColumns(), dynRes->numColumns()}),
       &Join::hashJoinImpl, this, dynA, jc1, dynB, jc2, dynRes);
@@ -608,24 +608,24 @@ void Join::hashJoin(const IdTable& dynA, size_t jc1, const IdTable& dynB,
 // ___________________________________________________________________________
 template <typename ROW_A, typename ROW_B, int TABLE_WIDTH>
 void Join::addCombinedRowToIdTable(const ROW_A& rowA, const ROW_B& rowB,
-                                   const size_t jcRowB,
+                                   const ColumnIndex jcRowB,
                                    IdTableStatic<TABLE_WIDTH>* table) {
   // Add a new, empty row.
   const size_t backIndex = table->size();
   table->emplace_back();
 
   // Copy the entire rowA in the table.
-  for (size_t h = 0; h < rowA.numColumns(); h++) {
+  for (auto h = ColumnIndex{0}; h < rowA.numColumns(); h++) {
     (*table)(backIndex, h) = rowA[h];
   }
 
   // Copy rowB columns before the join column.
-  for (size_t h = 0; h < jcRowB; h++) {
+  for (auto h = ColumnIndex{0}; h < jcRowB; h++) {
     (*table)(backIndex, h + rowA.numColumns()) = rowB[h];
   }
 
   // Copy rowB columns after the join column.
-  for (size_t h = jcRowB + 1; h < rowB.numColumns(); h++) {
+  for (auto h = jcRowB + 1; h < rowB.numColumns(); h++) {
     (*table)(backIndex, h + rowA.numColumns() - 1) = rowB[h];
   }
 }