Skip to content

Commit

Permalink
For small index scans, avoid spurious copy of whole block (ad-freibur…
Browse files Browse the repository at this point in the history
…g#1260)

For small index scans, which read only a part of a single block, QLever so far made a copy of the whole block, even when the block was already in the block cache. This spurious copy is now avoided, so that, when the block is already in the cache, only the requested part is copied. In particular, this makes full index scans much faster and also helps other queries with ?s ?p ?o where ?s is bound by the rest of the query.

Note: There is potential for more performance gains for such scans, but we have other issues for now.
  • Loading branch information
joka921 authored Feb 6, 2024
1 parent ac9db29 commit 60bf149
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 28 deletions.
2 changes: 1 addition & 1 deletion e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ fi
# here because then we can't easily get the SERVER_PID out of that subshell
pushd "$BINARY_DIR"
echo "Launching server from path $(pwd)"
./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 500s &> server_log.txt &
./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 30s &> server_log.txt &
SERVER_PID=$!
popd

Expand Down
35 changes: 21 additions & 14 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,15 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
std::back_inserter(allColumns));
// A block is uniquely identified by its start position in the file.
auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_;
DecompressedBlock block = blockCache_
.computeOnce(cacheKey,
[&]() {
return readAndDecompressBlock(
blockMetadata, allColumns);
})
._resultPointer->clone();
auto sharedResultFromCache =
blockCache_
.computeOnce(cacheKey,
[&]() {
return readAndDecompressBlock(blockMetadata,
allColumns);
})
._resultPointer;
const DecompressedBlock& block = *sharedResultFromCache;
const auto& col1Column = block.getColumn(0);

// Find the range in the blockMetadata, that belongs to the same relation
Expand All @@ -452,17 +454,22 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
}
}();
auto numResults = subBlock.size();
block.erase(block.begin(),
block.begin() + (subBlock.begin() - col1Column.begin()));
block.resize(numResults);

auto beginIndex = subBlock.begin() - col1Column.begin();
auto endIndex = subBlock.end() - col1Column.begin();

DecompressedBlock result{columnIndices.size(), allocator_};
result.resize(numResults);
for (auto i : ad_utility::integerRange(columnIndices.size())) {
const auto& inputCol = block.getColumn(columnIndices[i]);
std::ranges::copy(inputCol.begin() + beginIndex,
inputCol.begin() + endIndex, result.getColumn(i).begin());
}
if (scanMetadata.has_value()) {
auto& details = scanMetadata.value().get();
++details.numBlocksRead_;
details.numElementsRead_ += block.numRows();
details.numElementsRead_ += result.numRows();
}
block.setColumnSubset(columnIndices);
return block;
return result;
};

// _____________________________________________________________________________
Expand Down
5 changes: 4 additions & 1 deletion src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,10 @@ class CompressedRelationReader {
// the `col1Id` doesn't match. For this to work, the block has to be one of
// the blocks that actually store triples from the given `relationMetadata`'s
// relation, else the behavior is undefined. Only return the columns specified
// by the `columnIndices`.
// by the `columnIndices`. Note: Do not call this function for blocks of which
// you know that you need them completely, as then this function wastes some
// time and space. It is only typically needed for the first and last block of
// certain scans.
DecompressedBlock readPossiblyIncompleteBlock(
const CompressedRelationMetadata& relationMetadata,
std::optional<Id> col1Id, const CompressedBlockMetadata& blockMetadata,
Expand Down
28 changes: 16 additions & 12 deletions test/engine/IndexScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "../IndexTestHelpers.h"
#include "../util/GTestHelpers.h"
#include "../util/IdTableHelpers.h"
#include "engine/IndexScan.h"
#include "parser/ParsedQuery.h"

Expand Down Expand Up @@ -323,24 +324,27 @@ TEST(IndexScan, additionalColumn) {
auto qec = getQec("<x> <y> <z>.");
using V = Variable;
SparqlTriple triple{V{"?x"}, "<y>", V{"?z"}};
triple._additionalScanColumns.emplace_back(1, V{"?blib"});
triple._additionalScanColumns.emplace_back(0, V{"?blub"});
triple._additionalScanColumns.emplace_back(
ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN, V{"?xpattern"});
triple._additionalScanColumns.emplace_back(
ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN, V{"?ypattern"});
auto scan = IndexScan{qec, Permutation::PSO, triple};
ASSERT_EQ(scan.getResultWidth(), 4);
auto col = makeAlwaysDefinedColumn;
VariableToColumnMap expected = {{V{"?x"}, col(0)},
{V{"?z"}, col(1)},
{V("?blib"), col(2)},
{V("?blub"), col(3)}};
{V("?xpattern"), col(2)},
{V("?ypattern"), col(3)}};
ASSERT_THAT(scan.getExternallyVisibleVariableColumns(),
::testing::UnorderedElementsAreArray(expected));
ASSERT_THAT(scan.getCacheKey(),
::testing::ContainsRegex("Additional Columns: 1 0"));
// Executing such a query that has the same column multiple times is currently
// not supported and fails with an exception inside the `IdTable.h` module
// TODO<joka921> Add proper tests as soon as we can properly add additional
// columns. Maybe we cann add additional columns generically during the index
// build by adding a generic transformation function etc.
AD_EXPECT_THROW_WITH_MESSAGE(scan.computeResultOnlyForTesting(),
::testing::ContainsRegex("IdTable.h"));
::testing::ContainsRegex("Additional Columns: 2 3"));
auto res = scan.computeResultOnlyForTesting();
auto getId = makeGetId(qec->getIndex());
auto I = IntId;
// <x> is the only subject, so it has pattern 0, <z> doesn't appear as a
// subject, so it has no pattern.
auto exp = makeIdTableFromVector(
{{getId("<x>"), getId("<z>"), I(0), I(NO_PATTERN)}});
EXPECT_THAT(res.idTable(), ::testing::ElementsAreArray(exp));
}

0 comments on commit 60bf149

Please sign in to comment.