Skip to content

Commit

Permalink
Fix subtle bug in variable to columns map for JOIN (ad-freiburg#938)
Browse files Browse the repository at this point in the history
There was a subtle bug in the map from variables to columns in the JOIN operation, namely when the operands contained "invisible" columns (that is, columns that were computed in previous operations but then selected out). In particular, this bug showed in one of the warmup AC queries from the QLever UI. That bug is now fixed and corresponding unit tests and a query in our end-to-end test were added.
  • Loading branch information
joka921 authored Apr 3, 2023
1 parent 04900c2 commit 8b65e4d
Show file tree
Hide file tree
Showing 14 changed files with 170 additions and 19 deletions.
16 changes: 16 additions & 0 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1493,6 +1493,22 @@ queries:
- selected: [ "?sum" ]
- contains_row: [ 7.0 ]

- query: join-columns-in-the-presence-of-subqueries
type: no-text
sparql: |
SELECT ?a ?b WHERE {
VALUES ?a { 1 }
{ SELECT ?a ?b WHERE {
{ VALUES ?c { 42} }
UNION
{ VALUES (?a ?b) { (1 3) } }
} }
}
checks:
- num_cols: 2
- num_rows : 1
- selected: [ "?a", "?b" ]
- contains_row: [ 1, 3 ]



3 changes: 2 additions & 1 deletion src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ VariableToColumnMap Join::computeVariableToColumnMap() const {
}
return makeVarToColMapForJoinOperation(
_left->getVariableColumns(), _right->getVariableColumns(),
{{_leftJoinCol, _rightJoinCol}}, BinOpType::Join);
{{_leftJoinCol, _rightJoinCol}}, BinOpType::Join,
_left->getResultWidth());
}

// _____________________________________________________________________________
Expand Down
6 changes: 3 additions & 3 deletions src/engine/MultiColumnJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ ResultTable MultiColumnJoin::computeResult() {

// _____________________________________________________________________________
VariableToColumnMap MultiColumnJoin::computeVariableToColumnMap() const {
return makeVarToColMapForJoinOperation(_left->getVariableColumns(),
_right->getVariableColumns(),
_joinColumns, BinOpType::Join);
return makeVarToColMapForJoinOperation(
_left->getVariableColumns(), _right->getVariableColumns(), _joinColumns,
BinOpType::Join, _left->getResultWidth());
}

// _____________________________________________________________________________
Expand Down
6 changes: 3 additions & 3 deletions src/engine/OptionalJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ ResultTable OptionalJoin::computeResult() {

// _____________________________________________________________________________
VariableToColumnMap OptionalJoin::computeVariableToColumnMap() const {
return makeVarToColMapForJoinOperation(_left->getVariableColumns(),
_right->getVariableColumns(),
_joinColumns, BinOpType::OptionalJoin);
return makeVarToColMapForJoinOperation(
_left->getVariableColumns(), _right->getVariableColumns(), _joinColumns,
BinOpType::OptionalJoin, _left->getResultWidth());
}

// _____________________________________________________________________________
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Union.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ VariableToColumnMap Union::computeVariableToColumnMap() const {
VariableToColumnMap variableColumns;

// A variable is only guaranteed to always be bound if it exists in all the
// subtrees and if it is guaranteed to be bound in all the subrees.
// subtrees and if it is guaranteed to be bound in all the subtrees.
auto mightContainUndef = [this](const Variable& var) {
return std::ranges::any_of(
_subtrees, [&](const shared_ptr<QueryExecutionTree>& subtree) {
Expand All @@ -93,7 +93,7 @@ VariableToColumnMap Union::computeVariableToColumnMap() const {
size_t nextColumnIndex = 0;
auto addVariableColumnIfNotExists =
[&mightContainUndef, &variableColumns,
&nextColumnIndex](const VarAndTypeInfo& varAndIndex) mutable {
&nextColumnIndex](const VarAndTypeInfo& varAndIndex) {
const auto& variable = varAndIndex.first;
if (!variableColumns.contains(variable)) {
using enum ColumnIndexAndTypeInfo::UndefStatus;
Expand Down
1 change: 1 addition & 0 deletions src/engine/ValuesForTesting.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "engine/Operation.h"
#include "engine/QueryExecutionContext.h"
#include "engine/ResultTable.h"
#include "util/Algorithm.h"
#include "util/Random.h"

// An operation that yields a given `IdTable` as its result. It is used for
Expand Down
10 changes: 6 additions & 4 deletions src/engine/VariableToColumnMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ copySortedByColumnIndex(VariableToColumnMap map) {
// ______________________________________________________________________________
VariableToColumnMap makeVarToColMapForJoinOperation(
const VariableToColumnMap& leftVars, const VariableToColumnMap& rightVars,
std::vector<std::array<ColumnIndex, 2>> joinColumns, BinOpType binOpType) {
std::vector<std::array<ColumnIndex, 2>> joinColumns, BinOpType binOpType,
size_t leftResultWidth) {
// First come all the variables from the left input. Variables that only
// appear in the left input always have the same definedness as in the input.
// For join columns we might override it below.
VariableToColumnMap result{leftVars};
bool isOptionalJoin = binOpType == BinOpType::OptionalJoin;
size_t nextColumnIndex = result.size();

// Add the variables from the right operand.
size_t numJoinColumnsBefore = 0;
for (const auto& [variable, columnIndexWithType] :
copySortedByColumnIndex(rightVars)) {
const auto& colIdxRight = columnIndexWithType.columnIndex_;
Expand All @@ -41,17 +42,18 @@ VariableToColumnMap makeVarToColMapForJoinOperation(
static_cast<bool>(undef) &&
(isOptionalJoin ||
static_cast<bool>(columnIndexWithType.mightContainUndef_)));
++numJoinColumnsBefore;
} else {
// The column is not a join column. For non-optional joins it keeps its
// definedness, but for optional joins, it is `PossiblyUndefined` if
// there is a row in the left operand that has no match in the right
// input.
result[variable] = ColumnIndexAndTypeInfo{
nextColumnIndex,
leftResultWidth + columnIndexWithType.columnIndex_ -
numJoinColumnsBefore,
static_cast<ColumnIndexAndTypeInfo::UndefStatus>(
static_cast<bool>(columnIndexWithType.mightContainUndef_) ||
isOptionalJoin)};
nextColumnIndex++;
}
}
return result;
Expand Down
8 changes: 6 additions & 2 deletions src/engine/VariableToColumnMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ copySortedByColumnIndex(VariableToColumnMap map);
// operand (in the same order as in the input, including the join columns), then
// the columns of the right operand without the join columns. We additionally
// need the information whether the JOIN is optional, because then additional
// columns might contain undefined values.
// columns might contain undefined values. We also need the total width of the
// left input, because there might be columns that are not represented in the
// `VariableToColumnMap` (e.g. because they are not visible because of subquery
// scoping).
enum class BinOpType { Join, OptionalJoin };
VariableToColumnMap makeVarToColMapForJoinOperation(
const VariableToColumnMap& leftVars, const VariableToColumnMap& rightVars,
std::vector<std::array<ColumnIndex, 2>> joinColumns, BinOpType binOpType);
std::vector<std::array<ColumnIndex, 2>> joinColumns, BinOpType binOpType,
size_t leftResultWidth);
2 changes: 1 addition & 1 deletion src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ add_library(parser
data/OrderKey.h data/GroupKey.h ParseException.cpp SelectClause.cpp
SelectClause.h GraphPatternOperation.cpp GraphPatternOperation.h
# The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors.
GraphPattern.cpp GraphPattern.h ConstructClause.h data/Variable.cpp)
GraphPattern.cpp GraphPattern.h ConstructClause.h data/VariableToColumnMapPrinters.cpp)
qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2 util engine)

Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include "parser/data/Variable.h"

#include "ctre/ctre.h"
#include "engine/ExportQueryExecutionTrees.h"
#include "index/Index.h"
#include "parser/data/ConstructQueryExportContext.h"
#include "parser/data/Variable.h"

// ___________________________________________________________________________
Variable::Variable(std::string name) : _name{std::move(name)} {
Expand Down
4 changes: 3 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -312,4 +312,6 @@ addLinkAndDiscoverTest(LimitOffsetClauseTest)

addLinkAndDiscoverTest(OperationTest engine)

addLinkAndDiscoverTest(RuntimeInformationTest engine)
addLinkAndDiscoverTest(RuntimeInformationTest engine)

addLinkAndDiscoverTest(VariableToColumnMapTest engine)
100 changes: 100 additions & 0 deletions test/VariableToColumnMapTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include "./printers/VariablePrinters.h"
#include "./printers/VariableToColumnMapPrinters.h"
#include "engine/VariableToColumnMap.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

// In the right input there are three columns (0, 3, 4) which are not
// represented by variables, but that will still be part of the result.
TEST(VariableToColumnMap, gapsInRightCols) {
using V = Variable;
VariableToColumnMap leftCols;
leftCols[V{"?x"}] = makeAlwaysDefinedColumn(0);
leftCols[V{"?y"}] = makeAlwaysDefinedColumn(1);

VariableToColumnMap rightCols;
rightCols[V{"?x"}] = makeAlwaysDefinedColumn(1);
rightCols[V{"?a"}] = makePossiblyUndefinedColumn(2);
rightCols[V{"?b"}] = makeAlwaysDefinedColumn(5);

auto joinCols = makeVarToColMapForJoinOperation(leftCols, rightCols, {{0, 1}},
BinOpType::Join, 2);
VariableToColumnMap expected;
expected[V{"?x"}] = makeAlwaysDefinedColumn(0);
expected[V{"?y"}] = makeAlwaysDefinedColumn(1);
expected[V{"?a"}] = makePossiblyUndefinedColumn(3);
expected[V{"?b"}] = makeAlwaysDefinedColumn(6);
EXPECT_THAT(joinCols, ::testing::UnorderedElementsAreArray(expected));
}

// In the left input there are three columns (1, 3) which are not represented by
// variables, but that will still be part of the result. The column `3` can only
// be inferred from the last argument to `makeVarToColMapForJoinOperation` which
// is the total number of columns in the left input.
TEST(VariableToColumnMap, gapsInLeftCols) {
using V = Variable;
VariableToColumnMap leftCols;
leftCols[V{"?x"}] = makeAlwaysDefinedColumn(2);
leftCols[V{"?y"}] = makeAlwaysDefinedColumn(0);

VariableToColumnMap rightCols;
rightCols[V{"?x"}] = makePossiblyUndefinedColumn(0);
rightCols[V{"?a"}] = makeAlwaysDefinedColumn(1);

auto joinCols = makeVarToColMapForJoinOperation(leftCols, rightCols, {{3, 0}},
BinOpType::Join, 4);
VariableToColumnMap expected;
expected[V{"?x"}] = makeAlwaysDefinedColumn(2);
expected[V{"?y"}] = makeAlwaysDefinedColumn(0);
expected[V{"?a"}] = makeAlwaysDefinedColumn(4);
EXPECT_THAT(joinCols, ::testing::UnorderedElementsAreArray(expected));
}

// Test the status of `mightBeUndefined` for join columns when the corresponding
// columns in one or both of the inputs might be undefined.
TEST(VariableToColumnMap, undefinedJoinColumn) {
using V = Variable;
VariableToColumnMap leftCols;
leftCols[V{"?x"}] = makeAlwaysDefinedColumn(0);
leftCols[V{"?y"}] = makePossiblyUndefinedColumn(1);
leftCols[V{"?z"}] = makePossiblyUndefinedColumn(2);

VariableToColumnMap rightCols;
rightCols[V{"?x"}] = makePossiblyUndefinedColumn(0);
rightCols[V{"?y"}] = makeAlwaysDefinedColumn(2);
rightCols[V{"?z"}] = makePossiblyUndefinedColumn(1);

auto joinCols = makeVarToColMapForJoinOperation(
leftCols, rightCols, {{0, 0}, {1, 2}, {2, 1}}, BinOpType::Join, 3);
VariableToColumnMap expected;
expected[V{"?x"}] = makeAlwaysDefinedColumn(0);
expected[V{"?y"}] = makeAlwaysDefinedColumn(1);
expected[V{"?z"}] = makePossiblyUndefinedColumn(2);
EXPECT_THAT(joinCols, ::testing::UnorderedElementsAreArray(expected));
}

// Test all the combinations for possibly undefined and always defined columns
// that might occur with OPTIONAL joins.
TEST(VariableToColumnMap, optionalJoin) {
using V = Variable;
VariableToColumnMap leftCols;
leftCols[V{"?x"}] = makeAlwaysDefinedColumn(0);
leftCols[V{"?y"}] = makePossiblyUndefinedColumn(1);

VariableToColumnMap rightCols;
rightCols[V{"?x"}] = makePossiblyUndefinedColumn(0);
rightCols[V{"?y"}] = makeAlwaysDefinedColumn(2);
rightCols[V{"?a"}] = makeAlwaysDefinedColumn(1);

auto joinCols = makeVarToColMapForJoinOperation(
leftCols, rightCols, {{0, 0}, {1, 2}}, BinOpType::OptionalJoin, 2);
VariableToColumnMap expected;
expected[V{"?x"}] = makeAlwaysDefinedColumn(0);
expected[V{"?y"}] = makePossiblyUndefinedColumn(1);
expected[V{"?a"}] = makePossiblyUndefinedColumn(2);
EXPECT_THAT(joinCols, ::testing::UnorderedElementsAreArray(expected));
}
12 changes: 12 additions & 0 deletions test/printers/VariablePrinters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#pragma once
#include "parser/data/Variable.h"

// _____________________________________________________________
void PrintTo(const Variable& var, std::ostream* os) {
auto& s = *os;
s << var.name();
}
14 changes: 14 additions & 0 deletions test/printers/VariableToColumnMapPrinters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#pragma once

#include "engine/VariableToColumnMap.h"

// __________________________________________________________________
void PrintTo(const ColumnIndexAndTypeInfo& colIdx, std::ostream* os) {
auto& s = *os;
s << "col: " << colIdx.columnIndex_
<< " might be undef: " << static_cast<bool>(colIdx.mightContainUndef_);
}

0 comments on commit 8b65e4d

Please sign in to comment.