Skip to content

Commit

Permalink
Fix unescaped literals in SERVICE VALUES-clause (ad-freiburg#1530)
Browse files Browse the repository at this point in the history
Fixes the bug where literals including unescaped quotes (or other characters non conform with the specification of Rdf-Literals, e.g. `"Abraham "Bram" Wiertz"@en`)  were passed unescaped to the `SERVICE` endpoint (using the `VALUES` clause optimization) causing the `SERVICE` clause to fail with a parsing error of the `SERVICE` query.
Also fixes the behavior of undefined values and blank nodes for this optimization.
  • Loading branch information
UNEXENU authored Oct 8, 2024
1 parent 77ea2c6 commit 14ea95b
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 16 deletions.
60 changes: 49 additions & 11 deletions src/engine/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,31 +322,35 @@ std::optional<std::string> Service::getSiblingValuesClause() const {

checkCancellation();

ad_utility::HashSet<std::string> rowSet;

std::string values = " { ";
for (size_t rowIndex = 0; rowIndex < siblingResult->idTable().size();
++rowIndex) {
// Creates a single row of the values clause or an empty string on error.
auto createValueRow = [&](size_t rowIndex) -> std::string {
std::string row = "(";
for (const auto& columnIdx : commonColumnIndices) {
const auto& optionalString = ExportQueryExecutionTrees::idToStringAndType(
const auto& optStr = idToValueForValuesClause(
siblingTree_->getRootOperation()->getIndex(),
siblingResult->idTable()(rowIndex, columnIdx),
siblingResult->localVocab());

if (optionalString.has_value()) {
absl::StrAppend(&row, optionalString.value().first, " ");
if (!optStr.has_value()) {
return "";
}
absl::StrAppend(&row, optStr.value(), " ");
}
row.back() = ')';
return row;
};

if (rowSet.contains(row)) {
ad_utility::HashSet<std::string> rowSet;
std::string values = " { ";
for (size_t rowIndex = 0; rowIndex < siblingResult->idTable().size();
++rowIndex) {
std::string row = createValueRow(rowIndex);
if (row.empty() || rowSet.contains(row)) {
continue;
}

rowSet.insert(row);
absl::StrAppend(&values, row, " ");

absl::StrAppend(&values, row, " ");
checkCancellation();
}

Expand Down Expand Up @@ -455,3 +459,37 @@ void Service::throwErrorWithContext(std::string_view msg,
(last100.empty() ? "'"
: absl::StrCat(", last 100 bytes: '", last100, "'"))));
}

// ____________________________________________________________________________
std::optional<std::string> Service::idToValueForValuesClause(
const Index& index, Id id, const LocalVocab& localVocab) {
using enum Datatype;
const auto& optionalStringAndXsdType =
ExportQueryExecutionTrees::idToStringAndType(index, id, localVocab);
if (!optionalStringAndXsdType.has_value()) {
AD_CORRECTNESS_CHECK(id.getDatatype() == Undefined);
return "UNDEF";
}
const auto& [value, xsdType] = optionalStringAndXsdType.value();

switch (id.getDatatype()) {
case BlankNodeIndex:
// Blank nodes are not allowed in a values clause. Additionally blank
// nodes across a SERVICE endpoint are disjoint anyway, so rows that
// contain blank nodes will never create matches and we can safely omit
// them.
return std::nullopt;
case Int:
case Double:
case Bool:
return value;
default:
if (xsdType) {
return absl::StrCat("\"", value, "\"^^<", xsdType, ">");
} else if (value.starts_with('<')) {
return value;
} else {
return RdfEscaping::validRDFLiteralFromNormalized(value);
}
}
}
5 changes: 5 additions & 0 deletions src/engine/Service.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ class Service : public Operation {
static TripleComponent bindingToTripleComponent(
const nlohmann::json& binding);

// Create a value for the VALUES-clause used in `getSiblingValuesClause` from
// id. If the id is of type blank node `std::nullopt` is returned.
static std::optional<std::string> idToValueForValuesClause(
const Index& index, Id id, const LocalVocab& localVocab);

private:
// The string returned by this function is used as cache key.
std::string getCacheKeyImpl() const override;
Expand Down
3 changes: 0 additions & 3 deletions src/parser/RdfEscaping.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ NormalizedRDFString normalizeRDFLiteral(std::string_view origLiteral);
* and "be"ta"@en becomes "be\"ta"@en.
*
* If the `normLiteral` is not a literal, an AD_CONTRACT_CHECK will fail.
*
* TODO: This function currently only handles the escaping of " inside the
* literal, no other characters. Is that enough?
*/
std::string validRDFLiteralFromNormalized(std::string_view normLiteral);

Expand Down
2 changes: 1 addition & 1 deletion test/ExportQueryExecutionTreesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ TEST(ExportQueryExecutionTrees, CornerCases) {
std::string queryNoVariablesVisible = "SELECT ?not ?known WHERE {<s> ?p ?o}";
auto resultNoColumns = runJSONQuery(kg, queryNoVariablesVisible,
ad_utility::MediaType::sparqlJson);
ASSERT_TRUE(resultNoColumns["result"]["bindings"].empty());
ASSERT_TRUE(resultNoColumns["results"]["bindings"].empty());

auto qec = ad_utility::testing::getQec(kg);
AD_EXPECT_THROW_WITH_MESSAGE(
Expand Down
43 changes: 42 additions & 1 deletion test/ServiceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ TEST_F(ServiceTest, computeResult) {
// Check 5: When a siblingTree with variables common to the Service
// Clause is passed, the Service Operation shall use the siblings result
// to reduce its Query complexity by injecting them as Values Clause

auto iri = ad_utility::testing::iri;
using TC = TripleComponent;
auto siblingTree = std::make_shared<QueryExecutionTree>(
Expand All @@ -402,7 +403,11 @@ TEST_F(ServiceTest, computeResult) {
{Variable{"?x"}, Variable{"?y"}, Variable{"?z"}},
{{TC(iri("<x>")), TC(iri("<y>")), TC(iri("<z>"))},
{TC(iri("<x>")), TC(iri("<y>")), TC(iri("<z2>"))},
{TC(iri("<blu>")), TC(iri("<bla>")), TC(iri("<blo>"))}}}));
{TC(iri("<blu>")), TC(iri("<bla>")), TC(iri("<blo>"))},
// This row will be ignored in the created Values Clause as it
// contains a blank node.
{TC(Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0))),
TC(iri("<bl>")), TC(iri("<ank>"))}}}));

auto parsedServiceClause5 = parsedServiceClause;
parsedServiceClause5.graphPatternAsString_ =
Expand Down Expand Up @@ -592,6 +597,10 @@ TEST_F(ServiceTest, bindingToTripleComponent) {
TripleComponent::Literal::fromEscapedRdfLiteral("\"Hallo \\\\Welt\"",
"@de"));

EXPECT_EQ(Service::bindingToTripleComponent(
{{"type", "literal"}, {"value", "a\"b\"c"}}),
TripleComponent::Literal::fromEscapedRdfLiteral("\"a\\\"b\\\"c\""));

EXPECT_EQ(Service::bindingToTripleComponent(
{{"type", "uri"}, {"value", "http://doof.org"}}),
TripleComponent::Iri::fromIrirefWithoutBrackets("http://doof.org"));
Expand All @@ -605,3 +614,35 @@ TEST_F(ServiceTest, bindingToTripleComponent) {
{{"type", "INVALID_TYPE"}, {"value", "v"}}),
::testing::HasSubstr("Type INVALID_TYPE is undefined"));
}

// ____________________________________________________________________________
TEST_F(ServiceTest, idToValueForValuesClause) {
auto idToVc = Service::idToValueForValuesClause;
LocalVocab localVocab{};
auto index = ad_utility::testing::makeIndexWithTestSettings();

// blanknode -> nullopt
EXPECT_EQ(idToVc(index, Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0)),
localVocab),
std::nullopt);

EXPECT_EQ(idToVc(index, Id::makeUndefined(), localVocab), "UNDEF");

// simple datatypes -> implicit string representation
EXPECT_EQ(idToVc(index, Id::makeFromInt(42), localVocab), "42");
EXPECT_EQ(idToVc(index, Id::makeFromDouble(3.14), localVocab), "3.14");
EXPECT_EQ(idToVc(index, Id::makeFromBool(true), localVocab), "true");

// Escape Quotes within literals.
auto str = LocalVocabEntry(
ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(
"a\"b\"c"));
EXPECT_EQ(idToVc(index, Id::makeFromLocalVocabIndex(&str), localVocab),
"\"a\\\"b\\\"c\"");

// value with xsd-type
EXPECT_EQ(
idToVc(index, Id::makeFromGeoPoint(GeoPoint(70.5, 130.2)), localVocab)
.value(),
absl::StrCat("\"POINT(130.200000 70.500000)\"^^<", GEO_WKT_LITERAL, ">"));
}

0 comments on commit 14ea95b

Please sign in to comment.