Skip to content

Commit

Permalink
Fold points directly into the Id (ad-freiburg#1506)
Browse files Browse the repository at this point in the history
Points (specified by WKT literals with type point) are now stored directly in the `ValueId`, with 30 bits of precision for each of the two coordinates.  This makes working on point geometries (e.g. spatial queries that only work on centroids) orders of magnitude faster, as we save the retrieving of the points from disk as well as parsing them.
  • Loading branch information
ullingerc authored Sep 27, 2024
1 parent 6384041 commit 85793e3
Show file tree
Hide file tree
Showing 25 changed files with 664 additions and 110 deletions.
2 changes: 2 additions & 0 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) {
return std::pair{std::to_string(id.getInt()), XSD_INT_TYPE};
case Date:
return id.getDate().toStringAndType();
case GeoPoint:
return id.getGeoPoint().toStringAndType();
case BlankNodeIndex:
return std::pair{absl::StrCat("_:bn", id.getBlankNodeIndex().get()),
nullptr};
Expand Down
17 changes: 9 additions & 8 deletions src/engine/sparqlExpressions/NaryExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@
#include "engine/sparqlExpressions/NaryExpression.h"

#include "engine/sparqlExpressions/NaryExpressionImpl.h"
#include "engine/sparqlExpressions/SparqlExpressionValueGetters.h"
#include "util/GeoSparqlHelpers.h"

namespace sparqlExpression {
namespace detail {
NARY_EXPRESSION(LongitudeExpression, 1,
FV<NumericIdWrapper<decltype(ad_utility::wktLongitude), true>,
LiteralFromIdGetter>);
NARY_EXPRESSION(LatitudeExpression, 1,
FV<NumericIdWrapper<decltype(ad_utility::wktLatitude), true>,
LiteralFromIdGetter>);
NARY_EXPRESSION(
LongitudeExpression, 1,
FV<NumericIdWrapper<ad_utility::WktLongitude, true>, GeoPointValueGetter>);
NARY_EXPRESSION(
LatitudeExpression, 1,
FV<NumericIdWrapper<ad_utility::WktLatitude, true>, GeoPointValueGetter>);
NARY_EXPRESSION(DistExpression, 2,
FV<NumericIdWrapper<decltype(ad_utility::wktDist), true>,
LiteralFromIdGetter>);
FV<NumericIdWrapper<ad_utility::WktDistGeoPoints, true>,
GeoPointValueGetter>);

} // namespace detail

Expand Down
8 changes: 8 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "engine/ExportQueryExecutionTrees.h"
#include "global/Constants.h"
#include "global/ValueId.h"
#include "util/Conversions.h"

using namespace sparqlExpression::detail;
Expand All @@ -29,6 +30,7 @@ NumericValue NumericValueGetter::operator()(
case Datatype::TextRecordIndex:
case Datatype::WordVocabIndex:
case Datatype::Date:
case Datatype::GeoPoint:
case Datatype::BlankNodeIndex:
return NotNumeric{};
}
Expand Down Expand Up @@ -68,6 +70,7 @@ auto EffectiveBooleanValueGetter::operator()(
case Datatype::WordVocabIndex:
case Datatype::TextRecordIndex:
case Datatype::Date:
case Datatype::GeoPoint:
return True;
}
AD_FAIL();
Expand Down Expand Up @@ -144,6 +147,8 @@ IntDoubleStr ToNumericValueGetter::operator()(
return id.getDouble();
case Datatype::Bool:
return static_cast<int>(id.getBool());
case Datatype::GeoPoint:
return id.getGeoPoint().toStringRepresentation();
case Datatype::VocabIndex:
case Datatype::LocalVocabIndex:
case Datatype::TextRecordIndex:
Expand Down Expand Up @@ -180,6 +185,8 @@ OptIri DatatypeValueGetter::operator()(ValueId id,
return Iri::fromIrirefWithoutBrackets(XSD_DOUBLE_TYPE);
case Int:
return Iri::fromIrirefWithoutBrackets(XSD_INT_TYPE);
case GeoPoint:
return Iri::fromIrirefWithoutBrackets(GEO_WKT_LITERAL);
case Date: {
auto dateType = id.getDate().toStringAndType().second;
AD_CORRECTNESS_CHECK(dateType != nullptr);
Expand Down Expand Up @@ -250,6 +257,7 @@ T getValue(ValueId id, const sparqlExpression::EvaluationContext* context,
case Int:
case Double:
case Date:
case GeoPoint:
case Undefined:
if constexpr (std::is_same_v<T, sparqlExpression::IdOrLiteralOrIri>) {
return Id::makeUndefined();
Expand Down
20 changes: 20 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "engine/Result.h"
#include "engine/sparqlExpressions/SparqlExpressionTypes.h"
#include "global/Id.h"
#include "parser/GeoPoint.h"
#include "util/ConstexprSmallString.h"
#include "util/TypeTraits.h"

Expand Down Expand Up @@ -208,6 +209,25 @@ struct DateValueGetter : Mixin<DateValueGetter> {
}
};

/// This class can be used as the `ValueGetter` argument of Expression
/// templates. It produces a `std::optional<GeoPoint>`.
struct GeoPointValueGetter : Mixin<GeoPointValueGetter> {
using Mixin<GeoPointValueGetter>::operator();
using Opt = std::optional<GeoPoint>;

Opt operator()(ValueId id, const EvaluationContext*) const {
if (id.getDatatype() == Datatype::GeoPoint) {
return id.getGeoPoint();
} else {
return std::nullopt;
}
}

Opt operator()(const LiteralOrIri&, const EvaluationContext*) const {
return std::nullopt;
}
};

// If the `id` points to a literal, return the contents of that literal (without
// the quotation marks). For all other types (IRIs, numbers, etc.) return
// `std::nullopt`. This is used for expressions that work on strings, but for
Expand Down
7 changes: 7 additions & 0 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ constexpr inline char RDF_PREFIX[] =
constexpr inline char RDF_LANGTAG_STRING[] =
"http://www.w3.org/1999/02/22-rdf-syntax-ns#langString";

constexpr inline char GEO_WKT_LITERAL[] =
"http://www.opengis.net/ont/geosparql#wktLiteral";

constexpr inline std::string_view VOCAB_SUFFIX = ".vocabulary";
constexpr inline std::string_view MMAP_FILE_SUFFIX = ".meta";
constexpr inline std::string_view CONFIGURATION_FILE = ".meta-data.json";
Expand Down Expand Up @@ -245,3 +248,7 @@ constexpr inline size_t NUM_SORT_THREADS = 4;
constexpr inline std::string_view EMPH_ON = "\033[1m";
/// ANSI escape sequence to print "normal" text again in the console.
constexpr inline std::string_view EMPH_OFF = "\033[22m";

// Allowed range for geographical coordinates from WTK Text
constexpr inline double COORDINATE_LAT_MAX = 90.0;
constexpr inline double COORDINATE_LNG_MAX = 180.0;
26 changes: 26 additions & 0 deletions src/global/ValueId.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#include <functional>
#include <limits>

#include "global/Constants.h"
#include "global/IndexTypes.h"
#include "parser/GeoPoint.h"
#include "util/BitUtils.h"
#include "util/DateYearDuration.h"
#include "util/NBitInteger.h"
Expand All @@ -28,6 +30,7 @@ enum struct Datatype {
LocalVocabIndex,
TextRecordIndex,
Date,
GeoPoint,
WordVocabIndex,
BlankNodeIndex,
MaxValue = BlankNodeIndex
Expand Down Expand Up @@ -58,6 +61,8 @@ constexpr std::string_view toString(Datatype type) {
return "WordVocabIndex";
case Datatype::Date:
return "Date";
case Datatype::GeoPoint:
return "GeoPoint";
case Datatype::BlankNodeIndex:
return "BlankNodeIndex";
}
Expand Down Expand Up @@ -102,6 +107,10 @@ class ValueId {
static_cast<size_t>(minStringType_) + 1 ==
stringTypes_.size());

// Assert that the size of an encoded GeoPoint equals the available bits in a
// ValueId.
static_assert(numDataBits == GeoPoint::numDataBits);

/// This exception is thrown if we try to store a value of an index type
/// (VocabIndex, LocalVocabIndex, TextRecordIndex) that is larger than
/// `maxIndex`.
Expand Down Expand Up @@ -310,6 +319,19 @@ class ValueId {

// TODO<joka921> implement dates

/// Create a `ValueId` for a GeoPoint object (representing a POINT from WKT).
static ValueId makeFromGeoPoint(GeoPoint p) {
return addDatatypeBits(p.toBitRepresentation(), Datatype::GeoPoint);
}

/// Obtain a new `GeoPoint` object representing the pair of coordinates that
/// this `ValueId` encodes. If `getDatatype() != GeoPoint` then the result
/// is unspecified.
GeoPoint getGeoPoint() const {
T bits = removeDatatypeBits(_bits);
return GeoPoint::fromBitRepresentation(bits);
}

/// Return the smallest and largest possible `ValueId` wrt the underlying
/// representation
constexpr static ValueId min() noexcept {
Expand Down Expand Up @@ -360,6 +382,8 @@ class ValueId {
return std::invoke(visitor, getWordVocabIndex());
case Datatype::Date:
return std::invoke(visitor, getDate());
case Datatype::GeoPoint:
return std::invoke(visitor, getGeoPoint());
case Datatype::BlankNodeIndex:
return std::invoke(visitor, getBlankNodeIndex());
}
Expand All @@ -385,6 +409,8 @@ class ValueId {
ostr << (value ? "true" : "false");
} else if constexpr (ad_utility::isSimilar<T, DateYearOrDuration>) {
ostr << value.toStringAndType().first;
} else if constexpr (ad_utility::isSimilar<T, GeoPoint>) {
ostr << value.toStringRepresentation();
} else if constexpr (ad_utility::isSimilar<T, LocalVocabIndex>) {
AD_CORRECTNESS_CHECK(value != nullptr);
ostr << value->toStringRepresentation();
Expand Down
2 changes: 2 additions & 0 deletions src/global/ValueIdComparators.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ inline std::vector<std::pair<RandomIt, RandomIt>> getRangesForId(
case Datatype::TextRecordIndex:
case Datatype::Bool:
case Datatype::Date:
case Datatype::GeoPoint:
case Datatype::BlankNodeIndex:
// For `Date` the trivial comparison via bits is also correct.
return detail::simplifyRanges(
Expand Down Expand Up @@ -434,6 +435,7 @@ inline std::vector<std::pair<RandomIt, RandomIt>> getRangesForEqualIds(
case Datatype::Bool:
case Datatype::Undefined:
case Datatype::Date:
case Datatype::GeoPoint:
case Datatype::BlankNodeIndex:
AD_FAIL();
case Datatype::VocabIndex:
Expand Down
2 changes: 1 addition & 1 deletion src/index/IndexFormatVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ struct IndexFormatVersion {
// The actual index version. Change it once the binary format of the index
// changes.
inline const IndexFormatVersion& indexFormatVersion{
1482, DateYearOrDuration{Date{2024, 9, 25}}};
1506, DateYearOrDuration{Date{2024, 9, 27}}};
} // namespace qlever
1 change: 1 addition & 0 deletions src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ add_library(parser
ParallelBuffer.cpp
SparqlParserHelpers.cpp
TripleComponent.cpp
GeoPoint.cpp
GraphPatternOperation.cpp
PropertyPath.cpp
data/SparqlFilter.cpp
Expand Down
109 changes: 109 additions & 0 deletions src/parser/GeoPoint.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Christoph Ullinger <[email protected]>

#include "parser/GeoPoint.h"

#include <cmath>
#include <optional>

#include "parser/Literal.h"
#include "parser/NormalizedString.h"
#include "util/Exception.h"
#include "util/GeoSparqlHelpers.h"

// _____________________________________________________________________________
GeoPoint::GeoPoint(double lat, double lng) : lat_{lat}, lng_{lng} {
// Ensure valid lat and lng values
if (lat < -COORDINATE_LAT_MAX || lat > COORDINATE_LAT_MAX || std::isnan(lat))
throw CoordinateOutOfRangeException(lat, true);
if (lng < -COORDINATE_LNG_MAX || lng > COORDINATE_LNG_MAX || std::isnan(lng))
throw CoordinateOutOfRangeException(lng, false);
};

// _____________________________________________________________________________
GeoPoint::T GeoPoint::toBitRepresentation() const {
// Transforms a normal-scaled geographic coordinate to an integer
constexpr auto scaleCoordinate = [](double value, double maxValue) {
// Only positive values between 0 and 1
double downscaled = (value + maxValue) / (2 * maxValue);

AD_CORRECTNESS_CHECK(0.0 <= downscaled && downscaled <= 1.0, [&]() {
return absl::StrCat("downscaled coordinate value ", downscaled,
" does not satisfy [0,1] constraint");
});

// Stretch to allowed range of values between 0 and maxCoordinateEncoded,
// rounded to integer
auto newscaled =
static_cast<size_t>(round(downscaled * maxCoordinateEncoded));

AD_CORRECTNESS_CHECK(
0.0 <= newscaled && newscaled <= maxCoordinateEncoded, [&]() {
return absl::StrCat("scaled coordinate value ", newscaled,
" does not satisfy [0,", maxCoordinateEncoded,
"] constraint");
});
return newscaled;
};

T lat = scaleCoordinate(getLat(), COORDINATE_LAT_MAX);
T lng = scaleCoordinate(getLng(), COORDINATE_LNG_MAX);

// Use shift to obtain 30 bit lat followed by 30 bit lng in lower bits
auto bits = (lat << numDataBitsCoordinate) | lng;

// Ensure the highest 4 bits are 0
AD_CORRECTNESS_CHECK((bits & coordinateMaskFreeBits) == 0);
return bits;
};

// _____________________________________________________________________________
std::optional<GeoPoint> GeoPoint::parseFromLiteral(
const ad_utility::triple_component::Literal& value, bool checkDatatype) {
if (!checkDatatype ||
(value.hasDatatype() &&
value.getDatatype() == asNormalizedStringViewUnsafe(GEO_WKT_LITERAL))) {
auto [lng, lat] = ad_utility::detail::parseWktPoint(
asStringViewUnsafe(value.getContent()));
if (!std::isnan(lng) && !std::isnan(lat)) {
return GeoPoint{lat, lng};
}
}
return std::nullopt;
};

// _____________________________________________________________________________
GeoPoint GeoPoint::fromBitRepresentation(T bits) {
// Extracts one of the coordinates from a single bitstring
constexpr auto extractCoordinate = [](T bits, T mask, T shift,
double maxValue) {
// Obtain raw value from bits
auto value = static_cast<double>((bits & mask) >> shift);
AD_CORRECTNESS_CHECK(0.0 <= value && value <= maxCoordinateEncoded);

// Transform to usual scaling
value = ((value / maxCoordinateEncoded) * 2 * maxValue) - maxValue;
AD_CORRECTNESS_CHECK(-maxValue <= value && value <= maxValue);
return value;
};

double lat = extractCoordinate(bits, coordinateMaskLat, numDataBitsCoordinate,
COORDINATE_LAT_MAX);
double lng =
extractCoordinate(bits, coordinateMaskLng, 0, COORDINATE_LNG_MAX);

return {lat, lng};
};

// _____________________________________________________________________________
std::string GeoPoint::toStringRepresentation() const {
// Extra conversion using std::to_string to get more decimals
return absl::StrCat("POINT(", std::to_string(getLng()), " ",
std::to_string(getLat()), ")");
};

// _____________________________________________________________________________
std::pair<std::string, const char*> GeoPoint::toStringAndType() const {
return std::pair(toStringRepresentation(), GEO_WKT_LITERAL);
};
Loading

0 comments on commit 85793e3

Please sign in to comment.