Skip to content

Commit

Permalink
fix: handling of Literal datatype (#2076)
Browse files Browse the repository at this point in the history
Check datatype against `None` instead of checking it's truthiness (i.e.
`if datatype is not None:` instead of `if datatype:`).

Checking truthiness instead of `is not None` causes a blank string to
be treated the same as None. The consequence of this was that
`Literal.datatype` could be a `str`, a `URIRef` or `None`, instead of
just a `URIRef` or `None` as was seemingly intended.

Other changes:
- Changed the type of `Literal.datatype` to be `Optional[URIRef]`
  instead of `Optional[str]` now that `str` will always be converted to
  `URIRef` even if it is a blank string.
- Changed `rdflib.util._coalesce` to make it easier and safer to use
  with a non-`None` default value.
- Changed `rdflib.util` to avoid issues with circular imports.
  • Loading branch information
aucampia authored Aug 12, 2022
1 parent a4b9305 commit a39d143
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 76 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ RDFLib.sublime-project
/docs/_build/
RDFLib.sublime-workspace
coverage/
cov.xml
/.hgtags
/.hgignore
build/
Expand Down
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,25 @@ and will be removed for release.
<!-- -->
<!-- -->


<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START PR #2076 -->
<!-- -->
<!-- -->

- Fixed handling of `Literal` `datatype` to correctly differentiate between
blank string values and undefined values, also changed the datatype of
`rdflib.term.Literal.datatype` from `Optional[str]` to `Optional[URIRef]` now
that all non-`URIRef` `str` values will be converted to `URIRef`.
[PR #2076](https://github.com/RDFLib/rdflib/pull/2076).

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: END PR #2076 -->
<!-- -->
<!-- -->

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START -->
Expand Down
2 changes: 1 addition & 1 deletion Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ tasks:
mypy:
desc: Run mypy
cmds:
- "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes"
- "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes {{.CLI_ARGS}}"

lint:fix:
desc: Fix auto-fixable linting errors
Expand Down
19 changes: 8 additions & 11 deletions rdflib/plugins/stores/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,12 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None:
self.__namespace[prefix] = namespace
else:
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
# type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str")
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment]
bound_prefix, prefix
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
bound_prefix, default=prefix
)
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
# type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef")
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment]
bound_namespace, namespace
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
bound_namespace, default=namespace
)

def namespace(self, prefix: str) -> Optional["URIRef"]:
Expand Down Expand Up @@ -538,14 +536,13 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None:
self.__namespace[prefix] = namespace
else:
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
# type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str")
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment]
bound_prefix, prefix
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
bound_prefix, default=prefix
)
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
# type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef")
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment]
bound_namespace, namespace
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
bound_namespace, default=namespace
)

def namespace(self, prefix: str) -> Optional["URIRef"]:
Expand Down
49 changes: 27 additions & 22 deletions rdflib/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
)

import rdflib
import rdflib.util
from rdflib.compat import long_type

if TYPE_CHECKING:
Expand Down Expand Up @@ -598,7 +599,7 @@ class Literal(Identifier):
_value: Any
_language: Optional[str]
# NOTE: _datatype should maybe be of type URIRef, and not optional.
_datatype: Optional[str]
_datatype: Optional[URIRef]
_ill_typed: Optional[bool]
__slots__ = ("_language", "_datatype", "_value", "_ill_typed")

Expand All @@ -624,7 +625,7 @@ def __new__(
if lang is not None and not _is_valid_langtag(lang):
raise ValueError(f"'{str(lang)}' is not a valid language tag!")

if datatype:
if datatype is not None:
datatype = URIRef(datatype)

value = None
Expand All @@ -633,7 +634,7 @@ def __new__(
# create from another Literal instance

lang = lang or lexical_or_value.language
if datatype:
if datatype is not None:
# override datatype
value = _castLexicalToPython(lexical_or_value, datatype)
else:
Expand All @@ -644,7 +645,7 @@ def __new__(
# passed a string
# try parsing lexical form of datatyped literal
value = _castLexicalToPython(lexical_or_value, datatype)
if datatype and datatype in _toPythonMapping:
if datatype is not None and datatype in _toPythonMapping:
# datatype is a recognized datatype IRI:
# https://www.w3.org/TR/rdf11-concepts/#dfn-recognized-datatype-iris
dt_uri: URIRef = URIRef(datatype)
Expand All @@ -661,10 +662,12 @@ def __new__(
value = lexical_or_value
_value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)

datatype = datatype or _datatype
_datatype = None if _datatype is None else URIRef(_datatype)

datatype = rdflib.util._coalesce(datatype, _datatype)
if _value is not None:
lexical_or_value = _value
if datatype:
if datatype is not None:
lang = None

if isinstance(lexical_or_value, bytes):
Expand Down Expand Up @@ -729,7 +732,7 @@ def language(self) -> Optional[str]:
return self._language

@property
def datatype(self) -> Optional[str]:
def datatype(self) -> Optional[URIRef]:
return self._datatype

def __reduce__(
Expand All @@ -743,7 +746,7 @@ def __reduce__(
def __getstate__(self) -> Tuple[None, Dict[str, Union[str, None]]]:
return (None, dict(language=self.language, datatype=self.datatype))

def __setstate__(self, arg: Tuple[Any, Dict[str, str]]) -> None:
def __setstate__(self, arg: Tuple[Any, Dict[str, Any]]) -> None:
_, d = arg
self._language = d["language"]
self._datatype = d["datatype"]
Expand Down Expand Up @@ -1096,8 +1099,8 @@ def __gt__(self, other: Any) -> bool:

# plain-literals and xsd:string literals
# are "the same"
dtself = self.datatype or _XSD_STRING
dtother = other.datatype or _XSD_STRING
dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING)
dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING)

if dtself != dtother:
if rdflib.DAWG_LITERAL_COLLATION:
Expand Down Expand Up @@ -1129,9 +1132,9 @@ def __gt__(self, other: Any) -> bool:
# same language, same lexical form, check real dt
# plain-literals come before xsd:string!
if self.datatype != other.datatype:
if not self.datatype:
if self.datatype is None:
return False
elif not other.datatype:
elif other.datatype is None:
return True
else:
return self.datatype > other.datatype
Expand Down Expand Up @@ -1186,7 +1189,7 @@ def _comparable_to(self, other: Any) -> bool:
rich-compare with this literal
"""
if isinstance(other, Literal):
if self.datatype and other.datatype:
if self.datatype is not None and other.datatype is not None:
# two datatyped literals
if (
self.datatype not in XSDToPython
Expand Down Expand Up @@ -1247,7 +1250,7 @@ def __hash__(self) -> int: # type: ignore[override]
# Directly accessing the member is faster than the property.
if self._language:
res ^= hash(self._language.lower())
if self._datatype:
if self._datatype is not None:
res ^= hash(self._datatype)
return res

Expand Down Expand Up @@ -1342,8 +1345,8 @@ def eq(self, other: Any) -> bool:
if (self.language or "").lower() != (other.language or "").lower():
return False

dtself = self.datatype or _XSD_STRING
dtother = other.datatype or _XSD_STRING
dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING)
dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING)

if dtself == _XSD_STRING and dtother == _XSD_STRING:
# string/plain literals, compare on lexical form
Expand Down Expand Up @@ -1556,7 +1559,7 @@ def _literal_n3(

datatype = self.datatype
quoted_dt = None
if datatype:
if datatype is not None:
if qname_callback:
quoted_dt = qname_callback(datatype)
if not quoted_dt:
Expand Down Expand Up @@ -1906,16 +1909,18 @@ def _well_formed_negative_integer(lexical: Union[str, bytes], value: Any) -> boo
URIRef(_XSD_PFX + "token"),
)

_StrT = TypeVar("_StrT", bound=str)


def _py2literal(
obj: Any,
pType: Any, # noqa: N803
castFunc: Optional[Callable[[Any], Any]],
dType: Optional[str],
) -> Tuple[Any, Optional[str]]:
if castFunc:
dType: Optional[_StrT],
) -> Tuple[Any, Optional[_StrT]]:
if castFunc is not None:
return castFunc(obj), dType
elif dType:
elif dType is not None:
return obj, dType
else:
return obj, None
Expand Down Expand Up @@ -2062,7 +2067,7 @@ def _reset_bindings() -> None:


def _castLexicalToPython( # noqa: N802
lexical: Union[str, bytes], datatype: Optional[str]
lexical: Union[str, bytes], datatype: Optional[URIRef]
) -> Any:
"""
Map a lexical form to the value-space for the given datatype
Expand Down
Loading

0 comments on commit a39d143

Please sign in to comment.