Skip to content

Commit

Permalink
JSONLDParser now uses resolver directly
Browse files Browse the repository at this point in the history
We pass an explicit resolver around, defaulting to
`get_default_resolver()` if not specified.

`source_to_json()` has been removed as it uses `create_input_source()`,
which trusts all locations passed to it. Instead, we parse json
explicitly in the two places where it was called.

Resolves #1369.
  • Loading branch information
alexdutton committed Aug 4, 2021
1 parent 4251056 commit 9455370
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 55 deletions.
30 changes: 22 additions & 8 deletions rdflib/plugins/parsers/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,16 @@

# NOTE: This code reads the entire JSON object into memory before parsing, but
# we should consider streaming the input to deal with arbitrarily large graphs.

import contextlib
import warnings
from rdflib.graph import ConjunctiveGraph
from rdflib.parser import Parser, URLInputSource
from rdflib.parser import Parser, URLInputSource, create_input_source
from rdflib.namespace import RDF, XSD
from rdflib.term import URIRef, BNode, Literal

from ..shared.jsonld.context import Context, Term, UNDEF
from ..shared.jsonld.util import (
json,
source_to_json,
VOCAB_DELIMS,
context_from_urlinputsource,
)
Expand All @@ -69,6 +68,8 @@


# Add jsonld suffix so RDFLib can guess format from file name
from ...resolver import get_default_resolver

try:
from rdflib.util import SUFFIX_FORMAT_MAP

Expand Down Expand Up @@ -108,7 +109,9 @@ def parse(self, source, sink, **kwargs):

generalized_rdf = kwargs.get("generalized_rdf", False)

data = source_to_json(source)
input_source = create_input_source(source)
with contextlib.closing(input_source.getByteStream()) as stream:
data = json.load(stream)

# NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be
# context_aware. Keeping this check in case RDFLib is changed, or
Expand All @@ -118,7 +121,15 @@ def parse(self, source, sink, **kwargs):
else:
conj_sink = sink

to_rdf(data, conj_sink, base, context_data, version, generalized_rdf)
to_rdf(
data,
conj_sink,
base,
context_data,
version,
generalized_rdf,
resolver=self.resolver,
)


def to_rdf(
Expand All @@ -129,15 +140,18 @@ def to_rdf(
version=None,
generalized_rdf=False,
allow_lists_of_lists=None,
resolver=None,
):
resolver = resolver or get_default_resolver()

# TODO: docstring w. args and return value
context = Context(base=base, version=version)
context = Context(base=base, version=version, resolver=resolver)
if context_data:
context.load(context_data)
parser = Parser(
generalized_rdf=generalized_rdf, allow_lists_of_lists=allow_lists_of_lists
)
return parser.parse(data, context, dataset)
return parser.parse(data, context, dataset, resolver=resolver)


class Parser(object):
Expand All @@ -149,7 +163,7 @@ def __init__(self, generalized_rdf=False, allow_lists_of_lists=None):
else ALLOW_LISTS_OF_LISTS
)

def parse(self, data, context, dataset):
def parse(self, data, context, dataset, resolver):
topcontext = False

if isinstance(data, list):
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/serializers/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def from_rdf(
context = context_data
context_data = context.to_dict()
else:
context = Context(context_data, base=base)
context = Context(context_data, base=base, resolver=None)

converter = Converter(context, use_native_types, use_rdf_type)
result = converter.convert(graph)
Expand Down
14 changes: 9 additions & 5 deletions rdflib/plugins/shared/jsonld/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"""
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py

import contextlib
import json
from collections import namedtuple
from rdflib.namespace import RDF

Expand Down Expand Up @@ -35,7 +36,7 @@
VOCAB,
)
from .errors import INVALID_REMOTE_CONTEXT, RECURSIVE_CONTEXT_INCLUSION
from .util import source_to_json, urljoin, urlsplit, split_iri, norm_url
from .util import urljoin, urlsplit, split_iri, norm_url


NODE_KEYS = {GRAPH, ID, INCLUDED, JSON, LIST, NEST, NONE, REV, SET, TYPE, VALUE, LANG}
Expand All @@ -52,7 +53,7 @@ class Defined(int):


class Context(object):
def __init__(self, source=None, base=None, version=None):
def __init__(self, source=None, base=None, version=None, *, resolver=None):
self.version = version or 1.0
self.language = None
self.vocab = None
Expand All @@ -67,6 +68,7 @@ def __init__(self, source=None, base=None, version=None):
self.parent = None
self.propagate = True
self._context_cache = {}
self.resolver = resolver
if source:
self.load(source)

Expand All @@ -93,7 +95,7 @@ def subcontext(self, source, propagate=True):
return parent._subcontext(source, propagate)

def _subcontext(self, source, propagate):
ctx = Context(version=self.version)
ctx = Context(version=self.version, resolver=self.resolver)
ctx.propagate = propagate
ctx.parent = self
ctx.language = self.language
Expand Down Expand Up @@ -406,7 +408,9 @@ def _fetch_context(self, source, base, referenced_contexts):
if source_url in self._context_cache:
return self._context_cache[source_url]

source = source_to_json(source_url)
input_source = self.resolver.resolve(source_url, format="json-ld")
with contextlib.closing(input_source.getByteStream()) as stream:
source = json.load(stream)
if source and CONTEXT not in source:
raise INVALID_REMOTE_CONTEXT
self._context_cache[source_url] = source
Expand Down
11 changes: 0 additions & 11 deletions rdflib/plugins/shared/jsonld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,6 @@
from io import StringIO


def source_to_json(source):
# TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
source = create_input_source(source, format="json-ld")

stream = source.getByteStream()
try:
return json.load(StringIO(stream.read().decode("utf-8")))
finally:
stream.close()


VOCAB_DELIMS = ("#", "/", ":")


Expand Down
3 changes: 3 additions & 0 deletions test/jsonld/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

# monkey-patch N-Quads parser via it's underlying W3CNTriplesParser to keep source bnode id:s ..
from rdflib.plugins.parsers.ntriples import W3CNTriplesParser, r_nodeid, bNode
from rdflib.resolver import PermissiveResolver


def _preserving_nodeid(self, bnode_context=None):
Expand All @@ -29,6 +30,7 @@ def do_test_json(suite_base, cat, num, inputpath, expectedpath, context, options
base=input_uri,
context_data=context,
generalized_rdf=True,
resolver=PermissiveResolver(),
)
expected_json = _load_json(expectedpath)
use_native_types = True # CONTEXT in input_obj
Expand Down Expand Up @@ -79,6 +81,7 @@ def do_test_parser(suite_base, cat, num, inputpath, expectedpath, context, optio
base=options.get("base", input_uri),
version=version,
generalized_rdf=options.get("produceGeneralizedRdf", False),
resolver=PermissiveResolver(),
)
assert isomorphic(result_graph, expected_graph), "Expected:\n%s\nGot:\n%s" % (
expected_graph.serialize(),
Expand Down
58 changes: 28 additions & 30 deletions test/jsonld/test_context.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
"""
JSON-LD Context Spec
"""

import json
from functools import wraps

from rdflib.parser import StringInputSource
from rdflib.plugins.shared.jsonld.context import Context, Term
from rdflib.plugins.shared.jsonld import context
from rdflib.plugins.shared.jsonld import errors
from rdflib.resolver import Resolver, url_resolver


# exception utility (see also nose.tools.raises)

def _expect_exception(expected_error):
def _try_wrapper(f):
@wraps(f)
Expand Down Expand Up @@ -128,85 +132,79 @@ def test_prefix_like_vocab():
assert term.id == "ex:term"


# Mock external sources loading
SOURCES = {}
_source_to_json = context.source_to_json

class MockContextResolver(Resolver):
def __init__(self, contexts):
self.contexts = contexts

def _mock_source_loader(f):
@wraps(f)
def _wrapper():
context.source_to_json = SOURCES.get
f()
context.source_to_json = _source_to_json
def is_resolution_allowed(self, scheme: str, location: str) -> bool:
return str(location) in self.contexts

return _wrapper
@url_resolver(schemes={'http', 'https'})
def resolve_http(self, url, format, scheme, trust=False):
return StringInputSource(
json.dumps(self.contexts[str(url)]),
system_id=str(url),
)


@_mock_source_loader
def test_loading_contexts():
# Given context data:
source1 = "http://example.org/base.jsonld"
source2 = "http://example.org/context.jsonld"
SOURCES[source1] = {"@context": {"@vocab": "http://example.org/vocab/"}}
SOURCES[source2] = {"@context": [source1, {"n": "name"}]}
SOURCES = {
source1: {"@context": {"@vocab": "http://example.org/vocab/"}},
source2: {"@context": [source1, {"n": "name"}]},
}

# Create a context:
ctx = Context(source2)
ctx = Context(source2, resolver=MockContextResolver(SOURCES))
assert ctx.expand("n") == "http://example.org/vocab/name"

# Context can be a list:
ctx = Context([source2])
ctx = Context([source2], resolver=MockContextResolver(SOURCES))
assert ctx.expand("n") == "http://example.org/vocab/name"


@_mock_source_loader
def test_use_base_in_local_context():
ctx = Context({"@base": "/local"})
assert ctx.base == "/local"


@_mock_source_loader
def test_override_base():
ctx = Context(
base="http://example.org/app/data/item", source={"@base": "http://example.org/"}
)
assert ctx.base == "http://example.org/"


@_mock_source_loader
def test_resolve_relative_base():
ctx = Context(base="http://example.org/app/data/item", source={"@base": "../"})
assert ctx.base == "http://example.org/app/"
assert ctx.resolve_iri("../other") == "http://example.org/other"


@_mock_source_loader
def test_set_null_base():
ctx = Context(base="http://example.org/app/data/item", source={"@base": None})
assert ctx.base is None
assert ctx.resolve_iri("../other") == "../other"


@_mock_source_loader
def test_ignore_base_remote_context():
ctx_url = "http://example.org/remote-base.jsonld"
SOURCES[ctx_url] = {"@context": {"@base": "/remote"}}
ctx = Context(ctx_url)
SOURCES = {ctx_url: {"@context": {"@base": "/remote"}}}
ctx = Context(ctx_url, resolver=MockContextResolver(SOURCES))
assert ctx.base == None


@_expect_exception(errors.RECURSIVE_CONTEXT_INCLUSION)
@_mock_source_loader
def test_recursive_context_inclusion_error():
ctx_url = "http://example.org/recursive.jsonld"
SOURCES[ctx_url] = {"@context": ctx_url}
ctx = Context(ctx_url)
SOURCES = {ctx_url: {"@context": ctx_url}}
ctx = Context(ctx_url, resolver=MockContextResolver(SOURCES))


@_expect_exception(errors.INVALID_REMOTE_CONTEXT)
@_mock_source_loader
def test_invalid_remote_context():
ctx_url = "http://example.org/recursive.jsonld"
SOURCES[ctx_url] = {"key": "value"}
ctx = Context(ctx_url)
SOURCES = {ctx_url: {"key": "value"}}
ctx = Context(ctx_url, resolver=MockContextResolver(SOURCES))

0 comments on commit 9455370

Please sign in to comment.