-
Notifications
You must be signed in to change notification settings - Fork 13
/
conftest.py
357 lines (285 loc) · 10.8 KB
/
conftest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
import os
import csv
import json
import pytest
from warnings import warn
from pathlib import Path
from jsonschema import FormatChecker
from jsonschema.validators import Draft202012Validator
from jscc.schema import is_json_schema, is_codelist, is_missing_property
from jscc.exceptions import MetadataPresenceWarning
from jscc.testing.checks import _false, _traverse
from jscc.testing.filesystem import walk_json_data, walk_csv_data
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT202012
"""
The fixtures in this file are automatically discovered by pytest without
needing to import them in the test files.
After the pytest fixtures are helper functions used by the fixtures, or
sometimes the tests directly.
"""
@pytest.fixture
def schema_dir():
"""
Makes the directory the schema files are in available to tests.
"""
return get_schema_dir()
@pytest.fixture
def codelists_dir():
"""
Makes the directory the codeslists are in available to tests.
"""
return os.path.join(get_schema_dir(), "codelists")
@pytest.fixture
def examples_dir():
"""
Makes the directory the example files are in available to tests.
"""
return get_examples_dir()
@pytest.fixture
def schema_from_registry(request):
"""
Fetches a schema from the registry by id.
`request.param` should be the value of `$id` in the schema.
"""
registry = schema_registry()
return registry.contents(request.param)
@pytest.fixture
def schema_validator():
"""
This sets up and returns a 2020-12 validator, against which the BODS
schema can be checked. The BODS schema files are loaded into the
registry so $refs can be resolved.
"""
registry = schema_registry()
# Get meta schema
here = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(here, "schema", "meta-schema.json")) as fp:
metaschema = json.load(fp)
validator = Draft202012Validator(metaschema, registry=registry, format_checker=FormatChecker())
return validator
@pytest.fixture
def bods_validator():
"""
This sets up and returns the validator, using the statement.json
schema as the primary schema against which data is validated.
"""
# Get the registry
registry = schema_registry()
# Make the validator
statement_schema = registry.contents("urn:statement")
bods_validator = Draft202012Validator(statement_schema, registry=registry, format_checker=FormatChecker())
return bods_validator
@pytest.fixture
def codelist_validator():
"""
This sets up and returns a validator for the codelists schema.
"""
# Get the registry
registry = schema_registry()
# Make the validator
codelist_schema = registry.contents("urn:codelists")
codelist_validator = Draft202012Validator(codelist_schema, registry=registry, format_checker=FormatChecker())
return codelist_validator
@pytest.fixture
def bods_json(request):
"""
Returns a tuple of filename and JSON from a file path.
This is automatically called on all params for tests which take
`bods_json` as a parameter with indirect=True, and the output passed to
the test.
"""
fp = request.param
bods = json.loads(fp.read_text())
return (fp.name, bods)
@pytest.fixture
def codelist_json(request):
"""
Returns JSON version of a codelist from a CSV file path.
This is automatically called on all params for tests which take
`codelist_json` as a parameter with indirect=True, and the output passed to
the test.
"""
codelist = request.param # (path, name, text, fieldnames, rows)
return codelist[4]
@pytest.fixture
def codelist_values():
"""
Parses the schema for `codelist` fields and returns the values
(eg. 'entityType.csv') of those fields.
Checks all schema files at once and returns a list of all codelist values seen.
"""
codelists = {}
schema_paths = get_schema_paths()
for _, _, data in schema_paths:
codelists.update(get_codelists_from_schema(data))
codelist_names = list(codelists.keys())
return codelist_names
@pytest.fixture
def codelist_enums(request):
"""
Parses the schema for `codelist` fields and retrieves the accompanying `enum` value.
Expects the value of the schema `$id` field to be passed from the test,
and only gets values from the one schema.
"""
registry = schema_registry()
schema_contents = registry.contents(request.param)
return get_codelists_from_schema(schema_contents)
@pytest.fixture
def invalid_data_errors():
"""
The CSV file expected_errors.csv maps the invalid test data filenames to the expected validation errors.
This function reads that file into a dict with structure:
{ file_name: ( validation_error, json_path, property ) }
for use in the invalid data tests.
Update the CSV file when new invalid data files are created.
"""
errors = {}
with open(os.path.join(get_test_data_dir(), "invalid-statements", "expected_errors.csv"), newline="") as csvfile:
r = csv.reader(csvfile, delimiter=",", quotechar='"')
for row in r:
errors[row[0]] = (row[1], row[2], row[3])
return errors
def get_json_files(dir):
"""
Recursively gets files with .json extension in `path` and returns
a list of the full paths.
"""
paths = []
for p in Path(dir).rglob("*.json"):
paths.append(p)
return paths
def file_id(path):
"""
Return the file name from a path; helps with
understanding test errors.
"""
return path.name
def codelist_id(codelist_data):
"""
Return the file name of a codelist from the path data; helps with
understanding test errors.
codelist_data is (path, name, text, fieldnames, rows)
"""
return codelist_data[1]
def get_schema_dir():
"""
Assumes the schema directory is /schema
"""
here = os.path.dirname(os.path.realpath(__file__))
schema_dir = os.path.join(here, "..", "schema")
return schema_dir
def get_examples_dir():
"""
Assumes the examples directory is /examples
"""
here = os.path.dirname(os.path.realpath(__file__))
schema_dir = os.path.join(here, "..", "examples")
return schema_dir
def get_test_data_dir():
"""
Assumes the data for the tests is in /tests/data
"""
here = os.path.dirname(os.path.realpath(__file__))
data_dir = os.path.join(here, "data")
return data_dir
def get_schema_paths():
"""
Returns an array of paths, filenames, and contents (parsed JSON) for each of the schema files.
"""
schema_dir = get_schema_dir()
schema_paths = [
(path, name, data) for path, name, _, data in walk_json_data(top=schema_dir) if is_json_schema(data)
]
return schema_paths
def get_codelist_paths():
"""
Returns an array of paths, filenames and contents for each codelist in the codelists directory.
"""
codelists_dir = os.path.join(get_schema_dir(), "codelists")
codelist_paths = [
(path, name, text, fieldnames, rows)
for path, name, text, fieldnames, rows in walk_csv_data(top=codelists_dir)
if is_codelist(fieldnames)
]
return codelist_paths
def schema_registry():
"""
This loads the BODS schema files into a jsonschema registry, so the
validator can resolve $refs across all of the schema files.
"""
schemas = []
for _, _, schema in get_schema_paths():
schemas.append((schema.get("$id"), Resource(contents=schema, specification=DRAFT202012)))
registry = Registry().with_resources(schemas)
return registry
def get_codelists_from_schema(schema_content, pointer=""):
"""
Gets the value of `codelist` properties and accompanying `enum`s from the schema.
Adapted from JSCC: https://github.com/open-contracting/jscc/blob/main/jscc/testing/checks.py#L696C5-L712C25
"""
codelists = {}
if isinstance(schema_content, list):
for index, item in enumerate(schema_content):
codelists.update(get_codelists_from_schema(item, pointer=f"{pointer}/{index}"))
elif isinstance(schema_content, dict):
if "codelist" in schema_content:
codelists[schema_content.get("codelist")] = schema_content.get("enum")
for key, value in schema_content.items():
codelists.update(get_codelists_from_schema(value, pointer=f"{pointer}/{key}"))
return codelists
def validate_metadata_presence(*args, allow_missing=_false):
"""
Warns and returns the number of errors relating to metadata in a JSON Schema.
The root schema and each field must have `"type" <https://tools.ietf.org/html/draft-fge-json-schema-validation-00#section-5.5.2>`__,
`"title" and "description" <https://tools.ietf.org/html/draft-fge-json-schema-validation-00#section-6.1>`__
properties, unless it has a `"$ref" <https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03>`__ property.
:param function allow_missing: a method that accepts a JSON Pointer, and returns whether the field is allowed to
not have a "title" or "description" property
:returns: the number of errors
:rtype: int
This is copied from JSCC to patch oneOf/anyOf/allOf/if/then/else properties being flagged by this.
TODO: see if this should be fixed in JSCC directly.
""" # noqa: E501
schema_fields = {
"definitions",
"$defs",
"deprecated",
"items",
"patternProperties",
"properties",
"oneOf",
"anyOf",
"allOf",
"if",
"then",
"else",
}
schema_sections = {"patternProperties", "properties", "anyOf", "allOf", "if", "then", "else", "oneOf"}
required_properties = {"title", "description"}
def block(path, data, pointer):
errors = 0
parts = pointer.rsplit("/")
if len(parts) >= 3:
grandparent = parts[-2]
else:
grandparent = None
parent = parts[-1]
# Look for metadata fields on user-defined objects only. (Add exceptional condition for "items" field.)
if parent not in schema_fields and grandparent not in schema_sections:
for prop in required_properties:
# If a field has `$ref`, then its `title` and `description` might defer to the reference.
if is_missing_property(data, prop) and "$ref" not in data and not allow_missing(pointer):
errors += 1
warn(f'{path} is missing "{prop}" at {pointer}', MetadataPresenceWarning)
if (
"type" not in data
and "$ref" not in data
and "oneOf" not in data
and "$defs" not in data
and not allow_missing(pointer)
):
errors += 1
warn(f'{path} is missing "type" or "$ref" or "oneOf" at {pointer}', MetadataPresenceWarning)
return errors
return _traverse(block)(*args)