forked from oppia/oppia
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix part of oppia#11475: Move datastoreio stub into JobBase (oppia#12947
) * Add JobRunResult type for Beam Jobs * Update job_run_result.py * transition to ndb_io * Move datastoreio_stub into JobBase * add unit tests * tidy things up * add test to shards * tidy up
- Loading branch information
1 parent
f753ff5
commit 43a2032
Showing
14 changed files
with
549 additions
and
264 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# coding: utf-8 | ||
# | ||
# Copyright 2021 The Oppia Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS-IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Provides an Apache Beam API for operating on NDB models.""" | ||
|
||
from __future__ import absolute_import # pylint: disable=import-only-modules | ||
from __future__ import unicode_literals # pylint: disable=import-only-modules | ||
|
||
import feconf | ||
from jobs import job_utils | ||
|
||
import apache_beam as beam | ||
|
||
|
||
class GetModels(beam.PTransform): | ||
"""Reads NDB models from the datastore using a query. | ||
TODO(#11475): Stop using datastoreio_stub after we're able to use Cloud NDB. | ||
""" | ||
|
||
def __init__(self, query, datastoreio_stub, label=None): | ||
"""Initializes the GetModels PTransform. | ||
Args: | ||
query: datastore_services.Query. The query used to fetch models. | ||
datastoreio_stub: stub_io.DatastoreioStub. The stub instance | ||
responsible for handling datastoreio operations. | ||
label: str|None. The label of the PTransform. | ||
""" | ||
super(GetModels, self).__init__(label=label) | ||
self.datastoreio = datastoreio_stub | ||
self.query = query | ||
|
||
def expand(self, pbegin): | ||
"""Returns a PCollection containing the queried models. | ||
Args: | ||
pbegin: PValue. The initial PValue of the pipeline, used to anchor | ||
the models to its underlying pipeline. | ||
Returns: | ||
PCollection. The PCollection of models. | ||
""" | ||
return ( | ||
pbegin | ||
| 'Reading %r from the datastore' % self.query >> ( | ||
self.datastoreio.ReadFromDatastore( | ||
job_utils.get_beam_query_from_ndb_query(self.query))) | ||
| 'Transforming %r into NDB models' % self.query >> ( | ||
beam.Map(job_utils.get_ndb_model_from_beam_entity)) | ||
) | ||
|
||
|
||
class PutModels(beam.PTransform): | ||
"""Writes NDB models to the datastore.""" | ||
|
||
def __init__(self, datastoreio_stub, label=None): | ||
"""Initializes the PutModels PTransform. | ||
Args: | ||
datastoreio_stub: stub_io.DatastoreioStub. The stub instance | ||
responsible for handling datastoreio operations. | ||
label: str|None. The label of the PTransform. | ||
""" | ||
super(PutModels, self).__init__(label=label) | ||
self.datastoreio = datastoreio_stub | ||
|
||
def expand(self, model_pcoll): | ||
"""Writes the given models to the datastore. | ||
Args: | ||
model_pcoll: PCollection. A PCollection of NDB models. | ||
Returns: | ||
PCollection. An empty PCollection. | ||
""" | ||
return ( | ||
model_pcoll | ||
| 'Transforming the NDB models into Apache Beam entities' >> ( | ||
beam.Map(job_utils.get_beam_entity_from_ndb_model)) | ||
| 'Writing the NDB models to the datastore' >> ( | ||
self.datastoreio.WriteToDatastore(feconf.OPPIA_PROJECT_ID)) | ||
) | ||
|
||
|
||
class DeleteModels(beam.PTransform): | ||
"""Deletes NDB models from the datastore.""" | ||
|
||
def __init__(self, datastoreio_stub, label=None): | ||
"""Initializes the DeleteModels PTransform. | ||
Args: | ||
datastoreio_stub: stub_io.DatastoreioStub. The stub instance | ||
responsible for handling datastoreio operations. | ||
label: str|None. The label of the PTransform. | ||
""" | ||
super(DeleteModels, self).__init__(label=label) | ||
self.datastoreio = datastoreio_stub | ||
|
||
def expand(self, model_key_pcoll): | ||
"""Deletes the given models from the datastore. | ||
Args: | ||
model_key_pcoll: PCollection. The PCollection of NDB keys to delete. | ||
Returns: | ||
PCollection. An empty PCollection. | ||
""" | ||
return ( | ||
model_key_pcoll | ||
| 'Transforming the NDB keys into Apache Beam keys' >> ( | ||
beam.Map(job_utils.get_beam_key_from_ndb_key)) | ||
| 'Deleting the NDB keys from the datastore' >> ( | ||
self.datastoreio.DeleteFromDatastore(feconf.OPPIA_PROJECT_ID)) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# coding: utf-8 | ||
# | ||
# Copyright 2021 The Oppia Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS-IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Provides an Apache Beam API for operating on NDB models.""" | ||
|
||
from __future__ import absolute_import # pylint: disable=import-only-modules | ||
from __future__ import unicode_literals # pylint: disable=import-only-modules | ||
|
||
from core.platform import models | ||
from jobs import job_test_utils | ||
from jobs.io import ndb_io | ||
from jobs.io import stub_io | ||
|
||
import apache_beam as beam | ||
|
||
(base_models,) = models.Registry.import_models([models.NAMES.base_model]) | ||
|
||
datastore_services = models.Registry.import_datastore_services() | ||
|
||
|
||
class NdbIoTests(job_test_utils.PipelinedTestBase): | ||
|
||
def setUp(self): | ||
super(NdbIoTests, self).setUp() | ||
self.datastoreio_stub = stub_io.DatastoreioStub() | ||
|
||
def tearDown(self): | ||
datastore_services.delete_multi( | ||
datastore_services.query_everything().iter(keys_only=True)) | ||
super(NdbIoTests, self).tearDown() | ||
|
||
def get_everything(self): | ||
"""Returns all models in the datastore. | ||
Returns: | ||
list(Model). All of the models in the datastore. | ||
""" | ||
return list(datastore_services.query_everything().iter()) | ||
|
||
def put_multi(self, model_list, update_last_updated_time=False): | ||
"""Puts the given models into the datastore. | ||
Args: | ||
model_list: list(Model). The models to put into the datastore. | ||
update_last_updated_time: bool. Whether to update the last updated | ||
time before putting the model into storage. | ||
""" | ||
datastore_services.update_timestamps_multi( | ||
model_list, update_last_updated_time=update_last_updated_time) | ||
datastore_services.put_multi(model_list) | ||
|
||
def test_read_from_datastore(self): | ||
model_list = [ | ||
self.create_model(base_models.BaseModel, id='a'), | ||
self.create_model(base_models.BaseModel, id='b'), | ||
self.create_model(base_models.BaseModel, id='c'), | ||
] | ||
self.put_multi(model_list) | ||
|
||
self.assertItemsEqual(self.get_everything(), model_list) | ||
|
||
with self.datastoreio_stub.context(): | ||
model_pcoll = ( | ||
self.pipeline | ||
| ndb_io.GetModels( | ||
datastore_services.query_everything(), | ||
self.datastoreio_stub) | ||
) | ||
self.assert_pcoll_equal(model_pcoll, model_list) | ||
|
||
def test_write_to_datastore(self): | ||
model_list = [ | ||
self.create_model(base_models.BaseModel, id='a'), | ||
self.create_model(base_models.BaseModel, id='b'), | ||
self.create_model(base_models.BaseModel, id='c'), | ||
] | ||
|
||
self.assertItemsEqual(self.get_everything(), []) | ||
|
||
with self.datastoreio_stub.context(): | ||
self.assert_pcoll_empty( | ||
self.pipeline | ||
| beam.Create(model_list) | ||
| ndb_io.PutModels(self.datastoreio_stub) | ||
) | ||
|
||
self.assertItemsEqual(self.get_everything(), model_list) | ||
|
||
def test_delete_from_datastore(self): | ||
model_list = [ | ||
self.create_model(base_models.BaseModel, id='a'), | ||
self.create_model(base_models.BaseModel, id='b'), | ||
self.create_model(base_models.BaseModel, id='c'), | ||
] | ||
self.put_multi(model_list) | ||
|
||
self.assertItemsEqual(self.get_everything(), model_list) | ||
|
||
with self.datastoreio_stub.context(): | ||
self.assert_pcoll_empty( | ||
self.pipeline | ||
| beam.Create([model.key for model in model_list]) | ||
| ndb_io.DeleteModels(self.datastoreio_stub) | ||
) | ||
|
||
self.assertItemsEqual(self.get_everything(), []) |
Oops, something went wrong.