Skip to content

Commit

Permalink
feat: cache course index queries (openedx#29107)
Browse files Browse the repository at this point in the history
* feat: store split modulestore's course indexes in Django/MySQL

Course outline generation is calling active_versions hundreds or even thousands of times on course_publish. In practice, the handling of a block requires a fetch of the course index. That requires a call to the active_versions table for each touching of a block. In larger courses, that can mean 1000s of calls to the db to retrieve them. A quick solution to this problem is to use a request cache, so we don't have to query the slow db every time and memoize the relevant data
[An example run can be seen here.](https://one.newrelic.com/launcher/nr1-core.explorer?platform[filters]=IihuYW1lIGxpa2UgJ3Byb2QtZWR4LWVkeGFwcCcgb3IgaWQgPSAncHJvZC1lZHgtZWR4YXBwJyBvciBkb21haW5JZCA9ICdwcm9kLWVkeC1lZHhhcHAnKSBhbmQgKG5hbWUgbGlrZSAnbG1zJyBvciBpZCA9ICdsbXMnIG9yIGRvbWFpbklkID0gJ2xtcycpIg==&platform[accountId]=88178&platform[timeRange][begin_time]=1626715880701&platform[timeRange][end_time]=1627320680701&pane=eyJuZXJkbGV0SWQiOiJhcG0tbmVyZGxldHMudHJhbnNhY3Rpb25zIiwiZW50aXR5R3VpZCI6Ik9EZ3hOemg4UVZCTmZFRlFVRXhKUTBGVVNVOU9mRFk1TVRNM05EUTROQSIsInNlbGVjdGVkU2VyaWVzIjoiZjAzYjNmNzY5OTQ0MjlmOTFhYWQ4MDBkNTEwZTU5MDM5OWNjMzNhMSIsImRyaWxsZG93biI6eyJ0cmFuc2FjdGlvbk5hbWUiOiJPdGhlclRyYW5zYWN0aW9uL0NlbGVyeS9jbXMuZGphbmdvYXBwcy5jb250ZW50c3RvcmUudGFza3MudXBkYXRlX291dGxpbmVfZnJvbV9tb2R1bGVzdG9yZV90YXNrIn19&cards[0]=eyJuZXJkbGV0SWQiOiJhcG0tbmVyZGxldHMudHJhbnNhY3Rpb24tdHJhY2UiLCJ0cmFjZUlkIjoiY2I2OGNkMDktZWI5Yi0xMWViLWJkY2QtMDI0MmFjMTEwMDBlXzI1MjU0MV8zMTgzMTMiLCJlbnRpdHlHdWlkIjoiT0RneE56aDhRVkJOZkVGUVVFeEpRMEZVU1U5T2ZEWTVNVE0zTkRRNE5BIn0=&sidebars[0]=eyJuZXJkbGV0SWQiOiJucjEtY29yZS5hY3Rpb25zIiwic2VsZWN0ZWROZXJkbGV0Ijp7Im5lcmRsZXRJZCI6ImFwbS1uZXJkbGV0cy50cmFuc2FjdGlvbnMifSwiZW50aXR5R3VpZCI6Ik9EZ3hOemg4UVZCTmZFRlFVRXhKUTBGVVNVOU9mRFk1TVRNM05EUTROQSJ9&state=d9946155-ea53-cb11-c1c0-cc873d6c7d39)

Useful information to include:
In theory, this should provide a minor performance boost to authors and learners, and be noticeable in the above function trace, once live on prod. Note, that in several places the cache must be invalidated, as to prevent a stale cache.


Co-authored-by: Braden MacDonald <[email protected]>
  • Loading branch information
connorhaugh and bradenmacdonald authored Oct 27, 2021
1 parent ec7205e commit b01e773
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
from xmodule.modulestore import BlockData
from xmodule.modulestore.split_mongo import BlockKey
from xmodule.mongo_utils import connect_to_mongodb, create_collection_index

from openedx.core.lib.cache_utils import request_cached
from edx_django_utils.cache import RequestCache

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -261,6 +262,9 @@ def __init__(
# only before returning. Also makes pymongo report write errors.
kwargs['w'] = 1

#make sure the course index cache is fresh.
RequestCache(namespace="course_index_cache").clear()

self.database = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password,
Expand Down Expand Up @@ -532,6 +536,7 @@ def close_connections(self):
"""
Closes any open connections to the underlying databases
"""
RequestCache(namespace="course_index_cache").clear()
self.database.client.close()

def _drop_database(self, database=True, collections=True, connections=True):
Expand All @@ -546,6 +551,7 @@ def _drop_database(self, database=True, collections=True, connections=True):
If connections is True, then close the connection to the database as well.
"""
RequestCache(namespace="course_index_cache").clear()
connection = self.database.client

if database:
Expand All @@ -571,7 +577,13 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):

# Structures and definitions are only supported in MongoDB for now.
# Course indexes are read from MySQL and written to both MongoDB and MySQL

# Course indexes are cached within the process using their key and ignore_case atrributes as keys.
# This method is request cached. The keys to the cache are the arguements to the method.
# The `self` arguement is discarded as a key using an isinstance check.
# This is because the DjangoFlexPersistenceBackend could be different in reference to the same course key.
@request_cached(
"course_index_cache",
arg_map_function=lambda arg: str(arg) if not isinstance(arg, DjangoFlexPersistenceBackend) else "")
def get_course_index(self, key, ignore_case=False):
"""
Get the course_index from the persistence mechanism whose id is the given key
Expand Down Expand Up @@ -650,6 +662,10 @@ def insert_course_index(self, course_index, course_context=None): # pylint: dis
"""
Create the course_index in the db
"""
# clear the whole course_index request cache, required for sucessfully cloning a course.
# This is a relatively large hammer for the problem, but we mostly only use one course at a time.
RequestCache(namespace="course_index_cache").clear()

course_index['last_update'] = datetime.datetime.now(pytz.utc)
new_index = SplitModulestoreCourseIndex(**SplitModulestoreCourseIndex.fields_from_v1_schema(course_index))
new_index.save()
Expand All @@ -669,6 +685,7 @@ def update_course_index(self, course_index, from_index=None, course_context=None
# "last_update not only tells us when this course was last updated but also helps prevent collisions"
# This code is just copying the behavior of the existing MongoPersistenceBackend
# See https://github.com/edx/edx-platform/pull/5200 for context
RequestCache(namespace="course_index_cache").clear()
course_index['last_update'] = datetime.datetime.now(pytz.utc)
# Find the SplitModulestoreCourseIndex entry that we'll be updating:
try:
Expand Down Expand Up @@ -730,6 +747,7 @@ def delete_course_index(self, course_key):
"""
Delete the course_index from the persistence mechanism whose id is the given course_index
"""
RequestCache(namespace="course_index_cache").clear()
SplitModulestoreCourseIndex.objects.filter(course_id=course_key).delete()
# TEMP: Also write to MongoDB, so we can switch back to using it if this new MySQL version doesn't work well:
super().delete_course_index(course_key)
Expand All @@ -738,6 +756,7 @@ def _drop_database(self, database=True, collections=True, connections=True):
"""
Reset data for testing.
"""
RequestCache(namespace="course_index_cache").clear()
try:
SplitModulestoreCourseIndex.objects.all().delete()
except TransactionManagementError as err:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def my_test(self):
"""
MODULESTORE = functools.partial(mixed_store_config, mkdtemp_clean(), {})
CONTENTSTORE = functools.partial(contentstore_config)
ENABLED_CACHES = ['default', 'mongo_metadata_inheritance', 'loc_cache']
ENABLED_CACHES = ['default', 'mongo_metadata_inheritance', 'loc_cache', 'course_index_cache']

# List of modulestore signals enabled for this test. Defaults to an empty
# list. The list of signals available is found on the SignalHandler class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def test_duplicate_course_error_with_different_case_ids(self, default_store):
# fake: one w/ wildcard version
# split: has one lookup for the course and then one for the course items
# but the active_versions check is done in MySQL
@ddt.data((ModuleStoreEnum.Type.mongo, [1, 1], 0), (ModuleStoreEnum.Type.split, [2, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, [1, 1], 0), (ModuleStoreEnum.Type.split, [2, 1], 0))
@ddt.unpack
def test_has_item(self, default_ms, max_find, max_send):
self.initdb(default_ms)
Expand All @@ -391,7 +391,7 @@ def test_has_item(self, default_ms, max_find, max_send):
# split:
# problem: active_versions, structure
# non-existent problem: ditto
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [3, 2], 0), (ModuleStoreEnum.Type.split, 0, [2, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [3, 2], 0), (ModuleStoreEnum.Type.split, 0, [2, 1], 0))
@ddt.unpack
def test_get_item(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
Expand All @@ -414,7 +414,7 @@ def test_get_item(self, default_ms, num_mysql, max_find, max_send):
# Split:
# mysql: fetch course's active version from SplitModulestoreCourseIndex, spurious refetch x2
# find: get structure
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 14, 0), (ModuleStoreEnum.Type.split, 0, 4, 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 14, 0), (ModuleStoreEnum.Type.split, 0, 3, 0))
@ddt.unpack
def test_get_items(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
Expand Down Expand Up @@ -522,7 +522,7 @@ def test_get_items_include_orphans(self, default_ms, expected_items_in_tree, orp
# mysql: SplitModulestoreCourseIndex - select 2x (by course_id, by objectid), update, update historical record
# find: definitions (calculator field), structures
# sends: 2 sends to update index & structure (note, it would also be definition if a content field changed)
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 7, 5), (ModuleStoreEnum.Type.split, 3, 3, 2))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 7, 5), (ModuleStoreEnum.Type.split, 3, 2, 2))
@ddt.unpack
def test_update_item(self, default_ms, num_mysql, max_find, max_send):
"""
Expand Down Expand Up @@ -1043,7 +1043,7 @@ def test_delete_draft_vertical(self, default_ms, num_mysql, max_find, max_send):
# executed twice, possibly unnecessarily)
# find: 2 reads of structure, definition (s/b lazy; so, unnecessary),
# plus 1 wildcard find in draft mongo which has none
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 3, 0), (ModuleStoreEnum.Type.split, 0, 6, 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 3, 0), (ModuleStoreEnum.Type.split, 0, 5, 0))
@ddt.unpack
def test_get_courses(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
Expand Down Expand Up @@ -1638,7 +1638,7 @@ def test_get_parent_location_draft(self, default_ms):
# 8-9. get vertical, compute inheritance
# 10-11. get other vertical_x1b (why?) and compute inheritance
# Split: loading structure from mongo (also loads active version from MySQL, not tracked here)
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [12, 3], 0), (ModuleStoreEnum.Type.split, 0, [3, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [12, 3], 0), (ModuleStoreEnum.Type.split, 0, [3, 1], 0))
@ddt.unpack
def test_path_to_location(self, default_ms, num_mysql, num_finds, num_sends):
"""
Expand Down Expand Up @@ -1802,6 +1802,7 @@ def test_reset_course_to_version(self):
# Find the version_guid of our course by diving into Split Mongo.
split = self._get_split_modulestore()
course_index = split.get_course_index(self.course.location.course_key)
log.warning(f"Banana course index: {course_index}")
original_version_guid = course_index["versions"]["published-branch"]

# Reset course to currently-published version.
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/course_api/blocks/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ def test_query_counts_cached(self, store_type, with_storage_backing):
@ddt.data(
(ModuleStoreEnum.Type.mongo, 5, True, 24),
(ModuleStoreEnum.Type.mongo, 5, False, 14),
(ModuleStoreEnum.Type.split, 3, True, 24),
(ModuleStoreEnum.Type.split, 3, False, 14),
(ModuleStoreEnum.Type.split, 2, True, 24),
(ModuleStoreEnum.Type.split, 2, False, 14),
)
@ddt.unpack
def test_query_counts_uncached(self, store_type, expected_mongo_queries, with_storage_backing, num_sql_queries):
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/courseware/tests/test_module_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,7 @@ def setup_request_and_course(self, num_finds, num_sends):
# - 1 for 5 definitions
# Split makes 1 MySQL query to render the toc:
# - 1 MySQL for the active version at the start of the bulk operation (no mongo calls)
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 1))
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 0))
@ddt.unpack
def test_toc_toy_from_chapter(self, default_ms, setup_finds, setup_sends, toc_finds):
with self.store.default_store(default_ms):
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def test_toc_toy_from_chapter(self, default_ms, setup_finds, setup_sends, toc_fi
# - 1 for 5 definitions
# Split makes 1 MySQL query to render the toc:
# - 1 MySQL for the active version at the start of the bulk operation (no mongo calls)
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 1))
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 0))
@ddt.unpack
def test_toc_toy_from_section(self, default_ms, setup_finds, setup_sends, toc_finds):
with self.store.default_store(default_ms):
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/courseware/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ def verify_response(self, expected_response_code=200, url_params=None):

@ddt.data(
('vertical_block', ModuleStoreEnum.Type.mongo, 13),
('vertical_block', ModuleStoreEnum.Type.split, 6),
('vertical_block', ModuleStoreEnum.Type.split, 5),
('html_block', ModuleStoreEnum.Type.mongo, 14),
('html_block', ModuleStoreEnum.Type.split, 6),
('html_block', ModuleStoreEnum.Type.split, 5),
)
@ddt.unpack
def test_courseware_html(self, block_name, default_store, mongo_calls):
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/discussion/django_comment_client/base/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def inner(self, default_store, module_count, mongo_calls, sql_queries, *args, **

@ddt.data(
(ModuleStoreEnum.Type.mongo, 3, 4, 39),
(ModuleStoreEnum.Type.split, 3, 13, 39),
(ModuleStoreEnum.Type.split, 3, 11, 39),
)
@ddt.unpack
@count_queries
Expand All @@ -411,7 +411,7 @@ def test_create_thread(self, mock_request):

@ddt.data(
(ModuleStoreEnum.Type.mongo, 3, 3, 35),
(ModuleStoreEnum.Type.split, 3, 10, 35),
(ModuleStoreEnum.Type.split, 3, 9, 35),
)
@ddt.unpack
@count_queries
Expand Down
8 changes: 4 additions & 4 deletions lms/djangoapps/discussion/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,15 +483,15 @@ class SingleThreadQueryCountTestCase(ForumsEnableMixin, ModuleStoreTestCase):
(ModuleStoreEnum.Type.mongo, False, 1, 5, 2, 21, 7),
(ModuleStoreEnum.Type.mongo, False, 50, 5, 2, 21, 7),
# split mongo: 3 queries, regardless of thread response size.
(ModuleStoreEnum.Type.split, False, 1, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, False, 50, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, False, 1, 2, 2, 21, 8),
(ModuleStoreEnum.Type.split, False, 50, 2, 2, 21, 8),
# Enabling Enterprise integration should have no effect on the number of mongo queries made.
(ModuleStoreEnum.Type.mongo, True, 1, 5, 2, 21, 7),
(ModuleStoreEnum.Type.mongo, True, 50, 5, 2, 21, 7),
# split mongo: 3 queries, regardless of thread response size.
(ModuleStoreEnum.Type.split, True, 1, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, True, 50, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, True, 1, 2, 2, 21, 8),
(ModuleStoreEnum.Type.split, True, 50, 2, 2, 21, 8),
)
@ddt.unpack
def test_number_of_mongo_queries(
Expand Down
11 changes: 6 additions & 5 deletions lms/djangoapps/grades/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,9 @@ def test_block_structure_created_only_once(self):
@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 40, True),
(ModuleStoreEnum.Type.mongo, 1, 40, False),
(ModuleStoreEnum.Type.split, 3, 40, True),
(ModuleStoreEnum.Type.split, 3, 40, False),
(ModuleStoreEnum.Type.split, 2, 40, True),
(ModuleStoreEnum.Type.split, 2, 40, False),
)
@ddt.unpack
def test_query_counts(self, default_store, num_mongo_calls, num_sql_calls, create_multiple_subsections):
Expand All @@ -178,7 +179,7 @@ def test_query_counts(self, default_store, num_mongo_calls, num_sql_calls, creat

@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 40),
(ModuleStoreEnum.Type.split, 3, 40),
(ModuleStoreEnum.Type.split, 2, 40),
)
@ddt.unpack
def test_query_counts_dont_change_with_more_content(self, default_store, num_mongo_calls, num_sql_calls):
Expand Down Expand Up @@ -224,7 +225,7 @@ def test_other_inaccessible_subsection(self, mock_subsection_signal):

@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 23),
(ModuleStoreEnum.Type.split, 3, 23),
(ModuleStoreEnum.Type.split, 2, 23),
)
@ddt.unpack
def test_persistent_grades_not_enabled_on_course(self, default_store, num_mongo_queries, num_sql_queries):
Expand All @@ -239,7 +240,7 @@ def test_persistent_grades_not_enabled_on_course(self, default_store, num_mongo_

@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 41),
(ModuleStoreEnum.Type.split, 3, 41),
(ModuleStoreEnum.Type.split, 2, 41),
)
@ddt.unpack
def test_persistent_grades_enabled_on_course(self, default_store, num_mongo_queries, num_sql_queries):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,6 @@ def test_spoc_gradebook_mongo_calls(self):

# check MongoDB calls count
url = reverse('spoc_gradebook', kwargs={'course_id': self.course.id})
with check_mongo_calls(9):
with check_mongo_calls(7):
response = self.client.get(url)
assert response.status_code == 200
9 changes: 5 additions & 4 deletions openedx/core/djangoapps/bookmarks/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,11 +376,12 @@ def test_path(self, seconds_delta, paths, get_path_call_count, mock_get_path):
# (ModuleStoreEnum.Type.mongo, 6, 3, 3), Too slow.
(ModuleStoreEnum.Type.mongo, 2, 4, 4),
# (ModuleStoreEnum.Type.mongo, 4, 4, 4),
(ModuleStoreEnum.Type.split, 2, 2, 2),
(ModuleStoreEnum.Type.split, 4, 2, 2),
(ModuleStoreEnum.Type.split, 2, 3, 2),
(ModuleStoreEnum.Type.split, 2, 2, 1),
(ModuleStoreEnum.Type.split, 4, 2, 1),
(ModuleStoreEnum.Type.split, 2, 3, 1),
# (ModuleStoreEnum.Type.split, 4, 3, 1),
(ModuleStoreEnum.Type.split, 2, 4, 2),
(ModuleStoreEnum.Type.split, 2, 4, 1),
)
@ddt.unpack
def test_get_path_queries(self, store_type, children_per_block, depth, expected_mongo_calls):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def test_malformed_grading_policy(self):
course_overview = CourseOverview._create_or_update(course) # pylint: disable=protected-access
assert course_overview.lowest_passing_grade is None

@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 3, 3))
@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 2, 2))
@ddt.unpack
def test_versioning(self, modulestore_type, min_mongo_calls, max_mongo_calls):
"""
Expand Down

0 comments on commit b01e773

Please sign in to comment.