Skip to content

Commit

Permalink
Fix incorrect unknown user_type values in ES forms
Browse files Browse the repository at this point in the history
using a periodic task and background queue for updating identified forms in ES
  • Loading branch information
dannyroberts committed Dec 10, 2019
1 parent a536259 commit f8f382e
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
32 changes: 32 additions & 0 deletions corehq/pillows/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from celery.schedules import crontab
from celery.task import periodic_task

from corehq.apps.es import FormES
from corehq.form_processor.interfaces.dbaccessors import FormAccessors
from corehq.form_processor.utils.xform import resave_form
from corehq.pillows.utils import get_user_type_deep_cache_for_unknown_users
from corehq.util.datadog.gauges import datadog_gauge
from corehq.util.decorators import serial_task


@periodic_task(run_every=crontab(minute=0, hour=0))
def fix_user_types():
unknown_user_ids = (
FormES().user_type('unknown').user_aggregation().run().aggregations.user.keys
)
datadog_gauge('commcare.fix_user_types.unknown_user_count', len(unknown_user_ids))
for user_id in unknown_user_ids:
user_type = get_user_type_deep_cache_for_unknown_users(user_id)
if user_type != unknown_user_ids:
resave_es_forms_with_unknown_user_type.delay(user_id)


@serial_task('{user_id}', queue='background_queue')
def resave_es_forms_with_unknown_user_type(user_id):
domain_form_id_list = (
FormES().user_type('unknown').user_id(user_id)
.values_list('domain', '_id', scroll=True)
)
for domain, form_id in domain_form_id_list:
form = FormAccessors(domain).get_form(form_id)
resave_form(domain, form)
13 changes: 13 additions & 0 deletions corehq/pillows/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,22 @@ def get_user_type(user_id):
return MOBILE_USER_TYPE
except (ResourceNotFound, WrappingAttributeError):
pass

get_user_type_deep_cache_for_unknown_users.set_cached_value(user_id).to(True)
return UNKNOWN_USER_TYPE


@quickcache(['user_id'], timeout=30 * ONE_DAY)
def get_user_type_deep_cache_for_unknown_users(user_id):
"""
Only call this on user_ids that have previously been classified as 'unknown'
This allows us to periodically check if unknown users really are unknown
without pummeling the user db.
"""
return get_user_type(user_id)


def get_all_expected_es_indices():
yield CASE_INDEX_INFO
yield XFORM_INDEX_INFO
Expand Down

0 comments on commit f8f382e

Please sign in to comment.