fixes bug 1283214 - Check SuperSearch in healthcheck monitoring (mozi…

…lla-services#3381) r=adngdb
mbrukman · Jun 30, 2016 · c905c07 · c905c07
1 parent e37ed41
commit c905c07
Show file tree

Hide file tree

Showing 2 changed files with 135 additions and 1 deletion.
diff --git a/webapp-django/crashstats/monitoring/tests/test_views.py b/webapp-django/crashstats/monitoring/tests/test_views.py
@@ -1,7 +1,7 @@
 import datetime
 import json
 
-from nose.tools import eq_, ok_
+from nose.tools import eq_, ok_, assert_raises
 import mock
 
 from django.core.urlresolvers import reverse
@@ -10,6 +10,8 @@
 
 from crashstats.crashstats.tests.test_views import BaseTestViews, Response
 from crashstats.crashstats.models import CrontabberState
+from crashstats.supersearch.models import SuperSearch
+from crashstats.monitoring.views import assert_supersearch_counts
 
 
 class TestViews(BaseTestViews):
@@ -278,3 +280,82 @@ def fake_es_instance(**config):
         )
 
         eq_(es_instance.info.call_count, 2)
+
+    def test_assert_supersearch_counts(self):
+
+        searches = []
+
+        def mocked_supersearch_get(**params):
+            searches.append(params)
+            eq_(params['product'], [settings.DEFAULT_PRODUCT])
+            if len(searches) == 1:
+                # this is the first one
+                eq_(params['_results_number'], 0)
+                eq_(params['_columns'], ['uuid'])
+                return {
+                    'hits': [
+                        {'uuid': '12345'},
+                    ],
+                    'facets': [],
+                    'total': 30002
+                }
+            else:
+                # second search
+                eq_(params['_results_number'], 100)
+                eq_(params['_results_offset'], 30000)
+                return {
+                    'hits': [
+                        {'uuid': '0128940'},
+                        {'uuid': '9156826'},
+                        {'uuid': '3969175'},
+                    ],
+                    'facets': [],
+                    'total': 30004
+                }
+
+        SuperSearch.implementation().get.side_effect = (
+            mocked_supersearch_get
+        )
+        assert_supersearch_counts()
+        assert len(searches) == 2
+
+    def test_assert_supersearch_counts_failing(self):
+
+        searches = []
+
+        def mocked_supersearch_get(**params):
+            searches.append(params)
+            eq_(params['product'], [settings.DEFAULT_PRODUCT])
+            if len(searches) == 1:
+                # this is the first one
+                eq_(params['_results_number'], 0)
+                eq_(params['_columns'], ['uuid'])
+                return {
+                    'hits': [
+                        {'uuid': '12345'},
+                    ],
+                    'facets': [],
+                    'total': 320
+                }
+            else:
+                # second search
+                eq_(params['_results_number'], 100)
+                eq_(params['_results_offset'], 300)
+                return {
+                    'hits': [
+                        {'uuid': '0128940'},
+                        {'uuid': '9156826'},
+                        {'uuid': '3969175'},
+                    ],
+                    'facets': [],
+                    'total': 320
+                }
+
+        SuperSearch.implementation().get.side_effect = (
+            mocked_supersearch_get
+        )
+        assert_raises(
+            AssertionError,
+            assert_supersearch_counts
+        )
+        assert len(searches) == 2
diff --git a/webapp-django/crashstats/monitoring/views.py b/webapp-django/crashstats/monitoring/views.py
@@ -12,6 +12,7 @@
 
 from crashstats.crashstats import utils
 from crashstats.crashstats.models import CrontabberState
+from crashstats.supersearch.models import SuperSearch
 
 
 def index(request):
@@ -167,4 +168,56 @@ def healthcheck(request):
         )
         es.info()  # will raise an error if there's a problem with the cluster
 
+        # Check SuperSearch paginated results
+        assert_supersearch_counts()
+
     return {'ok': True}
+
+
+def assert_supersearch_counts():
+    """Make sure that all shards in ElasticSearch return all results.
+    If some nodes in the cluster are unhealthy, the whole
+    SuperSearch query might succeed but the number of records
+    might not match the "total" count provided in every result page.
+
+    This test has been tested in our production database, using the
+    /api/SuperSearch endpoint and the epsilon hovers around 0.005 roughly
+    and that's *with* webapp caching at play.
+    """
+    supersearch = SuperSearch()
+    # We don't want any caching this time
+    supersearch.cache_seconds = 0
+    results = supersearch.get(
+        product=settings.DEFAULT_PRODUCT,
+        _results_number=0,
+        _columns=['uuid'],
+    )
+    # Use this total just to
+    total = results['total']
+    # This looks weird but if total is 350 then 350 / 100 is 3
+    # and multiplied with 100 becomes 300. Meaning he last page
+    # offset is 300 if the limit is 100.
+    limit = 100
+    offset = total / limit * limit
+    results = supersearch.get(
+        product=settings.DEFAULT_PRODUCT,
+        _results_number=limit,
+        _results_offset=offset,
+        _columns=['uuid'],
+    )
+    counted = len(results['hits']) + offset
+    # use the second total count
+    total = results['total']
+    # The difference is a percentage. It doesn't matter if
+    # counted > total or the other way around.
+    epsilon = abs(100 - 100.0 * counted / total)
+    # If the difference is bigger than 1/100 of a percent
+    # the difference is too large to be considered healthy.
+    # The reason we accept this small difference to be slightly
+    # more than 0 is because ElasticSearch has its own internal
+    # caching which it tries to leverage when doing pagination.
+    assert epsilon < 0.01, {
+        'epsilon': epsilon,
+        'counted': counted,
+        'total': total,
+    }