From b0b7d7a5f68531a0215088b130e55866e6d794cf Mon Sep 17 00:00:00 2001
From: Laszlo Szekeres <lszekeres@google.com>
Date: Mon, 30 Mar 2020 17:51:51 -0400
Subject: [PATCH] Add warning about fuzzers with low number of trials in the
 report. (#184)

* Add warning about fuzzers with low number of trials in the report.
* Move warning below plots. Consider `in_progress`.
---
 analysis/benchmark_results.py          |  6 ++++++
 analysis/data_utils.py                 | 17 +++++++++++++++++
 analysis/report_templates/default.html | 18 ++++++++++++++++++
 3 files changed, 41 insertions(+)
diff --git a/analysis/benchmark_results.py b/analysis/benchmark_results.py
index c4781fa84..b536e1658 100644
--- a/analysis/benchmark_results.py
+++ b/analysis/benchmark_results.py
@@ -58,6 +58,12 @@ def _benchmark_df(self):
     def _benchmark_snapshot_df(self):
         return data_utils.get_benchmark_snapshot(self._benchmark_df)
 
+    @property
+    def fuzzers_with_not_enough_samples(self):
+        """Fuzzers with not enough samples."""
+        return data_utils.get_fuzzers_with_not_enough_samples(
+            self._benchmark_snapshot_df)
+
     @property
     def summary_table(self):
         """Statistical summary table."""
diff --git a/analysis/data_utils.py b/analysis/data_utils.py
index 742106973..bde740067 100644
--- a/analysis/data_utils.py
+++ b/analysis/data_utils.py
@@ -75,6 +75,23 @@ def get_benchmark_snapshot(benchmark_df,
     return benchmark_snapshot_df
 
 
+_DEFAULT_FUZZER_SAMPLE_NUM_THRESHOLD = 0.8
+
+
+def get_fuzzers_with_not_enough_samples(
+        benchmark_snapshot_df, threshold=_DEFAULT_FUZZER_SAMPLE_NUM_THRESHOLD):
+    """Retruns fuzzers that didn't have enough trials running at snapshot time.
+    It takes a benchmark snapshot and finds the fuzzers that have a sample size
+    smaller than 80% of the largest sample size. Default threshold can be
+    overridden.
+    """
+    samples_per_fuzzer = benchmark_snapshot_df.fuzzer.value_counts()
+    max_samples = samples_per_fuzzer.max()
+    few_sample_criteria = samples_per_fuzzer < threshold * max_samples
+    few_sample_fuzzers = samples_per_fuzzer[few_sample_criteria].index
+    return few_sample_fuzzers.tolist()
+
+
 def get_experiment_snapshots(experiment_df):
     """Finds a good snapshot time for each benchmark in the experiment data.
 
diff --git a/analysis/report_templates/default.html b/analysis/report_templates/default.html
index 852c84312..f8e5a2844 100644
--- a/analysis/report_templates/default.html
+++ b/analysis/report_templates/default.html
@@ -124,6 +124,24 @@ <h5 class="center-align">Mean coverage growth over time</h5>
                 </div>
             </div>
 
+            {% if benchmark.fuzzers_with_not_enough_samples and not in_progress %}
+            <div class="card-panel deep-orange lighten-3">
+                <div class="row valign-wrapper">
+                    <div class="col s4 m2">
+                        <i class="medium material-icons">error</i>
+                    </div>
+                    <div class="col s8 m10">
+                        <span class="black-text">
+                            The following fuzzers do not have enough samples:
+                            <strong>
+                                {{ ', '.join(benchmark.fuzzers_with_not_enough_samples) }}.
+                            </strong>
+                        </span>
+                    </div>
+                </div>
+            </div>
+            {% endif %}
+
             <ul class="collapsible">
                 <li>
                     <div class="collapsible-header">