From ff99a92180c3f0731182ef8b346a4dbb8c612933 Mon Sep 17 00:00:00 2001
From: Marc Bevand <m@zorinaq.com>
Date: Mon, 7 Mar 2022 15:55:11 -0800
Subject: [PATCH] do not chart states with insufficient data

---
 by_age_group.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/by_age_group.py b/by_age_group.py
index f996708..73982c3 100755
--- a/by_age_group.py
+++ b/by_age_group.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import os, json, time
+import os, json, time, math
 import pandas as pd
 import numpy as np
 from sklearn.linear_model import LinearRegression
@@ -23,13 +23,19 @@
 # of these rows, we assume 5 deaths (mean of 0-10)
 suppressed_mean = 5
 
+# If we calculate the absolute excess deaths for a particular age group for a
+# particular state to be less than threshold, we ignore it and assume zero
+# excess. This typically occurs when there are so few deaths that many
+# rows in Weekly_Counts_of_Deaths_by_Jurisdiction_and_Age.csv are suppressed.
+threshold = 10
+
 # all_weeks is an array of the MMWR weeks (yyyy, mm): [(2015, 1), (2015, 2), ...]
 # all_weeks_info maps an MMWR week (yyyy, mm) to its index in all_weeks[] and the saturday ending it:
 #   { (2015, 1): {'idx': 0, 'end': '01/10/2015'}, ... }
 all_weeks = []
 all_weeks_info = {}
 # my_excess contains our estimates of excess deaths; it maps an age group
-# like "75-84 years" to a 4-tuple:
+# like "75-84 years" to an array of 4-tuple:
 # (excess_per_1M, observed_deaths, expected_deaths, jurisdiction)
 my_excess = None
 # cdc_excess maps state names to the CDC's estimate of total number of excess deaths
@@ -160,6 +166,9 @@ def analyze_group(res, df, jurisdiction, group):
         debug(f' obs {obs} exp {exp} excess {obs-exp}')
         total_obs += obs
         total_exp += exp
+    if abs(total_obs - total_exp) < threshold:
+        print(f'Ignoring {jurisdiction}/{group}: {total_obs - total_exp} excess deaths')
+        return 0, 0
     add_my(res, group, total_obs, total_exp, jurisdiction)
     print(f'{(total_obs / total_exp - 1) * 100:.2f}% {jurisdiction} {group} {total_obs} {total_exp}')
     return total_obs, total_exp
@@ -263,9 +272,17 @@ def colname(st):
     ys = [_[0] for _ in l]
     states = [f'{len(ys) - i}. ' + _[3] for (i, _) in enumerate(l)]
     colors = list(map(colname, [_[3] for _ in l]))
+    missing = set(pop.keys()) - set([_[3] for _ in l])
+    if missing:
+        ys = [math.nan] * len(missing) + ys
+        states = list(missing) + states
+        colors = ['black'] * len(missing) + colors
     ax.barh(states, ys, color=colors)
     for (i, y) in enumerate(ys):
-        ax.text(max(y, 0), i - .07, f' {y:,.0f}', va='center')
+        if math.isnan(y):
+            ax.text(0, i - .07, f'N/A (insufficient data)', va='center')
+        else:
+            ax.text(max(y, 0), i - .07, f' {y:,.0f}', va='center')
     ax.set_ylim(bottom=-1, top=len(ys))
     ax.set_xlim(left=min([0] + ys))
     ax.tick_params(axis='y', which='both', left=False)