Skip to content

Commit

Permalink
IDPH-zipcode ETL: fewer guppy queries (#256)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Feb 15, 2022
1 parent 0d903c5 commit 85ecef9
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 10 deletions.
2 changes: 1 addition & 1 deletion covid19-etl/etl/ctp.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def parse_row(self, row):
if value and value.lower() not in ["nan", "n/a"]:
try:
summary_clinical[k] = int(value.replace(",", ""))
except:
except Exception:
pass

dataQualityGrade = row[self.header_to_column["dataQualityGrade"]]
Expand Down
11 changes: 6 additions & 5 deletions covid19-etl/etl/idph_zipcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,22 @@ def files_to_submissions(self):
print(
f"Latest submitted date: {latest_submitted_date}. Getting data until date: {today}"
)
existing_summary_locations = (
self.metadata_helper.get_existing_summary_locations()
)
for i in range(int((today - latest_submitted_date).days)):
date = latest_submitted_date + datetime.timedelta(i + 1)
self.parse_data(date.strftime("%Y-%m-%d"))
self.parse_data(date.strftime("%Y-%m-%d"), existing_summary_locations)

def parse_data(self, date_str):
def parse_data(self, date_str, existing_summary_locations):
"""
Converts a JSON files to data we can submit via Sheepdog. Stores the
records to submit in `self.summary_locations` and `self.summary_clinicals`.
Args:
date_str (str): date in "%Y-%m-%d" format
existing_summary_locations (list): [<location submitter_id>, ...]
"""
existing_summary_locations = (
self.metadata_helper.get_existing_summary_locations()
)
url = f"https://idph.illinois.gov/DPHPublicInformation/api/COVIDExport/GetZip?reportDate={date_str}"
print("Getting data from {}".format(url))
with closing(self.get(url, stream=True)) as r:
Expand Down
11 changes: 7 additions & 4 deletions covid19-etl/utils/metadata_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def query_peregrine(self, query_string):
raise
try:
return response.json()
except:
except Exception:
print(f"Peregrine did not return JSON: {response.text}")
raise

Expand All @@ -195,7 +195,7 @@ async def _post_request(headers, query_string):
raise
try:
response = await response.json()
except:
except Exception:
print(f"Peregrine did not return JSON: {response.text}")
raise
return response
Expand All @@ -218,7 +218,7 @@ def query_guppy(self, query_string, variables=None):
raise
try:
return response.json()
except:
except Exception:
print(f"Guppy did not return JSON: {response.status_code} {response.text}")
raise

Expand All @@ -242,7 +242,7 @@ def download_from_guppy(self, _type, fields=None, filter=None):
raise
try:
return response.json()
except:
except Exception:
print(f"Guppy did not return JSON: {response.status_code} {response.text}")
raise

Expand Down Expand Up @@ -356,4 +356,7 @@ def get_existing_summary_locations(self):
)

location_list = query_res["data"]["location"]
assert (
type(location_list) == list
), f"Did not receive a list of locations from Guppy. Received: {query_res}"
return [location["submitter_id"] for location in location_list]

0 comments on commit 85ecef9

Please sign in to comment.