Skip to content

Commit

Permalink
Fix cityofchicago ETL (#269)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Jun 9, 2022
1 parent 2e29891 commit 7c12934
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 25 deletions.
25 changes: 12 additions & 13 deletions covid19-etl/etl/cityofchicago.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,7 @@ def parse_cityofchicago_file(
]

# parse original file into value to be passed in sheepdog
city_of_chicago_url = (
CITYOFCHICAGO_CDH_URL
+ "?$where=lab_report_date between '"
+ start_date
+ "' and '"
+ end_date
+ "'"
)
city_of_chicago_url = f"{CITYOFCHICAGO_CDH_URL}?$where=lab_report_date between '{start_date}' and '{end_date}'"

with closing(self.get(city_of_chicago_url, stream=True)) as r:
f = (line.decode("utf-8") for line in r.iter_lines())
Expand Down Expand Up @@ -376,7 +369,7 @@ def files_to_submissions(self):
# ETL code that reads from the data source
# and generates the data to submit
start = time.time()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()

today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)

Expand All @@ -389,10 +382,16 @@ def files_to_submissions(self):
{"country": self.country, "state": self.state, "city": self.city},
)

self.last_submission_identifier = self.metadata_helper.get_last_submission()
self.last_submission_identifier = (
self.metadata_helper.get_last_submission()
) # datetime

# The following condition is for the first entry in dataset, which is from date `2020-03-01`
if self.last_submission_identifier == None:
if self.last_submission_identifier:
self.last_submission_identifier = self.last_submission_identifier.strftime(
"%Y-%m-%d"
)
else:
# for the first entry in dataset, which is from date `2020-03-01`
self.last_submission_identifier = "2020-03-01"

print(
Expand All @@ -402,7 +401,7 @@ def files_to_submissions(self):
self.get_summary_location(summary_location_submitter_id)
self.parse_cityofchicago_file(
self.last_submission_identifier,
today.strftime("%Y-%m-%dT%H:%M:%S.%f"),
today.strftime("%Y-%m-%d"),
summary_location_submitter_id,
)
print("Done in {} secs".format(int(time.time() - start)))
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def files_to_submissions(self):
"""
start = time.time()

latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
today = datetime.date.today()
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_facility.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def files_to_submissions(self):
Reads JSON file and convert the data to Sheepdog records
"""

latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
today = datetime.date.today()
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_hospital.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def files_to_submissions(self):
Reads JSON file and convert the data to Sheepdog records
"""

latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
today = datetime.date.today()
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_hospital_utilization.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def files_to_submissions(self):
"""

today = datetime.date.today()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
print(f"Latest submitted date from guppy is {str(latest_submitted_date)}")
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_regional_icu_capacity.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def files_to_submissions(self):
"""

today = datetime.date.today()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
return
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_vaccine.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def files_to_submissions(self):
"""

# latest_submitted_date = (
# self.metadata_helper.get_latest_submitted_date_idph()
# self.metadata_helper.get_latest_submitted_date()
# )
# if latest_submitted_date != None and latest_submitted_date == self.date:
# print(
Expand Down
2 changes: 1 addition & 1 deletion covid19-etl/etl/idph_zipcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def files_to_submissions(self):
Reads JSON file and convert the data to Sheepdog records
"""

latest_submitted_date = self.metadata_helper.get_latest_submitted_date_idph()
latest_submitted_date = self.metadata_helper.get_latest_submitted_date()
today = datetime.date.today()
if latest_submitted_date == today:
print("Nothing to submit: today and latest submitted date are the same.")
Expand Down
5 changes: 3 additions & 2 deletions covid19-etl/tests/test_cityofchicago.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dateutil.parser import parse
import os
import csv
from etl.cityofchicago import CITYOFCHICAGO
Expand Down Expand Up @@ -29,12 +30,12 @@ class MockMetadataHelper:
def get_existing_summary_locations(self):
return []

def get_latest_submitted_date_idph(self):
def get_latest_submitted_date(self):
return None

def get_last_submission(self):
# The following returns date for the first entry in dataset, which is `2020-03-01`
return "2020-03-01"
return parse("2020-03-01")

etl = CITYOFCHICAGO("base_url", "access_token", "s3_bucket")
etl.get = lambda *args, **kwargs: mock_get(args)
Expand Down
7 changes: 4 additions & 3 deletions covid19-etl/utils/metadata_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ def get_existing_data_jhu(self):

return summary_locations, latest_submitted_date

def get_latest_submitted_date_idph(self):
def get_latest_submitted_date(self):
"""
Queries Guppy for the existing `location` data.
Returns the latest submitted date as Python "datetime.date" in "%Y-%m-%d" format
"""
str_latest_submitted_date = self.get_str_latest_submitted_date_idph()
str_latest_submitted_date = self.get_str_latest_submitted_date()
if str_latest_submitted_date is not None:
latest_submitted_date = datetime.datetime.strptime(
str_latest_submitted_date, "%Y-%m-%d"
)
return latest_submitted_date.date()
return None

def get_str_latest_submitted_date_idph(self):
def get_str_latest_submitted_date(self):
print("Getting the latest summary_clinical date from Guppy...")
query_string = """query ($filter: JSON) {
location (
Expand Down Expand Up @@ -252,6 +252,7 @@ def download_from_guppy(self, _type, fields=None, filter=None):
raise

def get_last_submission(self):
"""Returns a datetime"""
query_string = (
'{ project (first: 0, dbgap_accession_number: "'
+ self.project_code
Expand Down

0 comments on commit 7c12934

Please sign in to comment.