Skip to content

Commit

Permalink
Merge pull request #2706 from ohcnetwork/sainak/handle-panchayats
Browse files Browse the repository at this point in the history
Handle panchayats while loading govt org data from csv
  • Loading branch information
vigneshhari authored Jan 5, 2025
2 parents 26f2c5e + e4a184e commit 5ee7fcc
Showing 1 changed file with 167 additions and 58 deletions.
225 changes: 167 additions & 58 deletions care/emr/management/commands/load_govt_organization_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class RowObj(NamedTuple):
district: str
local_body: str
local_body_type: str
grama_panchayat: str
ward_number: int
ward_name: str

Expand All @@ -32,6 +33,13 @@ class RowObj(NamedTuple):
dict[
int, # ward_number
str, # ward_name
]
| dict[ # if local_body_type is block_panchayat
str, # grama_panchayat
dict[
int, # ward_number
str, # ward_name
],
],
],
],
Expand All @@ -47,6 +55,7 @@ class Command(BaseCommand):
- District
- Local Body
- Local Body Type
- Grama Panchayat
- Ward Number
- Ward Name
"""
Expand All @@ -69,18 +78,23 @@ def read_csv(self, url: str) -> list[RowObj]:
"District",
"Local Body",
"Local Body Type",
"Grama Panchayat",
"Ward Number",
"Ward Name",
}
if set(reader.fieldnames) != expected_columns:
if not expected_columns.issubset(reader.fieldnames):
logger.error("Invalid CSV Columns: %s", reader.fieldnames)
raise ValueError("Invalid CSV Columns")
for row in DictReader(response.text.splitlines()):
row_obj = RowObj(
state=row["State"].strip(),
district=row["District"].strip(),
local_body=row["Local Body"].strip(),
local_body_type=row["Local Body Type"].strip(),
local_body_type=row["Local Body Type"]
.strip()
.lower()
.replace(" ", "_"),
grama_panchayat=row["Grama Panchayat"].strip(),
ward_number=int(row["Ward Number"].strip()),
ward_name=row["Ward Name"].strip(),
)
Expand All @@ -92,47 +106,112 @@ def read_csv(self, url: str) -> list[RowObj]:
raise ValueError("Duplicate rows found")
logger.info("Sorting Data")
sorted_data = sorted(
data, key=lambda x: (x.state, x.district, x.local_body, x.ward_number)
data,
key=lambda x: (
x.state,
x.district,
x.local_body,
x.local_body_type,
x.grama_panchayat,
x.ward_number,
),
)
logger.info("Rows Parsed: %s", len(sorted_data))
return sorted_data

def rows_to_dict(self, rows: list[RowObj]) -> OrgDict:
# convert rows to nested dict
# {
# "state": {
# "district": {
# "local_body": {
# "local_body_type": {
# "ward_number": "ward_name"
# }
# }
# }
# }
# }
logger.info("Converting Rows to Dict")
data = {}
count = 0
for row in rows:
if not all(
[
row.state,
row.district,
row.local_body,
row.local_body_type,
row.ward_number,
row.ward_name,
]
) or (row.local_body_type == "block_panchayat" and not row.grama_panchayat):
logger.error("Invalid Row: %s", row)
# raise ValueError("Invalid Row") # TODO: uncomment when we have clean data
continue
state: dict = data.setdefault(row.state, {})
district: dict = state.setdefault(row.district, {})
local_body: dict = district.setdefault(row.local_body, {})
local_body_type: dict = local_body.setdefault(row.local_body_type, {})
local_body_type[row.ward_number] = row.ward_name
if row.local_body_type == "block_panchayat":
grama_panchayat: dict = local_body_type.setdefault(
row.grama_panchayat, {}
)
if row.ward_number in grama_panchayat:
logger.error("Duplicate Ward: %s", row)
# raise ValueError("Duplicate Ward") # TODO: uncomment when we have clean data
# we cant be sure which one is correct
del grama_panchayat[row.ward_number]
count -= 1
continue
grama_panchayat[row.ward_number] = row.ward_name
else:
if row.ward_number in local_body_type:
logger.error("Duplicate Ward: %s", row)
# raise ValueError("Duplicate Ward") # TODO: uncomment when we have clean data
# we cant be sure which one is correct
del local_body_type[row.ward_number]
count -= 1
continue
local_body_type[row.ward_number] = row.ward_name
count += 1
logger.info("Rows Converted: %s", count)
return data

def create_ward(self, state, parent, ward_number, ward_name):
metadata = {
"country": self.country,
"govt_org_type": "ward",
"govt_org_id": ward_number,
}
ward_obj, created = Organization.objects.filter(
name__iexact=ward_name,
parent=parent,
org_type="govt",
metadata=metadata,
).get_or_create(
defaults={
"name": ward_name,
"root_org": state,
"parent": parent,
"org_type": "govt",
"system_generated": True,
"metadata": metadata,
"meta": {"migration_id": self.timestamp},
},
)
logger.debug(
"Ward: %s, Created: %s, Ward ID: %s",
ward_name,
created,
ward_obj.id,
)

def create_organization(self, data: OrgDict):
logger.info("Creating Organizations")
for state, districts in data.items():
metadata = {
"country": self.country,
"govt_org_type": "state",
"govt_org_children_type": "district",
}
state_obj, created = Organization.objects.filter(
name__iexact=state,
org_type="govt",
metadata={"country": "India", "govt_org_type": "state"},
name__iexact=state, org_type="govt", parent=None, metadata=metadata
).get_or_create(
defaults={
"name": state,
"org_type": "govt",
"system_generated": True,
"metadata": {"country": "India", "govt_org_type": "state"},
"metadata": metadata,
"meta": {"migration_id": self.timestamp},
},
)
Expand All @@ -143,19 +222,24 @@ def create_organization(self, data: OrgDict):
state_obj.id,
)
for district, local_bodies in districts.items():
metadata = {
"country": self.country,
"govt_org_type": "district",
"govt_org_children_type": "local_body",
}
district_obj, created = Organization.objects.filter(
name__iexact=district,
parent=state_obj,
org_type="govt",
metadata={"country": "India", "govt_org_type": "district"},
metadata=metadata,
).get_or_create(
defaults={
"name": district,
"root_org": state_obj,
"parent": state_obj,
"org_type": "govt",
"system_generated": True,
"metadata": {"country": "India", "govt_org_type": "district"},
"metadata": metadata,
"meta": {"migration_id": self.timestamp},
},
)
Expand All @@ -166,24 +250,30 @@ def create_organization(self, data: OrgDict):
district_obj.id,
)
for local_body, local_body_types in local_bodies.items():
for local_body_type, wards in local_body_types.items():
lb_type = local_body_type.lower().replace(" ", "_")
for local_body_type, children in local_body_types.items():
# children can be either ward or grama_panchayat
metadata = {
"country": self.country,
"govt_org_type": local_body_type,
}
if local_body_type == "block_panchayat":
metadata["govt_org_children_type"] = "grama_panchayat"
else:
metadata["govt_org_children_type"] = "ward"

local_body_obj, created = Organization.objects.filter(
name__iexact=local_body,
parent=district_obj,
org_type="govt",
metadata={"country": "India", "govt_org_type": lb_type},
metadata=metadata,
).get_or_create(
defaults={
"name": local_body,
"root_org": state_obj,
"parent": district_obj,
"org_type": "govt",
"system_generated": True,
"metadata": {
"country": "India",
"govt_org_type": lb_type,
},
"metadata": metadata,
"meta": {"migration_id": self.timestamp},
},
)
Expand All @@ -193,36 +283,54 @@ def create_organization(self, data: OrgDict):
created,
local_body_obj.id,
)
for ward_number, ward_name in wards.items():
ward_obj, created = Organization.objects.get_or_create(
name__iexact=ward_name,
parent=local_body_obj,
org_type="govt",
metadata={
"country": "India",
"govt_org_type": "ward",
"govt_org_id": ward_number,
},
defaults={
"name": ward_name,
"root_org": state_obj,
"parent": local_body_obj,
"org_type": "govt",
"system_generated": True,
"metadata": {
"country": "India",
"govt_org_type": "ward",
"govt_org_id": ward_number,
},
"meta": {"migration_id": self.timestamp},
},
)
logger.debug(
"Ward: %s, Created: %s, Ward ID: %s",
ward_name,
created,
ward_obj.id,
)
if local_body_type == "block_panchayat":
for grama_panchayat, wards in children.items():
metadata = {
"country": self.country,
"govt_org_type": "grama_panchayat",
"govt_org_children_type": "ward",
}
grama_panchayat_obj, created = (
Organization.objects.filter(
name__iexact=grama_panchayat,
parent=local_body_obj,
org_type="govt",
metadata=metadata,
).get_or_create(
defaults={
"name": grama_panchayat,
"root_org": state_obj,
"parent": local_body_obj,
"org_type": "govt",
"system_generated": True,
"metadata": metadata,
"meta": {"migration_id": self.timestamp},
},
)
)
logger.debug(
"Block Panchayat: %s, Created: %s, Block Panchayat ID: %s",
grama_panchayat,
created,
grama_panchayat_obj.id,
)
for ward_number, ward_name in wards.items():
self.create_ward(
state_obj,
grama_panchayat_obj,
ward_number,
ward_name,
)
else:
for ward_number, ward_name in children.items():
self.create_ward(
state_obj, local_body_obj, ward_number, ward_name
)
logger.info("Organizations Created")
ward_count = Organization.objects.filter(
org_type="govt", metadata__govt_org_type="ward"
).count()
logger.info("Total Wards: %s", ward_count)

def handle(self, *args, **options):
start_time = datetime.now(tz=UTC)
Expand All @@ -234,6 +342,7 @@ def handle(self, *args, **options):
logger.setLevel(logging.DEBUG)

self.timestamp = int(start_time.timestamp() * 1000)
self.country = "India"

csv_file_url = options["csv_file_url"]
data = self.rows_to_dict(self.read_csv(csv_file_url))
Expand Down

0 comments on commit 5ee7fcc

Please sign in to comment.