Skip to content

Commit

Permalink
Avoid calling normalise multiple times (#48)
Browse files Browse the repository at this point in the history
Ensure lookup is using normalised name
  • Loading branch information
mcarans authored Jul 8, 2024
1 parent 8e6d69a commit e8ed02d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ hdx-python-utilities==3.7.2
# via hdx-python-country (pyproject.toml)
humanize==4.9.0
# via frictionless
identify==2.5.36
identify==2.6.0
# via pre-commit
idna==3.7
# via requests
Expand Down
33 changes: 19 additions & 14 deletions src/hdx/location/adminlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@ def setup_row(
self.pcodes.append(pcode)
self.pcode_to_name[pcode] = adm_name

adm_name = normalise(adm_name)
name_to_pcode = self.name_to_pcode.get(countryiso3, {})
name_to_pcode[normalise(adm_name)] = pcode
name_to_pcode[adm_name] = pcode
self.name_to_pcode[countryiso3] = name_to_pcode
self.pcode_to_iso3[pcode] = countryiso3
self.pcode_to_iso3[pcode] = countryiso3
Expand All @@ -155,7 +156,7 @@ def setup_row(
countryiso3, {}
)
name_to_pcode = name_parent_to_pcode.get(parent, {})
name_to_pcode[normalise(adm_name)] = pcode
name_to_pcode[adm_name] = pcode
name_parent_to_pcode[parent] = name_to_pcode
self.name_parent_to_pcode[countryiso3] = name_parent_to_pcode
self.pcode_to_parent[pcode] = parent
Expand Down Expand Up @@ -554,13 +555,15 @@ def fuzzy_pcode(
self,
countryiso3: str,
name: str,
normalised_name: str,
**kwargs: Any,
) -> Optional[str]:
"""Fuzzy match name to pcode
Args:
countryiso3 (str): ISO3 country code
name (str): Name to match
normalised_name (str): Normalised name
**kwargs:
parent (Optional[str]): Parent admin code
logname (str): Log using this identifying name. Defaults to not logging.
Expand Down Expand Up @@ -597,21 +600,20 @@ def fuzzy_pcode(
if logname:
self.errors.add((logname, countryiso3, parent))
return None
adm_name_lookup = normalise(name)
adm_name_lookup2 = multiple_replace(
adm_name_lookup,
alt_normalised_name = multiple_replace(
normalised_name,
self.get_admin_name_replacements(countryiso3, parent),
)
pcode = name_to_pcode.get(
adm_name_lookup, name_to_pcode.get(adm_name_lookup2)
normalised_name, name_to_pcode.get(alt_normalised_name)
)
if not pcode and name.lower() in self.admin_fuzzy_dont:
if logname:
self.ignored.add((logname, countryiso3, name))
return None
if not pcode:
for map_name in name_to_pcode:
if adm_name_lookup in map_name:
if normalised_name in map_name:
pcode = name_to_pcode[map_name]
if logname:
self.matches.add(
Expand All @@ -625,7 +627,7 @@ def fuzzy_pcode(
)
break
for map_name in name_to_pcode:
if adm_name_lookup2 in map_name:
if alt_normalised_name in map_name:
pcode = name_to_pcode[map_name]
if logname:
self.matches.add(
Expand Down Expand Up @@ -659,8 +661,8 @@ def al_transform_2(name):

matching_index = self.phonetics.match(
map_names,
adm_name_lookup,
alternative_name=adm_name_lookup2,
normalised_name,
alternative_name=alt_normalised_name,
transform_possible_names=[al_transform_1, al_transform_2],
)

Expand Down Expand Up @@ -754,25 +756,28 @@ def get_pcode(
)
return pcode, True
else:
normalised_name = normalise(name)
if parent:
name_parent_to_pcode = self.name_parent_to_pcode.get(
countryiso3
)
if name_parent_to_pcode:
name_to_pcode = name_parent_to_pcode.get(parent)
if name_to_pcode is not None:
pcode = name_to_pcode.get(name.lower())
pcode = name_to_pcode.get(normalised_name)
if pcode:
return pcode, True
else:
name_to_pcode = self.name_to_pcode.get(countryiso3)
if name_to_pcode is not None:
pcode = name_to_pcode.get(name.lower())
pcode = name_to_pcode.get(normalised_name)
if pcode:
return pcode, True
if not fuzzy_match or len(name) < fuzzy_length:
if not fuzzy_match or len(normalised_name) < fuzzy_length:
return None, True
pcode = self.fuzzy_pcode(countryiso3, name, **kwargs)
pcode = self.fuzzy_pcode(
countryiso3, name, normalised_name, **kwargs
)
return pcode, False

def output_matches(self) -> List[str]:
Expand Down

0 comments on commit e8ed02d

Please sign in to comment.