Skip to content

Commit

Permalink
Suggest name, when available, for unknown codes
Browse files Browse the repository at this point in the history
When parsing the CLDR data, we only handle language, script and
territory (which we call country) codes if they are known to our
enumdata.py tables.  When reporting the rest as unknown, in the
content of an actual locale definition (not the likely subtag data),
check whether en.xml can resolve the code for us; if it can, report
the full name it provides, as a hint to whoever's running the script
that an update to enumdata.py may be in order.

Change-Id: I9ca1d6922a91d45bc436f4b622e5557261897d7f
Reviewed-by: Thiago Macieira <[email protected]>
Reviewed-by: Lars Knoll <[email protected]>
Reviewed-by: Konstantin Ritt <[email protected]>
  • Loading branch information
ediosyncratic committed May 20, 2019
1 parent 248b675 commit b7d8169
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 5 deletions.
34 changes: 31 additions & 3 deletions util/locale_database/cldr2qlocalexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,34 @@ def skip_repeating_pattern(x):
result.append(pattern)
return result

def raiseUnknownCode(code, form, cache={}):
"""Check whether an unknown code could be supported.
We declare a language, script or country code unknown if it's not
known to enumdata.py; however, if it's present in main/en.xml's
mapping of codes to names, we have the option of adding support.
This caches the necessary look-up (so we only read main/en.xml
once) and returns the name we should use if we do add support.
First parameter, code, is the unknown code. Second parameter,
form, is one of 'language', 'script' or 'country' to select the
type of code to look up. Do not pass further parameters (the next
will deprive you of the cache).
Raises xpathlite.Error with a suitable message, that includes the
unknown code's full name if found.
Relies on global cldr_dir being set before it's called; see tail
of this file.
"""
if not cache:
cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
name = cache[form].get(code)
msg = 'unknown %s code "%s"' % (form, code)
if name:
msg += ' - could use "%s"' % name
raise xpathlite.Error(msg)

def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
Expand Down Expand Up @@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_

language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
raise xpathlite.Error('unknown language code "%s"' % language_code)
raiseUnknownCode(language_code, 'language')

script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
raise xpathlite.Error('unknown script code "%s"' % script_code)
raiseUnknownCode(script_code, 'script')

# we should handle fully qualified names with the territory
if not country_code:
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
raise xpathlite.Error('unknown country code "%s"' % country_code)
raiseUnknownCode(country_code, 'country')

# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
Expand Down
28 changes: 26 additions & 2 deletions util/locale_database/xpathlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
return node
return False

def codeMapsFromFile(file):
"""Extract mappings of language, script and country codes to names.
The file shall typically be common/main/en.xml, which contains a
localeDisplayNames element with children languages, scripts and
territories; each element in each of these has a code as its type
attribute and its name as element content. This returns a mapping
withe keys 'language', 'script' and 'country', each of which
has, as value, a mapping of the relevant codes to names.
"""
parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
for src, dst in keys.items():
child = findChild(parent, src)
data = result[dst] = {}
for elt in child.childNodes:
if elt.attributes and elt.attributes.has_key('type'):
key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
# Don't over-write previously-read data for an alt form:
if elt.attributes.has_key('alt') and data.has_key(key):
continue
data[key] = value

return result

def findTagsInFile(file, path):
doc = parseDoc(file)

elt = doc.documentElement
tag_spec_list = path.split("/")
last_entry = None
for i in range(len(tag_spec_list)):
tag_spec = tag_spec_list[i]
for tag_spec in tag_spec_list:
tag_name = tag_spec
arg_name = 'type'
arg_value = ''
Expand Down

0 comments on commit b7d8169

Please sign in to comment.