Skip to content

Commit

Permalink
Derive C locale data from en_US, overriding minor details
Browse files Browse the repository at this point in the history
The qlocalexml.py Locale.C() had to replicate a whole lot of data that
isn't really relevant to how C differs from en_US and every addition
to what we support required further additions to it. So pass the en_US
Locale object to the pseudoconstructor so that C can inherit from it
and only override the parts where we care about the difference.

Hand-code shortening for short Jalali month names, to match Soroush's
original contribution, and include the narrow forms in the hard-coded
data to keep the generated data unchanged (for now). Note some of the
departures from CLDR; we may want to drop these overrides later.

In the process, convert the mapping from keys to locales to
consistently use IDs for all members of the key, instead of using the
(empty) code value for (as yet unused) variant; it now gets ID 0 and
is consistent with returns from codesToIdNames(). This makes life
easier for the code that now has to construct an en_US key.

Task-number: QTBUG-115158
Change-Id: I3d7acb6a4059daec1bba341fcf015c39c7a6803b
Reviewed-by: Kai Köhne <[email protected]>
  • Loading branch information
ediosyncratic committed Jun 2, 2024
1 parent 5641b17 commit 0c809fc
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 91 deletions.
4 changes: 2 additions & 2 deletions util/locale_database/cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def zoneData(self):
return alias, defaults, winIds

def readLocales(self, calendars = ('gregorian',)):
return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
return {(k.language_id, k.script_id, k.territory_id, k.variant_id): k
for k in self.__allLocales(calendars)}

def __allLocales(self, calendars):
Expand Down Expand Up @@ -264,7 +264,7 @@ def __getLocaleData(self, scan, calendars, language, script, territory, variant)
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
territory = names[2], territory_code = territory, territory_id = ids[2],
variant_code = variant)
variant_code = variant, variant_id = ids[3])

firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
Expand Down
3 changes: 2 additions & 1 deletion util/locale_database/cldr2qlocalexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def main(argv, out, err):
writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
writer.locales(reader.readLocales(args.calendars), args.calendars)
en_US = tuple(id for id, name in reader.root.codesToIdName('en', '', 'US'))
writer.locales(reader.readLocales(args.calendars), args.calendars, en_US)

writer.close(err.write)
return 0
Expand Down
134 changes: 46 additions & 88 deletions util/locale_database/qlocalexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,19 @@ def zoneData(self, alias, defaults, windowsIds):
self.__closeTag('msZoneIana')
self.__closeTag('windowsZone')

def locales(self, locales, calendars):
def locales(self, locales, calendars, en_US):
"""Write the data for each locale.
First argument, locales, is the mapping whose values are the
Locale objects, with each key being the matching tuple of
numeric IDs for language, script, territory and variant.
Second argument is a tuple of calendar names. Third is the
tuple of numeric IDs that corresponds to en_US (needed to
provide fallbacks for the C locale)."""

self.__openTag('localeList')
self.__openTag('locale')
self.__writeLocale(Locale.C(calendars), calendars)
self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()):
self.__openTag('locale')
Expand Down Expand Up @@ -575,97 +584,46 @@ def toXml(self, write, calendars=('gregorian',)):
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))

# Tools used by __monthNames:
def fullName(i, name): return name
def firstThree(i, name): return name[:3]
def initial(i, name): return name[:1]
def number(i, name): return str(i + 1)
def islamicShort(i, name):
if not name: return name
if name == 'Shawwal': return 'Shaw.'
words = name.split()
if words[0].startswith('Dhu'):
words[0] = words[0][:7] + '.'
elif len(words[0]) > 3:
words[0] = words[0][:3] + '.'
return ' '.join(words)
@staticmethod
def __monthNames(calendars,
known={ # Map calendar to (names, extractors...):
# TODO: do we even need these ? CLDR's root.xml seems to
# have them, complete with yeartype="leap" handling for
# Hebrew's extra.
'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December'),
# Extractor pairs, (plain, standalone)
(fullName, fullName), # long
(firstThree, firstThree), # short
(number, initial)), # narrow
'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
(fullName, fullName),
(firstThree, firstThree),
(number, initial)),
'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
(fullName, fullName),
(islamicShort, islamicShort),
(number, number)),
'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
(fullName, fullName),
(fullName, fullName),
(number, number)),
},
sizes=('long', 'short', 'narrow')):
for cal in calendars:
try:
data = known[cal]
except KeyError as e: # Need to add an entry to known, above.
e.args += ('Unsupported calendar:', cal)
raise
names, get = data[0], data[1:]
for n, size in enumerate(sizes):
yield ('_'.join((camelCase((size, 'months')), cal)),
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
';'.join(get[n][1](i, x) for i, x in enumerate(names)))
del fullName, firstThree, initial, number, islamicShort

@classmethod
def C(cls, calendars=('gregorian',),
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday'),
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(cls.__monthNames(calendars),
def C(cls, en_US):
"""Returns an object representing the C locale.
Required argument, en_US, is the corresponding object for the
en_US locale (or the en_US_POSIX one if we ever support
variants). The C locale inherits from this, overriding what it
may need to."""
base = en_US.__dict__.copy()
# Soroush's original contribution shortened Jalali month names
# - contrary to CLDR, which doesn't abbreviate these in
# root.xml or en.xml, although some locales do, e.g. fr_CA.
# For compatibility with that,
for k in ('shortMonths_persian', 'standaloneShortMonths_persian'):
base[k] = ';'.join(x[:3] for x in base[k].split(';'))

return cls(base,
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
territory='AnyTerritory', territory_code='0', territoryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',
# CLDR has non-ASCII versions of these:
quotationStart='"', quotationEnd='"',
alternateQuotationStart='\'', alternateQuotationEnd='\'',
listPatternPartStart='%1, %2',
listPatternPartMiddle='%1, %2',
listPatternPartEnd='%1, %2',
listPatternPartTwo='%1, %2',
byte_unit='bytes',
byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
am='AM', pm='PM', firstDayOfWeek='mon',
weekendStart='sat', weekendEnd='sun',
alternateQuotationStart="'", alternateQuotationEnd="'",
# CLDR gives 'dddd, MMMM d, yyyy', 'M/d/yy', 'h:mm:ss Ap tttt',
# 'h:mm Ap' with non-breaking space before Ap.
longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
narrowDays='7;1;2;3;4;5;6',
standaloneLongDays=';'.join(days),
standaloneShortDays=';'.join(d[:3] for d in days),
standaloneNarrowDays=';'.join(d[:1] for d in days),
currencyIsoCode='', currencySymbol='',
currencyDisplayName='',
# CLDR has US-$ and US-style formats:
currencyIsoCode='', currencySymbol='', currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
currencyFormat='%1%2', currencyNegativeFormat='')
currencyFormat='%1%2', currencyNegativeFormat='',
# We may want to fall back to CLDR for some of these:
firstDayOfWeek='mon', # CLDR has 'sun'
exp='e', # CLDR has 'E'
listPatternPartEnd='%1, %2', # CLDR has '%1, and %2'
listPatternPartTwo='%1, %2', # CLDR has '%1 and %2'
narrowDays='7;1;2;3;4;5;6', # CLDR has letters
narrowMonths_gregorian='1;2;3;4;5;6;7;8;9;10;11;12', # CLDR has letters
standaloneNarrowMonths_persian='F;O;K;T;M;S;M;A;A;D;B;E', # CLDR has digits
# Keep these explicit, despite matching CLDR:
decimal='.', group=',', percent='%',
zero='0', minus='-', plus='+',
am='AM', pm='PM', weekendStart='sat', weekendEnd='sun')

0 comments on commit 0c809fc

Please sign in to comment.