Skip to content

Commit

Permalink
QLocaleXML: improve handling of XML-unsafe element text
Browse files Browse the repository at this point in the history
Use CDATA when outside ASCII. Share the attribute-packing code for an
open-tag in a static method. In passing, tweak a comment's text.

Change-Id: Ic8b75afc56d537a1a51d13797c737d4bfcc1f910
Reviewed-by: Mate Barany <[email protected]>
  • Loading branch information
ediosyncratic committed Jul 15, 2024
1 parent b657036 commit 85f0e26
Showing 1 changed file with 26 additions and 13 deletions.
39 changes: 26 additions & 13 deletions util/locale_database/qlocalexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ def __isNodeNamed(elt, name, TYPE=minidom.Node.ELEMENT_NODE):
def __eltWords(elt):
child = elt.firstChild
while child:
if child.nodeType == elt.TEXT_NODE:
if child.nodeType in (elt.TEXT_NODE, elt.CDATA_SECTION_NODE):
# Note: do not strip(), as some group separators are
# non-breaking spaces, that strip() will discard.
# (non-breaking) spaces, that strip() will discard.
yield child.nodeValue
child = child.nextSibling

Expand Down Expand Up @@ -464,20 +464,22 @@ def inTag(self, tag, text, **attrs):
"""Writes an XML element with the given content.
First parameter, tag, is the element type; second, text, is the content
of its body. Any keyword parameters passed specify attributes to
of its body, which must be XML-safe (see safeInTag() for when that's
not assured). Any keyword parameters passed specify attributes to
include in the opening tag."""
if attrs:
head = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
head = f'{tag} {head}'
else:
head = tag
self.__write(f'<{head}>{text}</{tag}>')
self.__write(f'<{self.__attrJoin(tag, attrs)}>{text}</{tag}>')

def asTag(self, tag, **attrs):
"""Similar to inTag(), but with no content for the element."""
assert attrs, tag # No point to this otherwise
tail = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
self.__write(f'<{tag} {tail} />')
self.__write(f'<{self.__attrJoin(tag, attrs)} />')

def safeInTag(self, tag, text, **attrs):
"""Similar to inTag(), when text isn't known to be XML-safe."""
if text.isascii():
self.inTag(tag, self.__xmlSafe(text), **attrs)
else:
self.__cdataInTag(tag, text, **attrs)

def close(self, grumble):
"""Finish writing and grumble about any issues discovered."""
Expand Down Expand Up @@ -506,10 +508,21 @@ def __printit(text):
def __complain(text):
raise Error('Attempted to write data after closing :-(')

@staticmethod
def __attrJoin(tag, attrs):
# Content of open-tag with given tag and attributes
if not attrs:
return tag
tail = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
return f'{tag} {tail}'

@staticmethod
def __xmlSafe(text):
return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')

def __cdataInTag(self, tag, text, **attrs):
self.__write(f'<{self.__attrJoin(tag, attrs)}><![CDATA[{text}]]></{tag}>')

def __enumTable(self, tag, table, code2name):
"""Writes a table of QLocale-enum-related data.
Expand All @@ -519,9 +532,9 @@ def __enumTable(self, tag, table, code2name):
type. Last is the englishNaming method of the CldrAccess being used to
read CLDR data; it is used to map ISO codes to en.xml names."""
self.__openTag(f'{tag}List')
enname, safe = code2name(tag), self.__xmlSafe
enname = code2name(tag)
for key, (name, code) in table.items():
self.inTag('naming', safe(enname(code, name)), id = key, code = code)
self.safeInTag('naming', enname(code, name), id = key, code = code)
self.__closeTag(f'{tag}List')

def __likelySubTag(self, tag, likely):
Expand Down

0 comments on commit 85f0e26

Please sign in to comment.