From 85f0e2637184fd42839bf46aa46570be12ffbef7 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Thu, 4 Jul 2024 15:16:38 +0200 Subject: [PATCH] QLocaleXML: improve handling of XML-unsafe element text Use CDATA when outside ASCII. Share the attribute-packing code for an open-tag in a static method. In passing, tweak a comment's text. Change-Id: Ic8b75afc56d537a1a51d13797c737d4bfcc1f910 Reviewed-by: Mate Barany --- util/locale_database/qlocalexml.py | 39 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 4def112f80b..e6a7e70de35 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -264,9 +264,9 @@ def __isNodeNamed(elt, name, TYPE=minidom.Node.ELEMENT_NODE): def __eltWords(elt): child = elt.firstChild while child: - if child.nodeType == elt.TEXT_NODE: + if child.nodeType in (elt.TEXT_NODE, elt.CDATA_SECTION_NODE): # Note: do not strip(), as some group separators are - # non-breaking spaces, that strip() will discard. + # (non-breaking) spaces, that strip() will discard. yield child.nodeValue child = child.nextSibling @@ -464,20 +464,22 @@ def inTag(self, tag, text, **attrs): """Writes an XML element with the given content. First parameter, tag, is the element type; second, text, is the content - of its body. Any keyword parameters passed specify attributes to + of its body, which must be XML-safe (see safeInTag() for when that's + not assured). Any keyword parameters passed specify attributes to include in the opening tag.""" - if attrs: - head = ' '.join(f'{k}="{v}"' for k, v in attrs.items()) - head = f'{tag} {head}' - else: - head = tag - self.__write(f'<{head}>{text}') + self.__write(f'<{self.__attrJoin(tag, attrs)}>{text}') def asTag(self, tag, **attrs): """Similar to inTag(), but with no content for the element.""" assert attrs, tag # No point to this otherwise - tail = ' '.join(f'{k}="{v}"' for k, v in attrs.items()) - self.__write(f'<{tag} {tail} />') + self.__write(f'<{self.__attrJoin(tag, attrs)} />') + + def safeInTag(self, tag, text, **attrs): + """Similar to inTag(), when text isn't known to be XML-safe.""" + if text.isascii(): + self.inTag(tag, self.__xmlSafe(text), **attrs) + else: + self.__cdataInTag(tag, text, **attrs) def close(self, grumble): """Finish writing and grumble about any issues discovered.""" @@ -506,10 +508,21 @@ def __printit(text): def __complain(text): raise Error('Attempted to write data after closing :-(') + @staticmethod + def __attrJoin(tag, attrs): + # Content of open-tag with given tag and attributes + if not attrs: + return tag + tail = ' '.join(f'{k}="{v}"' for k, v in attrs.items()) + return f'{tag} {tail}' + @staticmethod def __xmlSafe(text): return text.replace('&', '&').replace('<', '<').replace('>', '>') + def __cdataInTag(self, tag, text, **attrs): + self.__write(f'<{self.__attrJoin(tag, attrs)}>') + def __enumTable(self, tag, table, code2name): """Writes a table of QLocale-enum-related data. @@ -519,9 +532,9 @@ def __enumTable(self, tag, table, code2name): type. Last is the englishNaming method of the CldrAccess being used to read CLDR data; it is used to map ISO codes to en.xml names.""" self.__openTag(f'{tag}List') - enname, safe = code2name(tag), self.__xmlSafe + enname = code2name(tag) for key, (name, code) in table.items(): - self.inTag('naming', safe(enname(code, name)), id = key, code = code) + self.safeInTag('naming', enname(code, name), id = key, code = code) self.__closeTag(f'{tag}List') def __likelySubTag(self, tag, likely):