From 0e84957979a5816d6c4e30fb076f83f4fbf4458b Mon Sep 17 00:00:00 2001
From: Dunedan <dunedan@phoenitydawn.de>
Date: Sat, 7 Sep 2024 06:38:54 +0200
Subject: [PATCH] Simplify XML parsing by iterating only once

This simplifies the XML parsing, by iterating over the DOM tree only
once. Curiously this doesn't result in significant performance gains.

As the keywords are now found in the order they appear in the
document instead of the order they are mentioned in messages.json, the
order of a few strings in the PO-templates changes caused by the changes
in this commit.
---
 source/tools/i18n/i18n_helper/extractors.py | 70 +++++++++++----------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/source/tools/i18n/i18n_helper/extractors.py b/source/tools/i18n/i18n_helper/extractors.py
index d03f0d4594..4275ac4f5d 100644
--- a/source/tools/i18n/i18n_helper/extractors.py
+++ b/source/tools/i18n/i18n_helper/extractors.py
@@ -452,38 +452,40 @@ class XmlExtractor(Extractor):
     def extract_from_file(self, filepath):
         with open(filepath, encoding="utf-8-sig") as file_object:
             xml_document = etree.parse(file_object)
-            for keyword in self.keywords:
-                for element in xml_document.iter(keyword):
-                    lineno = element.sourceline
-                    if element.text is None:
-                        continue
 
-                    comments = []
-                    if "extractJson" in self.keywords[keyword]:
-                        json_extractor = self.get_json_extractor()
-                        json_extractor.set_options(self.keywords[keyword]["extractJson"])
-                        for message, context in json_extractor.extract_from_string(element.text):
-                            yield message, None, context, lineno, comments
-                    else:
-                        context = None
-                        if "context" in element.attrib:
-                            context = str(element.get("context"))
-                        elif "tagAsContext" in self.keywords[keyword]:
-                            context = keyword
-                        elif "customContext" in self.keywords[keyword]:
-                            context = self.keywords[keyword]["customContext"]
-                        if "comment" in element.attrib:
-                            comment = element.get("comment")
-                            comment = " ".join(
-                                comment.split()
-                            )  # Remove tabs, line breaks and unecessary spaces.
-                            comments.append(comment)
-                        if "splitOnWhitespace" in self.keywords[keyword]:
-                            for split_text in element.text.split():
-                                # split on whitespace is used for token lists, there, a
-                                # leading '-' means the token has to be removed, so it's not
-                                # to be processed here either
-                                if split_text[0] != "-":
-                                    yield str(split_text), None, context, lineno, comments
-                        else:
-                            yield str(element.text), None, context, lineno, comments
+        for element in xml_document.iter(*self.keywords.keys()):
+            keyword = element.tag
+
+            lineno = element.sourceline
+            if element.text is None:
+                continue
+
+            comments = []
+            if "extractJson" in self.keywords[keyword]:
+                json_extractor = self.get_json_extractor()
+                json_extractor.set_options(self.keywords[keyword]["extractJson"])
+                for message, context in json_extractor.extract_from_string(element.text):
+                    yield message, None, context, lineno, comments
+            else:
+                context = None
+                if "context" in element.attrib:
+                    context = str(element.get("context"))
+                elif "tagAsContext" in self.keywords[keyword]:
+                    context = keyword
+                elif "customContext" in self.keywords[keyword]:
+                    context = self.keywords[keyword]["customContext"]
+                if "comment" in element.attrib:
+                    comment = element.get("comment")
+                    comment = " ".join(
+                        comment.split()
+                    )  # Remove tabs, line breaks and unnecessary spaces.
+                    comments.append(comment)
+                if "splitOnWhitespace" in self.keywords[keyword]:
+                    for split_text in element.text.split():
+                        # split on whitespace is used for token lists, there, a
+                        # leading '-' means the token has to be removed, so it's not
+                        # to be processed here either
+                        if split_text[0] != "-":
+                            yield str(split_text), None, context, lineno, comments
+                else:
+                    yield str(element.text), None, context, lineno, comments