Check translations for spam, broken font tags and sprintf errors.
Supersedes D1674 and D3520. This still generates some false positives (particularly the singular-plural check), but in general it will suffice to better check bundled languages. Refs #4250 Differential Revision: https://code.wildfiregames.com/D3926 This was SVN commit r25538.
This commit is contained in:
parent
768313d7db
commit
0e234dbb61
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (C) 2020 Wildfire Games.
|
||||
# Copyright (C) 2021 Wildfire Games.
|
||||
# This file is part of 0 A.D.
|
||||
#
|
||||
# 0 A.D. is free software: you can redistribute it and/or modify
|
||||
@ -16,18 +16,60 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os, re, sys
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import multiprocessing
|
||||
|
||||
from i18n_helper import l10nToolsDirectory, projectRootDirectory
|
||||
from i18n_helper import projectRootDirectory
|
||||
from i18n_helper.catalog import Catalog
|
||||
from i18n_helper.globber import getCatalogs
|
||||
|
||||
l10nFolderName = "l10n"
|
||||
|
||||
def checkTranslationsForSpam(inputFilePath):
|
||||
print(f"Checking {inputFilePath}")
|
||||
templateCatalog = Catalog.readFrom()
|
||||
VERBOSE = 0
|
||||
|
||||
|
||||
class MessageChecker:
|
||||
"""Checks all messages in a catalog against a regex."""
|
||||
def __init__(self, human_name, regex):
|
||||
self.regex = re.compile(regex, re.IGNORECASE)
|
||||
self.human_name = human_name
|
||||
|
||||
def check(self, inputFilePath, templateMessage, translatedCatalogs):
|
||||
patterns = set(self.regex.findall(
|
||||
templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id
|
||||
))
|
||||
|
||||
# As a sanity check, verify that the template message is coherent.
|
||||
# Note that these tend to be false positives.
|
||||
# TODO: the pssible tags are usually comments, we ought be able to find them.
|
||||
if templateMessage.pluralizable:
|
||||
pluralUrls = set(self.regex.findall(templateMessage.id[1]))
|
||||
if pluralUrls.difference(patterns):
|
||||
print(f"{inputFilePath} - Different {self.human_name} in singular and plural source strings "
|
||||
f"for '{templateMessage}' in '{inputFilePath}'")
|
||||
|
||||
for translationCatalog in translatedCatalogs:
|
||||
translationMessage = translationCatalog.get(
|
||||
templateMessage.id, templateMessage.context)
|
||||
if not translationMessage:
|
||||
continue
|
||||
|
||||
translatedPatterns = set(self.regex.findall(
|
||||
translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string
|
||||
))
|
||||
unknown_patterns = translatedPatterns.difference(patterns)
|
||||
if unknown_patterns:
|
||||
print(f'{inputFilePath} - {translationCatalog.locale}: '
|
||||
f'Found unknown {self.human_name} {", ".join(["`" + x + "`" for x in unknown_patterns])} in the translation '
|
||||
f'which do not match any of the URLs in the template: {", ".join(["`" + x + "`" for x in patterns])}')
|
||||
|
||||
|
||||
def check_translations(inputFilePath):
|
||||
if VERBOSE:
|
||||
print(f"Checking {inputFilePath}")
|
||||
templateCatalog = Catalog.readFrom(inputFilePath)
|
||||
|
||||
# If language codes were specified on the command line, filter by those.
|
||||
filters = sys.argv[1:]
|
||||
@ -35,37 +77,22 @@ def checkTranslationsForSpam(inputFilePath):
|
||||
# Load existing translation catalogs.
|
||||
existingTranslationCatalogs = getCatalogs(inputFilePath, filters)
|
||||
|
||||
urlPattern = re.compile(r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE)
|
||||
spam = MessageChecker("url", r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
|
||||
sprintf = MessageChecker("sprintf", r"%\([^)]+\)s")
|
||||
tags = MessageChecker("tag", r"[^\\][^\\](\[[^]]+/?\])")
|
||||
|
||||
# Check that there are no spam URLs.
|
||||
# Loop through all messages in the .POT catalog for URLs.
|
||||
# For each, check for the corresponding key in the .PO catalogs.
|
||||
# If found, check that URLS in the .PO keys are the same as those in the .POT key.
|
||||
for templateMessage in templateCatalog:
|
||||
templateUrls = set(urlPattern.findall(
|
||||
templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id
|
||||
))
|
||||
# As a sanity check, verify that the template message is coherent
|
||||
if templateMessage.pluralizable:
|
||||
pluralUrls = set(urlPattern.findall(templateMessage.id[1]))
|
||||
if pluralUrls.difference(templateUrls):
|
||||
print(f"{inputFilePath} - Different URLs in singular and plural source strings "
|
||||
f"for '{templateMessage}' in '{inputFilePath}'")
|
||||
spam.check(inputFilePath, templateMessage, existingTranslationCatalogs)
|
||||
sprintf.check(inputFilePath, templateMessage, existingTranslationCatalogs)
|
||||
tags.check(inputFilePath, templateMessage, existingTranslationCatalogs)
|
||||
|
||||
for translationCatalog in existingTranslationCatalogs:
|
||||
translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context)
|
||||
if not translationMessage:
|
||||
continue
|
||||
if VERBOSE:
|
||||
print(f"Done checking {inputFilePath}")
|
||||
|
||||
translationUrls = set(urlPattern.findall(
|
||||
translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string
|
||||
))
|
||||
unknown_urls = translationUrls.difference(templateUrls)
|
||||
if unknown_urls:
|
||||
print(f'{inputFilePath} - {translationCatalog.locale}: '
|
||||
f'Found unknown URL(s) {", ".join(unknown_urls)} in the translation '
|
||||
f'which do not match any of the URLs in the template: {", ".join(templateUrls)}')
|
||||
print(f"Done checking {inputFilePath}")
|
||||
|
||||
def main():
|
||||
print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py” "
|
||||
@ -76,7 +103,7 @@ def main():
|
||||
if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n":
|
||||
foundPots += 1
|
||||
multiprocessing.Process(
|
||||
target=checkTranslationsForSpam,
|
||||
target=check_translations,
|
||||
args=(os.path.join(root, filename), )
|
||||
).start()
|
||||
if foundPots == 0:
|
Loading…
Reference in New Issue
Block a user