diff --git a/binaries/data/l10n/.tx/config b/binaries/data/l10n/.tx/config new file mode 100644 index 0000000000..9695d347cf --- /dev/null +++ b/binaries/data/l10n/.tx/config @@ -0,0 +1,8 @@ +[main] +host = https://www.transifex.com + +[0ad.engine] +file_filter = .engine.po +source_file = engine.pot +source_lang = en + diff --git a/binaries/data/l10n/messages.json b/binaries/data/l10n/messages.json new file mode 100644 index 0000000000..ffc4beb1c4 --- /dev/null +++ b/binaries/data/l10n/messages.json @@ -0,0 +1,30 @@ +[ + { + "output": "engine.pot", + "inputRoot": "../../../source", + "project": "Pyrogenesis", + "copyrightHolder": "Wildfire Games", + "rules": [ + { + "extractor": "cpp", + "filemasks": { + "includeMasks": ["**.cpp"], + "excludeMasks": ["third_party/**", "tools/**"] + }, + "options": { + "keywords": { + "Translate": [1], + "TranslatePlural": [1, 2], + "TranslateWithContext": [[1], 2], + "TranslatePluralWithContext": [[1], 2, 3], + "MarkForTranslation": [1], + "MarkForTranslationWithContext": [[1], 2] + }, + "commentTags": [ + "Translation:" + ] + } + } + ] + } +] diff --git a/binaries/data/mods/public/l10n/.tx/config b/binaries/data/mods/public/l10n/.tx/config new file mode 100644 index 0000000000..733e2ab536 --- /dev/null +++ b/binaries/data/mods/public/l10n/.tx/config @@ -0,0 +1,8 @@ +[main] +host = https://www.transifex.com + +[0ad.public] +file_filter = .public.po +source_file = public.pot +source_lang = en + diff --git a/binaries/data/mods/public/l10n/messages.json b/binaries/data/mods/public/l10n/messages.json new file mode 100644 index 0000000000..d73563d8d2 --- /dev/null +++ b/binaries/data/mods/public/l10n/messages.json @@ -0,0 +1,172 @@ +[ + { + "output": "public.pot", + "inputRoot": "..", + "project": "0 A.D. — Empires Ascendant", + "copyrightHolder": "Wildfire Games", + "rules": [ + { + "extractor": "javascript", + "filemasks": ["**.js"], + "options": { + "keywords": { + "translate": [1], + "translatePlural": [1, 2], + "translateWithContext": [[1], 2], + "translatePluralWithContext": [[1], 2, 3], + "markForTranslation": [1], + "markForTranslationWithContext": [[1], 2] + }, + "commentTags": [ + "Translation:" + ] + } + }, + { + "extractor": "xml", + "filemasks": ["gui/**.xml"], + "options": { + "format": "none", + "keywords": { + "translatableAttribute": { + "locationAttributes": ["id"] + }, + "translate": {} + } + } + }, + { + "extractor": "txt", + "filemasks": [ + "gui/manual/intro.txt", + "gui/manual/userreport.txt", + "gui/text/quotes.txt", + "gui/splashscreen/splashscreen.txt", + "gui/text/tips/**.txt" + ], + "options": { + "format": "none" + } + }, + { + "extractor": "json", + "filemasks": [ + "simulation/data/game_speeds.json", + "simulation/data/player_defaults.json" + ], + "options": { + "format": "none", + "keywords": [ + "Name" + ] + } + }, + { + "extractor": "json", + "filemasks": [ + "simulation/data/map_sizes.json" + ], + "options": { + "format": "none", + "keywords": [ + "Name", + "LongName" + ] + } + }, + { + "extractor": "json", + "filemasks": [ + "civs/**.json" + ], + "options": { + "format": "none", + "keywords": [ + "Name", + "Description", + "History", + "Special", + "AINames" + ] + } + }, + { + "extractor": "json", + "filemasks": [ + "maps/random/**.json" + ], + "options": { + "format": "none", + "keywords": [ + "Name", + "Description" + ] + } + }, + { + "extractor": "json", + "filemasks": [ + "simulation/ai/**.json" + ], + "options": { + "format": "none", + "keywords": [ + "name", + "description" + ] + } + }, + { + "extractor": "json", + "filemasks": [ + "simulation/data/technologies/**.json" + ], + "options": { + "format": "none", + "keywords": [ + "specificName", + "genericName", + "description", + "tooltip", + "requirementsTooltip" + ] + } + }, + { + "extractor": "xml", + "filemasks": ["simulation/templates/**.xml"], + "options": { + "format": "none", + "keywords": { + "GenericName": {}, + "SpecificName": {}, + "Tooltip": {}, + "DisabledTooltip": {}, + "FormationName": {}, + "FromClass": {} + } + } + }, + { + "extractor": "xml", + "filemasks": [ + "maps/scenarios/**.xml", + "maps/skirmishes/**.xml" + ], + "options": { + "format": "none", + "keywords": { + "ScriptSettings": { + "extractJson": { + "keywords": [ + "Name", + "Description" + ] + } + } + } + } + } + ] + } +] diff --git a/source/tools/LICENSE.txt b/source/tools/LICENSE.txt index cdce0d2ef2..dc2167ab2a 100644 --- a/source/tools/LICENSE.txt +++ b/source/tools/LICENSE.txt @@ -46,6 +46,10 @@ in particular, let us know and we can try to clarify it. unspecified (FontLoader.py) IBM CPL (Packer.py) + i18n + GPLv2 + BSD (potter, fork of babel.messages; see http://babel.edgewall.org/wiki/License) + jsdebugger GPL version 2 (or later) Other - see js/lib/ace/LICENSE.TXT (js/lib/ace) diff --git a/source/tools/i18n/generateLongStringTranslations.py b/source/tools/i18n/generateLongStringTranslations.py new file mode 100644 index 0000000000..99f1d307d9 --- /dev/null +++ b/source/tools/i18n/generateLongStringTranslations.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +# +# Copyright (C) 2013 Wildfire Games. +# This file is part of 0 A.D. +# +# 0 A.D. is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# 0 A.D. is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with 0 A.D. If not, see . + +from __future__ import absolute_import, division, print_function, unicode_literals + +import codecs, json, os, sys, textwrap + +from potter.catalog import Catalog, Message +from potter.extract import getExtractorInstance +from potter.pofile import read_po, write_po + + +l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) +projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) +l10nFolderName = "l10n" + + +#def getAverageExpansionForEnglishString(string): + #""" + #Based on http://www.w3.org/International/articles/article-text-size.en + #""" + #length = len(string) + #if len <= 10: + #return length*3 # 200–300% + #if len <= 20: + #return length*2 # 180–200% + #if len <= 30: + #return length*1.8 # 160–180% + #if len <= 50: + #return length*1.6 # 140–160% + #if len <= 70: + #return length*1.7 # 151-170% + + #return length*1.3 # 130% + + +#def enlarge(string, surroundWithSpaces): + #halfExpansion = int(getAverageExpansionForEnglishString(string)/2) + #if surroundWithSpaces: halfExpansion -= 1 + + #outputString = "x"*halfExpansion + #if surroundWithSpaces: + #outputString += " " + + #outputString += string + + #if surroundWithSpaces: + #outputString += " " + #outputString += "x"*halfExpansion + + #return outputString + + +def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath): + + with codecs.open(inputFilePath, 'r', 'utf-8') as fileObject: + templateCatalog = read_po(fileObject) + + longStringCatalog = Catalog() + + # Fill catalog with English strings. + for message in templateCatalog: + if message.pluralizable: + singularString, pluralString = message.id + message.string = (singularString, pluralString) + else: + message.string = message.id + longStringCatalog[message.id] = message + + # If language codes were specified on the command line, filder by those. + filters = sys.argv[1:] + + # Load existing translation catalogs. + existingTranslationCatalogs = [] + l10nFolderPath = os.path.dirname(inputFilePath) + + # .pot is one letter longer than .po, but the dot that separates the locale + # code from the rest of the filename in .po files makes up for that. + charactersToSkip = len(os.path.basename(inputFilePath)) + + for filename in os.listdir(l10nFolderPath): + if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long": + if not filters or filename[:-charactersToSkip] in filters: + with codecs.open(os.path.join(l10nFolderPath, filename), 'r', 'utf-8') as fileObject: + existingTranslationCatalogs.append(read_po(fileObject)) + + # If any existing translation has more characters than the average expansion, use that instead. + for translationCatalog in existingTranslationCatalogs: + for longStringCatalogMessage in longStringCatalog: + translationMessage = translationCatalog.get(longStringCatalogMessage.id, longStringCatalogMessage.context) + if translationMessage: + if longStringCatalogMessage.pluralizable: + currentSingularString, currentPluralString = longStringCatalogMessage.string + longestSingularString = currentSingularString + longestPluralString = currentPluralString + + candidateSingularString = translationMessage.string[0] + candidatePluralString = "" # There might be between 0 and infinite plural forms. + for candidateString in translationMessage.string[1:]: + if len(candidateString) > len(candidatePluralString): candidatePluralString = candidateString + + changed = False + if len(candidateSingularString) > len(currentSingularString): + longestSingularString = candidateSingularString + changed = True + if len(candidatePluralString) > len(currentPluralString): + longestPluralString = candidatePluralString + changed = True + + if changed: + longStringCatalogMessage.string = (longestSingularString, longestPluralString) + longStringCatalog[longStringCatalogMessage.id] = longStringCatalogMessage + + else: + if len(translationMessage.string) > len(longStringCatalogMessage.string): + longStringCatalogMessage.string = translationMessage.string + longStringCatalog[longStringCatalogMessage.id] = longStringCatalogMessage + + + with codecs.open(outputFilePath, 'w', 'utf-8') as fileObject: + write_po(fileObject, longStringCatalog) + + +def main(): + + foundPots = 0 + for root, folders, filenames in os.walk(projectRootDirectory): + root = root.decode("utf-8") + for filename in filenames: + if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": + foundPots += 1 + generateLongStringTranslationFromPotIntoPo(os.path.join(root, filename), os.path.join(root, "long." + filename[:-1])) + if foundPots == 0: + print(u"This script did not work because no ‘.pot’ files were found.") + print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.") + print(u"Then you can run this script to generate ‘.po’ files with the longest strings.") + + +if __name__ == "__main__": + main() diff --git a/source/tools/i18n/potter/__init__.py b/source/tools/i18n/potter/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/source/tools/i18n/potter/catalog.py b/source/tools/i18n/potter/catalog.py new file mode 100644 index 0000000000..500f86df87 --- /dev/null +++ b/source/tools/i18n/potter/catalog.py @@ -0,0 +1,575 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2013 Wildfire Games +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided with the distribution. +# The name of the author may not be used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs: +# • http://babel.edgewall.org/log/trunk/babel/messages +# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter + +"""Data structures for message catalogs.""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +from cgi import parse_header +from datetime import datetime, time as time_ +from difflib import get_close_matches +from email import message_from_string +from copy import copy +import re +import time + +from collections import OrderedDict + +from potter.util import distinct, LOCALTZ, UTC, FixedOffsetTimezone + + +__all__ = ['Message', 'Catalog'] +__docformat__ = 'restructuredtext en' + + +PYTHON_FORMAT = re.compile(r"""(?x) + \% + (?:\(([\w]*)\))? + ( + [-#0\ +]?(?:\*|[\d]+)? + (?:\.(?:\*|[\d]+))? + [hlL]? + ) + ([diouxXeEfFgGcrs%]) +""") + +C_FORMAT = re.compile(r"""(?x) + \% + (\d+\$)? + ([-+ 0#]+)? + (v|\*(\d+\$)?v)? + 0* + (\d+|\*(\d+\$)?)? + (\.(\d*|\*(\d+\$)?))? + [hlqLV]? + ([%bcdefginopsuxDFOUX]) +""") + + +class Message(object): + """Representation of a single message in a catalog.""" + + def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), + user_comments=(), previous_id=(), lineno=None, context=None, formatFlag=None): + """Create the message object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filenname, lineno)`` tuples + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments for the message + :param user_comments: a sequence of user comments for the message + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + """ + self.id = id #: The message ID + if not string and self.pluralizable: + string = (u'', u'') + self.string = string #: The message translation + self.locations = list(distinct(locations)) + self.flags = set(flags) + if id and formatFlag is None: + formatFlag = self.guessFormatFlag(); + if formatFlag: + self.flags.add(formatFlag) + self.auto_comments = list(distinct(auto_comments)) + self.user_comments = list(distinct(user_comments)) + if isinstance(previous_id, str): + self.previous_id = [previous_id] + else: + self.previous_id = list(previous_id) + self.lineno = lineno + self.context = context + + def __repr__(self): + return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, + list(self.flags)) + + def __cmp__(self, obj): + """Compare Messages, taking into account plural ids""" + def values_to_compare(): + if isinstance(obj, Message): + plural = self.pluralizable + obj_plural = obj.pluralizable + if plural and obj_plural: + return self.id[0], obj.id[0] + elif plural: + return self.id[0], obj.id + elif obj_plural: + return self.id, obj.id[0] + return self.id, obj.id + this, other = values_to_compare() + return cmp(this, other) + + def __gt__(self, other): + return self.__cmp__(other) > 0 + + def __lt__(self, other): + return self.__cmp__(other) < 0 + + def __ge__(self, other): + return self.__cmp__(other) >= 0 + + def __le__(self, other): + return self.__cmp__(other) <= 0 + + def __eq__(self, other): + return self.__cmp__(other) == 0 + + def __ne__(self, other): + return self.__cmp__(other) != 0 + + def clone(self): + return Message(*map(copy, (self.id, self.string, self.locations, + self.flags, self.auto_comments, + self.user_comments, self.previous_id, + self.lineno, self.context))) + + @property + def pluralizable(self): + """Whether the message is plurizable. + + >>> Message('foo').pluralizable + False + >>> Message(('foo', 'bar')).pluralizable + True + + :type: `bool`""" + return isinstance(self.id, (list, tuple)) + + def guessFormatFlag(self): + """ If the message contains parameters, this function returns a string with the flag that represents the format + of those parameters. + + :type: `string`""" + ids = self.id + if not isinstance(ids, (list, tuple)): + ids = [ids] + for id in ids: + if C_FORMAT.search(id) is not None: + return "c-format" + for id in ids: + if PYTHON_FORMAT.search(id) is not None: + return "python-format" + return None + + +DEFAULT_HEADER = u"""\ +# Translation template for PROJECT. +# Copyright © YEAR ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +#""" + + +class Catalog(object): + """Representation of a message catalog.""" + + def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, + project=None, version=None, copyright_holder=None, + msgid_bugs_address=None, creation_date=None, + revision_date=None, charset='utf-8'): + """Initialize the catalog object. + + :param domain: the message domain + :param header_comment: the header comment as string, or `None` for the + default header + :param project: the project's name + :param version: the project's version + :param copyright_holder: the copyright holder of the catalog + :param msgid_bugs_address: the email address or URL to submit bug + reports to + :param creation_date: the date the catalog was created + :param revision_date: the date the catalog was revised + :param charset: the encoding to use in the output + """ + self.domain = domain #: The message domain + self._header_comment = header_comment + self._messages = OrderedDict() + + self.project = project or 'PROJECT' #: The project name + self.version = version #: The project version + self.copyright_holder = copyright_holder or 'ORGANIZATION' + self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' + + self.charset = charset or 'utf-8' + + if creation_date is None: + creation_date = datetime.now(LOCALTZ) + elif isinstance(creation_date, datetime) and not creation_date.tzinfo: + creation_date = creation_date.replace(tzinfo=LOCALTZ) + self.creation_date = creation_date #: Creation date of the template + if revision_date is None: + revision_date = 'YEAR-MO-DA HO:MI+ZONE' + elif isinstance(revision_date, datetime) and not revision_date.tzinfo: + revision_date = revision_date.replace(tzinfo=LOCALTZ) + self.revision_date = revision_date #: Last revision date of the catalog + + self.obsolete = OrderedDict() #: Dictionary of obsolete messages + self._num_plurals = None + self._plural_expr = None + + def _get_header_comment(self): + comment = self._header_comment + year = datetime.now(LOCALTZ).strftime('%Y') + if hasattr(self.revision_date, 'strftime'): + year = self.revision_date.strftime('%Y') + comment = comment.replace('PROJECT', self.project) \ + .replace('YEAR', year) \ + .replace('ORGANIZATION', self.copyright_holder) + return comment + + def _set_header_comment(self, string): + self._header_comment = string + + header_comment = property(_get_header_comment, _set_header_comment, doc="""\ + The header comment for the catalog. + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> print catalog.header_comment #doctest: +ELLIPSIS + # Translations template for Foobar. + # Copyright (C) ... Foo Company + # This file is distributed under the same license as the Foobar project. + # FIRST AUTHOR , .... + # + + The header can also be set from a string. Any known upper-case variables + will be replaced when the header is retrieved again: + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> catalog.header_comment = '''\\ + ... # The POT for my really cool PROJECT project. + ... # Copyright (C) 1990-2003 ORGANIZATION + ... # This file is distributed under the same license as the PROJECT + ... # project. + ... #''' + >>> print catalog.header_comment + # The POT for my really cool Foobar project. + # Copyright (C) 1990-2003 Foo Company + # This file is distributed under the same license as the Foobar + # project. + # + + :type: `unicode` + """) + + def _get_mime_headers(self): + headers = [] + projectIdVersion = self.project + if self.version: + projectIdVersion += " " + self.version + headers.append(('Project-Id-Version', projectIdVersion)) + headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) + headers.append(('POT-Creation-Date', self.creation_date.strftime('%Y-%m-%d %H:%M%z'))) + if isinstance(self.revision_date, (datetime, time_, int, float)): + headers.append(('PO-Revision-Date', self.revision_date.strftime('%Y-%m-%d %H:%M%z'))) + else: + headers.append(('PO-Revision-Date', self.revision_date)) + headers.append(('MIME-Version', '1.0')) + headers.append(('Content-Type', + 'text/plain; charset=%s' % self.charset)) + headers.append(('Content-Transfer-Encoding', '8bit')) + headers.append(('Generated-By', 'Potter 1.0\n')) + return headers + + def _set_mime_headers(self, headers): + for name, value in headers: + name = name.lower() + if name == 'project-id-version': + parts = value.split(' ') + self.project = u' '.join(parts[:-1]) + self.version = parts[-1] + elif name == 'report-msgid-bugs-to': + self.msgid_bugs_address = value + elif name == 'content-type': + mimetype, params = parse_header(value) + if 'charset' in params: + self.charset = params['charset'].lower() + elif name == 'plural-forms': + _, params = parse_header(' ;' + value) + try: + self._num_plurals = int(params.get('nplurals', 2)) + except ValueError: + self._num_plurals = 2 + self._plural_expr = params.get('plural', '(n != 1)') + elif name == 'pot-creation-date': + # FIXME: this should use dates.parse_datetime as soon as that + # is ready + value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) + + tt = time.strptime(value, '%Y-%m-%d %H:%M') + ts = time.mktime(tt) + + # Separate the offset into a sign component, hours, and minutes + plus_minus_s, rest = tzoffset[0], tzoffset[1:] + hours_offset_s, mins_offset_s = rest[:2], rest[2:] + + # Make them all integers + plus_minus = int(plus_minus_s + '1') + hours_offset = int(hours_offset_s) + mins_offset = int(mins_offset_s) + + # Calculate net offset + net_mins_offset = hours_offset * 60 + net_mins_offset += mins_offset + net_mins_offset *= plus_minus + + # Create an offset object + tzoffset = FixedOffsetTimezone(net_mins_offset) + + # Store the offset in a datetime object + dt = datetime.fromtimestamp(ts) + self.creation_date = dt.replace(tzinfo=tzoffset) + elif name == 'po-revision-date': + # Keep the value if it's not the default one + if 'YEAR' not in value: + # FIXME: this should use dates.parse_datetime as soon as + # that is ready + value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) + tt = time.strptime(value, '%Y-%m-%d %H:%M') + ts = time.mktime(tt) + + # Separate the offset into a sign component, hours, and + # minutes + plus_minus_s, rest = tzoffset[0], tzoffset[1:] + hours_offset_s, mins_offset_s = rest[:2], rest[2:] + + # Make them all integers + plus_minus = int(plus_minus_s + '1') + hours_offset = int(hours_offset_s) + mins_offset = int(mins_offset_s) + + # Calculate net offset + net_mins_offset = hours_offset * 60 + net_mins_offset += mins_offset + net_mins_offset *= plus_minus + + # Create an offset object + tzoffset = FixedOffsetTimezone(net_mins_offset) + + # Store the offset in a datetime object + dt = datetime.fromtimestamp(ts) + self.revision_date = dt.replace(tzinfo=tzoffset) + + mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ + The MIME headers of the catalog, used for the special ``msgid ""`` entry. + + Here's an example of the output for such a catalog template: + + >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) + >>> catalog = Catalog(project='Foobar', version='1.0', + ... creation_date=created) + >>> for name, value in catalog.mime_headers: + ... print '%s: %s' % (name, value) + Project-Id-Version: Foobar 1.0 + Report-Msgid-Bugs-To: EMAIL@ADDRESS + POT-Creation-Date: 1990-04-01 15:30+0000 + PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE + MIME-Version: 1.0 + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + Generated-By: Potter ... + + :type: `list` + """) + + def __contains__(self, id): + """Return whether the catalog has a message with the specified ID.""" + return self._key_for(id) in self._messages + + def __len__(self): + """The number of messages in the catalog. + + This does not include the special ``msgid ""`` entry.""" + return len(self._messages) + + def __iter__(self): + """Iterates through all the entries in the catalog, in the order they + were added, yielding a `Message` object for every entry. + + :rtype: ``iterator``""" + buf = [] + for name, value in self.mime_headers: + buf.append('%s: %s' % (name, value)) + yield Message(u'', '\n'.join(buf), flags=set()) + for key in self._messages: + yield self._messages[key] + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.domain) + + def __delitem__(self, id): + """Delete the message with the specified ID.""" + self.delete(id) + + def __getitem__(self, id): + """Return the message with the specified ID. + + :param id: the message ID + :return: the message with the specified ID, or `None` if no such + message is in the catalog + :rtype: `Message` + """ + return self.get(id) + + def __setitem__(self, id, message): + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo') + >>> catalog[u'foo'] + + + If a message with that ID is already in the catalog, it is updated + to include the locations and flags of the new message. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) + >>> catalog[u'foo'].locations + [('main.py', 1)] + >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) + >>> catalog[u'foo'].locations + [('main.py', 1), ('utils.py', 5)] + + :param id: the message ID + :param message: the `Message` object + """ + assert isinstance(message, Message), 'expected a Message object' + key = self._key_for(id, message.context) + current = self._messages.get(key) + if current: + if message.pluralizable and not current.pluralizable: + # The new message adds pluralization + current.id = message.id + current.string = message.string + current.locations = list(distinct(current.locations + message.locations)) + current.auto_comments = list(distinct(current.auto_comments + message.auto_comments)) + current.user_comments = list(distinct(current.user_comments + message.user_comments)) + current.flags |= message.flags + message = current + elif id == '': + # special treatment for the header message + def _parse_header(header_string): + # message_from_string only works for str, not for unicode + headers = message_from_string(header_string.encode('utf8')) + decoded_headers = {} + for name, value in headers.items(): + name = name.decode('utf8') + value = value.decode('utf8') + decoded_headers[name] = value + return decoded_headers + self.mime_headers = _parse_header(message.string).items() + self.header_comment = '\n'.join(['# %s' % comment for comment + in message.user_comments]) + else: + if isinstance(id, (list, tuple)): + assert isinstance(message.string, (list, tuple)), \ + 'Expected sequence but got %s' % type(message.string) + self._messages[key] = message + + def add(self, id, string=None, locations=(), flags=(), auto_comments=(), + user_comments=(), previous_id=(), lineno=None, context=None, formatFlag=None): + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog.add(u'foo') + + >>> catalog[u'foo'] + + + This method simply constructs a `Message` object with the given + arguments and invokes `__setitem__` with that object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of strings that determine where a message was found + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments + :param user_comments: a sequence of user comments + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + :return: the newly added message + :rtype: `Message` + """ + message = Message(id, string, locations, flags, auto_comments, + user_comments, previous_id, lineno=lineno, + context=context, formatFlag=formatFlag) + self[id] = message + return message + + def get(self, id, context=None): + """Return the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + :return: the message with the specified ID, or `None` if no such + message is in the catalog + :rtype: `Message` + """ + return self._messages.get(self._key_for(id, context)) + + def delete(self, id, context=None): + """Delete the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + """ + key = self._key_for(id, context) + if key in self._messages: + del self._messages[key] + + @property + def num_plurals(self): + if self._num_plurals is not None: + return self._num_plurals + else: + return 2 + + def _key_for(self, id, context=None): + """The key for a message is just the singular ID even for pluralizable + messages, but is a ``(msgid, msgctxt)`` tuple for context-specific + messages. + """ + key = id + if isinstance(key, (list, tuple)): + key = id[0] + if context is not None: + key = (key, context) + return key diff --git a/source/tools/i18n/potter/extract.py b/source/tools/i18n/potter/extract.py new file mode 100644 index 0000000000..4b59691426 --- /dev/null +++ b/source/tools/i18n/potter/extract.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2013 Wildfire Games +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided with the distribution. +# The name of the author may not be used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs: +# • http://babel.edgewall.org/log/trunk/babel/messages +# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter + +"""Basic infrastructure for extracting localizable messages from source files. + +This module defines an extensible system for collecting localizable message +strings from a variety of sources. A native extractor for Python source files +is builtin, extractors for other sources can be added using very simple plugins. + +The main entry points into the extraction functionality are the functions +`extract_from_dir` and `extract_from_file`. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +__all__ = ['getExtractorInstance'] +__docformat__ = 'restructuredtext en' + + +def getExtractorInstance(code, directoryPath, filemasks, options={}): + extractorClass = getattr(__import__("potter.extractors", {}, {}, [code,]), code) + return extractorClass(directoryPath, filemasks, options) diff --git a/source/tools/i18n/potter/extractors.py b/source/tools/i18n/potter/extractors.py new file mode 100644 index 0000000000..926c46efcf --- /dev/null +++ b/source/tools/i18n/potter/extractors.py @@ -0,0 +1,411 @@ +# -*- coding:utf-8 -*- +# +# Copyright (C) 2013 Wildfire Games +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided with the distribution. +# The name of the author may not be used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs: +# • http://babel.edgewall.org/log/trunk/babel/messages +# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter + +from __future__ import absolute_import, division, print_function, unicode_literals + +import codecs, os, sys +import json as jsonParser + +from tokenize import generate_tokens, COMMENT, NAME, OP, STRING +from textwrap import dedent + +from potter.util import parse_encoding, pathmatch, relpath + +try: + stringType = unicode +except: + stringType = str + + +class Extractor(object): + + def __init__(self, directoryPath, filemasks, options): + + self.directoryPath = directoryPath + self.options = options + + if isinstance(filemasks, dict): + self.includeMasks = filemasks["includeMasks"] + self.excludeMasks = filemasks["excludeMasks"] + else: + self.includeMasks = filemasks + self.excludeMasks = [] + + + def run(self): + """ Extracts messages. + + :return: An iterator over ``(message, context, location, comment)`` tuples. + :rtype: ``iterator`` + """ + directoryAbsolutePath = os.path.abspath(self.directoryPath) + for root, folders, filenames in os.walk(directoryAbsolutePath): + for subdir in folders: + if subdir.startswith('.') or subdir.startswith('_'): + folders.remove(subdir) + folders.sort() + filenames.sort() + for filename in filenames: + filename = relpath(os.path.join(root, filename).replace(os.sep, '/'), self.directoryPath) + for filemask in self.excludeMasks: + if pathmatch(filemask, filename): + break + else: + for filemask in self.includeMasks: + if pathmatch(filemask, filename): + filepath = os.path.join(directoryAbsolutePath, filename) + for message, context, position, comments in self.extractFromFile(filepath): + yield message, context, filename + ":" + str(position), comments + + + def extractFromFile(self, filepath): + """ Extracts messages from a specific file. + + :return: An iterator over ``(message, context, position, comments)`` tuples. + :rtype: ``iterator`` + """ + pass + + + +class javascript(Extractor): + """ Extract messages from JavaScript source code. + """ + + empty_msgid_warning = ( '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' + 'returns the header entry with meta information, not the empty string.' ) + + def extractJavascriptFromFile(self, fileObject): + + from potter.jslexer import tokenize, unquote_string + funcname = message_lineno = None + messages = [] + last_argument = None + translator_comments = [] + concatenate_next = False + last_token = None + call_stack = -1 + comment_tags = self.options.get('commentTags', []) + keywords = self.options.get('keywords', {}).keys() + + for token in tokenize(fileObject.read()): + if token.type == 'operator' and token.value == '(': + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == 'linecomment': + value = token.value[2:].strip() + if translator_comments and \ + translator_comments[-1][0] == token.lineno - 1: + translator_comments.append((token.lineno, value)) + continue + + for comment_tag in comment_tags: + if value.startswith(comment_tag): + translator_comments.append((token.lineno, value.strip())) + break + + elif token.type == 'multilinecomment': + # only one multi-line comment may preceed a translation + translator_comments = [] + value = token.value[2:-2].strip() + for comment_tag in comment_tags: + if value.startswith(comment_tag): + lines = value.splitlines() + if lines: + lines[0] = lines[0].strip() + lines[1:] = dedent('\n'.join(lines[1:])).splitlines() + for offset, line in enumerate(lines): + translator_comments.append((token.lineno + offset, + line)) + break + + elif funcname and call_stack == 0: + if token.type == 'operator' and token.value == ')': + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + # Comments don't apply unless they immediately precede the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + if messages is not None: + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = message_lineno = last_argument = None + concatenate_next = False + translator_comments = [] + messages = [] + call_stack = -1 + + elif token.type == 'string': + new_value = unquote_string(token.value) + if concatenate_next: + last_argument = (last_argument or '') + new_value + concatenate_next = False + else: + last_argument = new_value + + elif token.type == 'operator': + if token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + concatenate_next = False + elif token.value == '+': + concatenate_next = True + + elif call_stack > 0 and token.type == 'operator' \ + and token.value == ')': + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif call_stack == -1 and token.type == 'name' and \ + token.value in keywords and \ + (last_token is None or last_token.type != 'name' or + last_token.value != 'function'): + funcname = token.value + + last_token = token + + + def extractFromFile(self, filepath): + + with codecs.open(filepath, 'r', encoding='utf-8-sig') as fileObject: + for lineno, funcname, messages, comments in self.extractJavascriptFromFile(fileObject): + if funcname: + spec = self.options.get('keywords', {})[funcname] or (1,) + else: + spec = (1,) + if not isinstance(messages, (list, tuple)): + messages = [messages] + if not messages: + continue + + # Validate the messages against the keyword's specification + context = None + msgs = [] + invalid = False + # last_index is 1 based like the keyword spec + last_index = len(messages) + for index in spec: + if isinstance(index, (list, tuple)): + context = messages[index[0] - 1] + continue + if last_index < index: + # Not enough arguments + invalid = True + break + message = messages[index - 1] + if message is None: + invalid = True + break + msgs.append(message) + if invalid: + continue + + # keyword spec indexes are 1 based, therefore '-1' + if isinstance(spec[0], (tuple, list)): + # context-aware *gettext method + first_msg_index = spec[1] - 1 + else: + first_msg_index = spec[0] - 1 + if not messages[first_msg_index]: + # An empty string msgid isn't valid, emit a warning + where = '%s:%i' % (hasattr(fileObject, 'name') and \ + fileObject.name or '(unknown)', lineno) + print >> sys.stderr, self.empty_msgid_warning % where + continue + + messages = tuple(msgs) + if len(messages) == 1: + messages = messages[0] + + yield messages, context, lineno, comments + + + +class cpp(javascript): + """ Extract messages from C++ source code. + """ + pass + + + +class txt(Extractor): + """ Extract messages from plain text files. + """ + + def extractFromFile(self, filepath): + with codecs.open(filepath, "r", encoding='utf-8-sig') as fileObject: + lineCount = 0 + for line in [line.strip() for line in fileObject.readlines()]: + lineCount += 1 + if line: + yield line, None, str(lineCount), [] + + + +class json(Extractor): + """ Extract messages from JSON files. + """ + + def __init__(self, directoryPath=None, filemasks=[], options={}): + super(json, self).__init__(directoryPath, filemasks, options) + self.breadcrumbs = [] + self.keywords = self.options.get("keywords", {}) + + def setOptions(self, options): + self.options = options + self.keywords = self.options.get("keywords", {}) + + @staticmethod + def formatBreadcrumbs(breadcrumbs): + firstPiece = breadcrumbs[0] + if isinstance(firstPiece, int): outputString = "[" + str(firstPiece) + "]" + else: outputString = firstPiece + for piece in breadcrumbs[1:]: + if isinstance(piece, int): outputString += "[" + str(piece) + "]" + else: outputString += "." + piece + return outputString + + def extractFromFile(self, filepath): + with codecs.open(filepath, "r", 'utf-8') as fileObject: + for message, breadcrumbs in self.extractFromString(fileObject.read()): + yield message, None, self.formatBreadcrumbs(breadcrumbs), [] + + def extractFromString(self, string): + self.breadcrumbs = [] + jsonDocument = jsonParser.loads(string) + if isinstance(jsonDocument, list): + for message, breadcrumbs in self.parseList(jsonDocument): + if message: # Skip empty strings. + yield message, breadcrumbs + elif isinstance(jsonDocument, dict): + for message, breadcrumbs in self.parseDictionary(jsonDocument): + if message: # Skip empty strings. + yield message, breadcrumbs + else: + raise Exception("Unexpected JSON document parent structure (not a list or a dictionary). You must extend the JSON extractor to support it.") + + def parseList(self, itemsList): + index = 0 + for listItem in itemsList: + self.breadcrumbs.append(index) + if isinstance(listItem, list): + for message, breadcrumbs in self.parseList(listItem): + yield message, breadcrumbs + elif isinstance(listItem, dict): + for message, breadcrumbs in self.parseDictionary(listItem): + yield message, breadcrumbs + del self.breadcrumbs[-1] + index += 1 + + def parseDictionary(self, dictionary): + for keyword in dictionary: + self.breadcrumbs.append(keyword) + if keyword in self.keywords: + if isinstance(dictionary[keyword], stringType): + yield dictionary[keyword], self.breadcrumbs + elif isinstance(dictionary[keyword], list): + for message, breadcrumbs in self.extractList(dictionary[keyword]): + yield message, breadcrumbs + elif isinstance(dictionary[keyword], dict): + for message, breadcrumbs in self.extractDictionary(dictionary[keyword]): + yield message, breadcrumbs + elif isinstance(dictionary[keyword], list): + for message, breadcrumbs in self.parseList(dictionary[keyword]): + yield message, breadcrumbs + elif isinstance(dictionary[keyword], dict): + for message, breadcrumbs in self.parseDictionary(dictionary[keyword]): + yield message, breadcrumbs + del self.breadcrumbs[-1] + + def extractList(self, itemsList): + index = 0 + for listItem in itemsList: + self.breadcrumbs.append(index) + if isinstance(listItem, stringType): + yield listItem, self.breadcrumbs + del self.breadcrumbs[-1] + index += 1 + + def extractDictionary(self, dictionary): + for keyword in dictionary: + self.breadcrumbs.append(keyword) + if isinstance(dictionary[keyword], stringType): + yield dictionary[keyword], self.breadcrumbs + del self.breadcrumbs[-1] + + + +class xml(Extractor): + """ Extract messages from XML files. + """ + + def __init__(self, directoryPath, filemasks, options): + super(xml, self).__init__(directoryPath, filemasks, options) + self.keywords = self.options.get("keywords", {}) + self.jsonExtractor = None + + def getJsonExtractor(self): + if not self.jsonExtractor: + self.jsonExtractor = json() + return self.jsonExtractor + + def extractFromFile(self, filepath): + from lxml import etree + with codecs.open(filepath, "r", encoding='utf-8-sig') as fileObject: + xmlDocument = etree.parse(fileObject) + for keyword in self.keywords: + for element in xmlDocument.iter(keyword): + position = str(element.sourceline) + if "extractJson" in self.keywords[keyword]: + jsonExtractor = self.getJsonExtractor() + jsonExtractor.setOptions(self.keywords[keyword]["extractJson"]) + for message, breadcrumbs in jsonExtractor.extractFromString(element.text): + yield message, None, position + ":" + json.formatBreadcrumbs(breadcrumbs), [] + elif element.text is not None: + if "locationAttributes" in self.keywords[keyword]: + attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib] + position += " ({attributes})".format(attributes=", ".join(attributes)) + yield element.text, None, position, [] diff --git a/source/tools/i18n/potter/jslexer.py b/source/tools/i18n/potter/jslexer.py new file mode 100644 index 0000000000..b215c6170e --- /dev/null +++ b/source/tools/i18n/potter/jslexer.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008-2011 Edgewall Software +# Copyright (C) 2013 Wildfire Games +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided with the distribution. +# The name of the author may not be used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs: +# • http://babel.edgewall.org/log/trunk/babel/messages +# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter + +"""A simple JavaScript 1.5 lexer which is used for the JavaScript +extractor. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +from operator import itemgetter +import re + +operators = [ + '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', + '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', + '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', + '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' +] +operators.sort(key=lambda x: -len(x)) + +escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} + +rules = [ + (None, re.compile(r'\s+(?u)')), + (None, re.compile(r'