Remove file missed by previous commit
This was SVN commit r28125.
This commit is contained in:
parent
53e00e1953
commit
5a43f514dc
@ -1,188 +0,0 @@
|
||||
# Copyright (C) 2008-2011 Edgewall Software
|
||||
# Copyright (C) 2013-2014 Wildfire Games.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
|
||||
# following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
|
||||
# disclaimer.
|
||||
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
# The name of the author may not be used to endorse or promote products derived from this software without specific
|
||||
# prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# This software consists of voluntary contributions made by many
|
||||
# individuals. For the exact contribution history, see the revision
|
||||
# history and logs:
|
||||
# • http://babel.edgewall.org/log/trunk/babel/messages
|
||||
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/extractors/jslexer.py
|
||||
|
||||
"""A simple JavaScript 1.5 lexer which is used for the JavaScript
|
||||
extractor.
|
||||
"""
|
||||
|
||||
from operator import itemgetter
|
||||
import re
|
||||
|
||||
operators = [
|
||||
'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
|
||||
'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
|
||||
'>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
|
||||
'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
|
||||
]
|
||||
operators.sort(key=lambda x: -len(x))
|
||||
|
||||
escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
|
||||
|
||||
rules = [
|
||||
(None, re.compile(r'\s+(?u)')),
|
||||
(None, re.compile(r'<!--.*')),
|
||||
('linecomment', re.compile(r'//.*')),
|
||||
('multilinecomment', re.compile(r'/\*.*?\*/(?us)')),
|
||||
('name', re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')),
|
||||
('number', re.compile(r'''(?x)(
|
||||
(?:0|[1-9]\d*)
|
||||
(\.\d+)?
|
||||
([eE][-+]?\d+)? |
|
||||
(0x[a-fA-F0-9]+)
|
||||
)''')),
|
||||
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
|
||||
('string', re.compile(r'''(?xs)(
|
||||
'(?:[^'\\]*(?:\\.[^'\\]*)*)' |
|
||||
"(?:[^"\\]*(?:\\.[^"\\]*)*)"
|
||||
)'''))
|
||||
]
|
||||
|
||||
division_re = re.compile(r'/=?')
|
||||
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)')
|
||||
line_re = re.compile(r'(\r\n|\n|\r)')
|
||||
line_join_re = re.compile(r'\\' + line_re.pattern)
|
||||
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
|
||||
|
||||
|
||||
class Token(tuple):
|
||||
"""Represents a token as returned by `tokenize`."""
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, type, value, lineno):
|
||||
return tuple.__new__(cls, (type, value, lineno))
|
||||
|
||||
type = property(itemgetter(0))
|
||||
value = property(itemgetter(1))
|
||||
lineno = property(itemgetter(2))
|
||||
|
||||
|
||||
def indicates_division(token):
|
||||
"""A helper function that helps the tokenizer to decide if the current
|
||||
token may be followed by a division operator.
|
||||
"""
|
||||
if token.type == 'operator':
|
||||
return token.value in (')', ']', '}', '++', '--')
|
||||
return token.type in ('name', 'number', 'string', 'regexp')
|
||||
|
||||
|
||||
def unquote_string(string):
|
||||
"""Unquote a string with JavaScript rules. The string has to start with
|
||||
string delimiters (``'`` or ``"``.)
|
||||
|
||||
:return: a string
|
||||
"""
|
||||
assert string and string[0] == string[-1] and string[0] in '"\'', \
|
||||
'string provided is not properly delimited'
|
||||
string = line_join_re.sub('\\1', string[1:-1])
|
||||
result = []
|
||||
add = result.append
|
||||
pos = 0
|
||||
|
||||
while 1:
|
||||
# scan for the next escape
|
||||
escape_pos = string.find('\\', pos)
|
||||
if escape_pos < 0:
|
||||
break
|
||||
add(string[pos:escape_pos])
|
||||
|
||||
# check which character is escaped
|
||||
next_char = string[escape_pos + 1]
|
||||
if next_char in escapes:
|
||||
add(escapes[next_char])
|
||||
|
||||
# unicode escapes. trie to consume up to four characters of
|
||||
# hexadecimal characters and try to interpret them as unicode
|
||||
# character point. If there is no such character point, put
|
||||
# all the consumed characters into the string.
|
||||
elif next_char in 'uU':
|
||||
escaped = uni_escape_re.match(string, escape_pos + 2)
|
||||
if escaped is not None:
|
||||
escaped_value = escaped.group()
|
||||
if len(escaped_value) == 4:
|
||||
try:
|
||||
add(chr(int(escaped_value, 16)))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
pos = escape_pos + 6
|
||||
continue
|
||||
add(next_char + escaped_value)
|
||||
pos = escaped.end()
|
||||
continue
|
||||
else:
|
||||
add(next_char)
|
||||
|
||||
# bogus escape. Just remove the backslash.
|
||||
else:
|
||||
add(next_char)
|
||||
pos = escape_pos + 2
|
||||
|
||||
if pos < len(string):
|
||||
add(string[pos:])
|
||||
|
||||
return u''.join(result)
|
||||
|
||||
|
||||
def tokenize(source):
|
||||
"""Tokenize a JavaScript source.
|
||||
|
||||
:return: generator of `Token`\s
|
||||
"""
|
||||
may_divide = False
|
||||
pos = 0
|
||||
lineno = 1
|
||||
end = len(source)
|
||||
|
||||
while pos < end:
|
||||
# handle regular rules first
|
||||
for token_type, rule in rules:
|
||||
match = rule.match(source, pos)
|
||||
if match is not None:
|
||||
break
|
||||
# if we don't have a match we don't give up yet, but check for
|
||||
# division operators or regular expression literals, based on
|
||||
# the status of `may_divide` which is determined by the last
|
||||
# processed non-whitespace token using `indicates_division`.
|
||||
else:
|
||||
if may_divide:
|
||||
match = division_re.match(source, pos)
|
||||
token_type = 'operator'
|
||||
else:
|
||||
match = regex_re.match(source, pos)
|
||||
token_type = 'regexp'
|
||||
if match is None:
|
||||
# woops. invalid syntax. jump one char ahead and try again.
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
token_value = match.group()
|
||||
if token_type is not None:
|
||||
token = Token(token_type, token_value, lineno)
|
||||
may_divide = indicates_division(token)
|
||||
yield token
|
||||
lineno += len(line_re.findall(token_value))
|
||||
pos = match.end()
|
Loading…
Reference in New Issue
Block a user