You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Enso-Bot/venv/Lib/site-packages/config/tokens.py

1226 lines
34 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright 2018-2020 by Vinay Sajip. All Rights Reserved.
#
from __future__ import unicode_literals
from string import digits
import sys
if sys.version_info[0] < 3:
import bisect
PRINTABLE_RANGES = [
(32, 126),
(161, 172),
(174, 887),
(890, 895),
(900, 906),
(908, 908),
(910, 929),
(931, 1327),
(1329, 1366),
(1369, 1375),
(1377, 1415),
(1417, 1418),
(1421, 1423),
(1425, 1479),
(1488, 1514),
(1520, 1524),
(1542, 1563),
(1566, 1756),
(1758, 1805),
(1808, 1866),
(1869, 1969),
(1984, 2042),
(2048, 2093),
(2096, 2110),
(2112, 2139),
(2142, 2142),
(2208, 2228),
(2275, 2435),
(2437, 2444),
(2447, 2448),
(2451, 2472),
(2474, 2480),
(2482, 2482),
(2486, 2489),
(2492, 2500),
(2503, 2504),
(2507, 2510),
(2519, 2519),
(2524, 2525),
(2527, 2531),
(2534, 2555),
(2561, 2563),
(2565, 2570),
(2575, 2576),
(2579, 2600),
(2602, 2608),
(2610, 2611),
(2613, 2614),
(2616, 2617),
(2620, 2620),
(2622, 2626),
(2631, 2632),
(2635, 2637),
(2641, 2641),
(2649, 2652),
(2654, 2654),
(2662, 2677),
(2689, 2691),
(2693, 2701),
(2703, 2705),
(2707, 2728),
(2730, 2736),
(2738, 2739),
(2741, 2745),
(2748, 2757),
(2759, 2761),
(2763, 2765),
(2768, 2768),
(2784, 2787),
(2790, 2801),
(2809, 2809),
(2817, 2819),
(2821, 2828),
(2831, 2832),
(2835, 2856),
(2858, 2864),
(2866, 2867),
(2869, 2873),
(2876, 2884),
(2887, 2888),
(2891, 2893),
(2902, 2903),
(2908, 2909),
(2911, 2915),
(2918, 2935),
(2946, 2947),
(2949, 2954),
(2958, 2960),
(2962, 2965),
(2969, 2970),
(2972, 2972),
(2974, 2975),
(2979, 2980),
(2984, 2986),
(2990, 3001),
(3006, 3010),
(3014, 3016),
(3018, 3021),
(3024, 3024),
(3031, 3031),
(3046, 3066),
(3072, 3075),
(3077, 3084),
(3086, 3088),
(3090, 3112),
(3114, 3129),
(3133, 3140),
(3142, 3144),
(3146, 3149),
(3157, 3158),
(3160, 3162),
(3168, 3171),
(3174, 3183),
(3192, 3199),
(3201, 3203),
(3205, 3212),
(3214, 3216),
(3218, 3240),
(3242, 3251),
(3253, 3257),
(3260, 3268),
(3270, 3272),
(3274, 3277),
(3285, 3286),
(3294, 3294),
(3296, 3299),
(3302, 3311),
(3313, 3314),
(3329, 3331),
(3333, 3340),
(3342, 3344),
(3346, 3386),
(3389, 3396),
(3398, 3400),
(3402, 3406),
(3415, 3415),
(3423, 3427),
(3430, 3445),
(3449, 3455),
(3458, 3459),
(3461, 3478),
(3482, 3505),
(3507, 3515),
(3517, 3517),
(3520, 3526),
(3530, 3530),
(3535, 3540),
(3542, 3542),
(3544, 3551),
(3558, 3567),
(3570, 3572),
(3585, 3642),
(3647, 3675),
(3713, 3714),
(3716, 3716),
(3719, 3720),
(3722, 3722),
(3725, 3725),
(3732, 3735),
(3737, 3743),
(3745, 3747),
(3749, 3749),
(3751, 3751),
(3754, 3755),
(3757, 3769),
(3771, 3773),
(3776, 3780),
(3782, 3782),
(3784, 3789),
(3792, 3801),
(3804, 3807),
(3840, 3911),
(3913, 3948),
(3953, 3991),
(3993, 4028),
(4030, 4044),
(4046, 4058),
(4096, 4293),
(4295, 4295),
(4301, 4301),
(4304, 4680),
(4682, 4685),
(4688, 4694),
(4696, 4696),
(4698, 4701),
(4704, 4744),
(4746, 4749),
(4752, 4784),
(4786, 4789),
(4792, 4798),
(4800, 4800),
(4802, 4805),
(4808, 4822),
(4824, 4880),
(4882, 4885),
(4888, 4954),
(4957, 4988),
(4992, 5017),
(5024, 5109),
(5112, 5117),
(5120, 5759),
(5761, 5788),
(5792, 5880),
(5888, 5900),
(5902, 5908),
(5920, 5942),
(5952, 5971),
(5984, 5996),
(5998, 6000),
(6002, 6003),
(6016, 6109),
(6112, 6121),
(6128, 6137),
(6144, 6157),
(6160, 6169),
(6176, 6263),
(6272, 6314),
(6320, 6389),
(6400, 6430),
(6432, 6443),
(6448, 6459),
(6464, 6464),
(6468, 6509),
(6512, 6516),
(6528, 6571),
(6576, 6601),
(6608, 6618),
(6622, 6683),
(6686, 6750),
(6752, 6780),
(6783, 6793),
(6800, 6809),
(6816, 6829),
(6832, 6846),
(6912, 6987),
(6992, 7036),
(7040, 7155),
(7164, 7223),
(7227, 7241),
(7245, 7295),
(7360, 7367),
(7376, 7414),
(7416, 7417),
(7424, 7669),
(7676, 7957),
(7960, 7965),
(7968, 8005),
(8008, 8013),
(8016, 8023),
(8025, 8025),
(8027, 8027),
(8029, 8029),
(8031, 8061),
(8064, 8116),
(8118, 8132),
(8134, 8147),
(8150, 8155),
(8157, 8175),
(8178, 8180),
(8182, 8190),
(8208, 8231),
(8240, 8286),
(8304, 8305),
(8308, 8334),
(8336, 8348),
(8352, 8382),
(8400, 8432),
(8448, 8587),
(8592, 9210),
(9216, 9254),
(9280, 9290),
(9312, 11123),
(11126, 11157),
(11160, 11193),
(11197, 11208),
(11210, 11217),
(11244, 11247),
(11264, 11310),
(11312, 11358),
(11360, 11507),
(11513, 11557),
(11559, 11559),
(11565, 11565),
(11568, 11623),
(11631, 11632),
(11647, 11670),
(11680, 11686),
(11688, 11694),
(11696, 11702),
(11704, 11710),
(11712, 11718),
(11720, 11726),
(11728, 11734),
(11736, 11742),
(11744, 11842),
(11904, 11929),
(11931, 12019),
(12032, 12245),
(12272, 12283),
(12289, 12351),
(12353, 12438),
(12441, 12543),
(12549, 12589),
(12593, 12686),
(12688, 12730),
(12736, 12771),
(12784, 12830),
(12832, 13054),
(13056, 19893),
(19904, 40917),
(40960, 42124),
(42128, 42182),
(42192, 42539),
(42560, 42743),
(42752, 42925),
(42928, 42935),
(42999, 43051),
(43056, 43065),
(43072, 43127),
(43136, 43204),
(43214, 43225),
(43232, 43261),
(43264, 43347),
(43359, 43388),
(43392, 43469),
(43471, 43481),
(43486, 43518),
(43520, 43574),
(43584, 43597),
(43600, 43609),
(43612, 43714),
(43739, 43766),
(43777, 43782),
(43785, 43790),
(43793, 43798),
(43808, 43814),
(43816, 43822),
(43824, 43877),
(43888, 44013),
(44016, 44025),
(44032, 55203),
(55216, 55238),
(55243, 55291),
(63744, 64109),
(64112, 64217),
(64256, 64262),
(64275, 64279),
(64285, 64310),
(64312, 64316),
(64318, 64318),
(64320, 64321),
(64323, 64324),
(64326, 64449),
(64467, 64831),
(64848, 64911),
(64914, 64967),
(65008, 65021),
(65024, 65049),
(65056, 65106),
(65108, 65126),
(65128, 65131),
(65136, 65140),
(65142, 65276),
(65281, 65470),
(65474, 65479),
(65482, 65487),
(65490, 65495),
(65498, 65500),
(65504, 65510),
(65512, 65518),
(65532, 65533),
(65536, 65547),
(65549, 65574),
(65576, 65594),
(65596, 65597),
(65599, 65613),
(65616, 65629),
(65664, 65786),
(65792, 65794),
(65799, 65843),
(65847, 65932),
(65936, 65947),
(65952, 65952),
(66000, 66045),
(66176, 66204),
(66208, 66256),
(66272, 66299),
(66304, 66339),
(66352, 66378),
(66384, 66426),
(66432, 66461),
(66463, 66499),
(66504, 66517),
(66560, 66717),
(66720, 66729),
(66816, 66855),
(66864, 66915),
(66927, 66927),
(67072, 67382),
(67392, 67413),
(67424, 67431),
(67584, 67589),
(67592, 67592),
(67594, 67637),
(67639, 67640),
(67644, 67644),
(67647, 67669),
(67671, 67742),
(67751, 67759),
(67808, 67826),
(67828, 67829),
(67835, 67867),
(67871, 67897),
(67903, 67903),
(67968, 68023),
(68028, 68047),
(68050, 68099),
(68101, 68102),
(68108, 68115),
(68117, 68119),
(68121, 68147),
(68152, 68154),
(68159, 68167),
(68176, 68184),
(68192, 68255),
(68288, 68326),
(68331, 68342),
(68352, 68405),
(68409, 68437),
(68440, 68466),
(68472, 68497),
(68505, 68508),
(68521, 68527),
(68608, 68680),
(68736, 68786),
(68800, 68850),
(68858, 68863),
(69216, 69246),
(69632, 69709),
(69714, 69743),
(69759, 69820),
(69822, 69825),
(69840, 69864),
(69872, 69881),
(69888, 69940),
(69942, 69955),
(69968, 70006),
(70016, 70093),
(70096, 70111),
(70113, 70132),
(70144, 70161),
(70163, 70205),
(70272, 70278),
(70280, 70280),
(70282, 70285),
(70287, 70301),
(70303, 70313),
(70320, 70378),
(70384, 70393),
(70400, 70403),
(70405, 70412),
(70415, 70416),
(70419, 70440),
(70442, 70448),
(70450, 70451),
(70453, 70457),
(70460, 70468),
(70471, 70472),
(70475, 70477),
(70480, 70480),
(70487, 70487),
(70493, 70499),
(70502, 70508),
(70512, 70516),
(70784, 70855),
(70864, 70873),
(71040, 71093),
(71096, 71133),
(71168, 71236),
(71248, 71257),
(71296, 71351),
(71360, 71369),
(71424, 71449),
(71453, 71467),
(71472, 71487),
(71840, 71922),
(71935, 71935),
(72384, 72440),
(73728, 74649),
(74752, 74862),
(74864, 74868),
(74880, 75075),
(77824, 78894),
(82944, 83526),
(92160, 92728),
(92736, 92766),
(92768, 92777),
(92782, 92783),
(92880, 92909),
(92912, 92917),
(92928, 92997),
(93008, 93017),
(93019, 93025),
(93027, 93047),
(93053, 93071),
(93952, 94020),
(94032, 94078),
(94095, 94111),
(110592, 110593),
(113664, 113770),
(113776, 113788),
(113792, 113800),
(113808, 113817),
(113820, 113823),
(118784, 119029),
(119040, 119078),
(119081, 119154),
(119163, 119272),
(119296, 119365),
(119552, 119638),
(119648, 119665),
(119808, 119892),
(119894, 119964),
(119966, 119967),
(119970, 119970),
(119973, 119974),
(119977, 119980),
(119982, 119993),
(119995, 119995),
(119997, 120003),
(120005, 120069),
(120071, 120074),
(120077, 120084),
(120086, 120092),
(120094, 120121),
(120123, 120126),
(120128, 120132),
(120134, 120134),
(120138, 120144),
(120146, 120485),
(120488, 120779),
(120782, 121483),
(121499, 121503),
(121505, 121519),
(124928, 125124),
(125127, 125142),
(126464, 126467),
(126469, 126495),
(126497, 126498),
(126500, 126500),
(126503, 126503),
(126505, 126514),
(126516, 126519),
(126521, 126521),
(126523, 126523),
(126530, 126530),
(126535, 126535),
(126537, 126537),
(126539, 126539),
(126541, 126543),
(126545, 126546),
(126548, 126548),
(126551, 126551),
(126553, 126553),
(126555, 126555),
(126557, 126557),
(126559, 126559),
(126561, 126562),
(126564, 126564),
(126567, 126570),
(126572, 126578),
(126580, 126583),
(126585, 126588),
(126590, 126590),
(126592, 126601),
(126603, 126619),
(126625, 126627),
(126629, 126633),
(126635, 126651),
(126704, 126705),
(126976, 127019),
(127024, 127123),
(127136, 127150),
(127153, 127167),
(127169, 127183),
(127185, 127221),
(127232, 127244),
(127248, 127278),
(127280, 127339),
(127344, 127386),
(127462, 127490),
(127504, 127546),
(127552, 127560),
(127568, 127569),
(127744, 128377),
(128379, 128419),
(128421, 128720),
(128736, 128748),
(128752, 128755),
(128768, 128883),
(128896, 128980),
(129024, 129035),
(129040, 129095),
(129104, 129113),
(129120, 129159),
(129168, 129197),
(129296, 129304),
(129408, 129412),
(129472, 129472),
(131072, 173782),
(173824, 177972),
(177984, 178205),
(178208, 183969),
(194560, 195101),
(917760, 917999),
]
def is_printable(c):
oc = ord(c)
i = bisect.bisect_right(PRINTABLE_RANGES, (oc, oc))
if i >= len(PRINTABLE_RANGES):
i -= 1
lo, hi = PRINTABLE_RANGES[i]
if i > 0 and oc < lo:
lo, hi = PRINTABLE_RANGES[i - 1]
return lo <= oc <= hi
text_type = unicode
else:
unichr = chr
is_printable = lambda c: c.isprintable()
text_type = str
WORD = 'a'
INTEGER = '0'
FLOAT = '1'
COMPLEX = 'j'
STRING = '"'
EOF = ''
NEWLINE = '\n'
LCURLY = '{'
RCURLY = '}'
LBRACK = '['
RBRACK = ']'
LPAREN = '('
RPAREN = ')'
LT = '<'
GT = '>'
LE = '<='
GE = '>='
EQ = '=='
ASSIGN = '='
NEQ = '!='
ALT_NEQ = '<>'
LSHIFT = '<<'
RSHIFT = '>>'
DOT = '.'
COMMA = ','
COLON = ':'
AT = '@'
PLUS = '+'
MINUS = '-'
STAR = '*'
POWER = '**'
SLASH = '/'
TILDE = '~'
SLASHSLASH = '//'
MODULO = '%'
BACKTICK = '`'
DOLLAR = '$'
TRUE = 'true'
FALSE = 'false'
NONE = 'null'
PYTRUE = 'True'
PYFALSE = 'False'
PYNONE = 'None'
IS = 'is'
IN = 'in'
NOT = 'not'
AND = 'and'
OR = 'or'
BITAND = '&'
BITOR = '|'
BITXOR = '^'
BITNOT = TILDE
WORDCHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
KEYWORDS = {TRUE, FALSE, NONE, IS, IN, NOT, AND, OR}
PUNCT = ':-+*/%,.{}[]()@$<>!~&|^'
PYKEYWORDS = {PYTRUE: TRUE, PYFALSE: FALSE, PYNONE: NONE}
KEYWORD_VALUES = {
TRUE: True,
PYTRUE: True,
FALSE: False,
PYFALSE: False,
NONE: None,
PYNONE: None,
}
SCALAR_TOKENS = {STRING, INTEGER, FLOAT, COMPLEX, FALSE, TRUE, NONE}
class RecognizerError(Exception):
pass
class TokenizerError(RecognizerError):
location = None
class Token(object):
start = end = None
def __init__(self, kind, text, value=None):
self.kind = kind
self.text = text
self.value = value
def __repr__(self):
return 'Token(%s:%s:%s)' % (self.kind, self.text, self.value)
def __eq__(self, other):
if not isinstance(other, Token):
return False
return (self.kind == other.kind) and (self.value == other.value)
ESCAPES = {
'a': '\a',
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t',
'v': '\v',
'\\': '\\',
'\"': '\"',
'\'': '\'',
'/': '/', # http://api.nobelprize.org/v1/prize.json escapes these
}
class Tokenizer(object):
whitespace = ' \t\r\n'
quotes = '\'"'
punct = PUNCT
wordchars = WORDCHARS
identchars = WORDCHARS + digits
def __init__(self, stream):
self.stream = stream
self.lineno = self.charline = 1
self.colno = self.charcol = 1
# self.lastc = None
self.filename = getattr(stream, 'filename', '<unknown filename>')
self.pbchars = []
self.pbtokens = []
@property
def remaining(self): # for debugging
s = self.stream.getvalue()
p = self.stream.tell()
return s[p:]
def push_back(self, c):
if c and ((c == '\n') or (c not in self.whitespace)):
self.pbchars.append((c, self.charline, self.charcol))
def get_char(self):
"""
Get the next char from the stream. Update line and column numbers
appropriately.
:return: The next character from the stream.
:rtype: str
"""
if self.pbchars:
t = self.pbchars.pop()
c = t[0]
self.charline = self.lineno = t[1]
self.charcol = self.colno = t[2]
else:
self.charline = self.lineno
self.charcol = self.colno
c = self.stream.read(1)
if c:
if c != '\n':
self.colno += 1
else:
self.lineno += 1
self.colno = 1
return c
def get_token(self):
"""
Get a token from the stream. The return value is (token_type, token_value).
Multiline string tokenizing is thanks to David Janes (BlogMatrix)
:return: The next token.
:rtype: A token tuple.
"""
if self.pbtokens: # pragma: no cover
return self.pbtokens.pop()
stream = self.stream
token = quoter = ''
tt = EOF
get_char = self.get_char
# noinspection PyShadowingNames
def get_number(token):
tt = INTEGER
in_exponent = False
radix = 0
dot_seen = token.find('.') >= 0
last_was_digit = token[-1].isdigit()
endline, endcol = self.charline, self.charcol
while True:
c = get_char()
if c == '.':
dot_seen = True
if not c:
break
if c == '_':
if last_was_digit:
token += c
last_was_digit = False
endline, endcol = self.charline, self.charcol
continue
e = TokenizerError('Invalid \'_\' in number: %s' % token + c)
e.location = (self.charline, self.charcol)
raise e
last_was_digit = False # unless set in one of the clauses below
if (radix == 0) and ('0' <= c <= '9'):
token += c
last_was_digit = True
endline, endcol = self.charline, self.charcol
elif (radix == 2) and ('0' <= c <= '1'):
token += c
last_was_digit = True
endline, endcol = self.charline, self.charcol
elif (radix == 8) and ('0' <= c <= '7'):
token += c
last_was_digit = True
endline, endcol = self.charline, self.charcol
elif (radix == 16) and (
('0' <= c <= '9') or ('a' <= c <= 'f') or ('A' <= c <= 'F')
):
token += c
last_was_digit = True
endline, endcol = self.charline, self.charcol
elif c in 'OXoxBb' and token == '0':
if c in 'Oo':
radix = 8
elif c in 'Xx':
radix = 16
else:
radix = 2
token += c
endline, endcol = self.charline, self.charcol
elif c == '.':
if (radix != 0) or token.find('.') >= 0 or in_exponent:
e = TokenizerError('Invalid character in number: %c' % c)
e.location = (self.charline, self.charcol)
raise e
else:
token += c
endline, endcol = self.charline, self.charcol
elif (
(radix == 0)
and (c == '-')
and token.find('-', 1) < 0
and in_exponent
):
token += c
endline, endcol = self.charline, self.charcol
elif (
(radix == 0)
and (c in 'eE')
and (token.find('e') < 0)
and (token.find('E') < 0)
and (token[-1] != '_')
):
token += c
endline, endcol = self.charline, self.charcol
in_exponent = True
else:
break
# reached the end of any actual number part. Before checking
# for complex, ensure that the last char wasn't an underscore.
if token[-1] == '_':
e = TokenizerError('Invalid \'_\' at end of number: %s' % token)
e.location = (self.charline, self.charcol - 1)
raise e
if c:
if (radix == 0) and c in 'jJ':
token += c
endline, endcol = self.charline, self.charcol
tt = COMPLEX
else:
if c != '.' and not c.isalnum():
self.push_back(c)
else:
e = TokenizerError('Invalid character in number: %c' % c)
e.location = (self.charline, self.charcol)
raise e
try:
s = token.replace('_', '')
if radix:
value = int(s[2:], radix)
elif token[-1] in 'jJ':
value = complex(s)
elif in_exponent or dot_seen:
value = float(s)
tt = FLOAT
else:
radix = 8 if s[0] == '0' else 10
value = int(s, radix)
except ValueError:
# str(token) so Unicode doesn't show u'prefix in repr
e = TokenizerError('Badly-formed number: %r' % str(token))
e.location = (startline, startcol)
raise e
return tt, token, value, endline, endcol
# noinspection PyShadowingNames
def parse_escapes(s):
i = s.find('\\')
if i < 0:
result = s
else:
result = []
failed = False
while i >= 0:
n = len(s)
if i > 0:
result.append(s[:i])
c = s[i + 1]
# import pdb; pdb.set_trace()
if c in ESCAPES:
result.append(ESCAPES[c])
i += 2
elif c in 'xXuU':
if c in 'xX':
slen = 4
else:
slen = 6 if c == 'u' else 10
if (i + slen) > n:
failed = True
break
p = s[i + 2 : i + slen]
try:
d = int(p, 16)
if (0xD800 <= d <= 0xDFFF) or d >= 0x110000:
failed = True
break
result.append(unichr(d))
i += slen
except ValueError:
failed = True
break
else:
failed = True
break
s = s[i:]
i = s.find('\\')
if failed:
e = TokenizerError(
'Invalid escape sequence at index %d: %s' % (i, s)
)
e.location = (startline, startcol)
raise e
result.append(s)
result = ''.join(result)
return result
value = None
while True:
c = get_char()
startline = endline = self.charline
startcol = endcol = self.charcol
if not c:
break
elif c == '#':
stream.readline()
self.lineno += 1
self.colno = 1
endline, endcol = self.lineno, self.colno - 1
tt = token = NEWLINE
break
elif c == '\n':
endline, endcol = self.lineno, self.colno - 1
tt = token = NEWLINE
break
elif c == '\r':
c = get_char()
if c != '\n':
self.push_back(c)
tt = token = NEWLINE
endline, endcol = self.charline, self.charcol
break
elif c == '\\':
c = get_char()
if c != '\n':
e = TokenizerError('Unexpected character: \\')
e.location = self.charline, self.charcol
raise e
endline, endcol = self.charline, self.charcol
continue
elif c in self.whitespace:
continue
elif c == '`':
token = quoter = c
tt = BACKTICK
endline, endcol = self.charline, self.charcol
while True:
c = get_char()
if not c:
break
if not is_printable(c):
e = TokenizerError(
'Invalid char %c in `-string: \'%s\'' % (c, token)
)
e.location = (self.charline, self.charcol)
raise e
token += c
endline, endcol = self.charline, self.charcol
if c == quoter:
break
if not c:
e = TokenizerError('Unterminated `-string: \'%s\'' % token)
e.location = (startline, startcol)
raise e
break
elif c in self.quotes:
token = c
endline, endcol = self.charline, self.charcol
quote = c
tt = STRING
escaped = False
multiline = False
c1 = get_char()
c1loc = (self.charline, self.charcol)
if c1 != quote:
self.push_back(c1)
else:
c2 = get_char()
if c2 != quote:
self.push_back(c2)
if not c2:
self.charline, self.charcol = c1loc
self.push_back(c1)
else:
multiline = True
token += quote
token += quote
# Keep the quoting string around for later
quoter = token
while True:
c = get_char()
if not c:
break
token += c
endline, endcol = self.charline, self.charcol
if (c == quote) and not escaped:
if not multiline or (
len(token) >= 6
and token.endswith(token[:3])
and token[-4] != '\\'
):
break
if c == '\\':
nc = get_char()
if nc == '\n':
token = token[:-1] # lose the backslash we added
continue
else:
self.push_back(nc)
escaped = not escaped
else:
escaped = False
if not c:
e = TokenizerError('Unterminated quoted string: %r' % token)
e.location = (startline, startcol)
raise e
break
elif c in self.wordchars:
token = c
endline, endcol = self.charline, self.charcol
tt = WORD
c = get_char()
while c and (c in self.identchars):
token += c
endline, endcol = self.charline, self.charcol
c = get_char()
self.push_back(c)
if token in PYKEYWORDS:
token = PYKEYWORDS[token]
if token in KEYWORDS:
value = KEYWORD_VALUES.get(token)
tt = token
else:
value = token
break
elif c in digits:
tt, token, value, endline, endcol = get_number(c)
break
elif c == '=':
nc = get_char()
if nc == '=':
token = c + nc
endline, endcol = self.charline, self.charcol
tt = token
else:
tt = token = c
self.push_back(nc)
break
elif c in self.punct:
token = tt = c
endline, endcol = self.charline, self.charcol
if c == '.':
c = get_char()
if c:
if c not in digits:
self.push_back(c)
else:
token += c
tt, token, value, endline, endcol = get_number(token)
break
elif c == '-':
c = get_char()
if c:
if c in digits or c == '.':
token += c
tt, token, value, endline, endcol = get_number(token)
else:
self.push_back(c)
elif token in ('<', '>', '!', '*', '/', '&', '|'):
c = get_char()
pb = True
if token == '<':
if c in '<>=':
token += c
endline, endcol = self.charline, self.charcol
tt = token if token != ALT_NEQ else NEQ
pb = False
elif token in ('&', '|') and c == token:
token += c
endline, endcol = self.charline, self.charcol
if c == '&':
tt = AND
else:
tt = OR
pb = False
elif token == '>':
if c in '>=':
token += c
endline, endcol = self.charline, self.charcol
tt = token
pb = False
elif token == '!':
if c == '=':
token += c
endline, endcol = self.charline, self.charcol
tt = token
pb = False
else:
tt = NOT
elif token in '*/=':
if c == token:
token += c
endline, endcol = self.charline, self.charcol
tt = token
pb = False
if pb:
self.push_back(c)
break
else:
e = TokenizerError('Unexpected character: %r' % str(c))
e.location = (self.charline, self.charcol)
raise e
if tt in (STRING, BACKTICK):
n = len(quoter)
assert n in (1, 3)
assert token.startswith(quoter)
assert token.endswith(quoter)
try:
value = parse_escapes(token[n:-n])
except TokenizerError as e:
e.location = (startline, startcol)
raise e
result = Token(tt, token, value)
result.start = (startline, startcol)
result.end = (endline, endcol)
return result
def __iter__(self):
return self
def next(self):
result = self.get_token()
if result.kind == EOF:
raise StopIteration
return result
__next__ = next