mirror of https://github.com/sgoudham/Enso-Bot.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
512 lines
14 KiB
Python
512 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2018-2020 by Vinay Sajip. All Rights Reserved.
|
|
#
|
|
|
|
from collections import OrderedDict
|
|
import functools
|
|
import io
|
|
import logging
|
|
import sys
|
|
|
|
from .tokens import *
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
open_file = functools.partial(io.open, encoding='utf-8')
|
|
|
|
|
|
class ParserError(RecognizerError):
|
|
pass
|
|
|
|
|
|
class ODict(OrderedDict):
|
|
"""
|
|
This class preserves insertion order for display purposes but is
|
|
otherwise just a dict.
|
|
"""
|
|
|
|
def __repr__(self): # pragma: no cover
|
|
result = []
|
|
for k, v in self.items():
|
|
result.append('%r: %r' % (k, v))
|
|
return '{%s}' % ', '.join(result)
|
|
|
|
|
|
class ASTNode(ODict):
|
|
start = end = None
|
|
|
|
|
|
class MappingBody(list):
|
|
start = end = None
|
|
|
|
|
|
class ListBody(list):
|
|
start = end = None
|
|
|
|
|
|
def set_positions(node, start, end):
|
|
node.start = start
|
|
node.end = end
|
|
|
|
|
|
def make_unary_expr(op, operand):
|
|
"""
|
|
This function makes an AST node for a unary expression
|
|
"""
|
|
result = ASTNode()
|
|
# The str() calls are to avoid repr ugliness in 2.x
|
|
result[str('op')] = str(op)
|
|
result[str('operand')] = operand
|
|
return result
|
|
|
|
|
|
def make_binary_expr(op, lhs, rhs):
|
|
"""
|
|
This function makes an AST node for a binary expression
|
|
"""
|
|
result = ASTNode()
|
|
# The str() calls are to avoid repr ugliness in 2.x
|
|
result[str('op')] = str(op)
|
|
result[str('lhs')] = lhs
|
|
result[str('rhs')] = rhs
|
|
return result
|
|
|
|
|
|
def invalid_index(n, pos):
|
|
raise ParserError(
|
|
'Invalid index at %s: expected 1 expression, ' 'found %d' % (pos, n)
|
|
)
|
|
|
|
|
|
TOKEN_REPRS = {
|
|
WORD: 'identifier',
|
|
NEWLINE: 'newline',
|
|
INTEGER: 'integer',
|
|
FLOAT: 'floating-point value',
|
|
COMPLEX: 'complex value',
|
|
STRING: 'string',
|
|
BACKTICK: 'backtick-string',
|
|
EOF: 'EOF',
|
|
}
|
|
|
|
|
|
def token_repr(kind):
|
|
if kind in TOKEN_REPRS:
|
|
return TOKEN_REPRS[kind]
|
|
if sys.version_info[0] == 2:
|
|
return "'%s'" % kind
|
|
return '%r' % kind
|
|
|
|
|
|
VALUE_STARTERS = {WORD, INTEGER, FLOAT, COMPLEX, STRING, BACKTICK, TRUE, FALSE, NONE}
|
|
|
|
|
|
class Parser(object):
|
|
def __init__(self, stream=None):
|
|
self.stream = stream
|
|
if stream:
|
|
self.tokenizer = Tokenizer(stream)
|
|
else:
|
|
self.tokenizer = None
|
|
self.token = None
|
|
|
|
def _make_stream(self, text):
|
|
if not isinstance(text, text_type):
|
|
text = text.decode('utf-8')
|
|
self.stream = io.StringIO(text)
|
|
self.tokenizer = Tokenizer(self.stream)
|
|
self.token = None
|
|
|
|
@property
|
|
def at_end(self):
|
|
return self.token.kind == EOF
|
|
|
|
@property
|
|
def remaining(self): # for debugging
|
|
return self.tokenizer.remaining
|
|
|
|
def advance(self):
|
|
self.token = self.tokenizer.get_token()
|
|
return self.token.kind
|
|
|
|
def expect(self, tt):
|
|
if self.token.kind != tt:
|
|
pe = ParserError(
|
|
'Expected %s, got %s at %s' % (token_repr(tt), token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
result = self.token
|
|
self.advance()
|
|
return result
|
|
|
|
def consume_newlines(self):
|
|
tt = self.token.kind
|
|
while tt == NEWLINE:
|
|
tt = self.advance()
|
|
return tt
|
|
|
|
def object_key(self):
|
|
if self.token.kind == STRING:
|
|
result = self.strings()
|
|
else:
|
|
result = self.token
|
|
self.advance()
|
|
return result
|
|
|
|
def mapping_body(self):
|
|
result = MappingBody()
|
|
start = self.token.start
|
|
tt = self.consume_newlines()
|
|
if tt in (RCURLY, EOF):
|
|
end = self.token.end
|
|
set_positions(result, start, end)
|
|
return result
|
|
if tt not in (WORD, STRING):
|
|
pe = ParserError(
|
|
'Unexpected %s at %s' % (token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
while tt in (WORD, STRING):
|
|
key = self.object_key()
|
|
if self.token.kind not in (COLON, ASSIGN):
|
|
pe = ParserError(
|
|
'Unexpected %s at %s'
|
|
% (token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
self.advance()
|
|
self.consume_newlines()
|
|
value = self.expr()
|
|
result.append((key, value))
|
|
tt = self.token.kind
|
|
if tt in (NEWLINE, COMMA):
|
|
self.advance()
|
|
tt = self.consume_newlines()
|
|
elif tt not in (RCURLY, EOF):
|
|
pe = ParserError(
|
|
'Unexpected %s at %s'
|
|
% (token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
return result
|
|
|
|
def strings(self):
|
|
result = self.token
|
|
start = result.start
|
|
s = result.value
|
|
|
|
if self.advance() == STRING:
|
|
all_text = []
|
|
|
|
while True:
|
|
all_text.append(s)
|
|
s = self.token.value
|
|
end = self.token.end
|
|
if self.advance() != STRING:
|
|
break
|
|
all_text.append(s)
|
|
s = ''.join(all_text)
|
|
result = Token(STRING, s, s)
|
|
result.start = start
|
|
result.end = end
|
|
return result
|
|
|
|
def list_body(self):
|
|
result = ListBody()
|
|
start = self.token.start
|
|
end = self.token.end
|
|
tt = self.consume_newlines()
|
|
while tt in (
|
|
LCURLY,
|
|
LBRACK,
|
|
LPAREN,
|
|
AT,
|
|
DOLLAR,
|
|
BACKTICK,
|
|
PLUS,
|
|
MINUS,
|
|
BITNOT,
|
|
INTEGER,
|
|
FLOAT,
|
|
COMPLEX,
|
|
TRUE,
|
|
FALSE,
|
|
NONE,
|
|
NOT,
|
|
STRING,
|
|
WORD,
|
|
):
|
|
value = self.expr()
|
|
end = self.token.end
|
|
result.append(value)
|
|
tt = self.token.kind
|
|
if tt not in (NEWLINE, COMMA):
|
|
break
|
|
self.advance()
|
|
tt = self.consume_newlines()
|
|
set_positions(result, start, end)
|
|
return result
|
|
|
|
def list(self):
|
|
self.expect(LBRACK)
|
|
result = self.list_body()
|
|
self.expect(RBRACK)
|
|
return result
|
|
|
|
def value(self):
|
|
tt = self.token.kind
|
|
if tt not in VALUE_STARTERS:
|
|
pe = ParserError(
|
|
'Unexpected %s when looking for value: %s'
|
|
% (token_repr(tt), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
if tt == STRING:
|
|
token = self.strings()
|
|
else:
|
|
token = self.token
|
|
self.advance()
|
|
return token
|
|
|
|
def mapping(self):
|
|
start = self.expect(LCURLY).start
|
|
result = self.mapping_body()
|
|
end = self.expect(RCURLY).end
|
|
set_positions(result, start, end)
|
|
return result
|
|
|
|
def container(self):
|
|
self.advance()
|
|
k = self.consume_newlines()
|
|
if k == LCURLY:
|
|
result = self.mapping()
|
|
elif k == LBRACK:
|
|
result = self.list()
|
|
elif k in (WORD, STRING, EOF):
|
|
result = self.mapping_body()
|
|
else:
|
|
pe = ParserError(
|
|
'Unexpected %s at %s' % (token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
self.consume_newlines()
|
|
return result
|
|
|
|
def atom(self):
|
|
tt = self.token.kind
|
|
if tt == LCURLY:
|
|
result = self.mapping()
|
|
elif tt == LBRACK:
|
|
result = self.list()
|
|
elif tt in VALUE_STARTERS:
|
|
result = self.value()
|
|
elif tt == DOLLAR:
|
|
self.advance()
|
|
self.expect(LCURLY)
|
|
result = self.primary()
|
|
self.expect(RCURLY)
|
|
result = make_unary_expr(tt, result)
|
|
elif tt == LPAREN:
|
|
self.advance()
|
|
result = self.expr()
|
|
self.expect(RPAREN)
|
|
else:
|
|
# import pdb; pdb.set_trace()
|
|
pe = ParserError(
|
|
'Unexpected %s at %s' % (token_repr(self.token.kind), self.token.start)
|
|
)
|
|
pe.location = self.token.start
|
|
raise pe
|
|
return result
|
|
|
|
# noinspection PyUnboundLocalVariable
|
|
def trailer(self):
|
|
tt = self.token.kind
|
|
if tt != LBRACK:
|
|
self.expect(DOT)
|
|
result = self.expect(WORD)
|
|
else:
|
|
|
|
def get_slice_element():
|
|
lb = self.list_body()
|
|
n = len(lb)
|
|
if n != 1:
|
|
invalid_index(n, lb.start)
|
|
return lb[0]
|
|
|
|
tt = self.advance()
|
|
is_slice = False
|
|
if tt == COLON:
|
|
# it's a slice like [:xyz:abc]
|
|
start = None
|
|
is_slice = True
|
|
else:
|
|
elem = get_slice_element()
|
|
tt = self.token.kind
|
|
if tt != COLON:
|
|
result = elem
|
|
else:
|
|
start = elem
|
|
is_slice = True
|
|
if not is_slice:
|
|
tt = LBRACK
|
|
else:
|
|
step = stop = None
|
|
# at this point start is either None (if foo[:xyz]) or a
|
|
# value representing the start. We are pointing at the COLON
|
|
# after the start value
|
|
tt = self.advance()
|
|
if tt == COLON: # no stop, but there might be a step
|
|
tt = self.advance()
|
|
if tt != RBRACK:
|
|
step = get_slice_element()
|
|
elif tt != RBRACK:
|
|
stop = get_slice_element()
|
|
tt = self.token.kind
|
|
if tt == COLON:
|
|
tt = self.advance()
|
|
if tt != RBRACK:
|
|
step = get_slice_element()
|
|
result = (start, stop, step)
|
|
tt = COLON
|
|
self.expect(RBRACK)
|
|
return tt, result
|
|
|
|
def primary(self):
|
|
result = self.atom()
|
|
while self.token.kind in (LBRACK, DOT):
|
|
op, rhs = self.trailer()
|
|
result = make_binary_expr(op, result, rhs)
|
|
return result
|
|
|
|
def power(self):
|
|
result = self.primary()
|
|
if self.token.kind == POWER:
|
|
self.advance()
|
|
rhs = self.u_expr()
|
|
result = make_binary_expr(POWER, result, rhs)
|
|
return result
|
|
|
|
def u_expr(self):
|
|
tt = self.token.kind
|
|
if tt not in (PLUS, MINUS, BITNOT, AT):
|
|
result = self.power()
|
|
else:
|
|
self.advance()
|
|
result = make_unary_expr(tt, self.u_expr())
|
|
return result
|
|
|
|
def mul_expr(self):
|
|
result = self.u_expr()
|
|
while self.token.kind in (STAR, SLASH, SLASHSLASH, MODULO):
|
|
op = self.token.kind
|
|
self.advance()
|
|
rhs = self.u_expr()
|
|
result = make_binary_expr(op, result, rhs)
|
|
return result
|
|
|
|
def add_expr(self):
|
|
result = self.mul_expr()
|
|
while self.token.kind in (PLUS, MINUS):
|
|
op = self.token.kind
|
|
self.advance()
|
|
rhs = self.mul_expr()
|
|
result = make_binary_expr(op, result, rhs)
|
|
return result
|
|
|
|
def shift_expr(self):
|
|
result = self.add_expr()
|
|
while self.token.kind in (LSHIFT, RSHIFT):
|
|
op = self.token.kind
|
|
self.advance()
|
|
rhs = self.add_expr()
|
|
result = make_binary_expr(op, result, rhs)
|
|
return result
|
|
|
|
def bitand_expr(self):
|
|
result = self.shift_expr()
|
|
while self.token.kind == BITAND:
|
|
self.advance()
|
|
rhs = self.shift_expr()
|
|
result = make_binary_expr(BITAND, result, rhs)
|
|
return result
|
|
|
|
def bitxor_expr(self):
|
|
result = self.bitand_expr()
|
|
while self.token.kind == BITXOR:
|
|
self.advance()
|
|
rhs = self.bitand_expr()
|
|
result = make_binary_expr(BITXOR, result, rhs)
|
|
return result
|
|
|
|
def bitor_expr(self):
|
|
result = self.bitxor_expr()
|
|
while self.token.kind == BITOR:
|
|
self.advance()
|
|
rhs = self.bitxor_expr()
|
|
result = make_binary_expr(BITOR, result, rhs)
|
|
return result
|
|
|
|
def comp_op(self):
|
|
result = self.token.kind
|
|
tt = self.advance()
|
|
advance = False
|
|
if result == IS and tt == NOT:
|
|
result = 'is not'
|
|
advance = True
|
|
elif result == NOT and tt == IN:
|
|
result = 'not in'
|
|
advance = True
|
|
if advance:
|
|
self.advance()
|
|
return result
|
|
|
|
def comparison(self):
|
|
result = self.bitor_expr()
|
|
while self.token.kind in (LE, LT, GE, GT, EQ, NEQ, ALT_NEQ, IS, IN, NOT):
|
|
op = self.comp_op()
|
|
rhs = self.bitor_expr()
|
|
result = make_binary_expr(op, result, rhs)
|
|
return result
|
|
|
|
def not_expr(self):
|
|
if self.token.kind != NOT:
|
|
result = self.comparison()
|
|
else:
|
|
self.advance()
|
|
result = make_unary_expr(NOT, self.not_expr())
|
|
return result
|
|
|
|
def and_expr(self):
|
|
result = self.not_expr()
|
|
while self.token.kind == AND:
|
|
self.advance()
|
|
rhs = self.not_expr()
|
|
result = make_binary_expr(AND, result, rhs)
|
|
return result
|
|
|
|
def or_expr(self):
|
|
result = self.and_expr()
|
|
while self.token.kind == OR:
|
|
self.advance()
|
|
rhs = self.and_expr()
|
|
result = make_binary_expr(OR, result, rhs)
|
|
return result
|
|
|
|
expr = or_expr
|
|
|
|
def parse(self, text, rule='mapping_body'):
|
|
self._make_stream(text)
|
|
self.advance()
|
|
method = getattr(self, rule, None)
|
|
if not method:
|
|
raise ValueError('no such rule: %s' % rule)
|
|
return method()
|