# -*- coding: utf-8 -*- # # Copyright (C) 2009-2018 the sqlparse authors and contributors # <see AUTHORS file> # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause """SQL Lexer""" # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance # and to allow some customizations. from sqlparse import tokens from sqlparse.keywords import SQL_REGEX from sqlparse.compat import text_type, file_types from sqlparse.utils import consume class Lexer(object): """Lexer Empty class. Leaving for backwards-compatibility """ @staticmethod def get_tokens(text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. Split ``text`` into (tokentype, text) pairs. ``stack`` is the initial stack (default: ``['root']``) """ if isinstance(text, file_types): text = text.read() if isinstance(text, text_type): pass elif isinstance(text, bytes): if encoding: text = text.decode(encoding) else: try: text = text.decode('utf-8') except UnicodeDecodeError: text = text.decode('unicode-escape') else: raise TypeError(u"Expected text or file-like object, got {!r}". format(type(text))) iterable = enumerate(text) for pos, char in iterable: for rexmatch, action in SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() elif callable(action): yield action(m.group()) consume(iterable, m.end() - pos - 1) break else: yield tokens.Error, char def tokenize(sql, encoding=None): """Tokenize sql. Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ return Lexer().get_tokens(sql, encoding)