import re

from pygments.lexer import RegexLexer, words
from pygments.token import (
    Punctuation,
    Text,
    Comment,
    Operator,
    Keyword,
    Name,
    String,
    Number,
    Generic,
    Token,
)


YQL_CLUSTERS = [
    {'type': 'YAMR', 'name': 'sakura'},
    {'type': 'YAMR', 'name': 'betula'},
    {'type': 'YT', 'name': 'quine'},
    {'type': 'YT', 'name': 'plato'},
    {'type': 'YT', 'name': 'freud'},
    {'type': 'YT', 'name': 'hegel'},
    {'type': 'YT', 'name': 'aristotle'},
    {'type': 'YT', 'name': 'smith'},
    {'type': 'YT', 'name': 'banach'},
    {'type': 'YT', 'name': 'hahn'},
    {'type': 'YT', 'name': 'marx'},
]


YQL_KEYWORDS = [
    'MAX',
    'LENGTH',
    'JOIN',
    'BETWEEN',
    'LIMIT',
    'PERCENTILE',
    'FROM',
    'DESC',
    'LEFT JOIN',
    'ASC',
    'WHEN',
    'COALESCE',
    'INNER JOIN',
    'INSERT INTO',
    'PROCESS',
    'PRAGMA',
    'OFFSET',
    'ELSE',
    'VALUES',
    'MEDIAN',
    'ON',
    'CAST',
    'FilePath',
    'OR',
    'NULL',
    'USE',
    'IS',
    'GROUP BY',
    'WHERE',
    'SELECT',
    'WITH',
    'CASE',
    'USING',
    'FileContent',
    'AS',
    'AVG',
    'FULL JOIN',
    'NOT',
    'MIN',
    'RIGHT JOIN',
    'AND',
    'THEN',
    'END',
    'COUNT',
    'HAVING',
    'ORDER BY',
    'REDUCE',
    'HELP',
    'QUICKSTART',
    'VERSION',
    'RESTART',
    'CLI UPDATE',
    'EXPLAIN',
    'AST',
    'PARSE',
    'PLAN',
    'SHOW',
    'DESCRIBE',
    'HISTORY',
    'CLUSTERS',
    'TABLES',
    'META',
    'SCHEME',
    'SCHEMA',
    'COLUMNS',
    'FIELDS',
    'PREVIEW',
    'QUERIES',
    'QUERY',
    'OPEN',
    'PARSE',
    'COMPILE',
    'VALIDATE',
    'OPTIMIZE',
    'RUN',
    'REMOVE',
    'DELETE',
    'OPERATIONS',
    'RESULTS',
    'PROGRESS',
    'STATUS',
    'RUNNING',
    'ATTACH',
    'FILE',
    'URL',
    'KEYWORDS',
    'AUTH',
    'PUT',
    'GET',
    'ABORT',
    'YT',
    'YAMR',
    'SUM',
    'JOIN',
    'DISTINCT',
    'REPLACE INTO',
    'DROP',
    'TABLE',
]


YQL_DATA_TYPES = ['Bool', 'Byte', 'Int32', 'Int64', 'Uint32', 'Uint64', 'Float', 'Double', 'String', 'Utf8', 'Yson']


def produce_function(_, m):
    return [
        (
            m.pos,
            Name.Function,
            m.group()[:-1],
        ),
        (
            m.end() - 1,
            Token.Bracket,
            m.group()[-1],
        ),
    ]


class YqlLexer(RegexLexer):
    name = 'YQL'
    aliases = ['yql', 'magic_yql']
    filenames = ['*.yql']
    mimetypes = ['text/x-yql']

    flags = re.IGNORECASE

    tokens = {
        'root': [
            (r'--.*', Comment.Single),
            (r'/\*', Comment.Multiline, 'multiline-comments'),
            # TODO: Backslash escapes?
            (r"'(''|[^'])*'", String.Single),
            (r'"(""|[^"])*"', String.Symbol),
            (r'@@', String.Other, 'multiline-strings'),
            (r'\s+', Text),
            (r'([a-z0-9]+)::', Name.Namespace),
            (r'\[', Name.Tag, 'table-name'),
            (words(YQL_KEYWORDS, suffix=r'\b'), Keyword),
            (words(YQL_DATA_TYPES, suffix=r'\b'), Keyword.Type),
            (words([c['name'] for c in YQL_CLUSTERS]), Generic.Subheading),
            (r'([a-z0-9_]+)\ *\(', produce_function),
            (r'\$[a-z0-9_]+', Name.Variable),
            (r'[+*/<>=~!@#%^&|`?-]', Operator),
            (r'[0-9][a-z0-9]{23}', Name.Attribute),
            (r'[0-9]+', Number.Integer),
            (r'[a-z_][\w$]*', Name),
            (r'[;:()\{\},.]', Punctuation),
        ],
        'multiline-comments': [
            (r'/\*', Comment.Multiline, 'multiline-comments'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[^/*]+', Comment.Multiline),
            (r'[/*]', Comment.Multiline),
        ],
        'multiline-strings': [(r'@@', String.Other, '#pop'), (r'[^@@]+', String.Other)],
        'table-name': [(r'\]([:a-z0-9_]*)', Name.Tag, '#pop'), (r'\/', Name.Tag), (r'[^\]\/]+', Name.Tag)],
    }
