#!/usr/bin/env python
# -*- coding:utf-8 -*-

'''
This is an unofficial lexer for ya make, created by gluk47@
to support easy addition of new files to Arcadia from scripts.

See YaMake.py for detailed usage.
'''

from __future__ import print_function

import ply.lex as lex
import re

keywords = {
    'MODULE_TYPE': (
        'PROGRAM', 'PY2_PROGRAM', 'PY3_PROGRAM',
        'LIBRARY', 'PY2_LIBRARY', 'PY23_LIBRARY', 'PY3_LIBRARY',
        'PROTO_LIBRARY', 'PY_PROTOS_FOR',
        'DLL',
        'FAT_OBJECT',
        'PY2MODULE', 'PACKAGE', 'UNION', 'BUNDLE',
        'YQL_UDF', 'YQL_UDF_TEST',
        'UDF', 'UDF_LIB',
        'BENCHMARK',
        'UNITTEST', 'UNITTEST_FOR', 'PY2TEST', 'JTEST', 'JTEST_FOR', 'EXECTEST',
        'JAVA_PROGRAM', 'JAVA_LIBRARY',
        'FUZZ',
        'SANDBOX_TASK',
        # TODO: 'PY3TEST', 'PY23TEST', 'SANDBOX_PY3_TASK',
    ),
    'INMODULE_OPERATOR_WORDS': (
        'FILES', 'PEERDIR', 'DEPENDS', 'SRCS', 'TEST_SRCS',
        'METAQUERYFILES',
        'EXTRALIBS', 'EXTRALIBS_STATIC',
        'ALLOCATOR',
        'GRPC',
        'PY_REGISTER', 'PYTHON2_ADDINCL',
        'CFLAGS', 'CONLYFLAGS', 'CXXFLAGS', 'MSVC_FLAGS', 'CUDA_NVCC_FLAGS', 'MASMFLAGS', 'LDFLAGS',
        'GENERATE_ENUM_SERIALIZATION', 'GENERATE_ENUM_SERIALIZATION_WITH_HEADER',
        'GENERATE_PY_PROTOS',
        'NO_RUNTIME', 'NO_LIBC', 'NO_PLATFORM', 'NO_SANITIZE',
        'PY_SRCS', 'PY_MAIN',
        'JAVA_SRCS', 'EXCLUDE', 'EXTERNAL_JAR', 'ANNOTATION_PROCESSOR', 'INDUCED_DEPS',
        'TEST_CWD',

        'PYTHON', 'BUILTIN_PYTHON', 'RUN_LUA', 'RUN_PROGRAM', 'RUN',
        'SYMLINK', 'RUN_JAVA_PROGRAM', 'ARCHIVE', 'ARCHIVE_ASM',
        'RESOURCE', 'EXTERNAL_RESOURCE', 'RESOURCE_FILES',
        'BASE_CODEGEN', 'STRUCT_CODEGEN', 'SPLIT_CODEGEN',
        'ACCELEO', 'GEN_SCHEEME2', 'CREATE_SVNVERSION_FOR',
        'DEB_VERSION', 'BUILDWITH_CYTHON_CPP', 'BUILDWITH_CYTHON_C',
        'BUILD_MN', 'BUILD_MN_ASM', 'BUILD_MNS',

        'PACK', 'COPY', 'CONFIGURE_FILE', 'COPY_FILE',
    ),
    'ANYWHERE_OPERATOR_WORDS': (
        'OWNER', 'LICENSE', 'VERSION',
        'ENABLE', 'DISABLE', 'DEFAULT',
        'INCLUDE', 'DATA', 'TEST_DATA',
        'DEPENDS', 'TIMEOUT',
        'FORK_TESTS', 'FORK_SUBTESTS',
        'SPLIT_FACTOR', 'FORK_TEST_FILES',
        'SIZE', 'TAG', 'REQUIREMENTS',
        'NO_LINT', 'LINT', 'NO_CHECK_IMPORTS', 'CHECK_CONFIG_H',
        'FUZZ_DICTS', 'FUZZ_OPTS',
        'NEED_REVIEW', 'CHECK_DEPENDENT_DIRS',
        'USE_PYTHON2', 'NO_WERROR', 'NO_OPTIMIZE', 'NO_DEBUG_INFO',
        'DLL_FOR',
        'BUILD_ONLY_IF', 'NO_BUILD_IF',
        'USE_PERL_LIB',
        'NO_SANITIZE_COVERAGE', 'NO_WSHADOW', 'ADDINCLSELF',
        'CHECK_JAVA_DEPS', 'UBERJAR', 'JVM_ARGS', 'UBERJAR_PATH_EXCLUDE_PREFIX',
        'SET', 'SET_APPEND',
        'RESOLVE_PROTO',

        'MESSAGE',
        'USE_ERROR_PRONE',

        # TODO: 'USE_PYTHON3',

        # deprecated:
        'WERROR',
    ),
    'OUTMODULE_OPERATOR_WORDS': (
        'RECURSE', 'RECURSE_ROOT_RELATIVE', 'RECURSE_FOR_TESTS',
        'EXTRADIR',
    ),
    'OUT': ('OUT', 'OUT_NOAUTO',),
    'WORD_OR_INMODULE_OPERATOR': ('SRCDIR', 'ADDINCL',),
    'WORD_OR_ANYWHERE_OPERATOR': ('LDFLAGS', 'NO_UTIL', 'NO_COMPILER_WARNINGS',),
    'WORD_OR_MODULE_TYPE': ('TOOL',),
}

literal_tokens = [
    'IF', 'ELSE',
    'FROM_SANDBOX', 'FILE', 'RENAME',
    'END', 'ENDIF',
]

tokens = list(keywords.keys()) + literal_tokens + [
    'WORD',
    'BEGIN_BLOCK', 'END_BLOCK', 'COMMENT',
    #  'ENDL',
]

for t, values in keywords.items():
    globals()['t_%s' % t] = '|'.join(values)

for t in literal_tokens:
    globals()['t_%s' % t] = t

t_BEGIN_BLOCK = r'\('
t_END_BLOCK = r'\)'
t_COMMENT = r'\#[^\n]*'


def t_ENDL(t):
    r'\n+'
    t.lexer.lineno += len(t.value)


def t_WORD(t):
    r'[^()\n #]+'
    txt = t.value
    for type, values in keywords.items():
        for v in values:
            if txt == v:
                t.type = type
                return t
    for l in literal_tokens:
        if txt == l:
            t.type = l
            return t
    return t


t_ignore = '\r\t\f '


def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)


lexer = lex.lex(reflags=re.UNICODE | re.DOTALL)

if __name__ == '__main__':
    data = '''
    OWNER(g:wizard)
    RECURSE(
        replace_queries_tries_fix
    )
    UNION()
    PEERDIR(
        search/wizard/data/fresh/video/replace_queries_tries_fix
    )
    FROM_SANDBOX(FILE 369682845 OUT besttitlecrc.trie)
    FROM_SANDBOX(FILE 372246889 OUT full_series_structs.trie)
    # some comment
    # FROM_SANDBOX(unused)
    FILES(
        __init__.py
        videosyn.gzt
    )
    END()
    '''

    lexer.input(data)

    while True:
        tok = lexer.token()
        if not tok:
            break      # закончились печеньки
        print(tok)
