from __future__ import unicode_literals
import re
import logging

from enum import Enum
from flask import g

from sepelib.core import config
from infra.swatlib.rpc import request_limiter
from infra.dproxy.proto import dproxy_pb2
from infra.dproxy.proto import awacs_pb2
from infra.dproxy.src.ydb_logs import ydbutil
from infra.dproxy.src.lib.rpc import blueprint


ydb_logs_service_blueprint = blueprint.HttpRpcBlueprint('rpc.ydb_logs_service',
                                                        __name__,
                                                        '/api/logs',
                                                        serialize_resp_threads_count=10)

olap_ydb_logs_service_blueprint = blueprint.HttpRpcBlueprint('rpc.olap_ydb_logs_service',
                                                             __name__,
                                                             '/api/olap/logs',
                                                             serialize_resp_threads_count=10)

awacs_ydb_logs_service_blueprint = blueprint.HttpRpcBlueprint('rpc.awacs_ydb_logs_service',
                                                              __name__,
                                                              '/api/v2/awacs/logs/',
                                                              serialize_resp_threads_count=10)

log = logging.getLogger(__name__)


MAX_LIMIT = 500

DEFAULT_LIMIT = 50

# well, it's not strictly valid (for someone who use dict keys like "a\\.b"),
# but it's simple enough to not perform full string processing with FSA
JSON_PATH = re.compile(r'(?<!\\)\.', re.UNICODE)

KEYTYPE_ENUM_TO_FIELD = {
    dproxy_pb2.GetSearchQuerySuggestsRequest.LOG_LEVEL: "log_level",
    dproxy_pb2.GetSearchQuerySuggestsRequest.HOST: "host",
    dproxy_pb2.GetSearchQuerySuggestsRequest.POD: "pod",
    dproxy_pb2.GetSearchQuerySuggestsRequest.BOX: "box",
    dproxy_pb2.GetSearchQuerySuggestsRequest.WORKLOAD: "workload",
    dproxy_pb2.GetSearchQuerySuggestsRequest.CONTAINER_ID: "container_id",
    dproxy_pb2.GetSearchQuerySuggestsRequest.LOGGER_NAME: "logger_name",
    dproxy_pb2.GetSearchQuerySuggestsRequest.POD_TRANSIENT_FQDN: "pod_transient_fqdn",
    dproxy_pb2.GetSearchQuerySuggestsRequest.POD_PERSISTENT_FQDN: "pod_persistent_fqdn",
    dproxy_pb2.GetSearchQuerySuggestsRequest.NODE_FQDN: "node_fqdn",
    dproxy_pb2.GetSearchQuerySuggestsRequest.THREAD_NAME: "thread_name",
    dproxy_pb2.GetSearchQuerySuggestsRequest.REQUEST_ID: "request_id",
}

AwacsGetSearchQuerySuggestsRequest_ENUM_TO_FIELD = {
    awacs_pb2.GetSearchQuerySuggestsRequest.DOMAIN_: "domain",
    awacs_pb2.GetSearchQuerySuggestsRequest.UPSTREAM: "upstream",
    awacs_pb2.GetSearchQuerySuggestsRequest.CLIENT_IP: "client_ip",
    awacs_pb2.GetSearchQuerySuggestsRequest.CLIENT_PORT: "client_port",
    awacs_pb2.GetSearchQuerySuggestsRequest.HOSTNAME: "hostname",
    # awacs_pb2.GetSearchQuerySuggestsRequest.COOKIES: "cookies",
    # awacs_pb2.GetSearchQuerySuggestsRequest.HEADERS: "headers",
    awacs_pb2.GetSearchQuerySuggestsRequest.YANDEXUID: "yandexuid",
    awacs_pb2.GetSearchQuerySuggestsRequest.METHOD: "method",
    awacs_pb2.GetSearchQuerySuggestsRequest.PROCESS_TIME: "process_time",
    awacs_pb2.GetSearchQuerySuggestsRequest.REASON: "reason",
    awacs_pb2.GetSearchQuerySuggestsRequest.REQUEST: "request",
    awacs_pb2.GetSearchQuerySuggestsRequest.REQUEST_ID: "request_id",
    awacs_pb2.GetSearchQuerySuggestsRequest.STATUS: "status",
    # awacs_pb2.GetSearchQuerySuggestsRequest.WORKFLOW: "workflow",
}

AwacsGetQueryContextKeysRequest_ENUM_TO_FIELD = {
    awacs_pb2.GetQueryContextKeysRequest.COOKIES: "cookies",
    awacs_pb2.GetQueryContextKeysRequest.HEADERS: "headers",
}


class YdbLogsConfig(Enum):
    DEFAULT = 'ydb_logs'
    OLAP = 'olap_ydb_logs'


def get_limit(req):
    limit = req.limit
    if limit > MAX_LIMIT:
        log.warning('limit parameter reduced from %d to %d',
                    limit, MAX_LIMIT)
        limit = MAX_LIMIT

    if not limit:
        limit = DEFAULT_LIMIT

    return limit


def get_timestamp_range(req):
    if req.HasField('timestamp_range'):
        tr = req.timestamp_range
        begin = tr.HasField('begin') and tr.begin or None
        end = tr.HasField('end') and tr.end or None
        return begin, end
    else:
        return None, None


def get_value_list_pb(
    req,
    basic_field_name,
    old_field_names=None,
):
    field_names = [basic_field_name]
    field_names.extend(old_field_names or [])

    for field_name in field_names:
        if req.HasField(field_name):
            return getattr(req, field_name)

    return None


def get_value_list(
    req,
    basic_list_field,
    old_list_fields=None,
    old_string_field=None,
    decode=False,
):
    value_list = get_value_list_pb(
        req,
        basic_field_name=basic_list_field,
        old_field_names=old_list_fields,
    )

    if value_list is not None:
        if decode:
            values = [val.decode('utf-8') for val in value_list.values]
        else:
            values = list(value_list.values)
        include = value_list.select_type == dproxy_pb2.INCLUDE
        op = getattr(value_list, 'op_type', None)
        return values, include, op

    if old_string_field is not None:
        val = getattr(req, old_string_field)
        if val:
            val = val.decode('utf-8') if decode else val
            return [val], True, dproxy_pb2.EQ

    return [], True, dproxy_pb2.EQ


def get_values_pb(
    req,
    basic_list_field,
    old_repeated_field_name=None,
):
    value_list = get_value_list_pb(
        req,
        basic_field_name=basic_list_field,
    )

    if value_list is not None:
        return value_list.values

    if old_repeated_field_name is not None:
        return getattr(req, old_repeated_field_name)

    return []


def parse_user_fields(user_field_list_pb):
    for item in user_field_list_pb:
        if '/' in item.path:
            raise ValueError("JSON path cannot contain '/' symbol: {!r}".format(item.path))
        if '*' in item.path:
            raise ValueError("JSON path cannot contain '*' symbol: {!r}".format(item.path))

        path = JSON_PATH.split(item.path)
        if not path or any(not field for field in path):
            raise ValueError("Invalid path in JSON: {!r}".format(item.path))

        if not item.values:
            raise ValueError("Values for path cannot be empty: {!r}".format(item.path))

        path = [path_item.replace(b'\\.', b'.') for path_item in path]

        yield path, list(item.values), item.select_type == dproxy_pb2.INCLUDE


def parse_search_query_args(
    req,
    config_prefix,
):
    table_path = ydbutil.make_table_path_prefix(
        config.get_value(config_prefix.value + '.db'),
        (req.project_id or 'UNKNOWN', req.deploy_unit_id or 'UNKNOWN', req.stage_id),
        config.get_value(config_prefix.value + '.table_prefix'),
    )

    limit = get_limit(req)
    timestamp_range = get_timestamp_range(req)

    search_patterns = get_value_list(
        req,
        'message_list',
        old_list_fields=['search_pattern_list'],
        old_string_field='search_pattern',
        decode=True,
    )

    containers = get_value_list(
        req,
        'container_id_list',
        old_list_fields=['container_list'],
    )

    user_fields_list = get_values_pb(
        req,
        'context_list',
        old_repeated_field_name='user_field_list',
    )

    user_fields = parse_user_fields(user_fields_list)

    return dict(
        table_path=table_path,
        search_patterns=search_patterns,
        timestamp_range=timestamp_range,
        log_levels=get_value_list(req, 'log_level_list', old_string_field='log_level'),
        log_levels_int=get_value_list(req, 'log_level_int_list'),
        hosts=get_value_list(req, 'host_list'),
        pods=get_value_list(req, 'pod_list'),
        boxes=get_value_list(req, 'box_list'),
        workloads=get_value_list(req, 'workload_list'),
        containers=containers,
        logger_names=get_value_list(req, 'logger_name_list'),
        pod_transient_fqdns=get_value_list(req, 'pod_transient_fqdn_list'),
        pod_persistent_fqdns=get_value_list(req, 'pod_persistent_fqdn_list'),
        node_fqdns=get_value_list(req, 'node_fqdn_list'),
        thread_names=get_value_list(req, 'thread_name_list'),
        request_ids=get_value_list(req, 'request_id_list'),
        stack_traces=get_value_list(req, 'stack_trace_list'),
        user_fields=user_fields,
        continuation_token=req.continuation_token,
        limit=limit,
        order=req.order,
    )


def parse_awacs_search_query_args(req):
    table_name = req.namespace or 'UNKNOWN'
    table_path_fragments = [table_name]

    table_path = ydbutil.make_table_path_prefix(
        config.get_value('awacs_ydb_logs.db'),
        table_path_fragments,
        config.get_value('awacs_ydb_logs.table_prefix'),
    )

    limit = get_limit(req)
    timestamp_range = get_timestamp_range(req)

    return dict(
        table_path=table_path,
        timestamp_range=timestamp_range,
        limit=limit,
        order=req.order,
        continuation_token=req.continuation_token,
        env_types=get_value_list(req, 'env_type_list'),
        domains=get_value_list(req, 'domain_list'),
        upstreams=get_value_list(req, 'upstream_list'),
        client_ips=get_value_list(req, 'client_ip_list'),
        client_ports=get_value_list(req, 'client_port_list'),
        hostnames=get_value_list(req, 'hostname_list'),
        yandexuids=get_value_list(req, 'yandexuid_list'),
        cookies=parse_user_fields(req.cookie_fields_list),
        headers=parse_user_fields(req.header_fields_list),
        methods=get_value_list(req, 'method_list'),
        reasons=get_value_list(req, 'reason_list'),
        request_ids=get_value_list(req, 'request_id_list'),
        requests=get_value_list(req, 'request_list'),
        statuses=get_value_list(req, 'status_list'),
        process_time_list=get_value_list(req, 'process_time_list'),
    )


@ydb_logs_service_blueprint.method('SearchLogEntries',
                                   request_type=dproxy_pb2.SearchLogEntriesRequest,
                                   response_type=dproxy_pb2.SearchLogEntriesResponse,
                                   need_authentication=True)
@request_limiter.limit(30)
def search(req, auth_subject):
    time_hgram = 'search_logs_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        search_args = parse_search_query_args(req, YdbLogsConfig.DEFAULT)
        r = g.ctx.ydb_logs_ctl.search(
            query_timeout=config.get_value('ydb_logs.ydb_search_timeout', None),
            **search_args
        )

    return r


@olap_ydb_logs_service_blueprint.method('SearchLogEntries',
                                        request_type=dproxy_pb2.SearchLogEntriesRequest,
                                        response_type=dproxy_pb2.SearchLogEntriesResponse,
                                        need_authentication=True)
@request_limiter.limit(30)
def olap_search(req, auth_subject):
    time_hgram = 'olap_search_logs_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        search_args = parse_search_query_args(req, YdbLogsConfig.OLAP)
        r = g.ctx.olap_ydb_logs_ctl.search(
            query_timeout=config.get_value('olap_ydb_logs.ydb_search_timeout', None),
            **search_args
        )

    return r


@awacs_ydb_logs_service_blueprint.method('SearchLogEntries',
                                         request_type=awacs_pb2.SearchLogEntriesRequest,
                                         response_type=awacs_pb2.SearchLogEntriesResponse,
                                         need_authentication=True
                                         )
@request_limiter.limit(30)
def awacs_search(req, auth_subject):
    time_hgram = 'awacs_search_logs_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        search_args = parse_awacs_search_query_args(req)
        r = g.ctx.awacs_ydb_logs_ctl.awacs_search(
            known_args=search_args,
            query_timeout=config.get_value('awacs_ydb_logs.ydb_search_timeout', None),
            limit=search_args['limit']
        )

    return r


@ydb_logs_service_blueprint.method('DummyAction',
                                   request_type=dproxy_pb2.SearchLogEntriesRequest,
                                   response_type=dproxy_pb2.SearchLogEntriesResponse,
                                   need_authentication=True)
def dummy(req, auth_subject):
    time_hgram = 'dummy_action_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = dproxy_pb2.SearchLogEntriesResponse()

    return r


@ydb_logs_service_blueprint.method('GetQueryContextKeys',
                                   request_type=dproxy_pb2.GetQueryContextKeysRequest,
                                   response_type=dproxy_pb2.GetQueryContextKeysResponse,
                                   need_authentication=True,
                                   )
@request_limiter.limit(60)
def get_query_context_keys(req, auth_subject):
    time_hgram = 'get_query_context_keys_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = dproxy_pb2.GetQueryContextKeysResponse()
        if req.HasField('request'):
            search_args = parse_search_query_args(req.request, YdbLogsConfig.DEFAULT)
            r.candidates.extend(g.ctx.ydb_logs_ctl.get_context_keys(
                known_args=search_args,
                key_prefix=req.key_prefix,
                query_timeout=config.get_value('ydb_logs.ydb_context_suggest_timeout', None),
            ))

    return r


@olap_ydb_logs_service_blueprint.method('GetQueryContextKeys',
                                        request_type=dproxy_pb2.GetQueryContextKeysRequest,
                                        response_type=dproxy_pb2.GetQueryContextKeysResponse,
                                        need_authentication=True,
                                        )
@request_limiter.limit(60)
def olap_get_query_context_keys(req, auth_subject):
    time_hgram = 'olap_get_query_context_keys_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = dproxy_pb2.GetQueryContextKeysResponse()
        if req.HasField('request'):
            search_args = parse_search_query_args(req.request, YdbLogsConfig.OLAP)
            r.candidates.extend(g.ctx.olap_ydb_logs_ctl.get_context_keys(
                known_args=search_args,
                key_prefix=req.key_prefix,
                query_timeout=config.get_value('olap_ydb_logs.ydb_context_suggest_timeout', None),
            ))

    return r


@awacs_ydb_logs_service_blueprint.method('GetQueryContextKeys',
                                         request_type=awacs_pb2.GetQueryContextKeysRequest,
                                         response_type=awacs_pb2.GetQueryContextKeysResponse,
                                         need_authentication=True,
                                         )
@request_limiter.limit(60)
def awacs_get_query_context_keys(req, auth_subject):
    time_hgram = 'awacs_get_query_context_keys_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = awacs_pb2.GetQueryContextKeysResponse()
        field = AwacsGetQueryContextKeysRequest_ENUM_TO_FIELD.get(req.key_type)
        if req.HasField('request'):
            search_args = parse_awacs_search_query_args(req.request)
            r.candidates.extend(g.ctx.awacs_ydb_logs_ctl.awacs_get_context_keys(
                known_args=search_args,
                field=field,
                key_prefix=req.key_prefix,
                query_timeout=config.get_value('awacs_ydb_logs.ydb_context_suggest_timeout', None),
            ))

    return r


@ydb_logs_service_blueprint.method('GetSearchQuerySuggests',
                                   request_type=dproxy_pb2.GetSearchQuerySuggestsRequest,
                                   response_type=dproxy_pb2.GetSearchQuerySuggestsResponse,
                                   need_authentication=True,
                                   )
@request_limiter.limit(60)
def get_search_query_suggests(req, auth_subject):
    time_hgram = 'get_search_query_suggests_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = dproxy_pb2.GetSearchQuerySuggestsResponse()
        key_type = KEYTYPE_ENUM_TO_FIELD.get(req.key_type)
        full_search = req.full_search
        query_timeout = config.get_value(
            'ydb_logs.ydb_full_suggest_timeout' if full_search else 'ydb_logs.ydb_suggest_timeout',
            None,
        )

        if key_type is not None:
            search_args = parse_search_query_args(req.request, YdbLogsConfig.DEFAULT)
            r.candidates.extend(g.ctx.ydb_logs_ctl.get_search_suggests(
                known_args=search_args,
                key_type=key_type,
                value_prefix=req.value_prefix,
                query_timeout=query_timeout,
                full_search=full_search,
            ))

    return r


@olap_ydb_logs_service_blueprint.method('GetSearchQuerySuggests',
                                        request_type=dproxy_pb2.GetSearchQuerySuggestsRequest,
                                        response_type=dproxy_pb2.GetSearchQuerySuggestsResponse,
                                        need_authentication=True,
                                        )
@request_limiter.limit(60)
def olap_get_search_query_suggests(req, auth_subject):
    time_hgram = 'olap_get_search_query_suggests_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = dproxy_pb2.GetSearchQuerySuggestsResponse()
        key_type = KEYTYPE_ENUM_TO_FIELD.get(req.key_type)
        full_search = req.full_search
        query_timeout = config.get_value(
            'olap_ydb_logs.ydb_full_suggest_timeout' if full_search else 'olap_ydb_logs.ydb_suggest_timeout',
            None,
        )

        if key_type is not None:
            search_args = parse_search_query_args(req.request, YdbLogsConfig.OLAP)
            r.candidates.extend(g.ctx.olap_ydb_logs_ctl.get_search_suggests(
                known_args=search_args,
                key_type=key_type,
                value_prefix=req.value_prefix,
                query_timeout=query_timeout,
                full_search=full_search,
            ))

    return r


@awacs_ydb_logs_service_blueprint.method('GetSearchQuerySuggests',
                                         request_type=awacs_pb2.GetSearchQuerySuggestsRequest,
                                         response_type=awacs_pb2.GetSearchQuerySuggestsResponse,
                                         need_authentication=True,
                                         )
@request_limiter.limit(60)
def awacs_get_search_query_suggests(req, auth_subject):
    time_hgram = 'awacs_get_search_query_suggests_execution_time'
    with g.ctx.metrics_registry.get_histogram(time_hgram).timer():
        r = awacs_pb2.GetSearchQuerySuggestsResponse()
        field = AwacsGetSearchQuerySuggestsRequest_ENUM_TO_FIELD.get(req.key_type)
        full_search = req.full_search
        query_timeout = config.get_value(
            'awacs_ydb_logs.ydb_full_suggest_timeout' if full_search else 'awacs_ydb_logs.ydb_suggest_timeout',
            None,
        )

        if field is not None:
            search_args = parse_awacs_search_query_args(req.request)
            r.candidates.extend(g.ctx.awacs_ydb_logs_ctl.awacs_get_search_suggests(
                known_args=search_args,
                field=field,
                value_prefix=req.value_prefix,
                query_timeout=query_timeout,
                full_search=full_search,
            ))

    return r
