# -*- coding: utf-8 -*-

import logging
import os
import datetime

from sandbox import sdk2

from sandbox.common.types import task as ctt, resource as ctr, misc as ctm
from sandbox.common.errors import TaskFailure
from sandbox.common.utils import singleton_property

from sandbox.projects.sandbox_ci import parameters
from sandbox.projects.sandbox_ci.task import ManagersTaskMixin
from sandbox.projects.sandbox_ci.utils import flow
from sandbox.projects.sandbox_ci.utils.context import Node
from sandbox.projects.sandbox_ci.utils.process import run_process
from sandbox.projects.sandbox_ci.utils import env


TASK_AUTHOR = 'robot-serp-bot'
TASK_OWNER = 'SANDBOX_CI_SEARCH_INTERFACES'
TASKS_LIMIT = 1000
MAX_PARALLEL = 20

LOGBROKER_HOST = 'logbroker.yandex.net'
LOGBROKER_PORT = 2135
LOGBROKER_TOPIC = '/fei/sandbox_ci/task_logs'
LOGBROKER_TVM_SERVER_ID = 2001059
LOGBROKER_TVM_CLIENT_ID = 2011566


def load_all_in_chunks(request, chunk_size=1000):
    limited_req = request.limit(chunk_size)
    first = limited_req.offset(0)
    result = list(first)
    count = first.count

    for offset in range(chunk_size, count, chunk_size):
        result += limited_req.offset(offset)

    return result


class SandboxCiLogCollector(ManagersTaskMixin, sdk2.Task):
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64
        disk_space = 50 * 1024

    class Parameters(sdk2.Parameters):
        with sdk2.parameters.Group('Advanced'):
            _container = parameters.environment_container()

        node_js = parameters.NodeJsParameters()

    lifecycle_steps = {
        'npm_install': 'npm i --production --registry=http://npm.yandex-team.ru @yandex-int/testout',
    }

    @singleton_property
    def _push_client_path(self):
        push_client_res = sdk2.Resource['STATBOX_PUSHCLIENT'].find(
            state=ctr.State.READY,
            attrs=dict(
                released=ctt.ReleaseStatus.STABLE,
            ),
        ).first()
        if not push_client_res:
            raise TaskFailure('STATBOX_PUSHCLIENT resource not found')

        return str(sdk2.ResourceData(push_client_res).path)

    # Lifecycle manager requires the following props/methods: `project_name`, `project_dir`, `working_path`, `project_path`.
    @property
    def project_name(self):
        return ''

    @property
    def project_dir(self):
        return self.working_path()

    def working_path(self, *args):
        return self.path(*args)

    def project_path(self, *args):
        return self.project_dir.joinpath(*args)

    def on_execute(self):
        self._init_environ()

        log_resources = self._prepare_log_resources()
        if not log_resources:
            logging.info('No suitable resources found')
            return

        logging.info('Found {} log resources'.format(len(log_resources)))

        with Node(self.Parameters.node_js_version):
            self.lifecycle('npm_install')

            flow.parallel(
                lambda r: self._extract_and_push_logs(*r),
                log_resources,
                MAX_PARALLEL,
            )

    def _prepare_log_resources(self):
        """
        :rtype: list of (sdk2.Task, sdk2.Resource)
        """
        tasks = self._load_tasks()
        if not tasks:
            return []

        resources = self._load_resources(tasks)
        if not resources:
            return []

        tasks_by_id = dict((t.id, t) for t in tasks)

        return map(lambda r: (tasks_by_id[r.task_id], r), resources)

    def _load_tasks(self):
        """
        :rtype: list of sdk2.Task
        """
        last_processed_task = self._get_last_processed_task()

        query = dict()
        if last_processed_task:
            query['updated'] = '{}..{}'.format(last_processed_task.updated.isoformat(), datetime.datetime.utcnow().isoformat())

        tasks = list(sdk2.Task.find(
            author=TASK_AUTHOR,
            owner=TASK_OWNER,
            status=(ctt.Status.Group.FINISH | ctt.Status.Group.BREAK),
            children=True,
            **query
        ).limit(TASKS_LIMIT))

        if last_processed_task:
            tasks = filter(lambda t: t.id > last_processed_task.id, tasks)

        if tasks:
            self._store_last_processed_task(tasks)
        elif last_processed_task:
            self._store_last_processed_task([last_processed_task])

        return tasks

    def _load_resources(self, tasks):
        """
        :param tasks: tasks list
        :type tasks: list of sdk2.Task
        :rtype: list of sdk2.Resource
        """
        return load_all_in_chunks(sdk2.Resource['TASK_LOGS'].find(
            task=tasks,
            state=ctr.State.READY,
        ))

    def _extract_and_push_logs(self, task, resource):
        """
        :param task: sdk2.Task
        :param resource: sdk2.Resource
        """
        try:
            logging.info('Process log resource: %s (%s#%s)', resource.id, task.type, task.id)
            logs_path = sdk2.ResourceData(resource).path

            self._push_test_out_txt_log(
                path=str(logs_path / 'test.out.txt'),
                task=task,
            )
        except Exception:
            logging.exception('Error occurred while processing log resource %s', resource.id)

    def _push_test_out_txt_log(self, path, task):
        """
        :param path: path to test.out.txt
        :type path: str
        :param task: task
        :type task: sdk2.Task
        """
        if not os.path.isfile(path):
            return

        logging.info('Processing %s#%s: test.out.txt %s', task.type, task.id, path)

        run_process(['{} | {}'.format(
            ' '.join(self._build_testout_cmd(path, task)),
            ' '.join(self._build_push_client_cmd()),
        )], shell=True)

        logging.info('Processed %s#%s: test.out.txt %s', task.type, task.id, path)

    def _build_testout_cmd(self, path, task):
        """
        Builds @yandex-int/testout command to parse log file.

        :param path: log file path
        :type path: str
        :param task: task which produced log
        :type task: sdk2.Task
        :rtype: list of str
        """
        return [
            'npx @yandex-int/testout',
            path,
            '-s',
            '-l warn',
            '-o push-client',
            '--task-type={}'.format(task.type),
            '--task-id={}'.format(task.id),
        ]

    def _build_push_client_cmd(self):
        """
        Builds push-client command to push logs from stdin.

        :rtype: list of str
        """
        return [
            self._push_client_path,
            '-f',
            '--network:master-addr={}'.format(LOGBROKER_HOST),
            '--network:master-port={}'.format(LOGBROKER_PORT),
            '--network:tvm-server-id={}'.format(LOGBROKER_TVM_SERVER_ID),
            '--network:tvm-client-id={}'.format(LOGBROKER_TVM_CLIENT_ID),
            '--network:proto=pq',
            '--topic="{}"'.format(LOGBROKER_TOPIC),
            '--stdin',
        ]

    def _init_environ(self):
        env.export(env.from_vault(self))
        env.log(self)

    def _store_last_processed_task(self, tasks):
        """
        :param tasks: tasks
        :type tasks: list of sdk2.Task
        """
        self.Context.last_task_id = max(map(lambda t: t.id, tasks))

    def _get_last_processed_task(self):
        """
        :rtype: sdk2.Task or None
        """
        task = SandboxCiLogCollector.find(
            status=ctt.Status.Group.FINISH,
        ).order(-sdk2.Task.id).first()

        if not task or type(task.Context.last_task_id) is not int:
            return None

        return sdk2.Task.find(id=task.Context.last_task_id).first()
