#!/usr/bin/python
# -*- coding: utf-8 -*-
import collections
import json
import logging
import os
import textwrap

from sandbox import sdk2
from sandbox import sandboxsdk
from sandbox import common
from sandbox.common.errors import TaskFailure
from sandbox.common.types.client import Tag
from sandbox.common.types.task import Status
from sandbox.projects import resource_types
from sandbox.sdk2.helpers import subprocess
from sandbox.projects.websearch.begemot.tasks.BegemotYT.CoredumpBacktrace import CoredumpBacktrace
from sandbox.projects.websearch.begemot.tasks.BegemotYT.common import CommonYtParameters, utc_from_now
import sandbox.projects.websearch.begemot.resources as br
from sandbox.projects.common.wizard import utils as wizard_utils


class BegemotMapper(sdk2.Task):
    yt, yt_client, env = None, None, None
    logger = logging.getLogger('WIZARD_REDUCER')
    logger.setLevel(logging.DEBUG)

    class Parameters(CommonYtParameters):
        with sdk2.parameters.RadioGroup('Service to test', required=True) as service:
            service.values['begemot'] = service.Value(value='Begemot', default=True)
            service.values['wizard'] = service.Value(value='Wizard')

        with service.value['wizard']:
            wizard_mapper = sdk2.parameters.Resource(
                'Wizard mapper binary',
                resource_type=resource_types.WIZARDYT,
                required=True,
            )
            wizard_config = sdk2.parameters.Resource(
                'Wizard config',
                resource_type=resource_types.WIZARD_CONFIG,
                required=True,
            )

        with service.value['begemot']:
            begemot_mapper = sdk2.parameters.Resource(
                'Begemot mapper binary',
                resource_type=br.BEGEMOT_YT_MAPPER,
                required=True,
            )
            eventlog_mapper = sdk2.parameters.Resource(
                'Begemot mapper binary',
                resource_type=br.BEGEMOT_YT_EVENTLOG_MAPPER,
                required=True,
            )
            begemot_config = sdk2.parameters.String('begemot_config in app_host context')

        shard = sdk2.parameters.Resource(
            'Shard paths file',
            # resource_types=br.BEGEMOT_CYPRESS_SHARD,  # FIXME: invalid argument (SANDBOX-6404)
            required=True
        )
        fresh = sdk2.parameters.Resource('Fresh paths file', resource_type=br.BEGEMOT_CYPRESS_SHARD)
        results_store_time = sdk2.parameters.Integer('Days to store answers and mapped requests', default=3)
        max_failed_job_count = sdk2.parameters.Integer('Max failed job count', default=3)
        job_count = sdk2.parameters.Integer('Yt job count', default=1)
        threads = sdk2.parameters.Integer('Number of threads', default=5)
        columns = sdk2.parameters.String('Columns for direct mode. Query column, than region column', default='')
        with sdk2.parameters.Output:
            answers = sdk2.parameters.String('Begemot answers')
            errors = sdk2.parameters.String('Begemot error answers')
            empty = sdk2.parameters.String('Begemot empty answers')
            eventlog_mapper_finished = sdk2.parameters.Bool('Eventlog mapper finished', default=False)
            eventlog_mapper_result = sdk2.parameters.String('Eventlog mapper output table')
            eventlog_contains_errors = sdk2.parameters.Bool('Eventlog contains errors', default=False)

    class Requirements(sdk2.Task.Requirements):
        disk_space = 15000
        ram = 8000
        cores = 1
        client_tags = Tag.GENERIC
        environments = [sandboxsdk.environments.PipEnvironment('yandex-yt', version='0.10.8')]
        client_tags = wizard_utils.ALL_SANDBOX_HOSTS_TAGS & ~wizard_utils.BEGEMOT_INVALID_HARDWARE

        class Caches(sdk2.Requirements.Caches):
            pass  # means that task do not use any shared caches

    class Context(sdk2.Task.Context):
        errors_count = 0
        wait_mode = False

    def get_mapper(self):
        if self.Parameters.service == 'wizard':
            return self.Parameters.wizard_mapper
        else:
            return self.Parameters.begemot_mapper

    def get_eventlog_mapper(self):
        return self.Parameters.eventlog_mapper

    def create_output(self):
        self.yt_client.create(
            'map_node', self.Parameters.output_path, recursive=True, ignore_existing=self.Parameters.ignore_existing,
            attributes={'expiration_time': utc_from_now(self.Parameters.results_store_time)}
        )

    def on_execute(self):
        import yt.wrapper as yt
        self.yt = yt
        token = sdk2.Vault.data(self.Parameters.yt_token_vault_owner, self.Parameters.yt_token_vault_name)
        self.yt_client = yt.YtClient(self.Parameters.yt_proxy, token)
        self.env = os.environ.copy()
        self.env['YT_TOKEN'] = token
        self.env['YT_PROXY'] = self.Parameters.yt_proxy
        self.env['YT_POOL'] = self.Parameters.yt_pool
        self.logger.info('wait_mode = {}'.format(self.Context.wait_mode))

        with self.memoize_stage.run_mapper:
            self.create_output()
            input_table = self.Parameters.input_table or self.get_processed_eventlog()
            shard = str(sdk2.ResourceData(self.Parameters.shard).path)
            self.Parameters.answers = yt.ypath_join(self.Parameters.output_path, 'answers')
            self.Parameters.empty = self.Parameters.answers + '.empty'
            self.Parameters.errors = self.Parameters.answers + '.error'
            self.logger.info('Downloaded resources')
            r, w = os.pipe()
            args = [
                str(sdk2.ResourceData(self.get_mapper()).path),
                '--input',  input_table,
                '--output', self.Parameters.answers,
                '--cypress_shard_file', shard,
                '--threads', str(self.Parameters.threads),
                '--pipe_fd', str(w),
                '--max_failed_job_count', str(self.Parameters.max_failed_job_count),
            ]
            if self.Parameters.columns:
                args.extend(['--direct', self.Parameters.columns])
            if self.Parameters.service == 'wizard':
                config = str(sdk2.ResourceData(self.Parameters.wizard_config).path)
                args.extend(['--config_file', config])
            else:
                args.extend(['--shard', self.Parameters.shard.shard_name])
                args.extend(['--job_count', '5'])
            if self.Parameters.begemot_config:
                args.extend(['--begemot_config', self.Parameters.begemot_config])
            if self.Parameters.fresh:
                args.extend(['--cypress_fresh_file', str(sdk2.ResourceData(self.Parameters.fresh).path)])
            with sdk2.helpers.ProcessLog(self, logger='mapper') as pl:
                pl.logger.propagate = 1
                subprocess.check_call(args, stdout=pl.stdout, stderr=subprocess.STDOUT, env=self.env, close_fds=False)
            os.close(w)
            self.logger.debug('Trying to read operation id from pipe')
            with os.fdopen(r) as operation_id_pipe:
                self.Context.operation_id = operation_id_pipe.readline()
            self.logger.debug('Read operation id = {}'.format(self.Context.operation_id))
            self.Context.wait_mode = True
            raise sdk2.WaitTime(self.Parameters.wait_time)

        self.logger.info('Checking mapper operation state')
        try:
            operation = yt.Operation(self.Context.operation_id, client=self.yt_client)
        except YtProxyUnavailable as x:
            self.logger.warning('YtProxyUnavailable: ', str(x))
            raise sdk2.WaitTime(self.Parameters.wait_time)
        state = operation.get_state()

        if state.is_unsuccessfully_finished():
            with self.memoize_stage.show_operation_error:
                error = operation.get_attributes(fields=['result'])['result']['error']
                human_readable_error = BegemotMapper.parse_operation_error(error)
                self.set_info('Operation failed with error:\n{}'.format(json.dumps(human_readable_error, indent=2)))

            jobs = yt.list_jobs(self.Context.operation_id, job_state='failed', client=self.yt_client)['jobs']
            if not jobs:
                raise TaskFailure('Operation failed, but no failed jobs were found. Check operation error here %s' % operation.url)

            jobs_with_coredump = [job for job in jobs if job.get('core_infos')]
            if jobs_with_coredump:
                with self.memoize_stage.get_coredump_backtrace:
                    self.Context.gdb_output = resource_types.OTHER_RESOURCE(self, 'Mapper backtrace', 'core_bt.txt').id
                    self.Context.read_coredump_task = CoredumpBacktrace(
                        self, binary=self.get_mapper(),
                        coredump_table=self.Parameters.answers + '.core',
                        job_id=jobs_with_coredump[0]['id'],
                        coredump_size=jobs_with_coredump[0]['core_infos'][0]['size'],
                        gdb_output=self.Context.gdb_output,
                        yt_proxy=self.Parameters.yt_proxy,
                        yt_token_vault_name=self.Parameters.yt_token_vault_name,
                        yt_token_vault_owner=self.Parameters.yt_token_vault_owner,
                    ).enqueue().id
                    raise sdk2.WaitTask(self.Context.read_coredump_task, Status.Group.FINISH | Status.Group.BREAK)
                task = CoredumpBacktrace.find(id=self.Context.read_coredump_task, children=True).first()
                if task.status != Status.SUCCESS:
                    raise TaskFailure('Read coredump task failed')
                gdb_output = sdk2.ResourceData(sdk2.Resource.find(id=self.Context.gdb_output).first())
                self.set_info('Backtrace:\n\n%s' % gdb_output.path.read_text())
            stderr = yt.get_job_stderr(self.Context.operation_id, jobs[0]['id'], client=self.yt_client).read()
            self.set_info('Stderr:\n\n%s' % stderr)
            self.Context.failed_begemot_stderr = stderr
            raise common.errors.TaskFailure('Reducer finished unsuccessfully')
        elif state.is_finished():
            self.logger.info('Reduce operation finished')
            self.Context.errors_count = yt.row_count(self.Parameters.errors, client=self.yt_client)
            if self.Context.errors_count > 1:
                self.Context.rules_errors = self.parse_error_table(self.Parameters.errors)
                self.Parameters.eventlog_contains_errors = True
                raise common.errors.TaskFailure('Error table is not empty')
        else:
            self.logger.info('Reduce operation not finished')
            raise sdk2.WaitTime(self.Parameters.wait_time)

    def get_processed_eventlog(self):
        params = self.Parameters
        if not params.eventlog_table:
            raise TaskFailure('Either input_table or eventlog_table must be specified')
        self.logger.info('Trying to find finished task with input = {}'.format(params.eventlog_table))
        mapper_id = params.wizard_mapper.id if params.service == 'wizard' else params.begemot_mapper.id
        previous_task = sdk2.Task.find(
            task_type=BegemotMapper,
            input_parameters={'eventlog_table': params.eventlog_table, '%s_mapper' % params.service: mapper_id},
            output_parameters=dict(eventlog_mapper_finished=True),
            children=True,
        ).first()
        if previous_task is None or not self.yt_client.exists(previous_task.Parameters.eventlog_mapper_result):
            self.logger.info("Previous task not found or its eventlog mapper result doesn't exist")
            params.eventlog_mapper_result = self.run_eventlog_mapper()
        else:
            self.logger.info('Found finished task with mapper, id = {}'.format(previous_task.id))
            params.eventlog_mapper_result = previous_task.Parameters.eventlog_mapper_result
            exp_time = utc_from_now(params.results_store_time)
            self.yt_client.set_attribute(params.eventlog_mapper_result, 'expiration_time', exp_time)
            self.logger.info('Updated ttl for table {} to {}'.format(params.eventlog_mapper_result, exp_time))
        params.eventlog_mapper_finished = True
        self.logger.info('Eventlog mapper result = {}'.format(params.eventlog_mapper_result))
        return params.eventlog_mapper_result

    def parse_error_table(self, err_table):
        rows = self.yt_client.read_table(err_table, format='json')
        errors = collections.Counter()
        for row in rows:
            errors[row["rule"]] += 1
        return errors

    def run_eventlog_mapper(self):
        self.logger.info('Starting mapper')
        output_table = self.yt.ypath_join(self.Parameters.output_path, 'mapped_eventlog')
        self.yt_client.create(
            'table', output_table, recursive=True,
            attributes={'expiration_time': utc_from_now(self.Parameters.results_store_time)},
        )
        args = [
            str(sdk2.ResourceData(self.get_eventlog_mapper()).path),
            '--input', self.Parameters.eventlog_table,
            '--output', output_table,
        ]
        with sdk2.helpers.ProcessLog(self, logger='eventlog_mapper') as pl:
            pl.logger.propagate = 1
            subprocess.check_call(args, stdout=pl.stdout, stderr=subprocess.STDOUT, env=self.env)
        self.logger.info('Finished mapper, result: {}'.format(output_table))
        return output_table

    @staticmethod
    def parse_operation_error(error):
        """
        :param error: dict with operation error and inner errors
        :return: the same dict with all fields, except human readable messages, recursively removed
        """
        result = collections.defaultdict(list, [('message', error['message'])])
        for e in error.get('inner_errors', []):
            result['inner_errors'].append(BegemotMapper.parse_operation_error(e))
        return result

    def get_table_url(self, table_path):
        return "https://yt.yandex-team.ru/{}/#page=navigation&path={}".format(self.Parameters.yt_proxy, table_path)

    def get_operation_url(self):
        return 'http://{}.yt.yandex.net/#page=operation&mode=detail&id={}&tab=details'\
            .format(self.Parameters.yt_proxy, self.Context.operation_id)

    @sdk2.header()
    def header(self):
        if self.Context.wait_mode:
            return '<a target="_blank" href="{}">Current operation</a>'.format(self.get_operation_url())

    @sdk2.footer()
    def footer(self):
        return textwrap.dedent("""
            <a target="_blank" href='{}'>Answers</a><br/>
            <a target="_blank" href='{}'>Empty</a><br/>
            <a target="_blank" href='{}'>Errors: {}</a><br/>
        """).format(
            self.get_table_url(self.Parameters.answers),
            self.get_table_url(self.Parameters.empty),
            self.get_table_url(self.Parameters.errors),
            self.Context.errors_count,
        ).strip()
