# -*- coding: utf-8 -*-

import json
import datetime
import pytz

from sandbox import sdk2
from sandbox.common import errors
from sandbox.common.types.task import Status
from sandbox.projects.common import link_builder
from sandbox.sandboxsdk import environments

from sandbox.projects.websearch.performance_report.CalculateAppHostPerformanceReport import CalculateAppHostPerformanceReport


class UpdateAppHostPerformanceReport(sdk2.Task):
    """
        Задача обновляет отчет со статистикой по временам источников аппхоста
    """

    APPHOST_EVENT_LOG_PATH = '//logs/apphost-event-log'

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        disk_space = 50  # 50 Mb
        environments = [
            environments.PipEnvironment("requests", version="2.18.4", custom_parameters=["--upgrade-strategy only-if-needed"]),
            environments.PipEnvironment('python-statface-client', version="0.154.0", custom_parameters=["--upgrade-strategy only-if-needed"]),
            environments.PipEnvironment('yandex-yt'),
        ]

        class Caches(sdk2.Requirements.Caches):
            pass  # do not use any shared caches

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.RadioGroup("Hours range type") as hours_range_type:
            hours_range_type.values['last_hours'] = hours_range_type.Value('Last N hours', default=True)
            hours_range_type.values['range'] = hours_range_type.Value('Select range')

            with hours_range_type.value['last_hours']:
                hours = sdk2.parameters.Integer('Number of last hours to calculate performance report', required=True)
            with hours_range_type.value['range']:
                start_hour = sdk2.parameters.String('Start hour to process in "YYYY-mm-dd HH:00:00" format', required=True)
                end_hour = sdk2.parameters.String('End hour to process in "YYYY-mm-dd HH:00:00" format', required=True)

        statface_report_path = sdk2.parameters.String('Statface report path', required=True)
        force_recalculate = sdk2.parameters.Bool('Recalculate already processed hours', default=False)

        yt_pool = sdk2.parameters.String('YT pool')
        yt_token_owner = sdk2.parameters.String('YT_TOKEN owner', required=True)
        statface_token_owner = sdk2.parameters.String('STATFACE_TOKEN owner', required=True)

    class Context(sdk2.Task.Context):
        broken_subtask_ids = []

    def get_hours(self):
        if self.Parameters.hours_range_type == 'last_hours':
            current_date = datetime.datetime.now(pytz.timezone('Europe/Moscow'))
            return [current_date - datetime.timedelta(hours=t) for t in range(self.Parameters.hours)]
        elif self.Parameters.hours_range_type == 'range':
            start_hour = datetime.datetime.strptime(self.Parameters.start_hour, '%Y-%m-%d %H:%M:%S')
            end_hour = datetime.datetime.strptime(self.Parameters.end_hour, '%Y-%m-%d %H:%M:%S')
            return [start_hour + datetime.timedelta(hours=t) for t in range(int((end_hour - start_hour).total_seconds() / 60 / 60) + 1)]

    def hour_log_name(self, time):
        return time.strftime('%Y-%m-%dT%H:00:00')

    def day_log_name(self, time):
        return time.strftime('%Y-%m-%d')

    def get_statface_report(self, report_path):
        import statface_client
        statface_token = sdk2.Vault.data(self.Parameters.statface_token_owner, name='STATFACE_TOKEN')
        sf_client = statface_client.StatfaceClient(host=statface_client.STATFACE_PRODUCTION, oauth_token=statface_token)
        return sf_client.get_report(report_path)

    def on_execute(self):
        import statface_client
        from yt.wrapper import YtClient

        with self.memoize_stage.launch_caclulation:
            hours = self.get_hours()

            yt_token = sdk2.Vault.data(self.Parameters.yt_token_owner, name='YT_TOKEN')
            yt_client = YtClient('hahn', yt_token)

            def required_table_exist(time):
                hour_table_path = '{}/1h/{}'.format(self.APPHOST_EVENT_LOG_PATH, self.hour_log_name(time))
                day_table_path = '{}/1d/{}'.format(self.APPHOST_EVENT_LOG_PATH, self.day_log_name(time))
                return yt_client.exists(hour_table_path) or yt_client.exists(day_table_path)

            self.Context.unprocessed_hours = list(set(map(
                lambda hour: self.hour_log_name(hour),
                filter(lambda hour: required_table_exist(hour), hours)
            )))

            report = self.get_statface_report(self.Parameters.statface_report_path)

            def report_is_calculated(time):
                data = report.download_data(scale=statface_client.constants.HOURLY_SCALE, date_min=time.replace('T', ' '), date_max=time.replace('T', ' '))
                return len(data) > 0

            self.Context.unprocessed_hours = list(filter(lambda hour: self.Parameters.force_recalculate or not report_is_calculated(hour), self.Context.unprocessed_hours))
            self.Context.calc_report_subtasks = dict()
            for hour in self.Context.unprocessed_hours:
                self.Context.calc_report_subtasks[hour] = CalculateAppHostPerformanceReport(
                    self,
                    description='Calculate AppHost Performance Report for {}'.format(hour.replace('T', ' ')),
                    kill_timeout=8 * 3600,
                    hour=hour,
                    yt_pool=self.Parameters.yt_pool,
                    yt_token_owner=self.Parameters.yt_token_owner,
                ).enqueue().id

        with self.memoize_stage.wait_tasks(len(self.Context.calc_report_subtasks) + 1):
            processed_hours = []
            working_subtask_ids = []

            for hour in self.Context.unprocessed_hours:
                task = sdk2.Task[self.Context.calc_report_subtasks[hour]]
                if task.status in Status.Group.FINISH:
                    processed_hours.append(hour)
                elif task.status in Status.Group.BREAK:
                    self.Context.broken_subtask_ids.append(task.id)
                else:
                    working_subtask_ids.append(task.id)

            for hour in processed_hours:
                self.Context.unprocessed_hours.remove(hour)

            if processed_hours:
                self.set_info('Done: {}'.format(', '.join(map(
                    lambda hour: link_builder.task_link(self.Context.calc_report_subtasks[hour], hour),
                    processed_hours
                ))), do_escape=False)

                report = []
                for hour in processed_hours:
                    calc_task_id = self.Context.calc_report_subtasks[hour]
                    calc_task = sdk2.Task[calc_task_id]
                    report_file_path = str(sdk2.ResourceData(calc_task.Parameters.report).path)
                    report += json.load(open(report_file_path, 'r'))
                sf_report = self.get_statface_report(self.Parameters.statface_report_path)
                sf_report.upload_data(scale=statface_client.constants.HOURLY_SCALE, data=report)

            if working_subtask_ids:
                raise sdk2.WaitTask(working_subtask_ids, Status.Group.FINISH | Status.Group.BREAK, wait_all=False)

        if self.Context.broken_subtask_ids:
            raise errors.TaskFailure()
