# -*- coding: utf-8 -*-

import json
import os

from sandbox import sdk2

from sandbox.common.types.task import Semaphores
from sandbox.sdk2.helpers import subprocess

from sandbox.projects.websearch.clickdaemon import resources
from sandbox.projects.websearch.clickdaemon.tasks.redir_log_loss import parameters as params
from sandbox.projects.websearch.params import ResourceWithLastReleasedValueByDefault


class CalculateRedirLogLoss(sdk2.Task):
    """
        Вычисляет часовые потери redir-лога по access логу l7-балансера
    """

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        disk_space = 50  # 50 Mb

        class Caches(sdk2.Requirements.Caches):
            pass  # do not use any shared caches

    class Parameters(sdk2.Task.Parameters):
        redir_log_loss_tool = ResourceWithLastReleasedValueByDefault(
            'Redir Log Loss tool (yweb/webdaemons/clickdaemon/tools/redir_log_loss)',
            resource_type=resources.RedirLogLossTool,
            required=True,
        )
        log_types = params.log_types()
        date = sdk2.parameters.String('Log table name in yyyy-mm-ddThh:00:00 format', required=True)
        with sdk2.parameters.CheckGroup('Requests method type') as method_type:
            method_type.values.GET = method_type.Value('GET', checked=True)
            method_type.values.POST = method_type.Value('POST', checked=True)

        yt_cluster = sdk2.parameters.String('YT cluster', default='hahn')
        yt_pool = sdk2.parameters.String('YT pool')
        yt_token_owner = sdk2.parameters.String('YT_TOKEN owner', required=True)

        process_unclassified = sdk2.parameters.Bool('Use unclassified req-ids table if possible', default=True)
        store_unclassified = sdk2.parameters.Bool('Store unclassified req-ids table in YT if possible', default=True)

        redir_logs_dict = sdk2.parameters.Dict(
            "Redir logs list (path: scale)", default={},
            description="scale types: 1d, 1h, 30min. path will be prefixed with //logs/ in code"
        )

        with sdk2.parameters.Output:
            stats = sdk2.parameters.Resource('Result statistics', resource_type=resources.RedirLogLossStats)

    def on_enqueue(self):
        pool = self.Parameters.yt_pool or self.Parameters.yt_token_owner
        semaphore_name = 'process_redir_logs_{}_{}'.format('-'.join(sorted(self.Parameters.log_types)), pool)
        self.Requirements.semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(
                    name=semaphore_name,
                    capacity=2
                ),
            ],
        )

    def on_execute(self):
        yt_token = sdk2.Vault.data(self.Parameters.yt_token_owner, name='YT_TOKEN')
        redir_log_loss_tool_path = str(sdk2.ResourceData(self.Parameters.redir_log_loss_tool).path)

        processes = dict()
        stat_files = dict()
        for method_type in self.Parameters.method_type:
            stat_files[method_type] = 'stats-{}-{}'.format(self.Parameters.date, method_type)

            env = os.environ.copy()
            if self.Parameters.yt_pool:
                env['YT_POOL'] = self.Parameters.yt_pool

            cmd = [
                redir_log_loss_tool_path, 'calc-loss',
                '--date', self.Parameters.date,
                '--method-type', method_type,
                '--log-types', ','.join(map(
                    lambda log_type: params.LOG_TYPES[log_type]['report_key'], self.Parameters.log_types
                )),
                '--output-file', stat_files[method_type],
                '--yt-token', yt_token,
            ]
            if self.Parameters.yt_cluster:
                cmd += [
                    '--yt-cluster', self.Parameters.yt_cluster,
                ]
            if not self.Parameters.process_unclassified:
                cmd += [
                    '--no-process-unclassified',
                ]
            if not self.Parameters.store_unclassified:
                cmd += [
                    '--no-store-unclassified',
                ]
            if self.Parameters.redir_logs_dict:
                cmd += [
                    '--redir-log-tables', ','.join(self.Parameters.redir_logs_dict.keys())
                ]
            else:
                cmd += [
                    '--redir-log-tables', ','.join(map(
                        lambda redir_log: redir_log['cypress_root'][len("//logs/"):], params.LOG_TYPES['redir_log']['logs']
                    ))
                ]

            processes[method_type] = subprocess.Popen(cmd, env=env)

        stats = []
        for method_type, proc in processes.iteritems():
            proc.wait()
            stats += json.load(open(stat_files[method_type], 'r'))

        if set(('POST', 'GET')) == set(self.Parameters.method_type):
            aggregated_stats = {
                'fielddate': self.Parameters.date.replace('T', ' '),
                'method': '\tRequest Method\t',
            }
            for stat in stats:
                for key, value in stat.items():
                    if key not in aggregated_stats:
                        aggregated_stats[key] = 0
                    if not isinstance(aggregated_stats[key], int):
                        continue
                    aggregated_stats[key] += value
            stats.append(aggregated_stats)

        report_filename = 'loss-{}-{}-{}'.format(
            self.Parameters.date,
            '-'.join(self.Parameters.method_type),
            '-'.join(self.Parameters.log_types),
        )
        self.Parameters.stats = resources.RedirLogLossStats(
            self, self.Parameters.description, report_filename, ttl=90,
        )
        json.dump(stats, open(str(self.Parameters.stats.path), 'w'), indent=2)
