# -*- coding: utf-8 -*-
"""
Генерация и релиз в Sandbox баз phits определённого типа.
"""
import json
import logging
import tempfile

import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
from sandbox import sdk2
from sandbox.common.errors import TaskFailure
from sandbox.common.types.task import Semaphores
from sandbox.projects.advq.AdvqGenPhitsIndex import AdvqGenPhitsIndex
from sandbox.projects.advq.artifacts import PHITS_GENERATOR
from sandbox.projects.advq.common import validate_arcadia_rev, YT_MINIMAL_REV
from sandbox.projects.advq.common.parameters import PhitsParameters, releaseTo_params
from sandbox.projects.advq.common.yt_utils import get_yt_env_from_parameters
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2 import ResourceData
from sandbox.sdk2.task import WaitTask
from sandbox.sdk2.helpers import subprocess as sp

SEMAPHORE_GENERATION_NAME_TEMPLATE = 'advq_phits_db_generation_{phits_type}'

GEN_PHITS_INDICES_MIN_ARCADIA_REVISION = 3560259  # ADVQ-1794: --released arg was added


class AdvqGenPhitsIndices(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = (
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        )
        client_tags = ctc.Tag.LINUX_TRUSTY & ctc.Tag.IPV6
        disk_space = 3 * 1024
        ram = 1 * 1024
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(PhitsParameters):
        # TODO(monoid) get dbs from some kind of const file instead
        dbs = sdk2.parameters.List("List of ADVQ dbs", required=True)
        chunk_num = sdk2.parameters.Integer("Number of chunks to generate", required=True, default=1)
        periods_limit = sdk2.parameters.Integer("Take only this number of latest tables (0 for all)")

        release_new_resource, releaseTo = releaseTo_params()

        child_timeout = sdk2.parameters.Integer("Child timeout, hours", default=3)

        advq_yt_prefix = sdk2.parameters.String("YT prefix instead of //home/advq (for testing proposes)")
        phits_tmp_prefix = sdk2.parameters.String("YT phits tmp prefix", required=True,
                                                  default="//home/advq/advq/{type}/{dbname}/phits-gen/tmp/phits_{type}_{dbname}_{epodate}")
        ttl = sdk2.parameters.Integer("TTL for released databases (days, always; 0 for inf)", default=60, required=True)
        delta_ttl = sdk2.parameters.Integer("TTL for released delta databases (days, always; 0 for inf)",
                                            default=730, required=True)
        dry_run = sdk2.parameters.Bool("Dry run")

    def on_enqueue(self):
        # Устанавливаем семафор, имя которого зависит от phits_type.
        self.Requirements.semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(
                    name=SEMAPHORE_GENERATION_NAME_TEMPLATE.format(phits_type=self.Parameters.advq_phits_type),
                    capacity=1)
            ],
            release=(
                ctt.Status.Group.BREAK, ctt.Status.Group.FINISH
            )
        )
        return super(AdvqGenPhitsIndices, self).on_enqueue()

    def on_execute(self):
        validate_arcadia_rev(self.Parameters.advq_build_binaries, [
            GEN_PHITS_INDICES_MIN_ARCADIA_REVISION, YT_MINIMAL_REV
        ])

        advq_phits_binaries_data = ResourceData(self.Parameters.advq_build_binaries)

        env = get_yt_env_from_parameters(self.Parameters)

        if self.Parameters.advq_yt_prefix:
            config_file = tempfile.NamedTemporaryFile()
            import yaml
            yaml.safe_dump({
                'ADVQ_PREFIX': self.Parameters.advq_yt_prefix
            }, config_file.file)
            config_file.flush()
            env['ADVQ_CONFIG_FILE'] = config_file.name

        with self.memoize_stage.available_phits_tables(commit_on_entrance=False):
            phits_tables = {}
            phits_generator_binary_path = str(advq_phits_binaries_data.path.joinpath(PHITS_GENERATOR))
            if self.Parameters.release_new_resource and self.Parameters.releaseTo is not None:
                released_args = ['--released', self.Parameters.releaseTo]
            else:
                released_args = []
            with sdk2.helpers.ProcessLog(self, logger=logging.getLogger("advq-phits-list-dbs-to-gen")) as pl:
                for advq_db in self.Parameters.dbs:
                    list_json_str = sp.check_output(
                        [phits_generator_binary_path, 'list-gen-db', self.Parameters.advq_phits_type, advq_db
                         ] + released_args,
                        timeout=600, stderr=pl.stdout, env=env)
                    to_generate = json.loads(list_json_str)
                    # Сортируем таким образом, чтобы дельты оказались отсортированны по возрастанию эпохи.
                    # В таком порядке мы и будем их последовательно генерировать.
                    to_generate.sort(key=lambda rec: (rec['advq_epoch'], rec['advq_date']))
                    if self.Parameters.periods_limit:
                        to_generate = to_generate[:self.Parameters.periods_limit]
                    phits_tables[advq_db] = to_generate

            self.Context.phits_tables = phits_tables

        if self.Parameters.release_new_resource:
            releaseTo = self.Parameters.releaseTo
        else:
            releaseTo = None

        child_kill_timeout = int(self.Parameters.child_timeout) * 60 * 60

        with self.memoize_stage.generate_subtasks:
            tasks_to_wait_base = []
            tasks_to_wait_epochs = []
            for advq_db in self.Parameters.dbs:
                for desc in self.Context.phits_tables[advq_db]:
                    date = desc['advq_date']
                    epoch = desc['advq_epoch']
                    input_table = desc['input_table']
                    is_delta = desc['advq_is_delta']
                    epd = desc['advq_epodate']
                    if self.Parameters.dry_run:
                        self.set_info("DRY_RUN: running task for {!r}".format(desc))
                    else:
                        if is_delta:
                            # Дельты обычно мелкие, нет смысла бить их на чанки, что замедляет обработку
                            chunk_num = 1
                            ttl = self.Parameters.delta_ttl
                        else:
                            chunk_num = self.Parameters.chunk_num
                            ttl = self.Parameters.ttl
                        task = AdvqGenPhitsIndex(
                            self,
                            description="Generation of phits chunks for {}_{}_{}".format(
                                self.Parameters.advq_phits_type, advq_db, epd),
                            yt_proxy=self.Parameters.yt_proxy,
                            yt_token_vault_user=self.Parameters.yt_token_vault_user,
                            yt_token_vault_name=self.Parameters.yt_token_vault_name,
                            advq_phits_type=self.Parameters.advq_phits_type,
                            chunk_num=chunk_num,
                            advq_build_binaries=self.Parameters.advq_build_binaries,
                            advq_db=advq_db,
                            advq_date=date,
                            advq_epoch=epoch,
                            advq_epodate=epd,
                            advq_is_delta=is_delta,
                            input_table=input_table,
                            phits_tmp_prefix=self.Parameters.phits_tmp_prefix.format(
                                type=self.Parameters.advq_phits_type,
                                dbname=advq_db,
                                epodate=epd,
                            ),
                            ttl=ttl,
                            kill_timeout=child_kill_timeout,
                            releaseTo=releaseTo,
                        )
                        if epoch == 0:
                            tasks_to_wait_base.append(task)
                            task.enqueue()
                        else:
                            tasks_to_wait_epochs.append(task)
                            # Таски с эпохами запускаем отдельно в правильном порядке

            self.Context.task_ids_to_release_base = [int(task_obj) for task_obj in tasks_to_wait_base]
            self.Context.task_ids_to_release_epoch = [int(task_obj) for task_obj in tasks_to_wait_epochs]
            # Ждём все базовые
            if self.Context.task_ids_to_release_base:
                raise WaitTask(
                    self.Context.task_ids_to_release_base,
                    statuses=(ctt.Status.Group.FINISH + ctt.Status.Group.BREAK),
                    wait_all=True,
                    timeout=(3 * child_kill_timeout)
                )

        invalid_tasks = []
        for task_id in self.Context.task_ids_to_release_base:
            task = sdk2.Task[task_id]
            if task.status in ctt.Status.Group.BREAK or task.status == ctt.Status.FAILURE:
                invalid_tasks.append(task)
        if invalid_tasks:
            self.set_info("ERROR: Some generation task failed: {!r}".format(invalid_tasks))
            raise TaskFailure("Some generation task failed", invalid_tasks)

        # Дельты.  Дельты мы запускаем по очереди, чтобы они генерировались в правильном порядке:
        # от младшей эпохи к большей.  Это нужно, в частности, для правильной генерации phits.
        with self.memoize_stage.init_deltas_generation:
            self.Context.delta_gen_current_task = None

        if self.Context.delta_gen_current_task is not None:
            task = sdk2.Task[self.Context.delta_gen_current_task]
            if task.status in ctt.Status.Group.BREAK or task.status == ctt.Status.FAILURE:
                self.set_info("ERROR: Delta generation task failed: #{!r}".format(
                    self.Context.delta_gen_current_task))
                raise TaskFailure("Delta generation task failed", self.Context.delta_gen_current_task)
            elif task.status not in (ctt.Status.SUCCESS, ctt.Status.RELEASED):
                self.set_info("ERROR: Delta generation task #{!r} has unexpected status {}".format(
                    self.Context.delta_gen_current_task, task.status
                ))
                raise TaskFailure(
                    "Delta generation task has unexpected status",
                    (self.Context.delta_gen_current_task, task.status),
                )
        # всё успешно завершилось, запускаем следующий
        if self.Context.task_ids_to_release_epoch:
            self.Context.delta_gen_current_task = self.Context.task_ids_to_release_epoch.pop(0)
            sdk2.Task[self.Context.delta_gen_current_task].enqueue()
            raise WaitTask(
                [self.Context.delta_gen_current_task],
                statuses=(ctt.Status.Group.FINISH + ctt.Status.Group.BREAK),
                wait_all=True,
                timeout=(3 * child_kill_timeout),
            )
