# -*- coding: utf-8 -*-
import json
import logging
import tempfile
import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
from sandbox import sdk2
from sandbox.common.types.task import Semaphores
from sandbox.projects.advq.artifacts import SUMHITS_GENERATOR, ADVQ_SUMHITS_FROM_PHITS, ADVQ_SUMHITS_MERGE_HITS, \
    ADVQ_SUMHITS_DB_GENERATOR
from sandbox.projects.advq.common import AdvqBroadmatchDatabases, get_sumhits_resource_class, BROADMATCH_RENORM_DICT_FILENAME
from sandbox.projects.advq.common.list_res import list_sumhits_tables, sandbox_list_sumhits_dbs
from sandbox.projects.advq.common.parameters import PhitsParameters, releaseTo_params, convert_ttl
from sandbox.projects.advq.common.yt_utils import setup_yt_from_parameters, get_yt_env_from_parameters
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2.resource import ResourceData
from sandbox.sdk2.helpers import subprocess as sp


SEMAPHORE_GENERATION_NAME_TEMPLATE = 'advq_sumhits_db_generation_{phits_type}_{dbname}_{parallel}'


class AdvqSumhitsGenIndices(sdk2.Task):
    """
    Создаём последний доступный sumhits index из готовых таблиц.
    """

    class Requirements(sdk2.Requirements):
        environments = (
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        )
        client_tags = ctc.Tag.LINUX_TRUSTY & ctc.Tag.IPV6
        disk_space = 3 * 1024
        ram = 1 * 1024
        cores = 16

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(PhitsParameters):
        # TODO(monoid) get dbs from some kind of const file instead
        dbs = sdk2.parameters.List("List of ADVQ dbs", required=True, default=['rus'])
        total_chunks = sdk2.parameters.Integer("Number of chunks to generate", required=True, default=1)

        release_new_resource, releaseTo = releaseTo_params()
        period_size = sdk2.parameters.Integer("Number of days/hours/30mins", required=True)
        advq_yt_prefix = sdk2.parameters.String("YT prefix instead of //home/advq (for testing proposes)", default=None)
        bnorm_resource = sdk2.parameters.LastReleasedResource("ADVQ broadmatch database",
                                                              resource_type=AdvqBroadmatchDatabases)
        generate_parallel = sdk2.parameters.Bool("Generate sumhits parallelly", default=False)
        ttl = sdk2.parameters.Integer("TTL for release (0 for 'inf')", default=30, required=True)

    def on_enqueue(self):
        # Устанавливаем семафор(ы), имя которого зависит от phits_type и dbname.
        semaphore_names = [SEMAPHORE_GENERATION_NAME_TEMPLATE.format(phits_type=self.Parameters.advq_phits_type,
                                                                     dbname=dbname,
                                                                     parallel='parallel' if self.Parameters.generate_parallel else '') for dbname in self.Parameters.dbs]
        self.Requirements.semaphores = Semaphores(
            acquires=[Semaphores.Acquire(name=name, capacity=1) for name in semaphore_names],
            release=(ctt.Status.Group.BREAK, ctt.Status.Group.FINISH)
        )
        return super(AdvqSumhitsGenIndices, self).on_enqueue()

    def on_execute(self):
        setup_yt_from_parameters(self.Parameters)

        sumhits_dir_template = None
        if self.Parameters.advq_phits_type == "mini":
            sumhits_dir_template = 'advq/normal/rus/sumhits-mini'

        for dbname in self.Parameters.dbs:
            available_tables = list(
                list_sumhits_tables(
                    self.Parameters.advq_phits_type,
                    dbname,
                    prefix=self.Parameters.advq_yt_prefix,
                    sumhits_dir_template=sumhits_dir_template
                ))

            if not available_tables:
                return  # Nothing to generate

            max_date, max_epoch, max_table = max(available_tables)

            # Генерируем из самой свежей даты.  Если для этой даты ресурсы уже есть, то ничего делать и не надо.
            # Эпоху мы пока игнорируем, и дельты выкатятся при следующей генерации (утилита генерации подцепит их сама).
            available_resources = sandbox_list_sumhits_dbs(
                self.Parameters.advq_phits_type,
                dbs=[dbname],
                release_type=self.Parameters.releaseTo,
                period_size=self.Parameters.period_size,
                extra_attrs={'advq_date': max_date}
            )[dbname]

            logging.debug("Available resources for date %r: %r", max_date, available_resources)

            if not available_resources:
                if max_epoch == 0:
                    max_epodate = max_date
                else:
                    max_epodate = '{}-{}'.format(max_date, max_epoch)

                advq_binaries_resource_path = ResourceData(self.Parameters.advq_build_binaries)

                if self.Parameters.bnorm_resource:
                    bnorm_resource_path = ResourceData(self.Parameters.bnorm_resource)
                    bnorm_args = ['--bnorm-db-path', str(bnorm_resource_path.path.joinpath(BROADMATCH_RENORM_DICT_FILENAME))]
                else:
                    bnorm_args = []

                cmd = [
                    str(advq_binaries_resource_path.path.joinpath(SUMHITS_GENERATOR)),
                    'sumhits',
                    '--advq-sumhits-from-phits-binary', str(advq_binaries_resource_path.path.joinpath(
                        ADVQ_SUMHITS_FROM_PHITS
                    )),
                    '--advq-sumhits-merge-hits-binary', str(advq_binaries_resource_path.path.joinpath(
                        ADVQ_SUMHITS_MERGE_HITS
                    )),
                    '--advq-generate-sumhits-index-binary', str(advq_binaries_resource_path.path.joinpath(
                        ADVQ_SUMHITS_DB_GENERATOR
                    )),
                    '--index-chunks', str(self.Parameters.total_chunks),
                    '--use-prebuilt-sumhits',
                    '--date', max_epodate,
                ] + bnorm_args + [
                    self.Parameters.advq_phits_type, dbname,
                ]

                if self.Parameters.generate_parallel:
                    cmd += ['--generate-sumhits-indexes-parallel']

                env = get_yt_env_from_parameters(self.Parameters)

                if self.Parameters.advq_yt_prefix:
                    config_file = tempfile.NamedTemporaryFile()
                    import yaml
                    yaml.safe_dump({
                        'ADVQ_PREFIX': self.Parameters.advq_yt_prefix
                    }, config_file.file)
                    config_file.flush()
                    env['ADVQ_CONFIG_FILE'] = config_file.name

                with sdk2.helpers.ProcessLog(self, logger=logging.getLogger("advq-sumhits-gen-index")) as pl:
                    data_str = sp.check_output(cmd, stderr=pl.stdout, env=env)
                    data = json.loads(data_str)
                    resource_class = get_sumhits_resource_class(self.Parameters.advq_phits_type)
                    for idx, file in enumerate(data['chunks'], 1):
                        res = resource_class(
                            self,
                            description="Sumhits db chunk {}".format(file),
                            path=file,
                            advq_phits_type=self.Parameters.advq_phits_type,
                            advq_db=dbname,
                            advq_date=data['date'],
                            advq_epoch=data['epoch'],
                            advq_epodate=data['epodate'],
                            advq_start_date=data['start_date'],
                            advq_last_date=data['last_date'],
                            advq_is_delta=bool(data.get('is_delta', False)),
                            period_size=data.get('period_size', self.Parameters.period_size),
                            advq_total_chunks=self.Parameters.total_chunks,
                            advq_chunk=idx,
                            binaries_arcadia_revision=self.Parameters.advq_build_binaries.arcadia_revision,
                            ttl=convert_ttl(self.Parameters.ttl),
                        )
                        if self.Parameters.release_new_resource:
                            res.released = self.Parameters.releaseTo

                        ResourceData(res).ready()
