# -*- coding: utf-8 -*-
"""
Генерация индексов хроносрезов для ADVQ7 (mrkit-таблицы из //home/advq/advq/{db}/weeklyhits
и //home/advq/advq/{db}/monthlyhits.

От обычной генерации отличается лишь форматом входной таблицы и схемой именования таблиц.
Создаваемые ресурсы совместимы с новым генератором хроносрезов, и для генерации используется тот же лок.
"""
import re
import logging
import six

import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
from sandbox import sdk2
from sandbox.common.errors import TaskFailure
from sandbox.common.types.task import Semaphores
from sandbox.projects.advq.AdvqLegacyGenChronoIndex import AdvqLegacyGenChronoIndex, CHRONO_TYPE_MONTH, CHRONO_TYPE_WEEK
from sandbox.projects.advq.common import AdvqLegacyNormalChronoDatabaseChunk
from sandbox.projects.advq.common.list_res import DEFAULT_ADVQ_PREFIX, sandbox_list_chrono_dbs
from sandbox.projects.advq.common.parameters import releaseTo_params, PhitsParameters
from sandbox.projects.advq.common.yt_utils import setup_yt_from_parameters
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2.task import WaitTask

CHRONO_INDEX_TIMEOUT = 48 * 60 * 60  # Обычно собирается примерно за 24ч, ставим 48ч.

SEMAPHORE_GENERATION_NAME_TEMPLATE = 'advq_chrono_db_generation_{phits_type}_legacy'
LEGACY_CHRONO_YT_PATH = '{advq_prefix}/advq/{advq_db}/{chrono_type}lyhits'

# TODO(monoid) сейчас эти значения игнорируются, играет роль только параметр limit, который пока мал.
BUILDING_LIMIT = {
    CHRONO_TYPE_WEEK: 52,
    CHRONO_TYPE_MONTH: 12,
}


CHRONO_TABLE_RE = re.compile('^([0-9]{6})(?:-([0-9]+))?$')


class AdvqLegacyGenChronoIndices(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = (
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        )
        client_tags = ctc.Tag.LINUX_TRUSTY & ctc.Tag.IPV6
        cores = 1
        disk_space = 3 * 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(PhitsParameters):
        dbs = sdk2.parameters.List("List of ADVQ dbs")  # TODO(monoid) get dbs from some kind of const file instead
        release_new_resource, releaseTo = releaseTo_params()
        advq_yt_prefix = sdk2.parameters.String("YT prefix instead of //home/advq (for testing proposes)")
        limit = sdk2.parameters.Integer("Limit", default=2)
        child_kill_timeout = sdk2.parameters.Integer("Child kill timeout, in seconds", default=CHRONO_INDEX_TIMEOUT)
        ttl = sdk2.parameters.Integer("TTL for released chunks (days, always; 0 for inf)", default=720, required=True)

    def on_enqueue(self):
        # Устанавливаем семафор, имя которого зависит от source_type; значение то же, что у новой генерации
        # хроносрезов.
        self.Requirements.semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(
                    name=SEMAPHORE_GENERATION_NAME_TEMPLATE.format(phits_type=self.Parameters.advq_phits_type),
                    capacity=1)
            ],
            release=(
                ctt.Status.Group.BREAK, ctt.Status.Group.FINISH
            )
        )
        return super(AdvqLegacyGenChronoIndices, self).on_enqueue()

    def on_execute(self):
        import yt.wrapper as yt

        setup_yt_from_parameters(self.Parameters)

        advq_yt_prefix = self.Parameters.advq_yt_prefix or DEFAULT_ADVQ_PREFIX

        if self.Parameters.release_new_resource:
            releaseTo = self.Parameters.releaseTo
        else:
            releaseTo = None

        with self.memoize_stage.gen_tasks(commit_on_entrance=False):
            task_ids = []
            for chrono_type, count in six.iteritems(BUILDING_LIMIT):
                phits_type = 'normal'  # legacy dbs are based on 'normal' phits.
                if self.Parameters.release_new_resource:
                    release_type = self.Parameters.releaseTo
                else:
                    release_type = None
                sandbox_chrono_dbs = sandbox_list_chrono_dbs(
                    phits_type, self.Parameters.dbs, release_type, chrono_type,
                    res_class=AdvqLegacyNormalChronoDatabaseChunk,
                )

                for advq_db in self.Parameters.dbs:
                    tables_with_epodates = []
                    epodates = set()
                    chrono_tables_dir = LEGACY_CHRONO_YT_PATH.format(advq_prefix=advq_yt_prefix, advq_db=advq_db,
                                                                     chrono_type=chrono_type)
                    tables = yt.list(chrono_tables_dir, attributes=['type'], sort=True)
                    for tbl in tables:
                        if tbl.attributes['type'] == 'table':
                            m = CHRONO_TABLE_RE.match(str(tbl))
                            if m:
                                date = m.group(1)
                                epoch = int(m.group(2) or 0)
                                tables_with_epodates.append(
                                    (date, epoch, str(tbl), chrono_tables_dir + '/' + tbl))
                                epodates.add((date, epoch,))

                    # ограничиваем, пока не поддерживаем count
                    if self.Parameters.limit:
                        epodates = frozenset(sorted(epodates, reverse=True)[:self.Parameters.limit])

                    existing_sandbox_epochs = frozenset((date, epoch)
                                                        for date, epoch in six.iterkeys(sandbox_chrono_dbs[advq_db]))
                    logging.debug("Existing sandbox resources epochs: %r", existing_sandbox_epochs)
                    logging.debug("Available tables epodates: %r", epodates)

                    to_build = epodates - existing_sandbox_epochs

                    for date, epoch, epodate, input_tbl in tables_with_epodates:
                        if (date, epoch,) in to_build:
                            task = AdvqLegacyGenChronoIndex(
                                self,
                                description=("Generate chrono index for {}_{}_{}".format(
                                    chrono_type, advq_db, epodate)),
                                kill_timeout=self.Parameters.child_kill_timeout,
                                yt_proxy=self.Parameters.yt_proxy,
                                yt_token_vault_user=self.Parameters.yt_token_vault_user,
                                yt_token_vault_name=self.Parameters.yt_token_vault_name,
                                advq_phits_type=phits_type,
                                advq_build_binaries=self.Parameters.advq_build_binaries,
                                advq_chrono_type=chrono_type,
                                advq_db=advq_db,
                                date=date,
                                epoch=epoch,
                                epodate=epodate,
                                input_table=input_tbl,
                                ttl=self.Parameters.ttl,
                                releaseTo=releaseTo,
                            )
                            task_ids.append(task.id)
                            task.enqueue()

            self.Context.tasks_to_wait = task_ids

        with self.memoize_stage.wait_for_children:
            raise WaitTask(self.Context.tasks_to_wait, statuses=(ctt.Status.Group.FINISH + ctt.Status.Group.BREAK),
                           wait_all=True)

        failed_tasks = []
        for task_id in self.Context.tasks_to_wait:
            task = sdk2.Task[task_id]
            if task.status in ctt.Status.Group.BREAK or task.status == ctt.Status.FAILURE:
                failed_tasks.append(task)
        if failed_tasks:
            logging.error("Some generation task failed: %r", failed_tasks)
            raise TaskFailure("Some generation task failed", failed_tasks)
