# -*- coding: utf-8 -*-
"""
Создаём бинарный индекс хроносрезов из legacy-таблиц.
"""
import tempfile
from pipes import quote
import logging
import subprocess

from sandbox import sdk2
import sandbox.sdk2.helpers
import sandbox.common.types.client as ctc
from sandbox.common.errors import TaskFailure
from sandbox.common.types.task import Semaphores
from sandbox.projects.advq.artifacts import MRKIT_LEGACY_READER, CHRONO_DB_GENERATOR
from sandbox.projects.advq.common import MRKIT_WITH_DEBUG_REV, validate_arcadia_rev, \
    AdvqLegacyNormalChronoDatabaseChunk, SHELL_COMMAND_PREFIX
from sandbox.projects.advq.common.parameters import PhitsParameters, convert_ttl
from sandbox.projects.advq.common.yt_utils import get_yt_env_from_parameters
from sandbox.sdk2.resource import ResourceData

SEMAPHORE_GENERATION_NAME_TEMPLATE = 'advq_chrono_db_generation_single_{type}_{chrono_type}_legacy'

CHRONO_TYPE_WEEK = 'week'
CHRONO_TYPE_MONTH = 'month'


class AdvqLegacyGenChronoIndex(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        client_tags = (ctc.Tag.GENERIC |
                       ctc.Tag.LINUX_TRUSTY | ctc.Tag.IPV6)
        # это максимальные требования по памяти и диску к normal_rus, 1 чанк.
        # Для других вариантов можно в on_enqueue указать поменьше.
        disk_space = 128 * 1024
        ram = 128 * 1024

    class Parameters(PhitsParameters):
        with sdk2.parameters.RadioGroup("Chrono period", required=True) as advq_chrono_type:
            advq_chrono_type.values[CHRONO_TYPE_MONTH] = advq_chrono_type.Value(value=CHRONO_TYPE_MONTH)
            advq_chrono_type.values[CHRONO_TYPE_WEEK] = advq_chrono_type.Value(value=CHRONO_TYPE_WEEK)
        advq_db = sdk2.parameters.String("db (rus, tur-robots, etc)", required=True)

        date = sdk2.parameters.String("Database week number or month number (YYYYNN)", required=True)
        epoch = sdk2.parameters.Integer("Epoch", required=True, default=0)
        epodate = sdk2.parameters.String("Epodate string (bare date or {date}-{epoch})", required=True)
        input_table = sdk2.parameters.String("Input table", required=True)
        releaseTo = sdk2.parameters.String("Release status", required=False, default=None)
        ttl = sdk2.parameters.Integer("TTL for released chunks (days, always; 0 for inf)", default=720, required=True)

        # advq_phits_type = 'normal'

    def on_enqueue(self):
        validate_arcadia_rev(self.Parameters.advq_build_binaries, [MRKIT_WITH_DEBUG_REV])

        # Устанавливаем семафор, имя которого зависит от phits_type и chrono_type.
        self.Requirements.semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(
                    name=SEMAPHORE_GENERATION_NAME_TEMPLATE.format(
                        type=self.Parameters.advq_phits_type, chrono_type=self.Parameters.advq_chrono_type),
                    capacity=4)
            ],
        )
        if (self.Parameters.advq_phits_type, self.Parameters.advq_db) != ('normal', 'rus'):
            self.Requirements.disk_space /= 4
            self.Requirements.ram /= 4

        super(AdvqLegacyGenChronoIndex, self).on_enqueue()

    def on_execute(self):
        if self.Parameters.advq_phits_type != 'normal':
            raise TaskFailure("Only phits_type normal is valid for legacy chrono")

        if not all([self.Parameters.advq_db,
                    self.Parameters.advq_chrono_type,
                    self.Parameters.date,
                    self.Parameters.epodate,
                    self.Parameters.input_table]):
            # При создании таска другим таском не проверяется, что все обязательные параметры
            # заданы. Приходится это делать самим.
            raise TaskFailure("Some required parameters are empty")
        chrono_db_prefix = '{chrono_type}lyhits_{type}_{db}_{epodate}'.format(
            type=self.Parameters.advq_phits_type,
            chrono_type=self.Parameters.advq_chrono_type,
            db=self.Parameters.advq_db,
            epodate=self.Parameters.epodate,
        )
        # Для legacy -- один чанк, но поля заполняем, как надо.
        chunk = 1
        total_chunks = 1

        env = get_yt_env_from_parameters(self.Parameters)

        binaries = ResourceData(self.Parameters.advq_build_binaries)

        chrono_chunk_filename = '{}.{}.{}.db'.format(chrono_db_prefix, chunk, total_chunks)

        res = AdvqLegacyNormalChronoDatabaseChunk(
            task=self,
            path=chrono_chunk_filename,
            advq_phits_type=self.Parameters.advq_phits_type,
            advq_chrono_type=self.Parameters.advq_chrono_type,
            advq_db=self.Parameters.advq_db,
            advq_date=self.Parameters.date,
            advq_epoch=self.Parameters.epoch,
            advq_epodate=self.Parameters.epodate,
            description=("{}: {!r} from {!r}".format(
                self.Parameters.description,
                chrono_chunk_filename,
                self.Parameters.input_table)),
            advq_chunk=chunk,
            advq_total_chunks=total_chunks,
            advq_is_delta=False,
            advq_input_table=self.Parameters.input_table,
            ttl=convert_ttl(self.Parameters.ttl)
        )
        if self.Parameters.releaseTo:
            res.released = self.Parameters.releaseTo

        res_path = str(res.path)

        mrkit_legacy_reader = str(binaries.path.joinpath(MRKIT_LEGACY_READER))
        chrono_db_generator = str(binaries.path.joinpath(CHRONO_DB_GENERATOR))

        HITS_FIELDS = (
            'RegionHits',
            'PhoneHits',
            'TabletHits'
        )
        CHANNELS = 'all,phone,tablet'
        FIELDS = ('OrigSanitized',) + HITS_FIELDS

        with sandbox.sdk2.helpers.ProcessLog(self, logger=logging.getLogger("advq-chrono-db-generate")) as pl:
            # Сохраняем totals в файл.
            totals_table = self.Parameters.input_table + '.total'
            with tempfile.NamedTemporaryFile(prefix=chrono_db_prefix + '.total.') as totals_file:
                subprocess.check_call(SHELL_COMMAND_PREFIX + [
                    "{mrkit_legacy_reader} --debug {total_tbl} {total_fields}".format(
                        mrkit_legacy_reader=quote(mrkit_legacy_reader),
                        total_tbl=quote(totals_table),
                        total_fields=' '.join(quote(fld) for fld in HITS_FIELDS)
                    )],
                    stdout=totals_file.file,
                    stderr=pl.stdout,
                    env=env,
                )
                totals_file.file.flush()

                # Запускаем генерацию базы
                subprocess.check_call(SHELL_COMMAND_PREFIX + [
                    ("{mrkit_legacy_reader} {tbl} {fields} | "
                     "{chrono_db_generator} {advq_db} {period} {date} --total-file {inp_totals_file}"
                     " --sorted --channels {channels} --output {out_file_path}"
                     ).format(
                        mrkit_legacy_reader=quote(mrkit_legacy_reader),
                        tbl=quote(self.Parameters.input_table),
                        fields=' '.join(quote(fld) for fld in FIELDS),
                        chrono_db_generator=quote(chrono_db_generator),
                        advq_db=quote(self.Parameters.advq_db),
                        period=quote(self.Parameters.advq_chrono_type),
                        date=quote(self.Parameters.epodate),
                        inp_totals_file=quote(totals_file.name),
                        channels=quote(CHANNELS),
                        out_file_path=quote(res_path),
                    )],
                    stderr=pl.stdout,
                    env=env,
                )

            ResourceData(res).ready()
