# -*- coding: utf-8 -*-
"""
Создаём том бинарного индекса хроносрезов из не-legacy-таблиц.
"""
from __future__ import print_function
import os
import json
import tempfile
import logging
import subprocess
from pipes import quote
import pathlib2

from sandbox import sdk2
import sandbox.sdk2.helpers
import sandbox.common.types.client as ctc
from sandbox.common.errors import TaskError, TaskFailure
from sandbox.projects.advq.artifacts import CHRONO_DB_GENERATOR
from sandbox.projects.advq.common import get_chrono_resource_class, SHELL_COMMAND_PREFIX
from sandbox.projects.advq.common.parameters import PhitsParameters, convert_ttl
from sandbox.projects.advq.common.yt_utils import get_yt_env_from_parameters, setup_yt_from_parameters
from sandbox.sdk2.resource import ResourceData
from sandbox.sandboxsdk.environments import PipEnvironment


CHRONO_TYPE_WEEK = 'week'
CHRONO_TYPE_MONTH = 'month'


class AdvqGenChronoChunk(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        client_tags = ctc.Tag.LINUX_TRUSTY & ctc.Tag.IPV6
        disk_space = 32 * 1024
        ram = 32 * 1024
        cores = 1
        environments = (
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        )

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(PhitsParameters):
        with sdk2.parameters.RadioGroup("Chrono period", required=True) as advq_chrono_type:
            advq_chrono_type.values[CHRONO_TYPE_MONTH] = advq_chrono_type.Value(value=CHRONO_TYPE_MONTH)
            advq_chrono_type.values[CHRONO_TYPE_WEEK] = advq_chrono_type.Value(value=CHRONO_TYPE_WEEK)
        advq_db = sdk2.parameters.String("db (rus, tur-robots, etc)", required=True)

        date = sdk2.parameters.String("Database week number or month number (YYYYNN)", required=True)
        epoch = sdk2.parameters.Integer("Epoch", required=True, default=0)
        epodate = sdk2.parameters.String(
            "Epodate string (bare date or {date}-{epoch} or {date}-delta{epoch})",
            required=True
        )
        is_delta = sdk2.parameters.Bool("Table is delta", required=True)
        input_table = sdk2.parameters.String("Input table", required=True)
        chunk_number = sdk2.parameters.Integer("Chunk number, could be 1..chunks_total", required=True)
        chunks_total = sdk2.parameters.Integer("Total number chunks", required=True)
        start_index = sdk2.parameters.Integer("Start index to generate from input_table", required=True)
        end_index = sdk2.parameters.Integer("End index to generate from input_table", required=True)
        releaseTo = sdk2.parameters.String("Release attribute value", required=False)
        ttl = sdk2.parameters.Integer("TTL for released chunks (days, always; 0 for inf)", default=720, required=True)

    def on_enqueue(self):
        super(AdvqGenChronoChunk, self).on_enqueue()

    def on_execute(self):
        import yt.wrapper as yt
        import yt.logger as yt_logger
        yt_logger.LOGGER.setLevel(logging.DEBUG)

        res_class = get_chrono_resource_class(self.Parameters.advq_phits_type)

        if None in [self.Parameters.advq_db,
                    self.Parameters.advq_chrono_type,
                    self.Parameters.date,
                    self.Parameters.epodate,
                    self.Parameters.input_table,
                    self.Parameters.chunk_number,
                    self.Parameters.chunks_total,
                    self.Parameters.start_index,
                    self.Parameters.end_index]:
            # При создании таска другим таском не проверяется, что все обязательные параметры
            # заданы. Приходится это делать самим.
            raise TaskFailure("Some required parameters are empty")
        if self.Parameters.is_delta and 'delta' not in self.Parameters.epodate:
            raise TaskFailure("is_delta flag is set but epodate doesn't contain 'delta'")
        if not self.Parameters.is_delta and 'delta' in self.Parameters.epodate:
            raise TaskFailure("is_delta flag is not set but epodate contains 'delta'")
        chrono_db_prefix = '{chrono_type}lyhits_{type}_{db}_{epodate}'.format(
            type=self.Parameters.advq_phits_type,
            chrono_type=self.Parameters.advq_chrono_type,
            db=self.Parameters.advq_db,
            epodate=self.Parameters.epodate,
        )

        env = get_yt_env_from_parameters(self.Parameters)
        setup_yt_from_parameters(self.Parameters)

        binaries = ResourceData(self.Parameters.advq_build_binaries)

        output_dir = pathlib2.Path('output')
        output_dir.mkdir()

        output_file_base = output_dir.joinpath(chrono_db_prefix)

        chrono_db_generator = str(binaries.path.joinpath(CHRONO_DB_GENERATOR))

        REGION_HITS_FIELDS = (
            'RegionHits',
            'PhoneRegionHits',
            'TabletRegionHits',
        )
        CHANNELS = 'all,phone,tablet'

        if not yt.exists(self.Parameters.input_table):
            raise TaskError("Input table {!r} doesn't exist".format(self.Parameters.input_table))

        with sandbox.sdk2.helpers.ProcessLog(self, logger=logging.getLogger("advq-chrono-db-generate")) as pl:
            # Сохраняем totals в файл.
            totals_attr = self.Parameters.input_table + '/@advq_totals'
            with tempfile.NamedTemporaryFile(prefix=chrono_db_prefix + '.total.') as totals_file:
                if yt.exists(totals_attr):
                    totals = yt.get(totals_attr)
                    totals_fields = [totals[field] for field in REGION_HITS_FIELDS]
                    print('\t'.join(totals_fields), file=totals_file.file)
                else:
                    raise TaskError(
                        "Failed to find totals for chrono table {!r}".format(self.Parameters.input_table))
                totals_file.file.flush()

                extra_args = []
                # Запускаем генерацию базы
                try:
                    chunks_json = subprocess.check_output(SHELL_COMMAND_PREFIX + [
                        ("{chrono_db_generator} {advq_db} {period} {date} --total-file {inp_totals_file}"
                         " --generate-parallel --input-table {input_table} --volume-number {chunk_num}"
                         " --start-index {start_index} --end-index {end_index} --sorted"
                         " --channels {channels} --output {out_file_path} {extra_args}"
                         ).format(chrono_db_generator=quote(chrono_db_generator),
                                  advq_db=quote(self.Parameters.advq_db),
                                  period=quote(self.Parameters.advq_chrono_type),
                                  date=quote(self.Parameters.epodate),
                                  input_table=quote(self.Parameters.input_table),
                                  chunk_num=quote(str(self.Parameters.chunk_number-1)),
                                  start_index=quote(str(self.Parameters.start_index)),
                                  end_index=quote(str(self.Parameters.end_index)),
                                  inp_totals_file=quote(totals_file.name),
                                  channels=quote(CHANNELS),
                                  out_file_path=quote(str(output_file_base)),
                                  extra_args=' '.join(quote(arg) for arg in extra_args),
                                  )],
                         stderr=pl.stdout,
                         env=env,
                    )
                except subprocess.CalledProcessError as ex:
                    self.set_info(ex.message)
                    raise
                logging.info("Got chunks: %r", chunks_json)
                chunk_path = json.loads(chunks_json)

        chrono_chunk_filename = '{}.{}.{}.db'.format(chrono_db_prefix,
                                                     self.Parameters.chunk_number,
                                                     self.Parameters.chunks_total)

        res = res_class(
            task=self.parent,
            path=chrono_chunk_filename,
            advq_phits_type=self.Parameters.advq_phits_type,
            advq_chrono_type=self.Parameters.advq_chrono_type,
            advq_db=self.Parameters.advq_db,
            advq_date=self.Parameters.date,
            advq_epoch=self.Parameters.epoch,
            advq_epodate=self.Parameters.epodate,
            advq_is_delta=self.Parameters.is_delta,
            description=("{}: {!r} from {!r}".format(
                self.Parameters.description,
                chrono_chunk_filename,
                self.Parameters.input_table)),
            advq_chunk=self.Parameters.chunk_number,
            advq_total_chunks=self.Parameters.chunks_total,
            advq_input_table=self.Parameters.input_table,
            ttl=convert_ttl(self.Parameters.ttl),
        )

        os.rename(chunk_path, str(res.path))

        ResourceData(res).ready()
