# -*- coding: utf-8 -*-
import logging
import datetime

from sandbox import sdk2
from sandbox.common.types.task import Semaphores
from sandbox.projects.advq.artifacts import PKZ_DB_GENERATOR
from sandbox.projects.advq.common import PkzDatabaseChunk, AdvqGenerationBinaries
from sandbox.projects.advq.common.list_res import list_any_tables, pkz_list_dbs
from sandbox.projects.advq.common.parameters import YtParameters, convert_ttl, releaseTo_params
from sandbox.projects.advq.common.yt_utils import setup_yt_from_parameters, get_yt_env_from_parameters
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2.resource import ResourceData

logger = logging.getLogger(__name__)

MB = 1024 * 1024

SEMAPHORE_NAME = "advq_pkz_gen_indices_{db}"


class PkzGenIndices(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = (
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        )

    class Parameters(sdk2.Task.Parameters):
        advq_generation_binaries = sdk2.parameters.Resource(
            "ADVQ generation resource", resource_type=AdvqGenerationBinaries, required=True)
        db = sdk2.parameters.String("Database(rus, tur)", required=True)
        total_chunks = sdk2.parameters.Integer("Number of chunks to generate", required=True, default=2)
        release_new_resource, releaseTo = releaseTo_params()
        pkz_tables_yt_prefix = sdk2.parameters.String("YT prefix of pkz tables", default="//home/advq/advq/{db}/pkz")
        pkz_dbs_yt_prefix = sdk2.parameters.String("YT prefix of pkz dbs", default="//home/advq/advq/offline_pkz/{db}")
        ttl = sdk2.parameters.Integer("TTL for release (0 for 'inf')", default=30, required=True)
        days_count = sdk2.parameters.Integer("Days to consider", default=28)
        symbolic_link_path = sdk2.parameters.String(
            "Adds symbolic link to resulting databases(needed because rus databases needed in offline_pkz)",
            default="//home/advq/advq/offline_pkz",
            required=False,
        )
        yt_params = YtParameters

    def on_enqueue(self):
        self.Requirements.semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(
                    name=SEMAPHORE_NAME.format(db=self.Parameters.db),
                    capacity=1
                ),
            ],
        )

    def on_execute(self):
        import yt.wrapper as yt
        setup_yt_from_parameters(self.Parameters.yt_params)
        env = get_yt_env_from_parameters(self.Parameters)

        binaries_data = sdk2.ResourceData(self.Parameters.advq_generation_binaries)
        binaries_path = str(binaries_data.path.joinpath(PKZ_DB_GENERATOR))

        dbname = self.Parameters.db
        last_date_to_use = datetime.date.today() - datetime.timedelta(self.Parameters.days_count)
        yt_dir = self.Parameters.pkz_dbs_yt_prefix.format(db=dbname)
        available_tables = [it[0] for it in list(list_any_tables(self.Parameters.pkz_tables_yt_prefix.format(db=dbname)))]
        generated_dbs = list(pkz_list_dbs(yt_dir))
        dates_to_build = set(available_tables) - set(generated_dbs)

        for date in dates_to_build:
            if datetime.datetime.strptime(date, "%Y%m%d").date() < last_date_to_use:
                continue

            with sdk2.helpers.ProcessLog(self, logger=logging.getLogger(PKZ_DB_GENERATOR)) as pl:
                sdk2.helpers.subprocess.check_call([
                    binaries_path,
                    self.Parameters.pkz_tables_yt_prefix.format(db=dbname) + "/" + date,
                    yt_dir + "/pkz-" + date,
                    str(self.Parameters.total_chunks)],
                    env=env, stdout=pl.stdout, stderr=pl.stderr
                )

            for chunk in range(1, self.Parameters.total_chunks + 1):
                output_chunk_file = "pkz-{date}.{chunk}.db".format(date=date, chunk=str(chunk))
                resource = PkzDatabaseChunk(
                    task=self,
                    description="Pkz db chunk {}".format("pkz-" + date),
                    path='pkz_{}_{}_{}_{}.db'.format(
                        dbname, date, chunk, self.Parameters.total_chunks
                    ),
                    pkz_db=dbname,
                    pkz_date=date,
                    pkz_total_chunks=self.Parameters.total_chunks,
                    pkz_chunk=chunk,
                    pkz_input_table=self.Parameters.pkz_tables_yt_prefix.format(db=dbname) + "/" + date,
                    arcadia_revision=self.Parameters.advq_generation_binaries.arcadia_revision,
                    ttl=convert_ttl(self.Parameters.ttl)
                )
                if self.Parameters.release_new_resource:
                    resource.released = self.Parameters.releaseTo

                resource_data = sdk2.ResourceData(resource)
                offset = 0
                buf = True
                while buf:
                    buf = yt.read_file(
                        yt_dir + "/" + output_chunk_file,
                        length=100 * MB,
                        offset=offset,
                        ).read()
                    offset += 100 * MB
                    with resource_data.path.open('ab') as f:
                        f.write(buf)

                if self.Parameters.symbolic_link_path:
                    yt.link(yt_dir + "/" + output_chunk_file, self.Parameters.symbolic_link_path + "/" + output_chunk_file)

                ResourceData(resource).ready()
