import os

import sandbox.common.types.task as ctt

from sandbox import sdk2
from sandbox.sandboxsdk import environments

from sandbox.projects.ydo import execute_cmd
from sandbox.projects.ydo import ydo_releasers

from sandbox.projects.geosearch.CleanupYtFolder import clean_folder
from sandbox.projects.ydo.backend.YdoSaasSearchProxy.resource_types import YdoSaasSearchProxyWord2Vec
from sandbox.projects.ydo.backup.LinkTables import YdoBackupLinkTables


class YdoCalcOrdersFactorsDataExecutable(sdk2.Resource):
    releasable = True
    releasers = ydo_releasers


class YdoCalcOrdersFactorsDataTask(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
        ]
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        calc_orders_factors_data_executable_resource = sdk2.parameters.Resource(
            "calc_orders_factors_data_executable_resource",
            resource_type=YdoCalcOrdersFactorsDataExecutable,
            required=True,
        )

        word2vec_model_resource = sdk2.parameters.Resource(
            "word2vec_model_resource",
            resource_type=YdoSaasSearchProxyWord2Vec,
            required=True,
        )

        docs_backup_path = sdk2.parameters.String("Path to directory where to store table with SAAS docs")

        states_backup_path = sdk2.parameters.String("Path to directory where to store table with state with category_id != 'all'")

        states_all_backup_path = sdk2.parameters.String("Path to directory where to store table with state with category_id == 'all'")

        db_dump_path = sdk2.parameters.String("Path to directory with db dump")

        squeeze_path = sdk2.parameters.String("Path to directory with squeezes about workers on orders")

        service_squeeze = sdk2.parameters.String("Path to directory with data about actions on service")

        dssm_model_path = sdk2.parameters.String("Path to dssm model (on YT)")

        dssm_vector_size = sdk2.parameters.Integer("Size of dssm vector")

        factors_data_service_name = sdk2.parameters.String("SAAS service name")

        factors_data_ctype = sdk2.parameters.String("SAAS service ctype")

        logbroker_host = sdk2.parameters.String("Logbroker host")

        self_tvm_id = sdk2.parameters.String("Task tvm id")

        logbroker_tvm_id = sdk2.parameters.String("Logbroker tvm id")

        tvm_key_secret_name = sdk2.parameters.String("Name of secret with tvm key")

        rate_limit = sdk2.parameters.String("Limit on pushed docs per second")

    def link(self, paths):
        import yt.wrapper as yt
        yt.config['token'] = sdk2.Vault.data(self.owner, 'yt-token')
        yt.config['proxy']['url'] = 'hahn'

        tables_to_link = {max(filter(lambda p: not p.endswith('current'), yt.list(path, absolute=True))): os.path.join(path, 'current') for path in paths}
        link_task = YdoBackupLinkTables(
            self,
            description='Link tables for task {}'.format(self.id),
            notifications=self.Parameters.notifications,
            create_sub_task=False,
            yt_host='hahn',
            yt_vault_token='yt-token',
            yt_tables=tables_to_link
        )
        link_task.enqueue()

        raise sdk2.WaitTask([link_task.id], ctt.Status.Group.SUCCEED + ctt.Status.Group.SCHEDULER_FAILURE, wait_all=True)

    def on_execute(self):
        env = os.environ.copy()
        env['YT_LOG_LEVEL'] = 'DEBUG'
        env['YT_PROXY'] = 'hahn.yt.yandex.net'
        env['YT_TOKEN'] = sdk2.Vault.data(self.owner, 'yt-token')
        env['YQL_TOKEN'] = sdk2.Vault.data(self.owner, 'YQL_TOKEN')
        env['TVM_KEY'] = sdk2.Vault.data(self.owner, self.Parameters.tvm_key_secret_name)

        with self.memoize_stage.run_cmd:
            cmd = [
                str(sdk2.ResourceData(self.Parameters.calc_orders_factors_data_executable_resource).path),
                '--docs_backup', self.Parameters.docs_backup_path,
                '--states_backup', self.Parameters.states_backup_path,
                '--states_all_backup', self.Parameters.states_all_backup_path,
                '--dump_from_yt', self.Parameters.db_dump_path,
                '--workers_on_orders_squeeze', self.Parameters.squeeze_path,
                '--service_squeeze', self.Parameters.service_squeeze,
                '--word2vec_model', str(sdk2.ResourceData(self.Parameters.word2vec_model_resource).path / 'text'),
                '--dssm_model', self.Parameters.dssm_model_path,
                '--dssm_vector_size', str(self.Parameters.dssm_vector_size),
                '--factors_data_service_name', self.Parameters.factors_data_service_name,
                '--factors_data_ctype', self.Parameters.factors_data_ctype,
                '--logbroker_host', self.Parameters.logbroker_host,
                '--self_tvm_id', self.Parameters.self_tvm_id,
                '--logbroker_tvm_id', self.Parameters.logbroker_tvm_id,
                '--rate_limit', self.Parameters.rate_limit,
            ]
            execute_cmd(cmd, "calc_orders_factors_data", "calc_orders_factors_data failed", env=env)

        with self.memoize_stage.clean_docs_folder:
            clean_folder(self, self.Parameters.docs_backup_path)

        with self.memoize_stage.clean_states_folder:
            clean_folder(self, self.Parameters.states_backup_path)

        with self.memoize_stage.clean_states_all_folder:
            clean_folder(self, self.Parameters.states_all_backup_path)

        with self.memoize_stage.link_to_current_docs:
            self.link((self.Parameters.docs_backup_path, self.Parameters.states_backup_path, self.Parameters.states_all_backup_path))
