import os
import logging
import shutil

from sandbox import sdk2
from sandbox.projects.common import utils
from sandbox.projects.suggest.dicts import SuggestDictTask
from sandbox.projects.suggest.resource_types import SuggestBaseForge
from sandbox.projects.suggest.resource_types import SuggestTailFragmentsPreparator
from sandbox.projects.suggest.resource_types import SuggestTailDictBuilder
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.process import run_process


class BuildTailSuggestDict(sdk2.Task, SuggestDictTask):
    """ Build suggest tail dictionary from base export """

    class Requirements(sdk2.Requirements):
        disk_space = 300 * 1024 # 300 gb
        ram = 230 * 1024 # 230 gb
        environments = (
            environments.PipEnvironment("yandex-yt-yson-bindings-skynet", version='0.3.32-0'),
            environments.PipEnvironment('yandex-yt'),
        )

    class Parameters(sdk2.Task.Parameters):
        dictionary_name = sdk2.parameters.String('Dictionary name', default='')
        dictionary_description = sdk2.parameters.String('Description', default='Base dictionary')
        autodeploy = sdk2.parameters.Bool('Autodeploy', default=False)
        export_table_path = sdk2.parameters.String('Export table path', default='')
        yt_cluster = sdk2.parameters.String('YT cluster', default='hahn')
        yt_token_vault_name = sdk2.parameters.String('YT token vault record', default='SUGGEST_YT_TOKEN')
        serp_size = sdk2.parameters.Integer('Serp size for suggest_tail_builder', default=10)
        max_range_node_count = sdk2.parameters.Integer('Max index count in node of range type', default=0)
        user_sessions_date = sdk2.parameters.String('User sessions date in format YYYY-MM-DD. It will be used if last query timestamp is not set', default='')
        last_query_ts = sdk2.parameters.Integer('Last query timestamp.', default=0)
        start_ts = sdk2.parameters.Integer('Start timestamp for whole build dict process', default=0)
        period = sdk2.parameters.Integer('Period for tail dict in days', default=0)
        dictionary_source_yt_path = sdk2.parameters.String('Dictionary source yt path', default='')
        build_tail_word_index = sdk2.parameters.Bool('Build tail word index', default=False)

    @staticmethod
    def get_forge():
        return utils.sync_last_stable_resource(SuggestBaseForge)

    @staticmethod
    def get_tail_data_builder():
        return utils.sync_last_stable_resource(SuggestTailDictBuilder)

    @staticmethod
    def get_tail_fragments_preparator():
        return utils.sync_last_stable_resource(SuggestTailFragmentsPreparator)

    def run_tail_source(self):
        dictionary_source_yt_path = self.Parameters.dictionary_source_yt_path
        if not dictionary_source_yt_path:
            dictionary_source_yt_path = \
                '//home/suggest-prod/dictionary_source/{0}/{1}'.format(self.Parameters.dictionary_name, self.id)

        command = [self.get_forge(), 'tail-source',
                   '--cluster', self.Parameters.yt_cluster,
                   '--export-table', self.Parameters.export_table_path,
                   '--dictionary-source-yt-path', dictionary_source_yt_path,
                   '--period', self.Parameters.period,
                   ]

        data_path = os.path.join(os.getcwd(), 'data')
        os.makedirs(data_path)
        command += ['--output-path', data_path]

        run_process(command, log_prefix="forge", wait=True, shell=True)

        return dictionary_source_yt_path, data_path

    def run_tail_fragments_preparator(self, dictionary_source_yt_path, data_path):
        yt_proxy = self.Parameters.yt_cluster
        yt_token = sdk2.Vault.data(self.Parameters.yt_token_vault_name)
        env = {
            'DEF_MR_SERVER': yt_proxy,
            'MR_RUNTIME': 'YT',
            'YT_PROXY': yt_proxy,
            'YT_TOKEN': yt_token
        }
        command = [self.get_tail_fragments_preparator(),
                   dictionary_source_yt_path + '/ready',
                   dictionary_source_yt_path + '/frags'
                   ]
        command += ['--cluster', yt_proxy]

        # make frags table from ready table
        run_process(command, log_prefix="fragments_selector", wait=True, shell=True, environment=env)

        # read frags table and save it to file
        self.setup_yt_client(yt_proxy, yt_token)
        with open(str(data_path + '/frags'), 'w') as f:
            for row in self.yt_client.read_table(dictionary_source_yt_path + '/frags'):
                f.write('{}\t{}\t{}\n'.format(row['key'], row['subkey'], row['value']))

    def run_tail_data_builder(self, data_path, dict_path):
        ready_file = data_path + '/ready'
        # make word index for tail dict
        if self.Parameters.build_tail_word_index:
            frags_file = data_path + '/frags'
            command = [self.get_tail_data_builder(),
                       ready_file,
                       '--fragments', frags_file,
                       '--out-dir', dict_path,
                       '--version', '3'
                       ]

            run_process(command, log_prefix="tail_word_index_dict", wait=True, shell=True)

        # suggest_tail_builder $DAT/ready --fragments $DAT/frags --out-dir $BINDAT --version 3 --ntop $NTOP
        groups_file = data_path + '/groups'
        command = [self.get_tail_data_builder(),
                   ready_file,
                   '--groups', groups_file,
                   '--out-dir', dict_path,
                   '--version', '3',
                    '--ntop', self.Parameters.serp_size
                   ]

        if self.Parameters.max_range_node_count > 0:
            command += ["--max-node-range", str(self.Parameters.max_range_node_count)]

        # make tail binary dict
        run_process(command, log_prefix="tail_dict", wait=True, shell=True)

    def on_execute(self):
        os.environ['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_vault_name)
        os.environ['YT_PROXY'] = self.Parameters.yt_cluster

        # read tail export table, create tail ready, groups tables, upload them to data_path as files
        dictionary_source_yt_path, data_path = self.run_tail_source()
        # create frags source table, upload this table to data_path as file
        if self.Parameters.build_tail_word_index:
            self.run_tail_fragments_preparator(dictionary_source_yt_path, data_path)

        # create directory for binary dicts
        dict_path = os.path.join(os.getcwd(), 'dict')
        os.makedirs(dict_path)

        # make tail binary dict
        self.run_tail_data_builder(data_path, dict_path)

        self.create_dict_info(self.Parameters.dictionary_name,
                              dict_path,
                              self.Parameters.start_ts,
                              self.Parameters.last_query_ts,
                              self.Parameters.user_sessions_date)

        self.publish_dict(self.Parameters.dictionary_name,
                          self.Parameters.dictionary_description,
                          dict_path,
                          autodeploy=self.Parameters.autodeploy)
