import os
import logging
import shutil

from sandbox import sdk2
from sandbox.projects.common import utils
from sandbox.projects.suggest.dicts import SuggestDictTask
from sandbox.projects.suggest.resource_types import SuggestBaseForge
from sandbox.projects.suggest.resource_types import SuggestBaseMrDataBuilder
from sandbox.projects.suggest.resource_types import SuggestPornoDictFilters
from sandbox.projects.suggest.resource_types import SuggestPornoTrie
from sandbox.projects.suggest.resource_types import SuggestPornoTrieBuilder
from sandbox.sandboxsdk.process import run_process


class BuildBaseSuggestDict(sdk2.Task, SuggestDictTask):
    """ Build suggest dictionary from base export for web, video or images """

    class Requirements(sdk2.Requirements):
        disk_space = 32 * 1024
        ram = 128 * 1024

    class Parameters(sdk2.Task.Parameters):
        dictionary_name = sdk2.parameters.String('Dictionary name', default='')
        dictionary_description = sdk2.parameters.String('Description', default='Base dictionary')
        autodeploy = sdk2.parameters.Bool('Autodeploy', default=False)
        export_table_path = sdk2.parameters.String('Export table path', default='')
        yt_cluster = sdk2.parameters.String('YT cluster', default='hahn')
        yt_token_vault_name = sdk2.parameters.String('YT token vault record', default='SUGGEST_YT_TOKEN')
        serp_size = sdk2.parameters.Integer('Serp size for suggest-data-builder', default=10)
        stop_word_count = sdk2.parameters.Integer('Stop-word-count', default=300000)
        stop_prefix_count = sdk2.parameters.Integer('Stop-prefix-count', default=150000)
        misspell_frequency_threshold = sdk2.parameters.Float('Misspell-frequency-threshold', default=0.5)
        user_sessions_date = sdk2.parameters.String('User sessions date in format YYYY-MM-DD. It will be used if last query timestamp is not set', default='')
        last_query_ts= sdk2.parameters.Integer('Last query timestamp.', default=0)
        start_ts = sdk2.parameters.Integer('Start timestamp for whole build dict process', default=0)
        patch_porno_trie = sdk2.parameters.Bool('Patch base dict with porno trie from last touch dict', default=False)
        porno_trie_name = sdk2.parameters.String('Porno trie name resource', default='touch.porno.trie')
        reformulations_table = sdk2.parameters.String('Reformulations table path', default='')
        compute_stats = sdk2.parameters.Bool('Compute stats for solomon', default=True)
        dictionary_source_yt_path = sdk2.parameters.String('Dictionary source yt path', default='')
        write_factors = sdk2.parameters.Bool('Write factors from export table to dictionary', default=False)
        use_mr_data_builder = sdk2.parameters.Bool('Use map-reduce data builder', default=False)
        build_requests_trie = sdk2.parameters.Bool('Build requests trie', default=False)
        requests_trie_suffix = sdk2.parameters.String('Suffix for requests trie', default='.requests.trie')
        use_freqs_without_subtract = sdk2.parameters.Bool('Use region frequencies without subtract', default=False)
        use_porno_filters = sdk2.parameters.Bool('Use porno filters for porno trie', default=False)

    @staticmethod
    def get_forge():
        return utils.sync_last_stable_resource(SuggestBaseForge)

    @staticmethod
    def get_mr_data_builder():
        return utils.sync_last_stable_resource(SuggestBaseMrDataBuilder)

    @staticmethod
    def get_porno_trie_builder():
        return utils.sync_last_stable_resource(SuggestPornoTrieBuilder)

    def run_forge_source(self):
        dictionary_source_yt_path = self.Parameters.dictionary_source_yt_path
        if not dictionary_source_yt_path:
            dictionary_source_yt_path = \
                '//home/suggest-prod/dictionary_source/{0}/{1}'.format(self.Parameters.dictionary_name, self.id)

        command = [self.get_forge(), 'source',
                   '--cluster', self.Parameters.yt_cluster,
                   '--export-table', self.Parameters.export_table_path,
                   '--dictionary-source-yt-path', dictionary_source_yt_path,
                   '--misspell-frequency-threshold', self.Parameters.misspell_frequency_threshold
                   ]

        if self.Parameters.reformulations_table:
            command += ['--refs-table', self.Parameters.reformulations_table]

        data_path = ''
        if not self.Parameters.use_mr_data_builder:
            data_path = os.path.join(os.getcwd(), 'data')
            os.makedirs(data_path)
            command += ['--output-path', data_path]

        run_process(command, log_prefix="forge", wait=True, shell=True)

        return dictionary_source_yt_path, data_path

    def run_forge_stats(self, dictionary_source_yt_path):
        solomon_url = "\'https://solomon.yandex.net/api/v2/push?project=suggest&service=dict_stats&cluster=main\'"
        solomon_token = sdk2.Vault.data("SUGGEST_SOLOMON_TOKEN")

        stats_command = [self.get_forge(), 'stats',
                         '--cluster', self.Parameters.yt_cluster,
                         '--source-path', dictionary_source_yt_path,
                         '--solomon-push-url', solomon_url,
                         '--solomon-token', solomon_token,
                         '--timestamp', self.Parameters.last_query_ts,
                         '--dict-name', self.Parameters.dictionary_name
                         ]

        run_process(stats_command, log_prefix="forge-stats", wait=False, shell=True)

    def run_old_data_builder(self, data_path, dict_prefix):
        queries_path = os.path.join(data_path, 'queries')
        groups_path = os.path.join(data_path, 'groups')
        streams_path = os.path.join(data_path, 'weights')

        streams_without_subtract_path = ''
        if self.Parameters.use_freqs_without_subtract:
            streams_without_subtract_path = os.path.join(data_path, 'weights-without-subtract')

        refs_path = os.path.join(data_path, 'refs')
        if not os.path.exists(refs_path):
            refs_path = ''

        export_table = self.Parameters.export_table_path if self.Parameters.write_factors else ''

        self.run_data_builder(dict_prefix,
                              queries_path,
                              groups_path,
                              streams_path,
                              data_path='',
                              word_index=True,
                              top_size=self.Parameters.serp_size,
                              thread_count=32,
                              refs_path=refs_path,
                              export_table=export_table,
                              stop_word_count=self.Parameters.stop_word_count,
                              stop_prefix_count=self.Parameters.stop_prefix_count,
                              streams_without_subtract_path=streams_without_subtract_path,
                              )

    def run_porno_trie_builder(self, data_path, dict_prefix, requests_trie_suffix, porno_filters_path):
        queries_path = os.path.join(data_path, 'queries')
        requests_trie_path = dict_prefix + requests_trie_suffix
        command = [self.get_porno_trie_builder(),
                   queries_path,
                   requests_trie_path
                   ]
        if porno_filters_path is not None:
            logging.info('use porno filters for filtering queries')
            command += [porno_filters_path]
        run_process(command, log_prefix="porno_trie_builder", wait=True, shell=True)

    def run_mr_data_builder(self, dictionary_source_yt_path, dict_prefix):
        queries_table = dictionary_source_yt_path
        if not queries_table.endswith('/'):
            queries_table += '/'
        queries_table += 'queries'

        groups_table = dictionary_source_yt_path
        if not groups_table.endswith('/'):
            groups_table += '/'
        groups_table += 'groups'

        weights_table = dictionary_source_yt_path
        if not weights_table.endswith('/'):
            weights_table += '/'
        weights_table += 'weights'

        command = [self.get_mr_data_builder(),
                   '--cluster', self.Parameters.yt_cluster,
                   '--queries-table', queries_table,
                   '--groups-table', groups_table,
                   '--weights-table', weights_table,
                   '--reformulations-table', self.Parameters.reformulations_table,
                   '--dict-prefix', dict_prefix,
                   '--max-suggests-count', self.Parameters.serp_size,
                   '--part-length', 3,
                   '--stop-word-count', self.Parameters.stop_word_count,
                   '--stop-prefix-count', self.Parameters.stop_prefix_count,
                   '--use-cloud-nodes'
                   ]

        run_process(command, log_prefix="mr_data_builder", wait=True, shell=True)

    def on_execute(self):
        os.environ['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_vault_name)
        os.environ['YT_PROXY'] = self.Parameters.yt_cluster

        dictionary_source_yt_path, data_path = self.run_forge_source()

        if self.Parameters.compute_stats:
            self.run_forge_stats(dictionary_source_yt_path)

        dict_path = os.path.join(os.getcwd(), 'dict')
        os.makedirs(dict_path)
        dict_prefix = os.path.join(dict_path, 'dict')

        if self.Parameters.use_mr_data_builder:
            self.run_mr_data_builder(dictionary_source_yt_path, dict_prefix)
        else:
            self.run_old_data_builder(data_path, dict_prefix)

        if self.Parameters.build_requests_trie:
            porno_filters_path = None
            if self.Parameters.use_porno_filters:
                porno_filters = sdk2.Resource.find(
                    resource_type=SuggestPornoDictFilters,
                    state='READY'
                ).first()
                resource_path = str(sdk2.ResourceData(porno_filters).path)
                logging.info('resource path: {}'.format(resource_path))
                # make link to directory for using relative paths in config
                resource_link_path = os.path.join(os.getcwd(), 'suggest_filters')
                os.symlink(resource_path, resource_link_path)
                porno_filters_path = os.path.join(resource_link_path, "config.xml")
            self.run_porno_trie_builder(data_path, dict_prefix, self.Parameters.requests_trie_suffix, porno_filters_path)

        self.create_dict_info(self.Parameters.dictionary_name,
                              dict_path,
                              self.Parameters.start_ts,
                              self.Parameters.last_query_ts,
                              self.Parameters.user_sessions_date)

        if self.Parameters.patch_porno_trie:
            resources = sdk2.Resource.find(
                resource_type=SuggestPornoTrie,
                state='READY',
                attrs={'name': self.Parameters.porno_trie_name}
            ).limit(3)
            last = sorted(list(resources), key=lambda x: x.created)[-1]
            resource_path = str(sdk2.ResourceData(last).path)
            logging.info('resource path: ' + str(resource_path))

            dst_porno_trie = dict_prefix + '.porno.trie'
            if os.path.exists(dst_porno_trie):
                os.remove(dst_porno_trie)
            shutil.copyfile(resource_path, dst_porno_trie)

        self.publish_dict(self.Parameters.dictionary_name,
                          self.Parameters.dictionary_description,
                          dict_path,
                          autodeploy=self.Parameters.autodeploy)
