# coding: utf-8

import os
import gevent
import json
import time

from sandbox import sdk2
from sandbox.projects.common import utils
from sandbox.projects.resource_types import SUGGEST_DICT as SUGGEST_DICT
from sandbox.projects.suggest.resource_types import SuggestDataBuilderExecutable, SuggestSimpleDataBuilderExecutable
from sandbox.sandboxsdk.process import run_process
from sandbox.projects.geobase.Geodata5BinStable.resource import GEODATA5BIN_STABLE


class SuggestDictTask:

    @staticmethod
    def get_data_builder():
        return utils.sync_last_stable_resource(SuggestDataBuilderExecutable)

    @staticmethod
    def get_simple_data_builder():
        return utils.sync_last_stable_resource(SuggestSimpleDataBuilderExecutable)

    @staticmethod
    def get_geodata():
        return utils.sync_last_stable_resource(GEODATA5BIN_STABLE)

    def run_data_builder(self,
                         dict_prefix,
                         queries_path,
                         groups_path,
                         streams_path,
                         data_path='',
                         word_index=False,
                         top_size=10,
                         thread_count=8,
                         refs_path='',
                         export_table='',
                         stop_word_count=300000,
                         stop_prefix_count=150000,
                         streams_without_subtract_path=''):

        data_builder_path = self.get_data_builder()
        command = [data_builder_path,
                   '-o', dict_prefix,
                   '-r', queries_path,
                   '-g', groups_path,
                   '-s', streams_path,
                   '-c', top_size,
                   '-T', thread_count,
                   '-R', 0,
                   '--stop-word-count', stop_word_count,
                   '--stop-prefix-count', stop_prefix_count
                   ]

        if data_path:
            command += ['-d', data_path]

        if word_index:
            command += ['-A']

        if refs_path:
            command += ['-f', refs_path]

        if export_table:
            command += ['--export-table', export_table]

        if streams_without_subtract_path:
            command += ['--streams-without-subtract', streams_without_subtract_path]

        run_process(command, log_prefix="data_builder", wait=True, shell=True)

    def publish_dict(self, name, description, path, autodeploy=False, ttl='inf'):
        params = dict()
        params['ttl'] = ttl
        if autodeploy:
            params['autodeploy'] = 'yes'

        dict_resource = SUGGEST_DICT(self, description, path=path, name=name, **params)
        sdk2.ResourceData(dict_resource).ready()

    def create_dict_info(self, name, path, start_ts, last_query_ts=0, user_sessions_date=''):
        if not name or not start_ts:
            return

        dict_info = {}
        dict_info['type'] = name
        dict_info['build_start_ts'] = start_ts
        dict_info['build_finish_ts'] = int(time.time())

        if last_query_ts:
            dict_info['last_query_ts'] = last_query_ts
        elif user_sessions_date:
            user_sessions_ts = int(time.mktime(time.strptime(user_sessions_date, '%Y-%m-%d')))
            dict_info['last_query_ts'] = user_sessions_ts + 3600*23 + 3599

        dict_info_path = os.path.join(path, 'dict-info.txt')
        with open(dict_info_path, 'w') as dict_info_file:
            json.dump(dict_info, dict_info_file)
            dict_info_file.write('\n')

#   https://ml.yandex-team.ru/thread/yt/167477611142886638/
    def setup_yt_client(self, proxy, token):
        import yt.wrapper as yt
        self.yt_client = yt.YtClient(proxy=proxy, token=token, config={
            'pickling': {
                'python_binary': '/skynet/python/bin/python',
                'additional_files_to_archive': [
                    (os.path.join(os.path.dirname(gevent.__file__), 'signal.py'), 'gevent/signal.py')
                ]
            },
            'read_parallel': {
                'enable': True,
                'max_thread_count': 56
            }
        })
        self.yt_format = yt.JsonFormat(attributes={'encode_utf8': False})

    def run_simple_data_builder(self,
                                dict_prefix,
                                source_path,
                                yt_cluster,
                                store_data=False,
                                word_index=False,
                                candidates_count=10,
                                thread_count=1,
                                stop_word_count=300000,
                                stop_prefix_count=150000,
                                write_aliases=False,
                                word_index_prefix=0,
                                enable_regional_frequencies=False,
                                enable_regional_corrected_frequencies=False,
                                min_regional_weight=0,
                                require_geodata=False):
        simple_data_builder_path = self.get_simple_data_builder()
        command = [simple_data_builder_path,
                   '--cluster', yt_cluster,
                   '--output-prefix', dict_prefix,
                   '--source-path', source_path,
                   '--candidates-count', candidates_count,
                   '--jobs-count', thread_count,
                   '--stop-word-count', stop_word_count,
                   '--stop-prefix-count', stop_prefix_count
                   ]

        if not store_data:
            command.append('--no-data')

        if not word_index:
            command.append('--no-word-index')

        if write_aliases:
            command.append('--write-aliases')

        if word_index and word_index_prefix > 0:
            command += ['--word-index-prefix', word_index_prefix]

        if enable_regional_frequencies:
            command.append('--regional-frequencies')

        if enable_regional_corrected_frequencies:
            command.append('--regional-corrected-frequencies')

        if min_regional_weight > 0:
            command += ['--min-regional-weight', min_regional_weight]

        if require_geodata:
            geobase = self.get_geodata()
            command += ['--geobase-path', geobase]

        run_process(command, log_prefix="simple_data_builder", wait=True, shell=True)
