# -*- coding: utf-8 -*
import os
import datetime

from sandbox import sdk2

from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.paths import get_logs_folder

from sandbox.projects import resource_types
from sandbox.projects.geosearch.tools import load_index
from sandbox.projects.geosearch.tools.database_notifications import NotifyGeosearchDuty


class CalcSimilarOrgsHypotheses(NotifyGeosearchDuty, sdk2.Task):
    """
        Calculates hypotheses for geosearch similar orgs.
    """

    class Requirements(sdk2.Task.Requirements):
        cores = 24
        ram = 80000
        disk_space = 200 * 1024

        environments = (environments.PipEnvironment('yandex-yt'),)

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.task.Parameters):
        notify_parameters = NotifyGeosearchDuty.Parameters()
        sprav_hypos_calcer = sdk2.parameters.Resource('Sprav hypotheses calcer: ',
                                                      resource_type=resource_types.SIMILAR_ORGS_HYPOS_SPRAV_EXECUTABLE,
                                                      required=True)
        index_shardmap = sdk2.parameters.Resource('Geobasesearch index shardmap',
                                                  resource_type=resource_types.ADDRS_BUSINESS_SHARDMAP)
        rubric_list = sdk2.parameters.Resource('Accepted rubrics: ',
                                               resource_type=resource_types.GEOSEARCH_RUBRIC_ID_LIST)
        geoid_list = sdk2.parameters.Resource('Accepted geoids: ',
                                              resource_type=resource_types.GEOSEARCH_GEOID_LIST)
        max_distance = sdk2.parameters.Integer('Max distance for similar orgs: ',
                                               default_value=3000)
        dump_poi = sdk2.parameters.Bool('Dump poi orgs',
                                        default_value=True)
        output_table = sdk2.parameters.String('Output table: ',
                                              default_value='')
        mr_server = sdk2.parameters.String('Mr server: ',
                                           default_value='hahn')
        ext_hypotheses_table = sdk2.parameters.String('Ext hypotheses table: ',
                                                      default_value='')
        yt_mode = sdk2.parameters.Bool('Use yt mode: ',
                                       default_value=False)
        altay_path = sdk2.parameters.String('Altay snapshot path: ',
                                            default_value='')
        ttl_days = sdk2.parameters.Integer('Output YT table TTL in days',
                                            default_value=7)

    def create_resource(self, descr, name, res_type):
        resource = sdk2.Resource[res_type]
        current_resource = resource(self, descr, name)
        data = sdk2.ResourceData(current_resource)
        data.ready()

    def set_ttl(self, tbl_name):
        if self.Parameters.ttl_days is None:
            return
        import yt.wrapper as yt
        yt.config['token'] = self.yt_token
        yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
        if not tbl_name.startswith('//'):
            full_output_path = '//home/geosearch/similar_orgs/hypotheses/from_backa/{0}'.format(tbl_name)
        else:
            full_output_path = tbl_name
        ts = datetime.datetime.now()
        ts += datetime.timedelta(days=self.Parameters.ttl_days)
        yt.set_attribute(full_output_path, 'expiration_time', ts.isoformat())

    def on_execute(self):

        from_sprav = str(sdk2.ResourceData(self.Parameters.sprav_hypos_calcer).path)

        index = './index'
        files = ['companies.pbs', 'factors.pbs', 'rubrics.pbs', 'features.pbs', 'address_storage.mms']
        shard_count = load_index.download_sharded_index(self.Parameters.index_shardmap, index, files)

        # Creating hypotheses from sprav
        exec_params = [from_sprav]

        if shard_count != 1:
            exec_params.extend(['--shard-count', str(shard_count)])

        if self.Parameters.rubric_list:
            exec_params.extend(['-r', str(sdk2.ResourceData(self.Parameters.rubric_list).path)])

        if self.Parameters.geoid_list:
            exec_params.extend(['-g', str(sdk2.ResourceData(self.Parameters.geoid_list).path)])

        if self.Parameters.max_distance:
            exec_params.extend(['-d', str(self.Parameters.max_distance)])

        if self.Parameters.dump_poi:
            exec_params.append('--poi')

        output_table = self.Parameters.output_table
        if output_table.find('today') != -1:
            output_table = output_table.replace('today', datetime.datetime.now().strftime('%Y%m%d'))
        if output_table:
            self.yt_token = sdk2.Vault.data('GEOMETA-SEARCH', 'yt-token')
            os.environ['YT_TOKEN'] = self.yt_token
            os.environ['YT_PREFIX'] = '//home/geosearch/'
            exec_params.extend(['--table', output_table])
            exec_params.extend(['-s', self.Parameters.mr_server])

            ext_hyps = self.Parameters.ext_hypotheses_table
            if ext_hyps:
                exec_params.extend(['--ext-hypotheses', ext_hyps])

        if self.Parameters.yt_mode:
            snapshot = self.Parameters.altay_path.strip()
            if snapshot:
                index = snapshot
                exec_params.append('--yt')

        exec_params.append(index)

        proc_err = os.path.join(get_logs_folder(), 'process_dump.err')
        if output_table:
            run_process(exec_params, stderr=open(proc_err, 'w'))
            result_name = './similar_orgs_hypotheses_table.txt'
            with open(result_name, 'w') as output:
                output.write(output_table)
            self.set_ttl(output_table)
            self.create_resource('similar orgs hypotheses table', result_name, resource_types.SIMILAR_ORGS_HYPOTHESES_TABLE)
        else:
            result_name = './similar_orgs_hypotheses.txt'
            output = open(result_name, 'w')
            run_process(exec_params, stdout=output, stderr=open(proc_err, 'w'))
            self.create_resource('similar orgs hypotheses', result_name, resource_types.SIMILAR_ORGS_HYPOTHESES)
