# -*- coding: utf-8 -*

import os
import logging
import subprocess
from datetime import datetime, timedelta

from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.projects.geosuggest import resources


class BuildGeosuggestToponymsData(sdk2.Task):
    '''
        Build data for geosuggest toponyms index from YT geocoder export
        (//home/maps/geocoder/geosrc/latest_state/)
    '''
    class Requirements(sdk2.Task.Requirements):
        cores = 1
        ram = 8192

        environments = (environments.PipEnvironment('yandex-yt', use_wheel=True),)

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.task.Parameters):
        kill_timeout = 4 * 60 * 60
        geosuggest_builder = sdk2.parameters.Resource('Geosuggest builder binaries',
                                                      resource_type=resources.GEO_SUGGEST_SANDBOX_BIN)
        yt_cluster = sdk2.parameters.String('YT cluster',
                                            default_value='hahn')
        yt_pool = sdk2.parameters.String('YT pool')
        geocoder_export_path = sdk2.parameters.String('YT-path to geocoder export',
                                                      required=True,
                                                      default_value='//home/maps/geocoder/geosrc/latest_state')
        global_factors_table_path = sdk2.parameters.String('YT-path to global factors table',
                                                           required=True,
                                                           default_value='//home/qreg/geosuggest/production_data/doc_factors/clicks_and_shows')
        weights_table_path = sdk2.parameters.String('YT-path to toponym weights table',
                                                    required=True,
                                                    default_value='//home/qreg/geosuggest/prepared/toponym_weightd')
        output_dir_path = sdk2.parameters.String('YT-path to output dir',
                                                 required=True)
        ttl_days = sdk2.parameters.Integer('Build artifacts TTL in days',
                                            default_value=3)
        custom_pov_path = sdk2.parameters.String('Path to the custom POV tables, if needed')

    def get_binary(self):
        return str(sdk2.ResourceData(self.Parameters.geosuggest_builder).path)

    def make_dir(self, parent_dir_path):
        import yt.wrapper as yt
        yt.config['token'] = self.yt_token
        yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
        dir_path = os.path.join(parent_dir_path, 'geosuggest_toponym_index_build_{}'.format(self.id))

        yt.mkdir(dir_path, recursive=None)
        if self.Parameters.ttl_days is not None:
            ts = datetime.now()
            ts += timedelta(days=self.Parameters.ttl_days)
            yt.set_attribute(dir_path, 'expiration_time', ts.isoformat())
        return dir_path

    def run(self):
        proc_env = os.environ.copy()
        self.yt_token = sdk2.Vault.data('GEO_SUGGEST', 'yt_token')
        proc_env['YT_TOKEN'] = self.yt_token
        if self.Parameters.yt_pool:
            proc_env['YT_POOL'] = self.Parameters.yt_pool
        binary = os.path.join(self.get_binary(), 'toponyms_mapper_bin')
        build_dir = self.make_dir('//tmp')
        output_dir = self.make_dir(self.Parameters.output_dir_path)
        cmd = [
            binary,
            '--cluster', self.Parameters.yt_cluster,
            '--geocoder_export', self.Parameters.geocoder_export_path,
            '--global_factors_table', self.Parameters.global_factors_table_path,
            '--weights_table', self.Parameters.weights_table_path,
            '--build_dir', build_dir,
            '--output_dir', output_dir
        ]

        if self.Parameters.custom_pov_path:
            cmd.extend(['--custom_pov_path', self.Parameters.custom_pov_path])

        logging.info('Running: {}'.format(cmd))
        log_file_path = os.path.join(get_logs_folder(), 'toponyms_data_prepare.log')
        with open(log_file_path, 'w') as log_file:
            try:
                subprocess.check_call(cmd,
                                      env=proc_env,
                                      stdout=log_file,
                                      stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError:
                logging.exception('%s command failed' % cmd)
                raise SandboxTaskFailureError('Building toponyms index data failed')
        return output_dir

    def on_execute(self):
        output_dir = self.run()
        self.Context.toponyms_data_table = os.path.join(output_dir, 'toponyms_index_data')
        self.Context.org2addr_data_table = os.path.join(output_dir, 'org2addr_toponyms_data')
