# -*- coding: utf-8 -*-

import logging
import os

import sandbox.common.types.task as ctt
from sandbox import sdk2

from sandbox.projects.ydo import (
    ydo_releasers,
    get_now_utc,
    execute_cmd,
    YdoFeaturesJson,
)

from sandbox.projects.ydo.rubrics_merger.YdoRubricsMerger import YdoMergedRubricsSmallDump

from sandbox.projects.common.geosearch.utils import unpack_file
from sandbox.projects.geobase.Geodata6BinStable.resource import GEODATA6BIN_STABLE
from sandbox.projects.geosearch.CleanupYtFolder import clean_folder
from sandbox.projects.resource_types import GEODATATZDATA_STABLE
from sandbox.projects.ydo.backup.LinkTables import YdoBackupLinkTables

from sandbox.sandboxsdk import environments


# Resource types
class YdoJsonIndexerExecutable(sdk2.Resource):
    """
        Generate table with json documents for SAAS
    """
    executable = True
    releasable = True
    releasers = ydo_releasers


class YdoJsonIndexer(sdk2.Task):
    '''Task that creates SAAS index and uploads it'''

    class Parameters(sdk2.Parameters):
        indexer = sdk2.parameters.Resource(
            'indexer executable',
            resource_type=YdoJsonIndexerExecutable,
            required=True
        )

        kill_timeout = 86400

        yt_backup_dir = sdk2.parameters.String('Path to database backup on YT', required=True)

        saas_namespace = sdk2.parameters.String('SAAS namespace (kps)', required=True)

        additional_factors = sdk2.parameters.String('Path to table with additional factors', required=False)

        sprav_dir = sdk2.parameters.String('Path to sprav directory', required=False)

        sprav_to_workers_path = sdk2.parameters.String('Path to table with sprav_to_workers table', required=False)

        res_dir = sdk2.parameters.String('Index resulting directory', required=True)

        geobase_snapshot = sdk2.parameters.Resource(
            'Geobase 6 snapshot',
            resource_type=GEODATA6BIN_STABLE,
            required=True
        )

        geodata_tz_snapshot = sdk2.parameters.Resource(
            'Geodata tz',
            resource_type=GEODATATZDATA_STABLE,
            required=True
        )

        features_json = sdk2.parameters.Resource(
            'Features',
            resource_type=YdoFeaturesJson,
            required=True
        )

        rubrics_json = sdk2.parameters.Resource(
            'merged rubrics.json',
            resource_type=YdoMergedRubricsSmallDump,
            required=False
        )

        sprav_only = sdk2.parameters.Bool('Sprav only mode', default=False)

        docdoc_only = sdk2.parameters.Bool('DocDoc only mode', default=False)

        removed_workers_only = sdk2.parameters.Bool('Removed workers only mode', default=False)

        update_addresses_with_sprav = sdk2.parameters.Bool('Change worker\'s address to org or chain address', default=False)

        workers_to_sprav_companies_result_path = sdk2.parameters.String('Path to result of workers_to_sprav_companies querry', required=False)

        altay_company_to_chain_path = sdk2.parameters.String('Path to sprav company_to_chain table', required=False)

        additional_cmd_options = sdk2.parameters.String('Additional command line options')

        yt_log_level = sdk2.parameters.String('Yt log level', required=False)

        unpublish_passport_duplicates = sdk2.parameters.Bool('Unpublish workers with passport duplicates', default=False)

        filter_far_areas_sprav_linked = sdk2.parameters.Bool('Filter areas far from sprav linked addresses', default=False)

        far_areas_max_distance = sdk2.parameters.Integer('Max distance from addresses for areas of sprav linked workers', default=200000)

        cut_areas_sprav_not_linked = sdk2.parameters.Bool('Cut areas of orgs not linked with sprav', default=False)

        areas_sprav_not_linked_max_distance = sdk2.parameters.Integer('Max distance between areas of not sprav linked orgs', default=200000)

        clear_linked_sprav_online_orgs_addresses = sdk2.parameters.Bool('Remove addresses, areas and main_address from workers linked with sprav online orgs', default=False)

    class Requirements(sdk2.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
        ]
        cores = 1
        disk_space = 60 * 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    def create_index(self):
        yt_token = sdk2.Vault.data(self.owner, 'yt-token')

        env = os.environ.copy()
        env['YT_TOKEN'] = yt_token
        env['YT_PROXY'] = '{}.yt.yandex.net'.format(self.yt_host)
        env['YT_LOG_LEVEL'] = self.Parameters.yt_log_level if self.Parameters.yt_log_level else 'INFO'

        unpack_file(str(sdk2.ResourceData(self.Parameters.geodata_tz_snapshot).path), str(sdk2.Path.cwd()))

        self.Context.index_ts = get_now_utc()
        self.Context.out_ranking_table = os.path.join(self.Parameters.res_dir, '{ts}_ranking_kps={kps}'.format(
            ts=self.Context.index_ts, kps=self.Parameters.saas_namespace))
        self.Context.out_kv_table = os.path.join(self.Parameters.res_dir, '{ts}_kv_kps={kps}'.format(
            ts=self.Context.index_ts, kps=self.Parameters.saas_namespace))

        logging.info('Indexing...')

        execute_cmd(
            [
                str(sdk2.ResourceData(self.Parameters.indexer).path),
                '--dump_from_yt', self.Parameters.yt_backup_dir,
                '--write_to_yt',
                '--write_proxy', '{}.yt.yandex.net'.format(self.yt_host),
                '--index_res_path', self.Context.out_ranking_table,
                '--kv_res_path', self.Context.out_kv_table,
                '--geobase_file', str(sdk2.ResourceData(self.Parameters.geobase_snapshot).path),
                '--geodata_tz_location', str(sdk2.Path.cwd().joinpath('zones_bin')),
                '--features_json_path', str(sdk2.ResourceData(self.Parameters.features_json).path),
            ]
            + (['--rubrics_json', str(sdk2.ResourceData(self.Parameters.rubrics_json).path)] if self.Parameters.rubrics_json else [])
            + (self.Parameters.additional_cmd_options.split() if self.Parameters.additional_cmd_options else [])
            + (['--additional_factors', self.Parameters.additional_factors] if self.Parameters.additional_factors else [])
            + (['--sprav_dir', self.Parameters.sprav_dir, '--sprav_on_yt'] if self.Parameters.sprav_dir else [])
            + (['--sprav_to_workers_path', self.Parameters.sprav_to_workers_path] if self.Parameters.sprav_to_workers_path else [])
            + (['--sprav_only'] if self.Parameters.sprav_only else [])
            + (['--docdoc_mode'] if self.Parameters.docdoc_only else [])
            + (['--removed_workers_only'] if self.Parameters.removed_workers_only else [])
            + (
                [
                    '--data_for_update_addresses_with_sprav_on_yt',
                    '--workers_to_sprav_companies_result_path', self.Parameters.workers_to_sprav_companies_result_path,
                    '--altay_company_to_chain_path', self.Parameters.altay_company_to_chain_path,
                ] if self.Parameters.update_addresses_with_sprav else []
            )
            + (['--filter_far_areas_sprav_linked', '--far_areas_max_distance', str(self.Parameters.far_areas_max_distance)] if self.Parameters.filter_far_areas_sprav_linked else [])
            + (
                [
                    '--cut_areas_sprav_not_linked',
                    '--areas_sprav_not_linked_max_distance',
                    str(self.Parameters.areas_sprav_not_linked_max_distance)
                ] if self.Parameters.cut_areas_sprav_not_linked else []
            )
            + (['--clear_linked_sprav_online_orgs_addresses'] if self.Parameters.clear_linked_sprav_online_orgs_addresses else [])
            + (['--unpublish_passport_duplicates'] if self.Parameters.unpublish_passport_duplicates else []),
            'ydo_indexer',
            'Failed to create index',
            env=env
        )

    def link(self):
        tables_to_link = {self.Context.out_ranking_table: os.path.join(self.Parameters.res_dir, 'current_index'),
                          self.Context.out_kv_table: os.path.join(self.Parameters.res_dir, 'current_kv')}
        link_task = YdoBackupLinkTables(
            self,
            description='Link tables for task {}'.format(self.id),
            notifications=self.Parameters.notifications,
            create_sub_task=False,
            yt_host=self.yt_host,
            yt_vault_token='yt-token',
            yt_tables=tables_to_link
        )
        link_task.enqueue()

        raise sdk2.WaitTask([link_task.id], ctt.Status.Group.SUCCEED + ctt.Status.Group.SCHEDULER_FAILURE, wait_all=True)

    def on_execute(self):
        self.yt_host = 'hahn'

        with self.memoize_stage.firststage:
            self.create_index()

        with self.memoize_stage.link_to_current:
            self.link()

        with self.memoize_stage.clean_folder:
            clean_folder(self, self.Parameters.res_dir, yt_host=self.yt_host, history_size=4)

        logging.info('Done')
