# -*- coding: utf-8 -*-

import os
import shutil
import logging
import distutils
import subprocess
from time import sleep

from sandbox import sdk2
from sandbox.projects.common.nanny import nanny
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.projects.common.geosearch.utils import unpack_file
from sandbox.projects.weather import WEATHER_SPECIAL_LOCATIONS
from sandbox.projects.geosuggest import resources as geosuggest_resources
from sandbox.projects.geobase.Geodata5BinStable import resource as geobase_resources
from sandbox.projects.geosearch.AddrsBaseDatabaseYtBuild import _processess_finished


DISPUTED_REGIONS = [
    977,    # Crimea
    20536,  # Donetsk Region
    20540   # Luhansk Region
]


class GeosuggestIndexPostprocessing(nanny.ReleaseToNannyTask2, sdk2.Task):
    '''
        Geosuggest index packing and postprocessing
    '''

    class Parameters(sdk2.task.Parameters):
        geosuggest_static_data = sdk2.parameters.Resource('Geosuggest daemon static data',
                                                          resource_type=geosuggest_resources.GEO_SUGGEST_STATIC_DATA)
        geodata5bin_stable = sdk2.parameters.Resource('geodata5.bin',
                                                      resource_type=geobase_resources.GEODATA5BIN_STABLE)
        toponyms_export_data = sdk2.parameters.String('Toponyms export data table path')
        toponyms_osm_export_data = sdk2.parameters.String('Toponyms for OSM export data table path')
        toponyms_export_ua_data = sdk2.parameters.String('Toponyms for UA pov export data table path')
        prepared_ru_pov_tables = sdk2.parameters.String('RU POV on geocoder export')
        prepared_ua_pov_tables = sdk2.parameters.String('UA POV on geocoder export')
        geosuggest_misspells = sdk2.parameters.Resource('Misspells tries',
                                                        resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_MISSPELLS)
        altay_export = sdk2.parameters.String('Altay export YT path',
                                              required=True)
        org2addr_toponyms_table = sdk2.parameters.String('Toponyms table YT path',
                                                         required=True)
        orgs_weights_table = sdk2.parameters.String('Weights table path',
                                                    default_value='//home/qreg/geosuggest/prepared/org2addr_weights')
        geosuggest_org_merged = sdk2.parameters.Resource('Merged organizations info in binary format',
                                                         resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_ORG_MERGED_BIN)
        geosuggest_osm_org_merged = sdk2.parameters.Resource('Merged OSM organizations info in binary format',
                                                             resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_OSM_ORG_MERGED_BIN)
        geosuggest_search_groups = sdk2.parameters.Resource('search groups index',
                                                            resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_SEARCH_GROUPS)
        geosuggest_transport = sdk2.parameters.Resource('Transport data',
                                                        resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_TRANSPORT)
        geosuggest_weather_special_locations = sdk2.parameters.Resource('special_locations.json',
                                                                        resource_type=WEATHER_SPECIAL_LOCATIONS)
        prefixtops_full = sdk2.parameters.Resource('Full prefixtop',
                                                   resource_type=geosuggest_resources.GEO_SUGGEST_PREFIXTOPS)
        prefixtops_taxi_world = sdk2.parameters.Resource('Taxi prefixtop for new countries',
                                                         resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_PREFIXTOPS_TAXI_WORLD)
        search_history_bin = sdk2.parameters.Resource('geo_search_dictionary/bin',
                                                      resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY_BIN)
        geosearch_ngrams = sdk2.parameters.Resource('geo_search ngrams (compiled trie)',
                                                    resource_type=geosuggest_resources.MAPS_GEO_SUGGEST_GEO_SEARCH_NGRAMS_BIN)
        # Geosuggest daemon parameters
        geosuggest_sandbox_binaries = sdk2.parameters.Resource('Geosuggest sandbox binaries pack',
                                                               resource_type=geosuggest_resources.GEO_SUGGEST_SANDBOX_BIN)
        geosuggest_top_rubrics_by_regions = sdk2.parameters.Resource('Top rubrics by regions',
                                                                     resource_type=geosuggest_resources.GEO_SUGGEST_TOP_RUBRICS_BY_REGIONS_TSV)
        geosuggest_top_rubrics_config = sdk2.parameters.Resource('Top rubrics config',
                                                                 resource_type=geosuggest_resources.GEO_SUGGEST_TOP_RUBRICS_CONFIG_JSON)
        foreign_trie = sdk2.parameters.Resource('Foreign trie',
                                                resource_type=geosuggest_resources.GEO_SUGGEST_FOREIGN_TRIE)
        yt_pool = sdk2.parameters.String('YT pool to use')
        yt_proxy = sdk2.parameters.String('YT proxy to use', default_value='hahn')

    class Requirements(sdk2.Task.Requirements):
        cores = 32
        ram = 143360
        disk_space = 122880

        class Caches(sdk2.Requirements.Caches):
            pass

    def recursive_chmod(self, path, mode):
        for root, dirs, files in os.walk(path):
            for dir in dirs:
                dir_path = os.path.join(root, dir)
                os.chmod(dir_path, mode)
            for file in files:
                file_path = os.path.join(root, file)
                os.chmod(file_path, mode)

    def create_subdir(self, file_path):
        dirname = os.path.dirname(file_path)
        if not os.path.exists(dirname):
            os.makedirs(os.path.dirname(file_path))

    def copy_resource_file(self, resource_id, resource_file_path, current_path):
        local_resource_path = str(sdk2.ResourceData(resource_id).path)
        logging.info('Copying %s' % local_resource_path)
        if os.path.isdir(local_resource_path):
            resource_file = os.path.join(local_resource_path, resource_file_path)
        else:
            resource_file = os.path.join(os.path.dirname(local_resource_path), resource_file_path)
        geosuggest_resource = os.path.join(self.pack, current_path)
        self.create_subdir(geosuggest_resource)
        shutil.copyfile(resource_file, geosuggest_resource)

    def copy_resource_dir(self, resource_id, resource_dir_path, current_path):
        local_resource_path = str(sdk2.ResourceData(resource_id).path)
        logging.info('Copying %s' % local_resource_path)
        resource_path = os.path.join(local_resource_path, resource_dir_path)
        geosuggest_resource = os.path.join(self.pack, current_path)
        self.create_subdir(geosuggest_resource)
        distutils.dir_util.copy_tree(resource_path, geosuggest_resource)

    def unpack_resource(self, resource_id, dst_path):
        local_resource_path = str(sdk2.ResourceData(resource_id).path)
        dst_path = os.path.join(self.pack, dst_path)
        unpack_file(local_resource_path, dst_path)

    def get_geobase_path(self):
        return os.path.join(self.pack, 'geobase/geodata5.bin')

    def prepare_toponyms_from_yt(self,
                                 toponyms_export,
                                 tag,
                                 output_dir,
                                 disable_geobase_toponyms=False):
        """Runs process to generate toponym index.

        Parameters:
        toponyms_export (str): YT path to the prepared table with toponyms
        tag (str): short human-readable tag, will be used in the
                   suffix of the process log file
        output_dir (str): path to the result toponym index
        """
        geobase_path = self.get_geobase_path()
        cmd = [
            self.prepare_data_bin,
            'toponyms_yt',
            '-s', self.Parameters.yt_proxy,
            '-t', toponyms_export,
            '-d', self.pack + '/',
            '-o', output_dir,
            '-g', geobase_path,
            '--config', os.path.join(self.pack, 'daemon.conf'),
            '--create-search-index'
        ]
        if disable_geobase_toponyms:
            cmd.append('--disable-geobase-toponyms')
        log_file_path = os.path.join(get_logs_folder(), 'toponyms_prepare_%s.log' % tag)
        with open(log_file_path, 'w') as log_file:
            try:
                process = subprocess.Popen(
                    ' '.join(cmd),
                    shell=True,
                    env=self.proc_env,
                    stdout=log_file,
                    stderr=subprocess.STDOUT
                )
                return process
            except subprocess.CalledProcessError:
                msg = '%s command failed' % cmd
                logging.info(msg)
                raise SandboxTaskFailureError(msg)

    def prepare_disputed_fullpaths_from_yt(self):
        cmd = [
            self.prepare_disputed_fullpaths,
            '--toponym-index', self.Parameters.toponyms_export_ua_data,
            '--geobase', self.get_geobase_path(),
            '--output', os.path.join(self.pack, 'disputed-fullpaths.jsonl')
        ]
        for region in DISPUTED_REGIONS:
            cmd.extend(['--geoid', str(region)])

        log_file_path = os.path.join(get_logs_folder(), 'prepare_disputed_fullpaths.log')
        with open(log_file_path, 'w') as log_file:
            try:
                process = subprocess.Popen(
                    ' '.join(cmd),
                    shell=True,
                    env=self.proc_env,
                    stdout=log_file,
                    stderr=subprocess.STDOUT
                )
                return process
            except subprocess.CalledProcessError:
                msg = '%s command failed' % cmd
                logging.info(msg)
                raise SandboxTaskFailureError(msg)

    def prepare_twins_table_from_yt(self):
        cmd = [
            self.prepare_twins_table,
            '--input1', os.path.join(self.Parameters.prepared_ru_pov_tables, 'toponyms'),
            '--input2', os.path.join(self.Parameters.prepared_ua_pov_tables, 'toponyms'),
            '--output', os.path.join(self.pack, 'twins-table.txt'),
        ]
        for region in DISPUTED_REGIONS:
            cmd.extend(['--region', str(region)])
        log_file_path = os.path.join(get_logs_folder(), 'prepare_twins_table.log')
        with open(log_file_path, 'w') as log_file:
            try:
                process = subprocess.Popen(
                    ' '.join(cmd),
                    shell=True,
                    env=self.proc_env,
                    stdout=log_file,
                    stderr=subprocess.STDOUT
                )
                return process
            except subprocess.CalledProcessError:
                msg = '%s command failed' % cmd
                logging.info(msg)
                raise SandboxTaskFailureError(msg)

    def prepare_oid2addr(self):
        geobase_path = self.get_geobase_path()
        merged_companies_path = os.path.join(self.pack, 'oid2addr.tsv')
        not_merged_companies_path = './companies2.not_joined'
        chains_by_cities_path = os.path.join(self.pack, 'chains_by_cities.tsv')

        cmd = [
            self.prepare_data_bin,
            'oid2addr',
            '-s', self.Parameters.yt_proxy,
            '-c', os.path.join(self.Parameters.altay_export, 'snapshot/company'),
            '-w', self.Parameters.orgs_weights_table,
            '-t', self.Parameters.org2addr_toponyms_table,
            '-m', merged_companies_path,
            '-n', not_merged_companies_path,
            '--geobase', geobase_path,
            '--chains_by_cities', chains_by_cities_path,
        ]
        log_file_path = os.path.join(get_logs_folder(), 'oid2addr_prepare.log')
        with open(log_file_path, 'w') as log_file:
            try:
                process = subprocess.Popen(
                    ' '.join(cmd),
                    shell=True,
                    env=self.proc_env,
                    stdout=log_file,
                    stderr=subprocess.STDOUT
                )
                return process
            except subprocess.CalledProcessError:
                msg = '%s command failed' % cmd
                logging.info(msg)
                raise SandboxTaskFailureError(msg)

    def prepare_prefix_tops(self):
        cmd = [
            self.prepare_data_bin,
            'prefix_tops',
            self.pack
        ]
        log_file_path = os.path.join(get_logs_folder(), 'prefix_tops_prepare.log')
        with open(log_file_path, 'w') as log_file:
            logging.info('Running: %s', cmd)
            subprocess.check_call(cmd,
                                  stdout=log_file,
                                  stderr=subprocess.STDOUT)

    def prepare_shard_info(self):
        cmd = [
            self.prepare_data_bin,
            'shard_info',
            '--output',
            os.path.join(self.pack, 'shard-info.pb.txt')
        ]

        log_file_path = os.path.join(get_logs_folder(), 'shard_info_prepare.log')
        with open(log_file_path, 'w') as log_file:
            logging.info('Running: %s', cmd)
            subprocess.check_call(cmd,
                                  stdout=log_file,
                                  stderr=subprocess.STDOUT)

    def on_execute(self):
        self.pack = './pack/'
        static_data_path = str(sdk2.ResourceData(self.Parameters.geosuggest_static_data).path)
        distutils.dir_util.copy_tree(static_data_path, self.pack)
        self.pack = os.path.join(self.pack, 'data')
        self.recursive_chmod(self.pack, 0777)
        self.copy_resource_dir(self.Parameters.geosuggest_org_merged,
                               'full',
                               'organizations/')
        self.copy_resource_dir(self.Parameters.geosuggest_osm_org_merged,
                               'full',
                               'organizations/')
        self.copy_resource_file(self.Parameters.geosuggest_transport,
                                'transport.json',
                                'transport.json')
        self.copy_resource_dir(self.Parameters.geosuggest_misspells,
                               '',
                               'misspells/')
        if self.Parameters.geosuggest_weather_special_locations is not None:
            self.copy_resource_file(self.Parameters.geosuggest_weather_special_locations,
                                    'special_locations.json',
                                    'special_locations.json')
        self.copy_resource_dir(self.Parameters.search_history_bin,
                               '',
                               'geo_search_dictionary/')
        self.copy_resource_dir(self.Parameters.geosearch_ngrams,
                               '',
                               'geo_search_history/')
        self.copy_resource_file(self.Parameters.geodata5bin_stable,
                                'geodata5.bin',
                                'geobase/geodata5.bin')
        self.copy_resource_file(self.Parameters.geosuggest_search_groups,
                                'search_groups',
                                'search_groups')
        self.copy_resource_file(self.Parameters.geosuggest_top_rubrics_by_regions,
                                'top_rubrics_by_regions.tsv',
                                'top_rubrics_by_regions.tsv')
        if self.Parameters.geosuggest_top_rubrics_config:
            self.copy_resource_file(self.Parameters.geosuggest_top_rubrics_config,
                                    'top_rubrics_config.json',
                                    'top_rubrics_config.json')
        self.unpack_resource(self.Parameters.prefixtops_taxi_world, '')
        self.unpack_resource(self.Parameters.prefixtops_full, '')
        self.unpack_resource(self.Parameters.foreign_trie, '')

        sandbox_binaries_pack = sdk2.ResourceData(self.Parameters.geosuggest_sandbox_binaries).path
        self.prepare_data_bin = str(sandbox_binaries_pack / 'suggest_prepare_data')
        self.prepare_disputed_fullpaths = str(sandbox_binaries_pack / 'prepare_disputed_fullpaths')
        self.prepare_twins_table = str(sandbox_binaries_pack / 'prepare_twins_table')

        yt_token = sdk2.Vault.data('GEO_SUGGEST', 'yt_token')
        yt_pool = self.Parameters.yt_pool
        self.proc_env = os.environ.copy()
        self.proc_env['YT_TOKEN'] = yt_token
        if yt_pool:
            self.proc_env['YT_POOL'] = yt_pool

        sync_procs = {}
        prep_toponyms_proc = self.prepare_toponyms_from_yt(
            toponyms_export=self.Parameters.toponyms_export_data,
            tag='geocoder',
            output_dir=os.path.join(self.pack, 'toponyms_bin'),
            disable_geobase_toponyms=False)
        prep_disputed_fullpaths_proc = self.prepare_disputed_fullpaths_from_yt()
        prep_twins_table_proc = self.prepare_twins_table_from_yt()
        prep_oid2addr_proc = self.prepare_oid2addr()
        sync_procs.update({
            'prepare_toponyms_export': prep_toponyms_proc,
            'prepare_oid2add': prep_oid2addr_proc,
            'prepare_disputed_fullpaths': prep_disputed_fullpaths_proc,
            'prepare_twins_table': prep_twins_table_proc
        })
        if self.Parameters.toponyms_osm_export_data:
            proc = self.prepare_toponyms_from_yt(
                toponyms_export=self.Parameters.toponyms_osm_export_data,
                tag='osm',
                output_dir=os.path.join(self.pack, 'toponyms_osm_bin'),
                disable_geobase_toponyms=True)
            sync_procs['prepare_toponyms_osm_export'] = proc
        while not _processess_finished(sync_procs):
            logging.info("Waiting for toponyms index and oid2addr build finish")
            sleep(60)
        self.prepare_prefix_tops()
        self.prepare_shard_info()

        index_resource = sdk2.Resource[geosuggest_resources.GEO_SUGGEST_DATA]
        current_index_resource = index_resource(self,
                                                'GEO_SUGGEST_DATA',
                                                self.pack)
        index_data = sdk2.ResourceData(current_index_resource)
        index_data.ready()

    def on_release(self, params):
        super(GeosuggestIndexPostprocessing, self).on_release(params)
        self.mark_released_resources(params['release_status'], ttl=90)
