import os
import subprocess

from sandbox import sdk2
from sandbox.common.types.resource import State as ResourceState
from sandbox.common.types.task import Status as TaskStatus
from sandbox.common.utils import get_task_link
from sandbox.projects.common.juggler import jclient
from sandbox.projects.geobase.Geodata5BinStable import GEODATA5BIN_STABLE
from sandbox.projects.mapsearch.BuildMapsDatabaseAdvert2 import MapsDatabaseAdvert2

from sandbox.projects.geoadv_ranking import resources as resource_types
from sandbox.projects.geoadv_ranking.lib import CommonSimilarAdvertParameters, AbstractSimilarAdvertTask
from sandbox.projects.geoadv_ranking.lib.parameters import get_path_to_resource


class SimilarAdvertsIndexBuild(AbstractSimilarAdvertTask):
    """
    Builds index of similar adverts for each organization from addrs base, see GEOADVDEV-857
    """
    class Parameters(CommonSimilarAdvertParameters):
        indexer_executable = sdk2.parameters.LastReleasedResource(
            'Executable binary for building similar adverts index',
            resource_type=resource_types.SimilarAdvertsIndexer,
            state=(ResourceState.READY,),
            required=False,
        )
        output_directory = sdk2.parameters.String(
            'Path to YT directory to put results',
            required=True,
        )

        top_size = sdk2.parameters.Integer(
            'Maximum amount of competitors to put in index for each org.',
            required=True,
            default=10,
        )

        send_events_to_juggler = sdk2.parameters.Bool(
            'Send event to juggler after task\'s finish',
            required=True,
            default=False,
        )
        with send_events_to_juggler.value[True]:
            juggler_host = sdk2.parameters.String(
                'Juggler host',
                required=True,
            )
            juggler_service = sdk2.parameters.String(
                'Juggler service',
                required=True,
            )

        addrs_base_export_table = sdk2.parameters.String(
            'Path to table with org data exported from addrs base.',
            required=True,
        )
        rubric_data_table = sdk2.parameters.String(
            'Path to table with rubric data exported from addrs base.',
            required=True,
        )
        ranking_model = sdk2.parameters.Resource(
            'Model to use for competitor candidates ranking.',
            resource_type=resource_types.MapsCompetitorsFormula,
            state=(ResourceState.READY,),
            default=None,
        )

        apply_geosearch_blacklist = sdk2.parameters.Bool(
            'Whether to filter orgs by geosearch pairs of blacklisted rubrics.',
            required=True,
            default=False,
        )

    YT_CLUSTER = 'hahn'
    DEFAULT_SEMAPHORE_NAME = 'similar_adverts_index_build'

    def _send_data_to_juggler_if_needed(self, status):
        if self.Parameters.send_events_to_juggler:
            jclient.send_events_to_juggler(
                self.Parameters.juggler_host,
                self.Parameters.juggler_service,
                'OK' if status == TaskStatus.SUCCESS else 'CRIT',
                get_task_link(self.id),
            )

    def on_finish(self, _, status):
        if status in (TaskStatus.SUCCESS, TaskStatus.FAILURE):
            self._send_data_to_juggler_if_needed(status)

    def on_execute(self):
        blacklists_data_path = os.path.realpath('blacklists_dir')
        sdk2.svn.Arcadia.export('arcadia:/arc/trunk/arcadia/geoproduct/advert_ranking/competitors/blacklists', blacklists_data_path)

        advert_data = self._sync_stable_resource(MapsDatabaseAdvert2)

        os.environ['YT_TOKEN'] = self._get_yt_token()
        os.environ['YT_LOG_LEVEL'] = 'DEBUG'

        cmd = [
            get_path_to_resource(self.Parameters.indexer_executable, resource_types.SimilarAdvertsIndexer),
            'build-advert-index',
            '--result-directory', self.Parameters.output_directory,
            '--top-size', str(self.Parameters.top_size),
        ]

        cmd.extend([
            '--rubric-blacklist', os.path.join(blacklists_data_path, 'rubric_blacklist.json'),
            '--nosimilar-list', os.path.join(blacklists_data_path, 'no_similar_list.json'),
            '--sensitive-rubrics', os.path.join(blacklists_data_path, 'sensitive_rubrics.json'),
            '--advert-data', os.path.join(advert_data, 'advert.pb.bin'),
        ])

        if self.Parameters.apply_geosearch_blacklist:
            cmd.append('--apply-geosearch-blacklist')

        cmd.append('--org-data')
        cmd.append(self.Parameters.addrs_base_export_table)
        cmd.append('--rubric-data')
        cmd.append(self.Parameters.rubric_data_table)
        cmd.append('--geobase')
        cmd.append(self._sync_stable_resource(GEODATA5BIN_STABLE))
        cmd.append('--model')
        cmd.append(get_path_to_resource(self.Parameters.ranking_model, resource_types.MapsCompetitorsFormula))

        with sdk2.helpers.ProcessLog(self, logger='index_build') as process_log:
            subprocess.check_call(
                cmd,
                stdout=process_log.stdout,
                stderr=subprocess.STDOUT,
            )
