# -*- coding: utf-8 -*-

import os

from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import process

from sandbox.common import errors
from sandbox.common.types.task import Status
from sandbox.projects.websearch.upper.fast_data.ExecutionTimeTracker import ExecutionTimeTracker

OPERATION_ID_PREFIX = 'noapache-fast-data-test'
SCRAPER_OVER_YT_API = 'http://soyproxy.yandex.net/hahn/soy_api'
YT_PROXY = 'hahn.yt.yandex.net'


class CompilePreparedQueries(ExecutionTimeTracker):
    """
        Прокачивает через сервис подготовленные запросы.
    """

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
        ]

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 180
        service = sdk2.parameters.String(
            'Service uri',
            default='https://hamster.yandex.ru',
            required=True,
        )
        requests_limit = sdk2.parameters.String(
            'Number of requests to make for each handle',
            default=100,
            required=True,
        )
        input_path = sdk2.parameters.String(
            'Cypress dir with prepared requests',
            default='//home/search-runtime/smorivan/last_splitted',
            required=True,
        )
        output_path = sdk2.parameters.String(
            'Output table with compiled requests',
            default='//home/search-runtime/smorivan/soy_input',
            required=True,
        )
        features = sdk2.parameters.List(
            'Features to calculate for these requests',
            default=['numdocs', 'direct', 'query'],
            required=True,
        )
        yt_token_name = sdk2.parameters.String(
            'YT token name',
            default='yt_token',
            required=True,
        )

    class Context(ExecutionTimeTracker.Context):
        pass

    @property
    def stage_name(self):
        return 'compile_prepared_queries'

    def on_execute(self):
        environment = os.environ.copy()
        environment['YT_PROXY'] = YT_PROXY
        environment['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_name)
        table_builder = os.path.join(os.path.dirname(__file__), 'build_soy_table.py')
        process.run_process(
            cmd=[
                '/skynet/python/bin/python',
                table_builder,
                '--service', self.Parameters.service,
                '--input-path', self.Parameters.input_path,
                '--output-path', self.Parameters.output_path,
                '--limit', self.Parameters.requests_limit,
                '--features',
            ] + self.Parameters.features,
            log_prefix='table-builder',
            wait=True,
            check=True,
            environment=environment,
        )


class PreparePopularQueries(ExecutionTimeTracker):
    """
        Подготавливает популярные запросы за последние сутки.
    """

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
        ]

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 4800  # 3600 sometimes was not enough
        source = sdk2.parameters.String(
            'Directory with raw requests',
            default='//home/search-functionality/mt_squeeze/v31',
            required=True
        )
        dest = sdk2.parameters.String(
            'Output directory',
            default='//home/search-runtime/smorivan',
            required=True
        )
        splitted_symlink = sdk2.parameters.String(
            'Symlink to directory with requests splitted by handle',
            default='//home/search-runtime/smorivan/last_splitted',
            required=True,
        )
        prepared_symlink = sdk2.parameters.String(
            'Last prepared requests symlink',
            default='//home/search-runtime/smorivan/last_prepared',
            required=True
        )

        keep_last_n = sdk2.parameters.Integer(
            'Number of task results to keep',
            default=100,
            required=True,
        )
        services = sdk2.parameters.Dict(
            'Services uri to soy input table name mapping',
            default={'https://noapache-fast-data.hamster.yandex.ru': 'soy_noapache_fast_data'},
            required=True,
        )
        requests_limit = sdk2.parameters.String(
            'Number of requests to make for each handle',
            default=1000,
            required=True,
        )
        features = sdk2.parameters.List(
            'Features to calculate for prepared requests',
            default=['numdocs', 'direct', 'query'],
            required=True,
        )
        additional_requests = sdk2.parameters.Dict(
            'Additional tables with prepared requests, format: {basic_table: comma-separated-list-of-tables}',
            default={'soy_noapache_fast_data': ''},
        )

        yt_token_name = sdk2.parameters.String('YT token name', default='yt_token')

    class Context(ExecutionTimeTracker.Context):
        pass

    @property
    def stage_name(self):
        return 'prepare_popular_queries'

    def on_execute(self):
        with self.memoize_stage.prepare_queries:
            environment = os.environ.copy()
            environment['YT_PROXY'] = YT_PROXY
            environment['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_name)
            script = os.path.join(os.path.dirname(__file__), 'prepare_queries.py')
            self.set_info('Prepare queries')
            process.run_process(
                cmd=[
                    '/skynet/python/bin/python',
                    script,
                    '--source', self.Parameters.source,
                    '--dest', self.Parameters.dest,
                    '--symlink', self.Parameters.splitted_symlink,
                ],
                log_prefix='prepare-queries',
                wait=True,
                check=True,
                environment=environment,
            )

        with self.memoize_stage.compile_queries:
            self.Context.compile_queries_task_ids = []
            for service, soy_input_table_name in self.Parameters.services.items():
                self.Context.prepared_dir = '{}/prepared/{}'.format(self.Parameters.dest, self.id)
                self.Context.compile_queries_task_ids.append(
                    CompilePreparedQueries(
                        self,
                        service=service,
                        requests_limit=self.Parameters.requests_limit,
                        input_path=self.Parameters.splitted_symlink,
                        output_path='{}/{}'.format(self.Context.prepared_dir, soy_input_table_name),
                        features=self.Parameters.features,
                        yt_token_name=self.Parameters.yt_token_name,
                    ).enqueue().id
                )
            self.set_info('Compile soy tables')
            raise sdk2.WaitTask(self.Context.compile_queries_task_ids, Status.Group.FINISH | Status.Group.BREAK)

        with self.memoize_stage.check_compile:
            if any(sdk2.Task[task_id].status not in Status.Group.SUCCEED
                   for task_id in self.Context.compile_queries_task_ids):
                raise errors.TaskFailure('Failed to compile prepared requests')

        with self.memoize_stage.merge:
            environment = os.environ.copy()  # don't know how to save environment to context without exposing yt_token
            environment['YT_PROXY'] = YT_PROXY
            environment['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_name)
            table_merger = os.path.join(os.path.dirname(__file__), 'merge_compiled_tables.py')
            self.set_info('Merge tables')
            for base, additionals in self.Parameters.additional_requests.items():
                additionals = filter(None, additionals.replace(' ', '').split(','))
                if additionals:
                    additionals = ['{}/{}'.format(self.Parameters.dest, additional) for additional in additionals]
                    compiled_tables = self.Parameters.additional_compiled_tables
                    compiled_tables.append(self.Parameters.soy_input_table)
                    process.run_process(
                        cmd=[
                            '/skynet/python/bin/python',
                            table_merger,
                            '--output-path', '{}/{}'.format(self.Context.prepared_dir, base),
                            '--input-paths',
                        ] + [base] + additionals,
                        log_prefix='tables-merger-{}'.format(base),
                        wait=True,
                        check=True,
                        environment=environment,
                    )
        from yt import wrapper as yw
        client = yw.YtClient(proxy=YT_PROXY, token=sdk2.Vault.data(self.Parameters.yt_token_name))
        client.link(self.Context.prepared_dir, self.Parameters.prepared_symlink, force=True)
        self._remove_old(client)

    def _remove_old(self, client=None):
        if self.Parameters.keep_last_n < 2:  # keep previous results in case TestPopularQueries is using them
            return
        if not client:
            from yt import wrapper as yw
            client = yw.YtClient(proxy=YT_PROXY, token=sdk2.Vault.data(self.Parameters.yt_token_name))
        prepared_dirs = client.list('{}/prepared'.format(self.Parameters.dest), absolute=True, attributes=['locks', 'modification_time'])
        prepared_dirs = filter(lambda node: not node.attributes['locks'], prepared_dirs)
        from prepare_queries import modification_time
        prepared_dirs.sort(key=modification_time)
        for node in prepared_dirs[:-self.Parameters.keep_last_n]:
            client.remove(node)
