# -*- coding: utf-8 -*-

import os
import logging

from sandbox.projects import resource_types

from sandbox.projects.common.base_search_quality import settings as bss
from sandbox.projects.common import cms
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils
from sandbox.projects.common.search import bugbanner
from sandbox.projects.images.models import resources as images_models_resources
from sandbox.projects.images.idx_ops import resources as images_idx_ops_resources

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import parameters as sp


class IdxOpsExecutableParameter(sp.ResourceSelector):
    """
        Бинарь idx_ops
    """
    name = 'idx_ops_resource_id'
    description = 'idx_ops binary'
    resource_type = (
        resource_types.IDX_OPS_EXECUTABLE,
        images_idx_ops_resources.IDX_OPS_IMAGES_EXECUTABLE
    )
    required = True


class SearchDatabaseResourceParameter(sp.ResourceSelector):
    """
        Поисковый шард
    """
    name = 'db_resource_id'
    description = 'Search database resource'
    resource_type = (
        resource_types.SEARCH_DATABASE,
        resource_types.IMAGES_SEARCH_DATABASE,
    )
    required = True


class RankingUrlsMapParameter(sp.ResourceSelector):
    """
        Карта оценённых урлов (primus-JudTier-*-urls.tsv.gz)
    """
    name = 'ranking_urls_map'
    description = 'Ranking URLs canonization table'
    resource_type = resource_types.JUDTIER_DUPS_TABLE


class ModelsParameter(sp.ResourceSelector):
    """
        Модели MatrixNet
    """
    name = 'models_resource_id'
    description = 'Dynamic models archive'
    resource_type = (
        resource_types.DYNAMIC_MODELS_ARCHIVE,
        images_models_resources.IMAGES_DYNAMIC_MODELS_ARCHIVE,
    )


class RequestsParameter(sp.ResourceSelector):
    """
        Запросы (requests.tsv.gz)
    """
    name = 'requests_resource_id'
    description = 'Requests from FML'
    resource_type = resource_types.FML_REQUESTS_TABLE
    required = True


class RatingsParameter(sp.ResourceSelector):
    """
        Оценки (ratings.tsv.gz)
    """
    name = 'ratings_resource_id'
    description = 'Ratings from FML'
    resource_type = resource_types.FML_RATINGS_TABLE
    required = True


class UseUrlsFromIndexParameter(sp.SandboxBoolParameter):
    """
        Load urls from index instead of url.dat
    """
    name = 'use_urls_from_index'
    description = 'Use urls from index'
    default_value = False


class SaveFactorNamesAndBorders(sp.SandboxBoolParameter):
    """
        Save factor_names.txt and factor_borders.txt
    """
    name = 'store_factor_info'
    description = 'Save factor_names.txt and factor_borders.txt'
    default_value = False


class IndexAnnBetaMode(sp.SandboxBoolParameter):
    """
        Enables calcing features of experimental stream
    """
    name = 'indexann_beta_enabled'
    description = 'Enable experimental stream text machine features calculation'
    default_value = False


class TextMachineLimitsMonitor(sp.SandboxBoolParameter):
    """
        Enables counting number of hits loaded from indexes and reached TextMachine
    """
    name = 'tm_limits_monitors_enabled'
    description = 'Enable text-machine limits monitors'
    default_value = False


class AdditionalRunParamsParameter(sp.SandboxStringParameter):
    """
        Дополнительные параметры командной строки запуска idx_ops
    """
    name = 'additional_params'
    description = 'Additional command-line parameters'
    default_value = ""


_testing_modes = 'Testing options'


class RecheckResultsParameter(sp.SandboxBoolParameter):
    """
        Запускать процесс дважды для сверки результатов
        (в целях проверки нестабильности)
    """
    name = 'recheck_results'
    description = 'Recheck results'
    default_value = True
    group = _testing_modes


class TestTrashParameter(sp.SandboxBoolParameter):
    """
        Тестировать trash-пулы имени ironpeter@ для подбора L1/L2-формул
    """
    name = 'test_trash_pools'
    description = 'Test trash pools'
    default_value = True
    group = _testing_modes


class TestProtoDocTextParameter(sp.SandboxBoolParameter):
    """
        Тестировать сбор пула с текстами (SEARCH-480)
    """
    name = 'collect_doc_texts'
    description = 'Test doc text collection'
    default_value = True
    group = _testing_modes


class TestProtoStreamHits(sp.SandboxBoolParameter):
    """
        Тестировать сбор пула с хитами лингвобустинга по стримам.
        Работает при наличии CGI параметров лингвобустинга (&qbundle=, &pron=qbundleiter).
    """
    name = 'collect_stream_hits'
    description = 'Test stream hits collection'
    default_value = False
    group = _testing_modes


class CheckProtobufModeParameter(sp.SandboxBoolParameter):
    """
        Запускать idx_ops в режиме protobuf
    """
    name = 'check_protobuf'
    description = 'Check protobuf mode'
    default_value = True
    group = _testing_modes


class CheckProtobufFinalFormatParameter(sp.SandboxBoolParameter):
    """
        Запускать idx_ops в режиме protobuf final format
    """
    name = 'check_final_format'
    description = 'Check protobuf final format'
    default_value = False
    group = _testing_modes


class SaveOutputParameter(sp.SandboxBoolParameter):
    """
        Сохранять выхлоп
    """
    name = 'save_output'
    description = 'Save output'
    default_value = False
    group = _testing_modes


class RunIdxOpsEstFeatures(bugbanner.BugBannerTask):
    """
        Запускает idx_ops в режиме ``estfeatures2f`` на указанном поисковом шарде, запросах и оценках.
        Выхлоп можно сохранить для дальнейшего анализа (опция Save output)
    """

    type = 'RUN_IDX_OPS_EST_FEATURES'

    execution_space = bss.RESERVED_SPACE
    required_ram = 96 << 10

    input_parameters = (
        IdxOpsExecutableParameter,
        SearchDatabaseResourceParameter,
        RankingUrlsMapParameter,
        ModelsParameter,
        RequestsParameter,
        RatingsParameter,
        UseUrlsFromIndexParameter,
        IndexAnnBetaMode,
        TextMachineLimitsMonitor,
        AdditionalRunParamsParameter,
        SaveFactorNamesAndBorders,
        RecheckResultsParameter,
        CheckProtobufModeParameter,
        CheckProtobufFinalFormatParameter,
        TestTrashParameter,
        TestProtoDocTextParameter,
        TestProtoStreamHits,
        SaveOutputParameter,
    )

    def initCtx(self):
        # debug versions are slow. TODO: optimize
        self.ctx['kill_timeout'] = 6 * 60 * 60

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)

        if utils.get_or_default(self.ctx, SaveFactorNamesAndBorders):
            resource_names = self._create_resource(
                self.descr + ': factor_names.txt', 'factor_names.txt', resource_types.FACTOR_NAMES_TXT
            )

            self.ctx['out_factor_names'] = resource_names.id

            resource_names_borders = self._create_resource(
                self.descr + ': factor_borders.txt', 'factor_borders.txt', resource_types.FACTOR_BORDERS_TXT
            )

            self.ctx['out_factor_borders'] = resource_names_borders.id
        if utils.get_or_default(self.ctx, SaveOutputParameter):
            resource_output = self._create_resource(
                self.descr + ": tsv file with result", 'pool.tsv', resource_types.IDX_OPS_EST_FEATURES_TSV_OUTPUT,
                arch='any',
            )

            self.ctx['out_tsv_pool'] = resource_output.id

            if utils.get_or_default(self.ctx, CheckProtobufFinalFormatParameter):
                resource_output_final = self._create_resource(
                    self.descr + ": result in proto-final format", 'pool-final.pb',
                    resource_types.IDX_OPS_EST_FEATURES_PROTOBUF_FINAL_OUTPUT,
                    arch='any',
                )
                self.ctx['out_final_pool'] = resource_output_final.id

    def on_execute(self):
        # IMGDEVOPS-361
        os.environ["MKL_CBWR"] = "COMPATIBLE"

        self.add_bugbanner(bugbanner.Banners.SearchTests)

        os.chdir(self.abs_path())

        idx_ops_name = self.sync_resource(self.ctx[IdxOpsExecutableParameter.name])
        ratings_name = self.sync_resource(self.ctx[RatingsParameter.name])
        requests_name = self.sync_resource(self.ctx[RequestsParameter.name])
        db_name = self.sync_resource(self.ctx[SearchDatabaseResourceParameter.name])
        save_output = utils.get_or_default(self.ctx, SaveOutputParameter)

        input_files = {
            'idx_ops': idx_ops_name,
            'ratings': ratings_name,
            'requests': requests_name,
            'db': db_name,
        }

        ranking_urls_parameter = utils.get_or_default(self.ctx, RankingUrlsMapParameter)
        if ranking_urls_parameter:
            ranking_urls_map_name = self.sync_resource(ranking_urls_parameter)
            input_files['ranking_urls_map'] = ranking_urls_map_name

        if utils.get_or_default(self.ctx, SaveFactorNamesAndBorders):
            self._save_factors_info(input_files['idx_ops'])

        res_path = self.abs_path() + '/result1.tsv'
        if save_output:
            # will be set to ready after all further checks
            resource_output = self._read_resource(self.ctx['out_tsv_pool'], sync=False)
            res_path = resource_output.abs_path()

        if utils.get_or_default(self.ctx, RecheckResultsParameter):
            tsv_out1 = res_path
            tsv_out2 = self.abs_path() + '/result2.tsv'
            # use different thread count to raise entropy
            self._run_idx_ops(
                input_files, tsv_out1,
                thread_count=17,
                save_output=False,  # either we already save in resource-d file either save_output == False
            )
            self._run_idx_ops(
                input_files, tsv_out2,
                thread_count=23,
                save_output=save_output,
            )
            self._compare_results(tsv_out1, tsv_out2)
        else:
            tsv_out = res_path
            self._run_idx_ops(
                input_files, tsv_out,
                thread_count=18,
                save_output=False,  # either we already save in resource-d file either save_output == False
            )

        if utils.get_or_default(self.ctx, CheckProtobufModeParameter):
            self._run_idx_ops(
                input_files, 'result.pb',
                thread_count=18,
                use_protobuf=True,
                save_output=save_output,
            )

        if utils.get_or_default(self.ctx, CheckProtobufFinalFormatParameter):
            res_path = 'result-final.pb'
            save_in_new_resource = save_output
            if 'out_final_pool' in self.ctx:
                resource_output_final = self._read_resource(self.ctx['out_final_pool'], sync=False)
                res_path = resource_output_final.abs_path()
                save_in_new_resource = False

            self._run_idx_ops(
                input_files, res_path,
                thread_count=18,
                use_protobuf=True,
                use_final_format=True,
                save_output=save_in_new_resource,
            )

        if utils.get_or_default(self.ctx, TestTrashParameter):
            self._run_idx_ops(
                input_files, 'result-trash.pb',
                thread_count=18,
                trash_pool=True,
                use_protobuf=True,
                use_final_format=True,
                save_output=save_output,
            )

        if utils.get_or_default(self.ctx, TestProtoDocTextParameter):
            self._run_idx_ops(
                input_files, 'result-doc-texts.pb',
                thread_count=18,
                collect_doc_texts=True,
                use_protobuf=True,
                use_final_format=True,
                save_output=save_output,
            )

        if utils.get_or_default(self.ctx, TestProtoStreamHits):
            self._run_idx_ops(
                input_files, 'result-stream-hits.pb',
                thread_count=18,
                collect_stream_hits=True,
                use_protobuf=True,
                use_final_format=True,
                save_output=save_output,
            )

    def _save_factors_info(self, idx_ops_path):
        resource_names = self._read_resource(self.ctx['out_factor_names'], sync=False)
        resource_borders = self._read_resource(self.ctx['out_factor_borders'], sync=False)

        args_names = [idx_ops_path, 'factor_names']
        args_borders = [idx_ops_path, 'factor_borders']

        if utils.get_or_default(self.ctx, IndexAnnBetaMode):
            args_names.append('--indexann-beta')
            args_borders.append('--indexann-beta')

        with open(resource_names.abs_path(), "w") as out_file:
            process.run_process(
                args_names,
                log_prefix='run_idx_ops factornames',
                check=True,
                stdout=out_file,
            )

        with open(resource_borders.abs_path(), "w") as out_file:
            process.run_process(
                args_borders,
                log_prefix='run_idx_ops factorborders',
                check=True,
                stdout=out_file,
            )

        resource_names.mark_ready()
        resource_borders.mark_ready()

    @staticmethod
    def _get_options(input_files):
        """
            Do some feature detection, because different branches
            need different options (base/pre-stable-25 has old idx_ops)
        """
        with open('options.txt', 'w') as options_file:
            process.run_process(
                [
                    input_files['idx_ops'],
                    'estfeatures2f',
                    '--help',
                ],
                stderr=options_file,
                outputs_to_one_file=False,
                log_prefix='run_idx_ops-get-version',
                check=False)

        return fu.read_file('options.txt')

    def _make_def_args(self, input_files, thread_count):
        args = [
            input_files['idx_ops'],
            'estfeatures2f',
            '--str-sort',
            '--threads', str(thread_count),
            '-g', 'domain',
            '--tier', bss.DEFAULT_TIER,  # does not matter for us
        ]
        if utils.get_or_default(self.ctx, UseUrlsFromIndexParameter):
            args.append('--urls-from-index')
        if utils.get_or_default(self.ctx, IndexAnnBetaMode):
            args.append('--indexann-beta')
        if utils.get_or_default(self.ctx, TextMachineLimitsMonitor):
            args.append('--cgi-params')
            args.append('&pron=qbundlelimitsmonitor')
        models = utils.get_or_default(self.ctx, ModelsParameter)
        if models:
            args += ['--models', self.sync_resource(models)]
        if 'ranking_urls_map' in input_files:
            args += ['--ranking-urls-map', input_files['ranking_urls_map']]
        additional_params = utils.get_or_default(self.ctx, AdditionalRunParamsParameter)
        if additional_params:
            args += additional_params.split(" ")

        return args

    def _run_idx_ops(
        self, input_files, result_name,
        thread_count=1,
        trash_pool=False,
        collect_doc_texts=False,
        collect_stream_hits=False,
        use_protobuf=False,
        use_final_format=False,
        save_output=False,
    ):
        """
            Executes idx_ops, sorts output when in text mode
        """

        if use_final_format:
            use_protobuf = True  # implies, final format does not make sense for TSV

        # use temp name for TSV runs (output should be sorted)
        result_name_raw = result_name
        if not use_protobuf:
            result_name_raw = result_name + '.tmp'

        options_list = self._get_options(input_files)

        # default trunk supports output option
        output_opts = ['--output', result_name_raw]

        if '--disk' in options_list:
            output_opts = ['--disk', result_name_raw]
            # disable protobuf, it is not supported
            if use_protobuf:
                logging.info(
                    'This idx_ops version does not support protobuf pools. '
                    'This mode will NOT be tested')
                return

        args = self._make_def_args(input_files, thread_count)

        if 'DEBUG' in self.descr:
            # limit to speed up
            args += [
                "--head", "20000",
            ]

        res_type = resource_types.IDX_OPS_EST_FEATURES_PROTOBUF_OUTPUT
        if not use_protobuf:
            args.append('--tsv-output')  # aka -h
            res_type = resource_types.IDX_OPS_EST_FEATURES_TSV_OUTPUT

        if use_final_format:
            args.append('--sorted-final-format' if '--sorted-final-format' in options_list else '--final-format')
            res_type = resource_types.IDX_OPS_EST_FEATURES_PROTOBUF_FINAL_OUTPUT

        if trash_pool:
            args += [
                '--trash-rating', '-0.5',
                '--trash-ratio', '0.3',
            ]

        if collect_doc_texts:
            args.append('--collect-doc-texts')

        if collect_stream_hits:
            args.append('--collect-stream-hits')

        args += output_opts
        args += [
            input_files['ratings'],
            input_files['requests'],
            input_files['db'],
        ]

        env = os.environ.copy()
        env["MKL_CBWR"] = "COMPATIBLE"
        proc = process.run_process(args, log_prefix='run_idx_ops', check=False, environment=env)

        if proc.returncode != 0 or save_output:
            self.create_resource(
                'idx_ops output for ' + result_name_raw, result_name_raw, res_type,
                arch='any',
            )
            eh.ensure(proc.returncode == 0, 'idx_ops run failed')
        if not use_protobuf:
            with open(result_name, 'w') as result_sorted:
                process.run_process(
                    [
                        'sort',
                        result_name_raw,
                    ],
                    stdout=result_sorted,
                    log_prefix='sort',
                )

            # get line count to control various upyachka
            wc_proc = process.run_process(
                [
                    'wc',
                    '-l',
                    result_name,
                ],
                outs_to_pipe=True,
            )
            line_count_str, _ = wc_proc.communicate()
            line_count = int(line_count_str.split()[0])
            eh.ensure(line_count > 0, 'Output is empty')
            eh.ensure(line_count < 150000, 'Too many lines in micropool: {}'.format(line_count))

    def _compare_results(self, result1, result2):
        """
            Compare two (sorted) idx_ops outputs
        """
        diff_name = self.abs_path() + '/result.diff'

        with open(diff_name, 'w') as diff:
            proc = process.run_process(
                [
                    'diff',
                    result1,
                    result2,
                ],
                stdout=diff,
                log_prefix='diff',
                check=False,
            )

        if proc.returncode != 0:
            self.create_resource(
                'idx_ops_unstable_diff', diff_name, resource_types.OTHER_RESOURCE,
                arch='any',
            )
            eh.check_failed('idx_ops output is not stable')

    def __get_shard_name(self, cms_configuration, instance_tag_name):
        logging.info(
            "Search for shard: cms_configuration={}, instance_tag_name={}".format(
                cms_configuration, instance_tag_name
            )
        )
        shard_names = cms.get_cms_shards(
            instance_tag_name=instance_tag_name,
            cms_configuration=cms_configuration)
        eh.ensure(shard_names, "Could not find any shards in CMS for tags {}".format(instance_tag_name))
        return shard_names[0]


__Task__ = RunIdxOpsEstFeatures
