#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys
import json
import logging

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2.helpers import subprocess
from sandbox.projects.websearch.begemot.tasks.BegemotYT.common import CommonYtParameters, utc_from_now
from sandbox.projects.common import file_utils as fu
from sandbox.common.errors import TaskFailure

# search/common/cphash.cpp : const TCpHashFunctor commonAttrs[]
DEFAULT_PRIMARY_KEY = ['pron', 'spamflt', 'qtree', 'relev']


def get_relev_changing_rules():
    begemot_executable = sdk2.Resource["BEGEMOT_EXECUTABLE"].find(state='READY', attrs={'released': 'stable'}).first()
    args = [str(sdk2.ResourceData(begemot_executable).path), '--print-bgschema']
    try:
        schema = json.loads(subprocess.check_output(args))
        return schema["Relev"]
    except Exception as e:
        logging.debug("Cannot read bgschema. ({})".format(e))
        return {}


def get_diff_causes(diff_rules, answers_count, multiplier=1.0, diff_perc=0.1):
    if len(diff_rules) == 0:
        return "No factors changed"

    info = dbg_info = "Rules caused diff (see full list in debug.log):"
    for record in diff_rules:
        perc = record['count'] * 100.0 * multiplier / answers_count
        s = "{}: {}%. Factors changed:".format(record['rule'], perc)
        dbg_info = "\n".join([dbg_info, "", s])
        if perc >= diff_perc:
            info = "\n".join([info, "", s])
        for field in record['fields']:
            perc = field[1] * 100.0 * multiplier / answers_count
            s = "\t- {} : {}%".format(field[0], perc)
            dbg_info = "\n".join([dbg_info, s])
            if perc >= diff_perc:
                info = "\n".join([info, s])

    logging.debug(dbg_info)
    if '\n' in info:
        return info
    return "No factors with significant diff (see full list in debug.log)"


class MiddleSearchCacheHitGuess2(sdk2.Task):
    __logger = logging.getLogger('TASK_LOGGER')
    __logger.setLevel(logging.DEBUG)

    class Parameters(sdk2.Parameters):
        begemot_answers_old = sdk2.parameters.String('Begemot answers old', required=True)
        begemot_answers_new = sdk2.parameters.String('Begemot answers new', required=True)
        full_check = sdk2.parameters.Bool('Count full diff', required=True, default=False, description='If false, check cache affecting keys + additional keys only')
        detailed = sdk2.parameters.Bool('Print detailed diff', required=True, default=True)
        with detailed.value[True]:
            limit = sdk2.parameters.Integer('Limit of analyzed diff rows, 0 if unlimited', default=0, required=True)
            fail_on_error = sdk2.parameters.Bool('Fail task if detailed_diff failed', required=True, default=False)
        keys = sdk2.parameters.List('Additional keys to compare', sdk2.parameters.String, default=[])
        output_path = CommonYtParameters.output_path()
        yt_proxy = CommonYtParameters.yt_proxy()
        yt_pool = CommonYtParameters.yt_pool()
        yt_token_vault_owner = CommonYtParameters.yt_token_vault_owner()
        yt_token_vault_name = CommonYtParameters.yt_token_vault_name()
        results_store_time = sdk2.parameters.Integer('Days to store results', default=3)
        retries = sdk2.parameters.Integer('Cache guess tool retries', default=3)
        with sdk2.parameters.Output:
            answers_diff = sdk2.parameters.Float('Share of answers with changed values')
            cache_guess_old = sdk2.parameters.Float('Share of repeated answers old')
            cache_guess_new = sdk2.parameters.Float('Share of repeated answers new')

    class Requirements(sdk2.Requirements):
        disk_space = 20 * 1024
        ram = 1024
        environments = [PipEnvironment('yandex-yt', version='0.8.49')]

    def on_execute(self):
        import yt.wrapper as yt
        self.__logger.info('Getting tasks results')
        cache_guess_tool = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'cache_guess.py')
        response_parser = sdk2.Resource["BEGEMOT_RESPONSE_PARSER"].find(state='READY').first()
        relev_map = get_relev_changing_rules()
        args = [
            sys.executable, cache_guess_tool,
            '--old', self.Parameters.begemot_answers_old,
            '--new', self.Parameters.begemot_answers_new,
            '--keys', json.dumps(DEFAULT_PRIMARY_KEY + self.Parameters.keys),
            '--output_path', self.Parameters.output_path,
            '--yt_proxy', self.Parameters.yt_proxy,
            '--yt_pool', self.Parameters.yt_pool,
            '--rp_path', str(sdk2.ResourceData(response_parser).path),
            '--relev_map', json.dumps(relev_map),
        ]
        if self.Parameters.full_check:
            args.append('--full')
        if self.Parameters.detailed:
            args.append('--detailed')
            if self.Parameters.limit:
                args.extend(['--limit', str(self.Parameters.limit)])
            if self.Parameters.fail_on_error:
                args.append('--fail_on_error')
        token = sdk2.Vault.data(self.Parameters.yt_token_vault_owner, self.Parameters.yt_token_vault_name)
        env = os.environ.copy()
        env['YT_TOKEN'] = token
        yt_client = yt.YtClient(self.Parameters.yt_proxy, token)
        yt_client.create(
            'map_node', self.Parameters.output_path, recursive=True, force=True,
            attributes={'expiration_time': utc_from_now(self.Parameters.results_store_time)},
        )
        with sdk2.helpers.ProcessLog(self, logger='cache_guess_tool') as pl:
            for attempt in range(self.Parameters.retries):
                try:
                    result = subprocess.check_output(args, stderr=pl.stderr, env=env)
                    break
                except subprocess.CalledProcessError as e:
                    self.set_info('\n'.join(fu.read_lines(str(pl.stderr.path))[-10:] + ["", "Retries left: {}".format(self.Parameters.retries - attempt - 1)]))
            else:
                raise TaskFailure('Cache guess tool failed with code {} after {} retries'.format(e.returncode, self.Parameters.retries))

        self.__logger.info('Result: {}'.format(result))
        result_json = json.loads(result)
        self.Parameters.answers_diff = self.Context.answers_diff = result_json['answers_diff']
        self.Parameters.cache_guess_old = result_json['cache_guess_old']
        self.Parameters.cache_guess_new = result_json['cache_guess_new']

        self.set_info("Resp diff: {}%; cachehit: {}% → {}%".format(
            self.Parameters.answers_diff * 100,
            self.Parameters.cache_guess_old * 100,
            self.Parameters.cache_guess_new * 100,
        ))

        self.Context.diff_rules = result_json

        if not self.Parameters.detailed:
            self.set_info("Detailed diff mode is off. Look for diff in output table")
        if not result_json['diff_parsed']:
            self.set_info("Zero diff.")
        else:
            ans_count = result_json['answers_count']
            multiplier = max(1.0, float(result_json['answers_diff_count']) / self.Parameters.limit) if self.Parameters.limit else 1.0
            self.set_info(get_diff_causes(result_json['diff_rules'], ans_count, multiplier))
