import logging

from infra.ya_salt.lib import jugglerutil
from infra.ya_salt.lib import rusage
from infra.ya_salt.lib import statusutil
from infra.ya_salt.lib import envutil
from infra.ya_salt.lib import constants
from .iface import IReporter

# Host manager metrics
METRIC_APT_UPDATE_OK = 'hostman-apt-update-ok_thhh'
METRIC_SERVER_INFO_OK = 'hostman-server-info-ok_thhh'
METRIC_APT_UPDATE_DURATION = 'hostman-apt-update-duration_thhh'
METRIC_YASM_UPDATE_OK = 'hostman-yasm-agent-update-ok_thhh'
METRIC_KERNEL_OK = 'hostman-kernel-ok_thhh'
METRIC_KERNEL_NEED_REBOOT = 'hostman-kernel-need-reboot_thhh'
METRIC_HOSTMAN_READY = 'hostman-ready_thhh'
METRIC_NODEIFO_OK = 'hostman-nodeinfo-ok_thhh'
METRIC_PORTO_DAEMON_MANAGER_OK = 'hostman-porto-daemon-manager-ok_thhh'
# Overall salt metrics
METRIC_SALT_INIT_FAIL = 'salt-init-fail_thhh'
METRIC_SALT_COMPILE_FAIL = 'salt-compile-fail_thhh'
METRIC_SALT_COMPONENTS_INIT_FAIL = 'salt-components-init-fail_tmmm'
METRIC_SALT_EXECUTE_ALLOWED = 'salt-execute-allowed_thhh'
METRIC_SALT_EXECUTE_FAIL = 'salt-execute-fail_thhh'
METRIC_SALT_REVISION_OK = 'salt-revision-ok_thhh'
# Salt timings
METRIC_SALT_EXECUTE_TIME = "salt-last-exec-seconds_thhh"
METRIC_HOSTCTL_EXECUTE_TIME = "hostctl-exec-seconds_thhh"
# Salt states results
METRIC_SALT_OK = "salt-ok-count_thhh"
METRIC_SALT_CHANGED = "salt-changed-count_thhh"
METRIC_SALT_FAIL = "salt-fail-count_thhh"
# Salt revision timestamp
METRIC_SALT_TIMESTAMP = "salt-timestamp_thhh"
# Gencfg related
METRIC_GENCFG_GROUPS_COUNT = 'hostman-gencfg-groups-count_thhh'
# Juggler check names
CHECK_NEED_REBOOT = "need_reboot_kernel"
CHECK_HOSTMAN_READY = "hostman-ready"
CHECK_HOSTMAN_SALT = "hostman-salt"
CHECK_HOSTMAN_APT = "hostman-apt"

log = logging.getLogger('mon-reporter')


def ready_and_need_reboot(kernel_status):
    """
    Checks if we need reboot AND ready to reboot.
    """
    return kernel_status.need_reboot.status == 'True'


def get_hostman_ready_err(status):
    # For now we start
    # with simple condition for readiness check - initial setup status.
    if status.initial_setup_passed.status == 'True':
        return None
    return "Initial setup not passed: {}".format(status.initial_setup_passed.message)


def is_market_module_run(result):
    """
    Checks if state comment matches run module execution.

    Market has a lot of states, which execute salt module by hand.
    Thus every run results in changes being reported, which confuses monitoring
    and SRE. Market guys (maxk@) in particular are aware of this situation, but
    this task is buried somewhere in backlog.

    So we manually try to filter those out to fix metrics.
    """
    if not result.id.startswith('market.'):
        return False
    c = result.comment
    if c.startswith('Module function '):
        return True
    if c.startswith('Command ') and c.endswith(' run'):
        return True
    if c == 'Target was already mounted. Updated the entry in the fstab.':
        return True
    return False


def extract_component_signals(status):
    _, ch, fail = statusutil.get_executed_states_results_by_component(status.salt_components)
    rv = [
        ('{}-ok_thhh'.format(k), 0 if v > 0 else 1) for k, v in fail.items()
    ]
    rv.extend(
        ('{}-changed_thhh'.format(k), 1 if v > 0 else 0) for k, v in ch.items()
    )
    return rv


def extract_salt_signals(status):
    salt_status = status.salt

    class _ComponentsStats(object):
        def __init__(self):
            self.has_orly_fails = False
            self.failed_components_count = 0

        def orly_fails_cb(self, result):
            if result.applied.status != 'True' and 'too many operations in progress' in result.applied.message:
                self.has_orly_fails = True

        def failed_components_count_cb(self, result):
            if result.initialized.status != 'True':
                self.failed_components_count += 1

    stats = _ComponentsStats()
    statusutil.enumerate_executed_components(status.salt_components, stats.orly_fails_cb)
    statusutil.enumerate_executed_components(status.salt_components, stats.failed_components_count_cb)

    def _market_module_filter(result):
        return not is_market_module_run(result)

    ok, changes, fail = statusutil.get_executed_states_results(status.salt_components,
                                                               changed_filter=_market_module_filter)

    exec_fail = 1 if salt_status.execution_ok.status != 'True' else 0
    # Do not report execution as failed if orly denied,
    # thus we can distinguish it from other problems.
    # Proper fix should be done, e.g. adding robot readable reason field
    # to condition object and checking it, perhaps generating another signal.
    # E.g. Condition(status='False', reason='OrlyNotAllowed').
    if stats.has_orly_fails:
        exec_fail = 0
    return [
        (METRIC_SALT_EXECUTE_TIME, salt_status.last_run_duration.ToMilliseconds() / 1000.0),
        (METRIC_SALT_INIT_FAIL, 0),  # Not failed if we managed to get here
        (METRIC_SALT_COMPILE_FAIL, 1 if salt_status.compilation_ok.status != 'True' else 0),
        (METRIC_SALT_EXECUTE_FAIL, exec_fail),
        (METRIC_SALT_OK, ok),
        (METRIC_SALT_FAIL, fail),
        (METRIC_SALT_CHANGED, changes),
        (METRIC_SALT_TIMESTAMP, salt_status.revision_timestamp.seconds),
        (METRIC_SALT_COMPONENTS_INIT_FAIL, stats.failed_components_count),
    ]


def extract_salt_rev_status(spec, status):
    return METRIC_SALT_REVISION_OK, 1 if spec.revision == status.revision else 0


def extract_status_signals(status):
    rv = [
        (METRIC_APT_UPDATE_OK, 1 if status.apt.last_update_ok.status == 'True' else 0),
        (METRIC_APT_UPDATE_DURATION, status.apt.last_update_duration.seconds),
        (METRIC_SERVER_INFO_OK, 1 if status.server_info.last_update_ok.status == 'True' else 0),
        (METRIC_YASM_UPDATE_OK, 1 if status.yasm.agent_package.last_update_ok.status == 'True' else 0),
    ]
    return rv


def extract_gencfg_signal(spec):
    return [(METRIC_GENCFG_GROUPS_COUNT, len(spec.gencfg_groups))]


def extract_hostctl_signals(status):
    return [(METRIC_HOSTCTL_EXECUTE_TIME, status.hostctl.execution_time.ToMilliseconds() / 1000.0)]


def check_kernel(spec, status):
    if ready_and_need_reboot(status.kernel):
        c = jugglerutil.Check.make_walle(CHECK_NEED_REBOOT, "CRIT",
                                         "{} => {}".format(status.kernel.version,
                                                           spec.kernel.version))
    else:
        c = jugglerutil.Check.make_walle(CHECK_NEED_REBOOT, 'OK', 'OK')
    return c


def check_apt(status):
    if status.apt.last_update_ok.status != 'True':
        c = jugglerutil.Check.make_walle(CHECK_HOSTMAN_APT,
                                         'CRIT',
                                         status.apt.last_update_ok.message[:100])
    else:
        c = jugglerutil.Check.make_walle(CHECK_HOSTMAN_APT, 'OK', 'OK')
    return c


def check_hostman(status):
    # For now we start with simple condition for readiness check - initial setup status.
    isp = status.initial_setup_passed
    err = get_hostman_ready_err(status)
    if err is None:
        c_status = "OK"
        c_desc = isp.message  # Usually "OK"
    # Weird condition to avoid Wall-E false-positive monitoring flapping.
    # Reboot runs AFTER monitoring report, so Wall-E has time to fire up "hostman-ready" CRIT.
    # Not treating status as OK to avoid broken hosts appear in production.
    elif isp.message == constants.AWAITING_REBOOT_STATUS:
        c_status = "WARN"
        c_desc = err
    else:
        c_status = "CRIT"
        c_desc = err
    return jugglerutil.Check.make_walle(CHECK_HOSTMAN_READY, c_status, c_desc)


def check_salt(salt, salt_components):
    # Failed salt states check
    if salt.init_ok.status == 'False':
        c_status = "CRIT"
        c_desc = "Init failed: {}".format(salt.init_ok.message)
    elif salt.compilation_ok.status == 'False':
        c_status = "CRIT"
        c_desc = "Compilation failed: {}".format(salt.compilation_ok.message)
    elif salt.execution_ok.status == 'False':
        c_status = "CRIT"
        c_desc = "Execution failed: {}".format(salt.execution_ok.message)
    else:
        failed = statusutil.failed_components_names(salt_components)
        if failed:
            c_status = "CRIT"
            # Format message in format like:
            # "Failed states: 21 [yandex-wh-watcher@, yandex-rtc-nodeinfo, ...]"
            # or
            # "Failed states: 2 [yandex-wh-watcher@, yandex-rtc-nodeinfo]"
            if len(failed) <= 2:
                failed_components_str = "[{}]".format(', '.join(failed))
            else:
                failed_components_str = "[{}, ...]".format(', '.join(failed[:2]))
            c_desc = "Failed states: {} {}".format(len(failed), failed_components_str)
        else:
            c_status = "OK"
            c_desc = "OK"
    return jugglerutil.Check.make_walle(CHECK_HOSTMAN_SALT, c_status, c_desc)


def extract_juggler_checks(spec, status):
    return [
        check_kernel(spec, status),
        check_apt(status),
        check_hostman(status),
        check_salt(status.salt, status.salt_components),
    ]


class Monitoring(IReporter):
    def __init__(self, spec, client, pkg_man):
        self.spec = spec
        self.client = client
        self.pkg_man = pkg_man

    def extract_signals(self, status):
        signals = []
        signals.extend(rusage.get_rusage_metrics())
        signals.extend(extract_salt_signals(status))
        signals.extend(extract_hostctl_signals(status))
        signals.append(extract_salt_rev_status(self.spec.salt, status.salt))
        signals.extend(extract_component_signals(status))
        signals.extend(extract_status_signals(status))
        signals.append((METRIC_KERNEL_OK, 1 if status.kernel.ready.status == 'True' else 0))
        signals.extend(self.pkg_man.metrics().to_yasm())
        signals.extend(extract_gencfg_signal(self.spec))
        v = ready_and_need_reboot(status.kernel)
        signals.append((METRIC_KERNEL_NEED_REBOOT, 1 if v else 0))
        v = get_hostman_ready_err(status) is None
        signals.append((METRIC_HOSTMAN_READY, 1 if v else 0))
        return signals

    def extract_ctype(self):
        """
        extracts environment value as ctype from wall-e tags
        example: rtc.stage-prestable -> prestable, rtc.stage-experiment -> experiment
        if wall-e tags does not contain stage tags using spec.env_type as fallback value
        :return: str ctype
        """
        stage, err = envutil.stage_from_tags(self.spec.walle_tags)
        if err is not None or stage is None:
            log.warning('Cannot determine correct stage, using "{}" as stage: {}'.format(self.spec.env_type, err))
            stage = self.spec.env_type
        return stage

    def notify(self, status):
        signals = self.extract_signals(status)
        tags = {
            'ctype': self.extract_ctype(),
            'prj': self.spec.walle_project,
        }
        log.info('Sending signals...')
        err = self.client.push_local(signals, tags=tags)
        if err is not None:
            log.error("YASM push failed: {}".format(err))
        checks = extract_juggler_checks(self.spec, status)
        for c in checks:
            log.info('Push juggler event {}: {}'.format(c.service, c.status))
        err = jugglerutil.push(checks)
        if err is not None:
            log.error("Juggler push failed: {}".format(err))
