"""Adapters from Juggler check/event data to wall-e checks and health data."""

import json
import logging
import operator
import typing as tp
from collections import namedtuple
from itertools import groupby

from fastjsonschema import JsonSchemaException

from sepelib.core.exceptions import Error
from walle.expert.checks import get_check_period, get_check_accuracy, get_check_max_possible_delay
from walle.expert.constants import (
    CHECK_AGE_TIMEOUT,
    JUGGLER_PASSTHROUGH_TIME,
    JUGGLER_PUSH_PERIOD,
    HW_WATCHER_CHECK_ACCURACY,
    ACTIVE_CHECK_SUSPECTED_PERIOD,
)
from walle.expert.schemes import METADATA_VALIDATORS, DEFAULT_METADATA_VALIDATOR
from walle.expert.types import CheckType, CheckStatus, get_walle_check_type
from walle.host_health import HealthCheck
from walle.hosts import Host, HealthStatus
from walle.models import timestamp
from walle.stats import stats_manager as stats
from walle.util.gevent_tools import gevent_idle_iter
from walle.util.host_health import get_human_reasons
from walle.util.misc import filter_dict_keys

log = logging.getLogger(__name__)

Reason = tp.Dict
Reasons = tp.Dict[str, Reason]

HostHealthStatus = namedtuple("HostHealthStatus", ["fqdn", "checks"])


class HostHealth(tp.NamedTuple):
    health: HealthStatus
    current_reasons: Reasons
    event_time: float
    checks: tp.List[tp.Dict]

    def most_fresh_walle_check_age(self, target_check_status) -> tp.Optional[float]:
        status_timestamps = [
            c["status_mtime"]
            for c in self.checks
            if c["status"] == target_check_status and c["type"] in CheckType.ALL_DMC_TYPES
        ]
        if status_timestamps:
            return timestamp() - max(status_timestamps)


class JugglerCheckError(Error):
    pass


class InvalidCheck(JugglerCheckError):
    def __init__(self, message="Got invalid check from Juggler"):
        super().__init__(message)


def get_host_health_reasons(host, decision_maker) -> Reasons:
    """Get host checks statuses or return None if no check statuses available."""
    health_data = _fetch_health_data([host.name])

    for host_status in gevent_idle_iter(health_data):
        host_name, checks = _decode_host_health_data(host_status, {host.name: decision_maker})

        return get_health_status_reasons(checks, decision_maker.checks, host.checks_min_time)

    return {}


def get_health_reasons_for_hosts(hostname_to_decision_maker, checks_min_times):
    health_data = _fetch_health_data(list(hostname_to_decision_maker.keys()))

    reasons_by_name = {}
    for host_status in gevent_idle_iter(health_data):
        host_name, checks = _decode_host_health_data(host_status, hostname_to_decision_maker)

        enabled_checks = hostname_to_decision_maker[host_name].checks
        checks_min_time = checks_min_times[host_name]

        reasons_by_name[host_name] = get_health_status_reasons(checks, enabled_checks, checks_min_time)

    return reasons_by_name


class _ProjectCfgCache:
    def __init__(self):
        self._ib_cache = {}
        self._tor_link_cache = {}

    def create_health_object(self, host_name, current_reasons):
        health = HealthStatus()

        health.reasons = get_human_reasons(current_reasons) or None
        health.check_statuses = self._create_check_statuses(host_name, current_reasons)
        if not health.reasons:
            health.status = HealthStatus.STATUS_OK
        elif CheckStatus.FAILED in health.check_statuses.values():
            health.status = HealthStatus.STATUS_FAILURE
        else:
            health.status = HealthStatus.STATUS_BROKEN

        return health

    def _create_check_statuses(self, host_name, current_reasons):
        # TODO: temporary crutch from WALLE-3905
        check_statuses = {}

        project_has_infiniband = self._ib_cache.get(host_name)
        project_has_tor_link_rule = self._tor_link_cache.get(host_name)
        if project_has_infiniband is None or project_has_tor_link_rule is None:
            host = Host.get_by_name(host_name, fields=("project",))
            project = host.get_project(fields=("tags",))
            project_has_infiniband = self._ib_cache[host_name] = project.has_infiniband()
            project_has_tor_link_rule = self._tor_link_cache[host_name] = project.has_tor_link_rule()

        for check_type, check in current_reasons.items():
            if check_type in CheckType.ALL_IB and not project_has_infiniband:
                continue
            elif check_type == CheckType.TOR_LINK and not project_has_tor_link_rule:
                continue
            else:
                check_statuses[get_walle_check_type(check_type)] = check["status"]
        return check_statuses


def get_health_for_hosts(hostname_to_decision_maker) -> tp.Dict[str, HostHealth]:
    health_data = _fetch_health_data(list(hostname_to_decision_maker.keys()))

    hostname_to_health = {}
    event_time = timestamp()
    project_cache = _ProjectCfgCache()
    for host_status in gevent_idle_iter(health_data):
        host_name, checks = _decode_host_health_data(host_status, hostname_to_decision_maker)

        enabled_checks = hostname_to_decision_maker[host_name].checks
        current_reasons = get_health_status_reasons(checks, enabled_checks)
        _report_check_statistics(current_reasons)

        health = project_cache.create_health_object(host_name, current_reasons)
        hostname_to_health[host_name] = HostHealth(health, current_reasons, event_time, checks)

    return hostname_to_health


def _fetch_health_data(hostnames):
    projection = {
        "status": 1,
        "type": 1,
        "timestamp": 1,
        "status_mtime": 1,
        "metadata": 1,
        "fqdn": 1,
        "effective_timestamp": 1,
        "_id": 0,
    }
    checks = HealthCheck.get_collection().find({"fqdn": {"$in": hostnames}}, projection, sort=[("fqdn", 1)])
    return [HostHealthStatus(fqdn, list(group)) for fqdn, group in groupby(checks, operator.itemgetter("fqdn"))]


def _decode_host_health_data(host_status, hostname_to_decision_makers):
    host_name = host_status.fqdn
    enabled_checks = hostname_to_decision_makers[host_name].checks

    checks = list(
        filter(
            None,
            (
                _decode_check_from_health_checks(host_name, check)
                for check in host_status.checks
                if check["type"] in enabled_checks
            ),
        )
    )

    return host_name, checks


def _decode_check_from_health_checks(host_name, check):
    metadata = check.get("metadata")
    metadata = decode_metadata(metadata)
    validator, preprocessor = get_metadata_validator(check["type"], check["status"])

    try:
        metadata = _validate_metadata(host_name, check["type"], check["status"], metadata, validator, preprocessor)
    except InvalidCheck as exception:
        check.update(metadata=metadata, status=CheckStatus.INVALID, invalid_reason=str(exception))
        return check

    stale_timestamp = get_stale_timestamp(check["type"], check["status"], check["timestamp"], metadata)
    check.update(
        {
            "metadata": metadata,
            "stale_timestamp": stale_timestamp,
        }
    )

    return check


def decode_metadata(check_description):
    if not check_description:
        return None

    try:
        return json.loads(check_description)
    except (ValueError, TypeError):
        if not isinstance(check_description, str):
            raise
        else:
            # just use the string as is. validate_metadata should log it if it's wrong.
            return check_description


def _validate_metadata(host_name, check_type, check_status, metadata, validator, preprocessor):
    if isinstance(metadata, str) and ("timed out after" in metadata or "exited with code" in metadata):
        message = "%s: check {} invalid: %s".format(check_type)
        log.debug(message, host_name, metadata)
        raise InvalidCheck(message % (host_name, metadata))  # this check will be marked as missing

    if validator is None:
        # metadata not need or not available.
        return None

    metadata = validate_metadata_common(host_name, check_type, metadata, validator, preprocessor)

    if check_type in CheckType.ALL_HW_WATCHER:
        _validate_metadata_hw_watcher(host_name, check_type, check_status, metadata)

    return metadata


def validate_metadata_common(host_name, check_type, metadata, validator, preprocessor):
    if preprocessor is not None:
        try:
            metadata = preprocessor(metadata)
        except Exception as e:
            message = "%s: check {} metadata preprocessor error (%s): %s".format(check_type)
            log.debug(message, host_name, metadata, str(e))
            raise InvalidCheck(message % (host_name, metadata, str(e))) from e
    try:
        validator(metadata)
        return metadata
    except JsonSchemaException as e:
        message = "%s: check {} metadata validation error (%s): %s".format(check_type)
        log.debug(message, host_name, metadata, str(e))
        raise InvalidCheck(message % (host_name, metadata, str(e))) from e


def _validate_metadata_hw_watcher(host_name, check_type, check_status, metadata):
    error_message = "%s: logical error during metadata validation: unsupported {}/%s hw-watcher check, {}."

    if metadata is None and check_status != CheckStatus.MISSING:
        # Metadata is a data from hw-watcher. It is required for these checks.
        error_message = error_message.format(check_type, "metadata missing")
        log.debug(error_message, host_name, check_status)
        raise InvalidCheck(error_message % (host_name, check_status))

    if metadata is not None and _hw_watcher_metadata_timestamp(metadata) is None:
        error_message = error_message.format(check_type, "timestamp field missing")
        log.debug(error_message, host_name, check_status)
        raise InvalidCheck(error_message % (host_name, check_status))


def get_metadata_validator(check_type, check_status):
    try:
        return METADATA_VALIDATORS[check_type][check_status]
    except KeyError:
        if check_type in CheckType.ALL_ACTIVE or check_status in CheckStatus.ALL_WITHOUT_METADATA:
            return None, None
        else:
            return DEFAULT_METADATA_VALIDATOR


def get_stale_timestamp(check_type, check_status, receive_timestamp, metadata):
    """Return timestamp usable to decide whether check has staled yet.
    Staled checks are ignored when computing health `reasons`."""

    stale_timestamp = receive_timestamp + CHECK_AGE_TIMEOUT
    max_possible_delay = get_check_max_possible_delay(check_type)

    if check_status in {CheckStatus.MISSING, CheckStatus.INVALID}:
        return stale_timestamp  # we ignore this check anyway

    if check_type in CheckType.ALL_HW_WATCHER:
        # hw-watcher checks use data from hw-watcher agent which is being run by system cron.
        # So these checks can stale even if we receive them regularly. Check timestamp from metadata.
        hw_watcher_stale_timestamp = _hw_watcher_metadata_timestamp(metadata) + max_possible_delay
        return min(hw_watcher_stale_timestamp, stale_timestamp)

    if check_type not in CheckType.ALL:
        # this is a passive check from automation plot. It must have timestamp in its metadata.
        passive_check_stale_timestamp = metadata["timestamp"] + max_possible_delay
        return min(passive_check_stale_timestamp, stale_timestamp)

    if check_type in CheckType.ALL_NETMON:
        return receive_timestamp + max_possible_delay

    return stale_timestamp


def get_effective_timestamp(check_type, check_status, status_mtime, receive_timestamp, metadata):

    if check_status in {CheckStatus.MISSING, CheckStatus.INVALID}:
        return receive_timestamp

    if check_type in CheckType.ALL_HW_WATCHER:
        # Only hw watcher checks have timestamp built into metadata (currently).
        return _hw_watcher_metadata_timestamp(metadata) - HW_WATCHER_CHECK_ACCURACY

    if check_type not in CheckType.ALL:
        # this is a passive check from automation plot. It must have timestamp in its metadata.
        return metadata["timestamp"] - get_check_accuracy(check_type)

    # Calculate 'probable' check time.
    return _probable_effective_timestamp(check_type, status_mtime, receive_timestamp)


def _hw_watcher_metadata_timestamp(metadata):
    # Only hw watcher checks have timestamp built into metadata (currently).
    if "result" in metadata:
        return metadata["result"]["timestamp"]
    elif metadata.get("results"):
        return min(result["timestamp"] for result in metadata["results"].values())


def _probable_effective_timestamp(check_type, status_mtime, receive_timestamp):
    """Calculate probable check effective timestamp
    (script start time for passive checks, first ping for active checks).

    Try use status mtime as more accurate representation of an event time.
    Use receive timestamp as more recent representation.
    """
    period = get_check_period(check_type)
    accuracy = get_check_accuracy(check_type)

    status_mtime_based = status_mtime - JUGGLER_PASSTHROUGH_TIME - accuracy
    periodical = receive_timestamp - JUGGLER_PUSH_PERIOD - JUGGLER_PASSTHROUGH_TIME - 2 * period - accuracy

    return max(status_mtime_based, periodical)


def update_check_status_mtime(check, prev_state):
    prev_status_mtime = prev_state.get("status_mtime")
    new_status_mtime = check["status_mtime"]
    if prev_status_mtime and prev_state["status"] == check["status"] and prev_status_mtime < new_status_mtime:
        check["status_mtime"] = prev_status_mtime
        return prev_status_mtime
    return new_status_mtime


def get_health_status_reasons(checks, enabled_checks, check_min_time=None) -> Reasons:
    """Returns reasons of current host health status.

    `check_min_time` argument can be used to accept only checks that were processed starting from the specified time.
    check_min_time is used to get reasons for host healing, but not used for UI.
    """

    cur_time = timestamp()

    reasons = {}
    for check in checks:
        check_type = check["type"]
        if check_type not in enabled_checks:
            continue

        if check["status"] not in {CheckStatus.MISSING, CheckStatus.INVALID}:
            # Use only checks that are recent enough.
            if _is_stale(check, cur_time):
                reasons[check_type] = _check_status_reason(check, status=CheckStatus.STALED)
                continue

            if _is_unsure(check, check_min_time):
                reasons[check_type] = _check_status_reason(check, status=CheckStatus.UNSURE)
                continue

            if _is_suspected(check, cur_time):
                reasons[check_type] = _check_status_reason(check, status=CheckStatus.SUSPECTED)
                continue

        reasons[check_type] = _check_status_reason(check)

    missing_check_info = {"status": CheckStatus.VOID}
    for check in enabled_checks:
        reasons.setdefault(check, missing_check_info)

    return reasons


def _is_stale(check, cur_time):
    return check["stale_timestamp"] < cur_time


def _is_suspected(check, cur_time):
    return check["type"] in CheckType.ALL_ACTIVE and check["status_mtime"] > cur_time - ACTIVE_CHECK_SUSPECTED_PERIOD


def _is_unsure(check, check_min_time):
    return check_min_time is not None and check["effective_timestamp"] < check_min_time


_CHECK_REASON_KEYS = (
    "status",
    "metadata",
    "timestamp",
    "status_mtime",
    "effective_timestamp",
    "stale_timestamp",
    "invalid_reason",
)


def _check_status_reason(check, **overrides):
    return filter_dict_keys(dict(check, **overrides), allowed_keys=_CHECK_REASON_KEYS)


def _report_check_statistics(current_reasons):
    for check_type, status_reason in current_reasons.items():
        stats.increment_counter(("dmc", "checks", status_reason["status"], "count"))
        stats.increment_counter(("dmc", "checks", check_type, status_reason["status"], "count"))
