"""Rules for hw_watcher memory check."""

import logging

from walle import restrictions
from walle.admin_requests.constants import RequestTypes
from walle.expert.decision import Decision
from walle.expert.failure_types import FailureType
from walle.expert.rules.base import CheckRuleInterface
from walle.expert.rules.escalation import (
    EscalationRules,
    EscalationPoint,
    Predicate,
    action_match,
    automatic_profile_not_supported,
    platform_not_supported,
    limit_reached,
    escalate_to_deactivate,
    task_has_not_helped,
    escalate_to_highload_test,
)
from walle.expert.rules.hw_watcher_rules.util import get_eine_code
from walle.expert.rules.utils import repair_hardware_params
from walle.expert.types import WalleAction, CheckType, HwWatcherCheckStatus
from walle.operations_log.constants import Operation

log = logging.getLogger(__name__)

MEMORY_RULES_PARAMS = ["redeploy_on_ecc_failure"]


@Predicate
def _is_memory_change_failure(decision):
    return "slot" in decision.params and decision.get_param("status") == HwWatcherCheckStatus.UNKNOWN


def _change_memory_operation_params(decision):
    return {"slot": decision.get_param("slot", -1)}


def _get_extra_restrictions(params):
    if "redeploy" in params and params["redeploy"]:
        return [restrictions.AUTOMATED_REDEPLOY]
    return []


def escalate_ecc_failed_to_repair_hardware(decision, reason):
    decision.params["operation"] = Operation.CHANGE_MEMORY.type
    updated_restrictions = [restrictions.AUTOMATED_MEMORY_CHANGE] + _get_extra_restrictions(decision.params)
    return decision.escalate(WalleAction.REPAIR_HARDWARE, reason=reason, restrictions=updated_restrictions)


class CheckMemory(CheckRuleInterface):
    def __init__(self, redeploy_on_ecc_failure=True, **kwargs):
        self.redeploy_on_ecc_failure = redeploy_on_ecc_failure

    check_type = CheckType.MEMORY

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_MEMORY),
            reason=platform_not_supported(Operation.REPAIR_MEMORY),
            action=escalate_ecc_failed_to_repair_hardware,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_MEMORY),
            reason=task_has_not_helped(Operation.REPAIR_MEMORY.host_status, "Repair memory cold didn't help"),
            action=escalate_ecc_failed_to_repair_hardware,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_MEMORY),
            reason=limit_reached("max_repaired_memory", Operation.REPAIR_MEMORY, _change_memory_operation_params),
            action=escalate_ecc_failed_to_repair_hardware,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_HARDWARE) & _is_memory_change_failure,
            reason=limit_reached(
                "max_change_memory_failures", Operation.CHANGE_MEMORY, _change_memory_operation_params
            ),
            action=escalate_to_highload_test,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_HARDWARE),
            reason=limit_reached("max_changed_memory", Operation.CHANGE_MEMORY, _change_memory_operation_params),
            action=escalate_to_highload_test,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.PROFILE),
            reason=automatic_profile_not_supported,
            action=escalate_to_deactivate,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.PROFILE),
            reason=limit_reached("max_memory_profiles", Operation.PROFILE),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result) -> Decision:
        hw_watcher_results = check_result["metadata"]["results"]

        ecc_result = hw_watcher_results["ecc"]
        if ecc_result["status"] in (HwWatcherCheckStatus.UNKNOWN, HwWatcherCheckStatus.FAILED):
            eine_code = get_eine_code(ecc_result)
            if eine_code and "DIMM_UE" in eine_code:
                return self._decision_ecc_failed_DIMM_UE(ecc_result, check_result)
            else:
                return self._decision_ecc_failed(ecc_result, check_result)

        mem_result = hw_watcher_results["mem"]
        if mem_result["status"] == HwWatcherCheckStatus.FAILED:
            failure_type = self._get_mem_failure_type(mem_result)
            if failure_type == "size":
                return self._decision_mem_size_failed(mem_result, check_result)
            elif failure_type == "numa":
                return self._decision_mem_numa_failed(mem_result, check_result)
            elif failure_type == "speed":
                return self._decision_mem_speed_failed(mem_result, check_result)
            else:
                pass
        return Decision.healthy("The host doesn't have any known memory errors.", checks=[CheckType.MEMORY])

    def _decision_ecc_failed(self, ecc_result, failure_check_info):
        reason = "hw-watcher: {reason} {comment}".format(
            reason=" ".join(ecc_result["reason"]), comment=ecc_result["comment"]
        )

        params = repair_hardware_params(
            redeploy=self.redeploy_on_ecc_failure,
            request_type=RequestTypes.CORRUPTED_MEMORY.type,
            operation_type=Operation.REPAIR_MEMORY.type,
            slot=ecc_result["slot"],
            status=ecc_result["status"],
            errors=ecc_result["reason"],
            eine_code=get_eine_code(ecc_result),
            reboot=True,
        )
        return Decision(
            WalleAction.REPAIR_MEMORY,
            reason,
            params,
            checks=[CheckType.MEMORY],
            failure_type=FailureType.MEM_ECC,
            restrictions=[restrictions.AUTOMATED_MEMORY_REPAIR] + _get_extra_restrictions(params),
            failure_check_info=failure_check_info,
        )

    def _decision_ecc_failed_DIMM_UE(self, ecc_result, failure_check_info):
        reason = "hw-watcher: {reason} {comment}".format(
            reason=" ".join(ecc_result["reason"]), comment=ecc_result["comment"]
        )

        params = repair_hardware_params(
            redeploy=self.redeploy_on_ecc_failure,
            request_type=RequestTypes.UNCORRECTABLE_ERRORS.type,
            operation_type=Operation.REPAIR_MEMORY.type,
            slot=ecc_result["slot"],
            status=ecc_result["status"],
            errors=ecc_result["reason"],
            eine_code=get_eine_code(ecc_result),
            reboot=True,
        )
        return Decision(
            WalleAction.REPAIR_MEMORY,
            reason,
            params,
            checks=[CheckType.MEMORY],
            failure_type=FailureType.MEM_ECC,
            restrictions=[restrictions.AUTOMATED_MEMORY_REPAIR] + _get_extra_restrictions(params),
            failure_check_info=failure_check_info,
        )

    @staticmethod
    def _get_mem_failure_type(mem_result):
        if "reason" not in mem_result:
            return None
        if len(mem_result["reason"]) == 1:
            reason_string = mem_result["reason"][0]
            reason_failure_type = reason_string.split(":")[0]
            return reason_failure_type
        return None

    @staticmethod
    def _decision_mem_size_failed(mem_result, failure_check_info):
        reason = "hw-watcher: {reason} {comment}".format(
            reason=" ".join(mem_result["reason"]), comment=mem_result["comment"]
        )

        params = repair_hardware_params(
            request_type=RequestTypes.INVALID_MEMORY_SIZE.type,
            operation_type=Operation.CHANGE_MEMORY.type,
            expected=mem_result["needed_mem"],
            real=mem_result["real_mem"],
            eine_code=get_eine_code(mem_result),
            reboot=True,
        )
        return Decision(
            WalleAction.REPAIR_HARDWARE,
            reason,
            params,
            checks=[CheckType.MEMORY],
            failure_type=FailureType.MEM_SIZE,
            restrictions=[restrictions.AUTOMATED_MEMORY_CHANGE],
            failure_check_info=failure_check_info,
        )

    @staticmethod
    def _decision_mem_numa_failed(mem_result, failure_check_info):
        reason = "hw-watcher: {reason}".format(reason=" ".join(mem_result["reason"]))

        params = repair_hardware_params(
            request_type=RequestTypes.INVALID_NUMA_MEMORY_NODES_SIZES.type,
            eine_code=get_eine_code(mem_result),
            operation_type=Operation.CHANGE_MEMORY.type,
            reboot=True,
        )
        return Decision(
            WalleAction.REPAIR_HARDWARE,
            reason,
            params,
            checks=[CheckType.MEMORY],
            failure_type=FailureType.MEM_NUMA,
            restrictions=[restrictions.AUTOMATED_MEMORY_CHANGE],
            failure_check_info=failure_check_info,
        )

    @staticmethod
    def _decision_mem_speed_failed(mem_result, failure_check_info):
        reason = "hw-watcher: {reason}".format(reason=" ".join(mem_result["reason"]))

        params = repair_hardware_params(
            request_type=RequestTypes.LOW_MEMORY_SPEED.type,
            eine_code=get_eine_code(mem_result),
            operation_type=Operation.CHANGE_MEMORY.type,
            reboot=True,
        )
        return Decision(
            WalleAction.REPAIR_HARDWARE,
            reason,
            params,
            checks=[CheckType.MEMORY],
            failure_type=FailureType.MEM_SPEED,
            restrictions=[restrictions.AUTOMATED_MEMORY_CHANGE],
            failure_check_info=failure_check_info,
        )
