"""Rules for hw_watcher disk check."""

import logging

from walle import restrictions
from walle.admin_requests.constants import RequestTypes
from walle.clients.eine import ProfileMode
from walle.expert.decision import Decision
from walle.expert.failure_types import FailureType
from walle.expert.rules.base import CheckRuleInterface
from walle.expert.rules.escalation import (
    EscalationRules,
    EscalationPoint,
    Predicate,
    action_match,
    automatic_profile_not_supported,
    limit_reached,
    escalate_to_deactivate,
    escalate_to_redeploy,
)
from walle.expert.rules.hw_watcher_rules.util import get_reason_from_hw_watcher, get_eine_code
from walle.expert.rules.utils import repair_hardware_params
from walle.expert.types import WalleAction, CheckType, HwWatcherCheckStatus
from walle.operations_log.constants import Operation
from walle.util.misc import drop_none

log = logging.getLogger(__name__)


class DISK_EINE_CODE:
    SSD_PERF_RAND_TOO_LOW = "SSD_PERF_RAND_TOO_LOW"
    ATA_BUS_ERROR = "ATA_BUS_ERROR"
    DRIVE_PERF_RAND_TO_LOW = "DRIVE_PERF_RAND_TO_LOW"
    ATA_IO_ERROR = "ATA_IO_ERROR"
    NVME_MISSING = "NVME_MISSING"
    NVME_LINK_DEGRADED = "NVME_LINK_DEGRADED"


DISK_RULES_PARAMS = [
    "redeploy_on_bad_block",
    "redeploy_on_no_info",
    "redeploy_on_unreserved_disk",
    "redeploy_on_unknown_disk",
    "reboot_on_performance",
]


def _change_disk_extra_restrictions(params):
    extra_restrictions = []
    if (
        DISK_EINE_CODE.NVME_LINK_DEGRADED in params.get("eine_code", [])
        or "slot" not in params
        and "serial" not in params
    ):
        extra_restrictions.append(restrictions.AUTOMATED_PROFILE)
    if params["redeploy"]:
        extra_restrictions.append(restrictions.AUTOMATED_REDEPLOY)
    return extra_restrictions


@Predicate
def _is_change_disk_via_profiling(decision):
    """Predicate for escalation rule. Return true if decision prescribes to profile host."""
    return not ("slot" in decision.params or "serial" in decision.params)


@Predicate
def _for_nvme_missing_code(decision):
    """Predicate for escalation rule. Return true if decision prescribes to profile host."""
    return DISK_EINE_CODE.NVME_MISSING in decision.params.get("eine_code", [])


@Predicate
def _for_ssd_perf_rand_too_low_code(decision):
    """Predicate for escalation rule. Return true if decision prescribes to profile host."""
    return DISK_EINE_CODE.SSD_PERF_RAND_TOO_LOW in decision.params.get("eine_code", [])


def _slot_params_getter(decision):
    """If decision contains slot num, count only operations with this slot value in params"""
    params = None
    slot = decision.params.get("slot")
    if slot is not None:
        params = {"slot": slot}
    return params


def _get_hw_watcher_reason_result(check_result):
    metadata = check_result["metadata"]
    if "result" in metadata:
        hw_watcher_result = metadata["result"]

        hw_watcher_reasons = hw_watcher_result["reason"]
        if "disk2replace" in hw_watcher_result:
            hw_watcher_reasons = hw_watcher_reasons + hw_watcher_result["disk2replace"]["reason"]

        return get_reason_from_hw_watcher("Disk", hw_watcher_reasons), hw_watcher_result
    return "Disk check failed: {}".format(metadata["reason"]), None


def escalate_to_change_disk_and_redeploy(decision, reason):
    params = decision.params
    params["redeploy"] = True
    extra_restrictions = _change_disk_extra_restrictions(params)
    return decision.escalate(WalleAction.CHANGE_DISK, reason=reason, restrictions=extra_restrictions, params=params)


def escalate_to_change_disk(decision, reason):
    params = decision.params
    extra_restrictions = _change_disk_extra_restrictions(params)
    return decision.escalate(WalleAction.CHANGE_DISK, reason=reason, restrictions=extra_restrictions, params=params)


def _get_disk_params(hw_watcher_result):

    disk_info = hw_watcher_result.get("disk2replace", {})
    if disk_info.get("slot") == -1:
        disk_info["slot"] = None

    return drop_none(
        {
            "slot": disk_info.get("slot"),
            "type": disk_info.get("disk_type"),
            "model": disk_info.get("model"),
            "serial": disk_info.get("serial"),
            "shelf_inv": disk_info.get("shelf_inv"),
            "errors": disk_info.get("reason"),
            "redeploy": False,
            "diskperformance": disk_info.get("diskperformance"),
            "eine_code": get_eine_code(hw_watcher_result),
        }
    )


def _get_disk_failure_type_params(hw_watcher_result):
    if hw_watcher_result is None:
        params = {"redeploy": True}
        return FailureType.DISK_NO_CHECK_INFO, params

    params = _get_disk_params(hw_watcher_result)
    disk_result = hw_watcher_result["reason"] + params.get("errors", [])
    for error in disk_result:
        if error.startswith("cable: "):
            return FailureType.DISK_CONNECTIVITY, params

    eine_code = get_eine_code(hw_watcher_result)

    if eine_code and DISK_EINE_CODE.ATA_BUS_ERROR in eine_code:
        return FailureType.DISK_CONNECTIVITY, params

    elif eine_code and DISK_EINE_CODE.DRIVE_PERF_RAND_TO_LOW in eine_code:
        return FailureType.DISK_PERFORMANCE, params

    elif eine_code and DISK_EINE_CODE.ATA_IO_ERROR in eine_code:
        return FailureType.DISK_BAD_BLOCKS, params

    elif eine_code and DISK_EINE_CODE.NVME_MISSING in eine_code:
        return FailureType.DISK_NVME_MISSING, params

    elif eine_code and DISK_EINE_CODE.NVME_LINK_DEGRADED in eine_code:
        return FailureType.DISK_NVME_LINK_DEGRADED, params

    elif eine_code and DISK_EINE_CODE.SSD_PERF_RAND_TOO_LOW in eine_code:
        return FailureType.SSD_PERF_RAND_TOO_LOW, params

    # no disk info is a different problem
    if "disk2replace" not in hw_watcher_result:
        return FailureType.DISK_UNKNOWN, {"redeploy": True}
    if "slot" not in params and "serial" not in params:
        return FailureType.DISK_UNKNOWN, params

    return FailureType.DISK_COMMON, params


def decision_redeploy(host, reason, params, failure_type, failure_check_info):
    try:
        restrictions.check_restrictions(host, restrictions.AUTOMATED_REDEPLOY)
    except restrictions.OperationRestrictedError:
        return Decision.healthy("Automated redeploy is restricted for the host.", checks=[CheckType.DISK])
    return Decision(
        WalleAction.REDEPLOY,
        reason=reason,
        checks=[CheckType.DISK],
        failure_type=failure_type,
        params=params,
        failure_check_info=failure_check_info,
    )


def decision_profile(
    host, reason, params, failure_type, failure_check_info, host_restrictions=(restrictions.AUTOMATED_PROFILE,)
):
    try:
        restrictions.check_restrictions(host, restrictions=host_restrictions)
    except restrictions.OperationRestrictedError:
        return Decision.healthy("Automated profile is restricted for the host.", checks=[CheckType.DISK])
    return Decision(
        WalleAction.PROFILE,
        reason=reason,
        checks=[CheckType.DISK],
        failure_type=failure_type,
        params=params,
        failure_check_info=failure_check_info,
    )


def decision_reboot(host, reason, params, failure_check_info):
    try:
        restrictions.check_restrictions(host, restrictions.REBOOT)
    except restrictions.OperationRestrictedError:
        return Decision.healthy("Automated reboot is restricted for the host.", checks=[CheckType.DISK])
    return Decision(
        WalleAction.REBOOT,
        reason=reason,
        checks=[CheckType.DISK],
        failure_type=FailureType.DISK_PERFORMANCE,
        params=params,
        failure_check_info=failure_check_info,
    )


def _decision_change_disk(host, reason, params, failure_type, failure_check_info):
    try:
        restrictions.check_restrictions(host, restrictions.AUTOMATED_DISK_CHANGE)
    except restrictions.OperationRestrictedError:
        return Decision.healthy("Automated disk repair is restricted for the host.", checks=[CheckType.DISK])

    extra_restrictions = _change_disk_extra_restrictions(params)
    for restriction in extra_restrictions:
        try:
            restrictions.check_restrictions(host, restriction)
        except restrictions.OperationRestrictedError:
            return Decision.healthy("{} is restricted for the host.".format(restriction), checks=[CheckType.DISK])

    return Decision(
        WalleAction.CHANGE_DISK,
        reason=reason,
        params=params,
        checks=[CheckType.DISK],
        failure_type=failure_type,
        restrictions=extra_restrictions,
        failure_check_info=failure_check_info,
    )


class CheckDisk(CheckRuleInterface):
    def __init__(
        self,
        redeploy_on_bad_blocks=True,
        redeploy_on_no_info=True,
        redeploy_on_unreserved_disk=True,
        redeploy_on_unknown_disk=True,
        reboot_on_performance=True,
        **kwargs,
    ):
        self.redeploy_on_bad_blocks = redeploy_on_bad_blocks
        self.redeploy_on_no_info = redeploy_on_no_info
        self.redeploy_on_unreserved_disk = redeploy_on_unreserved_disk
        self.redeploy_on_unknown_disk = redeploy_on_unknown_disk
        self.reboot_on_performance = reboot_on_performance

    check_type = CheckType.DISK

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.PROFILE) & _for_nvme_missing_code,
            reason=limit_reached("max_nvme_missing_profiles", Operation.PROFILE, params=_slot_params_getter),
            action=escalate_to_redeploy,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.REDEPLOY),
            reason=limit_reached("max_nvme_missing_redeployments", Operation.REDEPLOY, params=_slot_params_getter),
            action=escalate_to_change_disk_and_redeploy,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK) & ~_is_change_disk_via_profiling,
            reason=limit_reached("max_changed_disks", Operation.CHANGE_DISK, params=_slot_params_getter),
            action=escalate_to_deactivate,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK) & _is_change_disk_via_profiling,
            reason=automatic_profile_not_supported,
            action=escalate_to_deactivate,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK) & _is_change_disk_via_profiling,
            reason=limit_reached(
                "max_disk_fixes_via_profiling", Operation.PROFILE, params={"modes": ProfileMode.DISK_RW_TEST}
            ),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result):

        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        failure_type, params = _get_disk_failure_type_params(hw_watcher_result)

        if failure_type == FailureType.DISK_COMMON:
            params["redeploy"] = (
                self.redeploy_on_unreserved_disk and hw_watcher_result["status"] == HwWatcherCheckStatus.UNKNOWN
            )
            return _decision_change_disk(host, reason, params, failure_type, check_result)

        elif failure_type == FailureType.DISK_NVME_MISSING:
            params["profile"] = True
            return decision_profile(host, reason, params, failure_type, check_result)

        elif failure_type == FailureType.DISK_NVME_LINK_DEGRADED:
            return _decision_change_disk(host, reason, params, failure_type, check_result)
        elif failure_type == FailureType.DISK_UNKNOWN:
            if self.redeploy_on_unknown_disk:
                return _decision_change_disk(host, reason, {"redeploy": True}, failure_type, check_result)
            return Decision.deactivate(reason, failure_type=failure_type)
        elif failure_type == FailureType.DISK_BAD_BLOCKS and not self.redeploy_on_bad_blocks:
            return _decision_change_disk(host, reason, params, failure_type, check_result)
        elif failure_type == FailureType.DISK_PERFORMANCE and not self.reboot_on_performance:
            return _decision_change_disk(host, reason, params, failure_type, check_result)
        elif failure_type == FailureType.DISK_NO_CHECK_INFO:
            if self.redeploy_on_no_info:
                return _decision_change_disk(host, reason, params, failure_type, check_result)
            else:
                return Decision.deactivate(reason, failure_type=failure_type)
        else:
            return Decision.healthy("Host is healthy.")


class CheckDiskCable(CheckRuleInterface):

    check_type = CheckType.DISK

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.REPAIR_HARDWARE),
            reason=limit_reached("max_disk_cable_repairs", Operation.REPAIR_DISK_CABLE, params=_slot_params_getter),
            action=escalate_to_deactivate,
        )
    )

    def apply(self, host, check_result):
        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        failure_type, params = _get_disk_failure_type_params(hw_watcher_result)

        if failure_type == FailureType.DISK_CONNECTIVITY:
            return self._decision_repair_cable(reason, params, check_result)
        return Decision.healthy("Host is healthy.")

    @staticmethod
    def _decision_repair_cable(reason, params, failure_check_info):
        params = repair_hardware_params(
            request_type=RequestTypes.BAD_DISK_CABLE.type,
            operation_type=Operation.REPAIR_DISK_CABLE.type,
            power_on_before_repair=False,
            **params,
        )
        params["reboot"] = True
        return Decision(
            WalleAction.REPAIR_HARDWARE,
            reason=reason,
            params=params,
            checks=[CheckType.DISK],
            failure_type=FailureType.DISK_CONNECTIVITY,
            restrictions=[restrictions.AUTOMATED_DISK_CABLE_REPAIR, restrictions.AUTOMATED_REBOOT],
            failure_check_info=failure_check_info,
        )


class CheckDiskBadBlocks(CheckRuleInterface):

    check_type = CheckType.DISK

    def __init__(self, redeploy_on_bad_blocks=True, **kwargs):
        self.redeploy_on_bad_blocks = redeploy_on_bad_blocks

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.REDEPLOY),
            reason=limit_reached("max_ata_io_error_redeployments", Operation.REDEPLOY, params=_slot_params_getter),
            action=escalate_to_change_disk_and_redeploy,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK),
            reason=limit_reached("max_changed_disks", Operation.CHANGE_DISK, params=_slot_params_getter),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result):
        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        failure_type, params = _get_disk_failure_type_params(hw_watcher_result)

        if failure_type == FailureType.DISK_BAD_BLOCKS and self.redeploy_on_bad_blocks:
            params["redeploy"] = True
            return decision_redeploy(host, reason, params, failure_type, check_result)
        return Decision.healthy("Host is healthy.")


class CheckSsdPerfLow(CheckRuleInterface):

    check_type = CheckType.DISK

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.PROFILE) & _for_ssd_perf_rand_too_low_code,
            reason=limit_reached("max_ssd_perf_rand_too_low_profiles", Operation.PROFILE, params=_slot_params_getter),
            action=escalate_to_change_disk_and_redeploy,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK),
            reason=limit_reached("max_changed_disks", Operation.CHANGE_DISK, params=_slot_params_getter),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result):
        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        failure_type, params = _get_disk_failure_type_params(hw_watcher_result)

        if failure_type == FailureType.SSD_PERF_RAND_TOO_LOW:
            params["profile"] = True
            params["profile_mode"] = ProfileMode.DANGEROUS_HIGHLOAD_TEST
            return decision_profile(
                host,
                reason,
                params,
                failure_type,
                check_result,
                host_restrictions=[
                    restrictions.AUTOMATED_PROFILE,
                    restrictions.AUTOMATED_PROFILE_WITH_FULL_DISK_CLEANUP,
                ],
            )

        return Decision.healthy("Host is healthy.")


class CheckDiskPerformance(CheckRuleInterface):

    check_type = CheckType.DISK

    def __init__(self, reboot_on_performance=True, **kwargs):
        self.reboot_on_performance = reboot_on_performance

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.REBOOT),
            reason=limit_reached("max_host_reboots", Operation.REBOOT, params=_slot_params_getter),
            action=escalate_to_change_disk,
        ),
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK),
            reason=limit_reached("max_changed_disks", Operation.CHANGE_DISK, params=_slot_params_getter),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result):
        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        failure_type, params = _get_disk_failure_type_params(hw_watcher_result)

        if failure_type == FailureType.DISK_PERFORMANCE and self.reboot_on_performance:
            return decision_reboot(host, reason, params, check_result)
        return Decision.healthy("Host is healthy.")


class CheckDiskSmartCodesRule(CheckRuleInterface):

    _DRIVE_SMART_CODES = {
        "DRIVE_SMART_RSEC": FailureType.DISK_DRIVE_SMART_RSEC_ERROR,
        "DRIVE_SMART_REVENT": FailureType.DISK_DRIVE_SMART_REVENT_ERROR,
        "DRIVE_SMART_OFFUNC": FailureType.DISK_DRIVE_SMART_OFFUNC_ERROR,
        "DRIVE_SMART_PEND_SEC": FailureType.DISK_DRIVE_SMART_PEND_SEC_ERROR,
        "DRIVE_SMART_DISK_FAILING": FailureType.DISK_DRIVE_SMART_DISK_FAILING_ERROR,
        "DRIVE_SMART_OPEN_FAILED": FailureType.DISK_DRIVE_SMART_OPEN_FAILED_ERROR,
        "DRIVE_SMART_SELFTEST_FAILED": FailureType.DISK_DRIVE_SMART_SELFTEST_FAILED_ERROR,
        "DRIVE_SMART_UNK_ERROR": FailureType.DISK_DRIVE_SMART_UNK_ERROR_ERROR,
    }

    check_type = CheckType.DISK

    escalation_rules = EscalationRules(
        EscalationPoint(
            predicate=action_match(WalleAction.CHANGE_DISK) & ~_is_change_disk_via_profiling,
            reason=limit_reached("max_changed_disks", Operation.CHANGE_DISK, params=_slot_params_getter),
            action=escalate_to_deactivate,
        ),
    )

    def apply(self, host, check_result):
        reason, hw_watcher_result = _get_hw_watcher_reason_result(check_result)
        if hw_watcher_result is None:
            # NOTE(rocco66): it will be investigated later in other rules
            return Decision.healthy("Host is healthy.")
        if eine_codes := get_eine_code(hw_watcher_result):
            for eine_code in eine_codes:
                if failure_type := CheckDiskSmartCodesRule._DRIVE_SMART_CODES.get(eine_code):
                    params = _get_disk_params(hw_watcher_result)
                    return _decision_change_disk(host, reason, params, failure_type, check_result)
        return Decision.healthy("Host is healthy.")
