"""Decision-making center.

Accepts host health status change events and makes a decision what to do with the host.
"""

import logging
from functools import partial

import walle.errors
import walle.fsm_stages.common
import walle.host_fsm.control
import walle.host_status
import walle.projects
import walle.tasks
import walle.util.host_health
import walle.util.limits
import walle.util.misc
import walle.util.tasks
from sepelib.core.exceptions import Error
from walle import audit_log, authorization, restrictions
from walle.clients.eine import ProfileMode
from walle.errors import OperationRestrictedError, InvalidHostStateError
from walle.expert import juggler
from walle.expert.automation import AutomationDisabledError
from walle.expert.decision import Decision
from walle.expert.decisionmakers import get_decision_maker
from walle.expert.types import WalleAction
from walle.hosts import Host, HostState, HostStatus, TaskType
from walle.models import timestamp
from walle.statbox.contexts import host_context, host_failure_context
from walle.statbox.loggers import dmc_logger, failures_logger
from walle.util import notifications
from walle.util.misc import drop_none, StopWatch

log = logging.getLogger(__name__)


FAILURE_ORIGIN_EXPERT = "expert"
FAILURE_ORIGIN_DNS = "dns"


class _ActionRejectedError(Error):
    pass


def is_host_healthy(host, checks=None):
    decision = make_decision(host, checks, fast=True)
    return decision.action == WalleAction.HEALTHY


def make_decision(host, checks=None, fast=False) -> Decision:
    # TODO(rocco66): remove "fast" argument?
    decision_maker = get_decision_maker(host.get_project(), checks)
    reasons = get_host_reasons(host, decision_maker)

    if not reasons:
        return Decision.wait("Health information is not available.")

    return decision_maker.make_decision(host, reasons, fast=fast)


def get_host_reasons(host, decision_maker=None, checks=None) -> juggler.Reasons:
    if decision_maker is None:
        decision_maker = get_decision_maker(host.get_project(), checks)

    return juggler.get_host_health_reasons(host, decision_maker)


def dmc_allowed(host):
    """DMC is not allowed for FREE hosts and hosts that are being processed by FSM"""
    return host.state in HostState.ALL_DMC and host.status in HostStatus.ALL_STEADY


def handle_decision(host, decision, human_reasons, decision_maker):
    if decision.action == WalleAction.WAIT:
        return decision.reason

    if decision.action == WalleAction.HEALTHY:
        _force_ready_status(host, decision)
        return decision.reason

    try:
        return handle_failure(decision_maker.automation(), host, decision, human_reasons)
    except AutomationDisabledError as e:
        log.info("Host %s failure has not been processed: %s", host.human_name(), str(e))
        return "Host failure has not been processed: {}".format(str(e))


def handle_failure(automation, host, decision, human_reasons):
    log.info("%s: Decision has been made for the host: %s.", host.human_id(), decision)

    statbox = dmc_logger(
        walle_action="dmc_handle_failure",
        dmc_decision=decision.action,
        dmc_reasons=",".join(human_reasons) or None,
        **host_context(host, "checks_min_time")
    )
    statbox.log()

    log.info(
        "%s: Handling failure of the host with '%s' status: %s...",
        host.human_id(),
        host.status,
        ", ".join(human_reasons),
    )

    if host.status != HostStatus.READY:
        # currently we just ignore these hosts.
        # In future we will store the decision to host and show the `heal host` button.
        log.info(
            "%s: Action '%s' was not performed for host because it is in '%s' status.",
            host.human_id(),
            decision.action,
            host.status,
        )
        return "Action '{}' has not been performed for host because it is in '{}' status.".format(
            decision.action, host.status
        )

    try:
        restrictions.check_restrictions(host, decision.get_restrictions())
    except OperationRestrictedError as e:
        log.info(
            "Host %s: %s Action %s can not be performed: %s. Deactivating host.",
            host.human_name(),
            decision.reason,
            decision.action,
            str(e),
        )

        reason = "{} Action '{}' can not be performed: {}. Deactivating host.".format(
            decision.reason, decision.action, e
        )
        decision = Decision.deactivate(reason)

    with StopWatch("Global Heal Lock. Total"):
        automation.register_automated_failure(host, decision, automatic_healing=True)

    human_error = "Host health check failed: " + decision.reason
    schedule_action(host, decision, human_error, issuer=authorization.ISSUER_WALLE, statbox=statbox)
    failures_logger().log(**host_failure_context(host, host.get_project(), decision, timestamp(), timestamp()))
    return human_error


def handle_user_executed(host, decision, human_reasons, decision_maker, issuer, ignore_maintenance=False, reason=None):
    log.info("%s: Decision has been made for the host: %s.", host.human_id(), decision)

    if decision.action in {WalleAction.WAIT, WalleAction.HEALTHY}:
        return decision.reason

    automation = decision_maker.automation()

    statbox = dmc_logger(
        walle_action="dmc_handle_failure",
        dmc_decision=decision.action,
        dmc_reasons=",".join(human_reasons) or None,
        **host_context(host, "checks_min_time")
    )
    statbox.log()

    log.info(
        "%s: Handling failure of the host with '%s' status by request from %s: %s...",
        host.human_id(),
        host.status,
        issuer,
        ", ".join(human_reasons),
    )

    automation.register_failure(host, decision)

    human_error = _mk_nice_reason_for_user_executed_action(decision.reason, reason)
    schedule_action(host, decision, human_error, ignore_maintenance=ignore_maintenance, issuer=issuer, statbox=statbox)
    failures_logger().log(**host_failure_context(host, host.get_project(), decision, timestamp(), timestamp()))
    return human_error


def _mk_nice_reason_for_user_executed_action(decision_reason, user_reason=None):
    strings = []
    msg = "Host health check failed:"
    if user_reason:
        if not user_reason.endswith("."):
            user_reason += ":"
            msg = msg.lower()

        strings.append(user_reason)
    strings.extend([msg, decision_reason])
    return " ".join(strings)


def handle_monitoring(automation, host, decision, reasons):
    log.info("%s: Decision has been made for the host: %s.", host.human_id(), decision)

    human_reasons = walle.util.host_health.get_reasons_for_log(reasons)

    statbox = dmc_logger(
        walle_action="dmc_handle_monitoring",
        dmc_decision=decision.action,
        dmc_reasons=",".join(human_reasons) or None,
        **host_context(
            host, "checks_min_time", "healthy_checks_min_time", "active_check_min_time", "passive_check_min_time"
        )
    )
    statbox.log()

    if decision.action in (WalleAction.HEALTHY, WalleAction.WAIT):
        # no further actions needed.
        return decision

    human_error = "Host health check failed: " + decision.reason
    if not host.task.is_automated_task():
        # Don't schedule action, just let the task fail.
        return Decision.failure(human_error)

    if not host.task.enable_auto_healing:
        # Check automation settings for automation tasks, but not for tasks scheduled by user.
        # this can throw AutomationDisabledError error
        automation.register_automated_failure(host, decision)

    try:
        restrictions.check_restrictions(host, decision.get_restrictions())
    except OperationRestrictedError as e:
        reason = "{} Action '{}' can not be performed: {}".format(decision.reason, decision.action, e)
        return Decision.failure(reason)

    log.info(
        "%s: Handling failure of the host with '%s' status: %s...",
        host.human_id(),
        host.status,
        ", ".join(human_reasons),
    )

    schedule_action(host, decision, human_error, from_monitoring=True, issuer=host.task.owner, statbox=statbox)
    return decision


def schedule_action(
    host, decision, error_msg, issuer, statbox=None, from_monitoring=None, ignore_cms=None, ignore_maintenance=None
):
    from walle.tasks import (
        schedule_reboot,
        schedule_profile,
        schedule_redeploy,
        schedule_report_task,
        schedule_hardware_repairing,
        schedule_bmc_reset,
        schedule_disk_change,
        schedule_rack_repair,
        schedule_deactivation,
        schedule_rack_overheat_repair,
    )

    try:
        schedule_task = {
            WalleAction.REBOOT: schedule_reboot,
            WalleAction.REDEPLOY: schedule_redeploy,
            WalleAction.PROFILE: partial(
                schedule_profile, profile_mode=decision.get_param("profile_mode", ProfileMode.HIGHLOAD_TEST)
            ),
            WalleAction.REPORT_FAILURE: schedule_report_task,
            WalleAction.REPAIR_CPU: partial(schedule_profile, profile_mode=ProfileMode.HIGHLOAD_TEST),
            WalleAction.REPAIR_REBOOTS: partial(schedule_redeploy, custom_profile_mode=ProfileMode.EXTRA_HIGHLOAD_TEST),
            WalleAction.REPAIR_MEMORY: partial(schedule_profile, profile_mode=ProfileMode.BASIC_TEST),
            WalleAction.RESET_BMC: schedule_bmc_reset,
            WalleAction.CHANGE_DISK: schedule_disk_change,
            WalleAction.REPAIR_HARDWARE: schedule_hardware_repairing,
            WalleAction.REPAIR_RACK_FAILURE: schedule_rack_repair,
            WalleAction.REPAIR_RACK_OVERHEAT: schedule_rack_overheat_repair,
            WalleAction.DEACTIVATE: schedule_deactivation,
            WalleAction.DISK_RW_REDEPLOY: partial(
                schedule_redeploy, custom_profile_mode=ProfileMode.DISK_RW_TEST  # used only from terminator
            ),
        }[decision.action]
    except KeyError:
        raise Error("Unexpected host healing action: {}.", decision.action)

    try:
        kwargs = drop_none(
            {
                "issuer": issuer,
                "task_type": TaskType.AUTOMATED_HEALING,
                "host": host,
                "from_current_task": from_monitoring,
                "ignore_maintenance": ignore_maintenance or from_monitoring,
                "decision": decision,
                "reason": decision.reason,
                "ignore_cms": ignore_cms,
            }
        )
        # TODO: we may want to upgrade the CMS task here somehow instead of simply replacing it.
        schedule_task(**kwargs)
    except InvalidHostStateError:
        raise
    except Error as e:
        raise Error("Unable to schedule host healing operation: {}", e)
    else:
        if statbox is None:
            statbox = dmc_logger(walle_action="dmc_handle_failure", dmc_decision=decision.action, dmc_reasons=error_msg)
        statbox.log(
            walle_action="dmc_schedule_action",
            **host_context(
                host, "checks_min_time", "healthy_checks_min_time", "active_check_min_time", "passive_check_min_time"
            )
        )


def handle_restricted_automated_action(automation, host, human_error, cancel=None):
    reason = "{} Restrictions applied: {}.".format(human_error, ", ".join(host.restrictions or []))
    handle_host_deactivation(automation, host, reason, cancel=cancel)


# TODO: This is almost equal to host deactivation task. The only difference that it's guaranteed to be instant
# operation. Can we merge the operations?
def handle_host_deactivation(automation, host, reason, cancel=None):
    from walle.util.tasks import on_finished_task

    automation.register_automated_failure(host, Decision.deactivate(reason))

    orig_host = host.copy()
    status = HostStatus.DEAD
    issuer = authorization.ISSUER_WALLE

    log.warning("%s: %s Consider it dead.", host.human_id(), reason)

    try:
        with audit_log.on_deactivate_host(issuer, host.project, host.inv, host.name, host.uuid, reason) as audit_entry:
            audit_entry.instant = True

            query = dict(state=host.state, status=host.status)
            if host.task is not None:
                query.update(task__task_id=host.task.task_id)

            update_kwargs = Host.set_status_kwargs(
                host.state, status, issuer, audit_entry.id, downtime=False, reason=reason
            )

            if not host.modify(query, unset__task=True, **update_kwargs):
                raise InvalidHostStateError(host)
    except Error as e:
        raise Error("Failed to change host status to '{}': {}", status, e)

    if orig_host.task is not None:
        if cancel is None:
            audit_log.fail_task(orig_host.task, reason)
            on_finished_task(orig_host)
        else:
            walle.util.tasks.on_task_cancelled(issuer, orig_host, reason=cancel)

    notifications.on_dead_host(orig_host, reason)


def _force_ready_status(host, decision):
    if host.status == HostStatus.DEAD and host.status_author == authorization.ISSUER_WALLE:
        log.info(
            "%s: Decision has been made for the host: %s. Forcing host status to 'ready'...", host.human_id(), decision
        )

        walle.host_status.force_status(
            authorization.ISSUER_WALLE, host, HostStatus.READY, reason=decision.reason, only_from_current_status_id=True
        )
        log.info("%s: host status has been forced to 'ready': %s.", host.human_id(), decision.reason)
