"""Contains all logic for handling host monitoring after processing a task."""

import logging

import walle.host_health
import walle.util.host_health
from sepelib.core import config, constants
from sepelib.core.exceptions import LogicalError
from walle import audit_log
from walle import hbf_drills
from walle.expert import constants as expert_constants, dmc, juggler
from walle.expert.constants import MONITORING_TIMEOUT
from walle.expert.decisionmakers import get_decision_maker
from walle.expert.types import WalleAction
from walle.fsm_stages.common import (
    register_stage,
    get_current_stage,
    commit_stage_changes,
    fail_current_stage,
    complete_current_stage,
    MONITORING_PERIOD,
)
from walle.hosts import HostState
from walle.models import timestamp
from walle.stages import Stages
from walle.util.optional import Either
from walle.util.tasks import on_finished_task
from walle.clients import dmc as dmc_client

log = logging.getLogger(__name__)

_ERROR_MONITORING_PERIOD = 10 * constants.MINUTE_SECONDS
"""We increase monitoring period if checks are missing for too long and nobody seems to care."""

_ULTIMATE_WAIT_TIMEOUT = 2 * constants.DAY_SECONDS
"""If nobody wants it then don't burn the fuel on it. Give up waiting for checks after this timeout."""


class MonitorStageHandler:
    def __init__(self, host):
        self.host = host
        self.stage = get_current_stage(host)

    @classmethod
    def as_handler(cls):
        return lambda host: cls(host).handle()

    @property
    def task_checks(self):
        return set(self.stage.get_param("checks"))

    @property
    def extra_monitoring_timeout(self):
        return self.stage.get_param("monitoring_timeout", 0)

    @property
    def checks_for_use(self):
        return self.stage.get_param("checks_for_use", None)

    def handle(self):
        if not self._check_expert_enabled():
            return complete_current_stage(self.host)

        # if self._wait_for_hbf_drill():
        #     return

        decision_maker = get_decision_maker(self.host.get_project())
        reasons = juggler.get_host_health_reasons(self.host, decision_maker)
        if not reasons:
            return self._on_health_not_available()

        alternate_decision_params = dmc_client.AlternateDecisionParams(
            checks=self.task_checks, checks_for_use=self.checks_for_use
        )
        decision_params = dmc_client.DecisionParams(checks=decision_maker.checks)
        alternate_decision, full_decision = dmc_client.get_decisions_from_handler(
            self.host, alternate_decision_params, decision_params
        )

        if alternate_decision.action == WalleAction.HEALTHY:
            return complete_current_stage(self.host)

        if self._check_waiting_timeout(self.host.checks_min_time):
            if alternate_decision.action != WalleAction.WAIT:
                return Either(self._finish_with_dmc(decision_maker, alternate_decision, reasons)).or_throw(LogicalError)

            # We have list of checks to monitor but they are missing. Run dmc on all checks to find the cause.
            return Either(self._finish_with_dmc(decision_maker, full_decision, reasons)).or_else(
                lambda: self._on_monitoring_timeout(reasons)
            )

        else:
            self._wait_more("Waiting for the host to become alive.")

    def _check_expert_enabled(self):
        host = self.host

        expert_enabled = config.get_value("expert_system.enabled")
        host_state_is_free = host.state == HostState.FREE

        enabled = expert_enabled and not host_state_is_free
        return enabled

    def _on_health_not_available(self):
        """Try later with deadline."""
        host = self.host

        rename_time = host.rename_time
        assigned_time = host.state_time
        if rename_time is not None and assigned_time is not None:
            start_time = max(rename_time, assigned_time)
        else:
            start_time = rename_time or assigned_time
        timed_out = start_time is not None and timestamp() - start_time >= expert_constants.HOST_ADOPTION_TIMEOUT

        self._wait_more(
            message="Waiting for Juggler to catch the host.",
            timeout_error="Juggler failed to catch the host.",
            timed_out=timed_out,
        )

    def _on_health_outdated(self):
        """Try again later."""
        self._wait_more("Waiting for some health data from Juggler.")

    def _wait_more(self, message, timeout_error=None, timed_out=None):
        """Try later."""
        if timed_out:
            if self.stage.timed_out(_ULTIMATE_WAIT_TIMEOUT):
                return fail_current_stage(self.host, error=timeout_error)
            else:
                return commit_stage_changes(
                    self.host, check_after=_ERROR_MONITORING_PERIOD, error=timeout_error, status_message=message
                )

        commit_stage_changes(self.host, check_after=MONITORING_PERIOD, status_message=message)

    def _check_waiting_timeout(self, availability_start_time):
        """Check waiting start time and return True if it's time to worry about waiting too long."""

        return timestamp() >= availability_start_time + MONITORING_TIMEOUT + self.extra_monitoring_timeout

    def _on_monitoring_timeout(self, reasons):
        """Try again with a deadline"""
        human_failure_reasons = ", ".join(
            walle.util.host_health.get_human_reasons(reasons, check_types=self.checks_for_use)
        )
        self._wait_more(
            message="Waiting for the host to become alive.",
            timeout_error="Host health check failed: {}. Monitoring timeout exceeded.".format(human_failure_reasons),
            timed_out=True,
        )

    def _finish_with_dmc(self, decision_maker, decision, reasons):
        """Defer decision making to DMC. Deal with current task if DMC schedules a new one.
        Return True if current task is closed.
        """

        # DMC may schedule a new task so we keep a copy to perform some actions on the old task.
        old_host = self.host.copy()
        dmc_decision = dmc.handle_monitoring(decision_maker.automation(), self.host, decision, reasons)

        if dmc_decision.action == WalleAction.WAIT:
            return False

        if dmc_decision.action == WalleAction.HEALTHY:
            complete_current_stage(self.host)

        elif dmc_decision.action == WalleAction.FAILURE:
            # host is not healthy and we can not schedule healing action. Mark it as dead.
            fail_current_stage(self.host, dmc_decision.reason)

        else:
            # Don't run `fail stage` here as it registers host's death.
            # We don't need it, host is still under processing.
            # We also do not need to make changes to the database, because dmc has already done that.
            audit_log.fail_task(old_host.task, error=dmc_decision.reason)
            on_finished_task(old_host, keep_downtime=True)

        return True

    def _wait_for_hbf_drill(self):
        """
        If host is involved in HBF drill, wait (for monitoring period, because drill can be cancelled at any time)
        :return: True if host is in active drill and was delayed else False
        """
        drills_col = hbf_drills.drills_cache.get()
        reason = drills_col.get_host_inclusion_reason(self.host)
        if reason is not None:
            self._wait_more("Waiting for HBF drill to finish: {}".format(reason))
            return True

        return False


# NB: Attention: the stage completes the task, so at this time it must be the last stage in task
register_stage(Stages.MONITOR, MonitorStageHandler.as_handler())
