import collections
import typing
import time

import dataclasses

from infra.rtc_sla_tentacles.backend.lib.clickhouse import database
from infra.rtc_sla_tentacles.backend.lib.clickhouse.client import ClickhouseClient
from infra.rtc_sla_tentacles.backend.lib.config.interface import ConfigInterface
from infra.rtc_sla_tentacles.backend.lib.reroll_history import history

from infra.rtc_sla_tentacles.backend.lib.metrics.types import SloType


# Results in semi-NO_DATA warning when update from CH is stalled.
MONITORING_DATA_TTL = 60 * 5 + 30


@dataclasses.dataclass
class Reroll:
    start_ts: int
    end_ts: int
    taskgroup_ids: typing.List[str]
    snapshot_ids: typing.List[str]
    is_in_progress: bool = False


@dataclasses.dataclass
class BasicTentaclesMetrics:

    def is_slo_ok(self, limit=None) -> bool:
        raise NotImplementedError(f"is_slo_ok not implemented for {self.__class__.__name__}")

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        return {}


@dataclasses.dataclass
class MetricsTentaclesAvailability(BasicTentaclesMetrics):
    number_of_available_tentacles: int = 0
    number_of_unavailable_tentacles: int = 0
    number_of_excluded_tentacles: int = 0
    monitoring_min_percent_of_available_tentacles: int = 0

    @property
    def percent_of_available_tentacles(self):
        if not self.number_of_not_excluded_tentacles:
            return 0
        return self.number_of_available_tentacles / self.number_of_not_excluded_tentacles * 100

    @property
    def number_of_not_excluded_tentacles(self):
        return self.number_of_available_tentacles + self.number_of_unavailable_tentacles

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        return {
            "number_of_available_tentacles_axxv": self.number_of_available_tentacles,
            "number_of_not_excluded_for_available_tentacles_axxv": self.number_of_not_excluded_tentacles,
            "percent_of_available_tentacles_axxv": self.percent_of_available_tentacles,
            "monitoring_min_percent_of_available_tentacles_axxx": self.monitoring_min_percent_of_available_tentacles,
        }

    def is_slo_ok(self, limit=None) -> bool:
        limit = limit or self.monitoring_min_percent_of_available_tentacles
        return self.percent_of_available_tentacles >= limit


@dataclasses.dataclass
class MetricsTentaclesTimestampResourceFreshness(BasicTentaclesMetrics):
    number_of_tentacles_with_fresh_ts_resource: int = 0
    number_of_tentacles_with_stalled_ts_resource: int = 0
    number_of_excluded_tentacles: int = 0
    monitoring_min_percent_of_tentacles_with_fresh_ts_resource: int = 0

    last_success_redeployment_end_ts: int = 0
    last_success_duration: int = 0
    monitoring_last_success_redeployment_border: int = 0

    complete_redeployment_sessions: list = dataclasses.field(default_factory=list)

    redeployment_in_progress: bool = False

    current_redeployment_start_ts: int = 0
    current_redeployment_end_ts: int = 0
    monitoring_current_redeployment_border: int = 0
    nanny_current_taskgroup_ids: list = dataclasses.field(default_factory=list)
    nanny_current_snapshot_ids: list = dataclasses.field(default_factory=list)

    def get_complete_rerolls(self) -> typing.List[Reroll]:
        return [
            Reroll(
                session_["start_ts"],
                session_["end_ts"],
                session_["taskgroup_ids"],
                session_["snapshot_ids"],
            )
            for session_ in self.complete_redeployment_sessions
        ]

    def get_current_reroll(self) -> typing.Optional[Reroll]:
        return Reroll(
            self.current_redeployment_start_ts,
            self.current_redeployment_end_ts,
            self.nanny_current_taskgroup_ids,
            self.nanny_current_snapshot_ids,
            True
        ) if self.redeployment_in_progress else None

    @property
    def percent_of_tentacles_with_fresh_ts_resource(self):
        if not self.number_of_not_excluded_tentacles:
            return 0
        return self.number_of_tentacles_with_fresh_ts_resource / self.number_of_not_excluded_tentacles * 100

    @property
    def number_of_not_excluded_tentacles(self):
        return self.number_of_tentacles_with_fresh_ts_resource + self.number_of_tentacles_with_stalled_ts_resource

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        now = int(time.time())
        result = {
            "number_of_tentacles_with_fresh_ts_resource_axxv": self.number_of_tentacles_with_fresh_ts_resource,
            "number_of_not_excluded_for_tentacles_with_fresh_ts_resource_axxv": self.number_of_not_excluded_tentacles,
            "percent_of_tentacles_with_fresh_ts_resource_axxv": self.percent_of_tentacles_with_fresh_ts_resource,
            "monitoring_min_percent_of_tentacles_with_fresh_ts_resource_axxx":
                self.monitoring_min_percent_of_tentacles_with_fresh_ts_resource,
        }
        if self.current_redeployment_start_ts:
            result["current_redeployment_age_axxx"] = now - self.current_redeployment_start_ts
        if self.last_success_redeployment_end_ts:
            result["last_success_redeployment_age_axxx"] = now - self.last_success_redeployment_end_ts
        if self.last_success_duration:
            result["last_complete_redeployment_in_progress_duration_sec__axxv"] = self.last_success_duration
        return result

    def is_slo_ok(self, limit=None) -> bool:
        limit = limit or self.monitoring_min_percent_of_tentacles_with_fresh_ts_resource
        if self.percent_of_tentacles_with_fresh_ts_resource < limit:
            return False
        now = int(time.time())
        if self.last_success_redeployment_end_ts:
            if now - self.monitoring_last_success_redeployment_border > self.last_success_redeployment_end_ts:
                return False
        if self.current_redeployment_start_ts:
            if now - self.monitoring_current_redeployment_border > self.current_redeployment_start_ts:
                return False
        return True


@dataclasses.dataclass
class MetricsTentaclesReallocation(BasicTentaclesMetrics):
    last_success_reallocation_end_ts: int = 0
    monitoring_last_success_reallocation_border: int = 0
    last_success_duration: int = 0

    complete_reallocation_sessions: list = dataclasses.field(default_factory=list)

    reallocation_in_progress: bool = False

    current_reallocation_start_ts: int = 0
    current_reallocation_end_ts: int = 0
    monitoring_current_reallocation_border: int = 0
    nanny_current_taskgroup_ids: list = dataclasses.field(default_factory=list)
    nanny_current_snapshot_ids: list = dataclasses.field(default_factory=list)

    def get_complete_rerolls(self) -> typing.List[Reroll]:
        return [
            Reroll(
                session_["start_ts"],
                session_["end_ts"],
                session_["taskgroup_ids"],
                session_["snapshot_ids"],
            )
            for session_ in self.complete_reallocation_sessions
        ]

    def get_current_reroll(self) -> typing.Optional[Reroll]:
        return Reroll(
            self.current_reallocation_start_ts,
            self.current_reallocation_end_ts,
            self.nanny_current_taskgroup_ids,
            self.nanny_current_snapshot_ids,
            True
        ) if self.reallocation_in_progress else None

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        now = int(time.time())
        result = {}
        if self.current_reallocation_start_ts:
            result["current_reallocation_age_axxx"] = now - self.current_reallocation_start_ts
        if self.last_success_reallocation_end_ts:
            result["last_success_reallocation_age_axxx"] = now - self.last_success_reallocation_end_ts
        if self.last_success_duration:
            result["last_complete_reallocation_in_progress_duration_sec__axxv"] = self.last_success_duration
        return result

    def is_slo_ok(self, limit=None) -> bool:
        now = int(time.time())
        if self.last_success_reallocation_end_ts:
            if now - self.monitoring_last_success_reallocation_border > self.last_success_reallocation_end_ts:
                return False
        if self.current_reallocation_start_ts:
            if now - self.monitoring_current_reallocation_border > self.current_reallocation_start_ts:
                return False
        return True


@dataclasses.dataclass
class MetricsTentaclesYpUnusedNodes(BasicTentaclesMetrics):
    unused_nodes_count: int
    total_nodes_count: int
    monitoring_min_percent_of_unused_nodes: int
    unused_nodes: typing.List[str] = dataclasses.field(default_factory=list)

    def get_percent_of_unused_nodes(self):
        return self.unused_nodes_count / self.total_nodes_count * 100

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        return {
            "total_nodes_count_axxv": self.total_nodes_count,
            "unused_nodes_count_axxv": self.unused_nodes_count,
        }

    def is_slo_ok(self, limit=None) -> bool:
        return self.get_percent_of_unused_nodes() <= self.monitoring_min_percent_of_unused_nodes


@dataclasses.dataclass
class MetricsTentaclesSchedulingErrors(BasicTentaclesMetrics):
    scheduling_errors_on_up_nodes_count: int
    total_up_nodes_count: int
    monitoring_min_percent_of_scheduling_errors: int

    def get_percent_of_up_nodes_with_scheduling_errors(self):
        return self.scheduling_errors_on_up_nodes_count / self.total_up_nodes_count * 100

    def is_slo_ok(self, limit=None) -> bool:
        return self.get_percent_of_up_nodes_with_scheduling_errors() <= self.monitoring_min_percent_of_scheduling_errors


@dataclasses.dataclass
class NonSloMetrics:
    total_pods_count: int = 0
    hq_pods_count: int = 0
    juggler_pods_count: int = 0

    def get_yasm_signals(self) -> typing.Dict[str, typing.Union[int, float]]:
        return {
            "allocator_pods_count_axxv": self.total_pods_count,
            "hq_pods_count_axxv": self.hq_pods_count,
            "juggler_pods_count_axxv": self.juggler_pods_count,
        }


@dataclasses.dataclass
class AllZoneMetrics:
    availability: MetricsTentaclesAvailability
    redeployed_on_time: MetricsTentaclesTimestampResourceFreshness
    reallocation: MetricsTentaclesReallocation
    non_slo_metrics: NonSloMetrics
    yp_unused_nodes: MetricsTentaclesYpUnusedNodes = None
    scheduling_errors: MetricsTentaclesSchedulingErrors = None

    NON_SLO_METRICS_FIELD_NAME = "non_slo_metrics"

    def to_dict(self):
        repr_ = {
            SloType.availability: dataclasses.asdict(self.availability),
            SloType.redeployed_on_time: dataclasses.asdict(self.redeployed_on_time),
            SloType.reallocation: dataclasses.asdict(self.reallocation),
            AllZoneMetrics.NON_SLO_METRICS_FIELD_NAME: dataclasses.asdict(self.non_slo_metrics),
        }
        if self.yp_unused_nodes:
            repr_[SloType.unused_nodes] = dataclasses.asdict(self.yp_unused_nodes)
        if self.scheduling_errors:
            repr_[SloType.scheduling_errors] = dataclasses.asdict(self.scheduling_errors)
        return repr_

    @staticmethod
    def from_dict(dict_repr):
        all_zone_metrics = AllZoneMetrics(
            MetricsTentaclesAvailability(**dict_repr[SloType.availability]),
            MetricsTentaclesTimestampResourceFreshness(**dict_repr[SloType.redeployed_on_time]),
            MetricsTentaclesReallocation(**dict_repr[SloType.reallocation]),
            NonSloMetrics(**dict_repr[AllZoneMetrics.NON_SLO_METRICS_FIELD_NAME])
        )
        if unused_nodes_repr := dict_repr.get(SloType.unused_nodes):
            all_zone_metrics.yp_unused_nodes = MetricsTentaclesYpUnusedNodes(**unused_nodes_repr)

        if scheduling_errors_repr := dict_repr.get(SloType.scheduling_errors):
            all_zone_metrics.scheduling_errors = MetricsTentaclesSchedulingErrors(**scheduling_errors_repr)
        return all_zone_metrics

    def get_yasm_signals(self):
        yield from self.availability.get_yasm_signals().items()
        yield from self.redeployed_on_time.get_yasm_signals().items()
        yield from self.reallocation.get_yasm_signals().items()
        yield from self.non_slo_metrics.get_yasm_signals().items()
        if self.yp_unused_nodes:
            yield from self.yp_unused_nodes.get_yasm_signals().items()
        if self.scheduling_errors:
            yield from self.scheduling_errors.get_yasm_signals().items()


class MetricsProviderException(Exception):
    pass


def _add_reallocation_info(
    allocation_zone: str,
    reallocation_metrics: MetricsTentaclesReallocation,
    clickhouse_client: ClickhouseClient,
    config_interface: ConfigInterface,
):
    now = int(time.time())
    reallocation_history = history.ReallocationHistory(
        nanny_service_name=allocation_zone,
        config_interface=config_interface,
        clickhouse_client=clickhouse_client,
        end_ts=now,
        wait_for_end_ts=False,
    )

    reallocation_metrics.allocation_zone = allocation_zone
    if last_complete_session := reallocation_history.get_last_success_active_session():
        reallocation_metrics.last_success_reallocation_end_ts = last_complete_session.end_of_active_phase
        reallocation_metrics.last_success_duration = last_complete_session.in_progress_duration
    if complete_sessions := reallocation_history.get_complete_sessions():
        for session_ in complete_sessions:
            reallocation_metrics.complete_reallocation_sessions.append(dataclasses.asdict(Reroll(
                session_.start_ts,
                session_.end_of_active_phase,
                session_.taskgroup_ids,
                session_.snapshot_ids)))
    if current_session := reallocation_history.get_current_active_session():
        reallocation_metrics.reallocation_in_progress = True
        reallocation_metrics.current_reallocation_start_ts = current_session.start_ts
        reallocation_metrics.current_reallocation_end_ts = current_session.end_of_active_phase
        reallocation_metrics.nanny_current_taskgroup_ids = current_session.taskgroup_ids
        reallocation_metrics.nanny_current_snapshot_ids = current_session.snapshot_ids

    reallocation_configured = config_interface.get_tentacles_group_reallocation_settings(allocation_zone)
    reallocation_metrics.monitoring_current_reallocation_border = (
        reallocation_configured["monitoring"]["reallocation_stalled_after_min"] * 60
    )
    reallocation_metrics.monitoring_last_success_reallocation_border = (
        reallocation_configured["monitoring"]["last_reallocation_maximum_age_min"] * 60
    )


def _add_redeployment_info(
    allocation_zone: str,
    redeployed_on_time_metrics: MetricsTentaclesTimestampResourceFreshness,
    clickhouse_client: ClickhouseClient,
    config_interface: ConfigInterface,
):
    now = int(time.time())
    redeployment_history = history.RedeploymentHistory(
        nanny_service_name=allocation_zone,
        config_interface=config_interface,
        clickhouse_client=clickhouse_client,
        end_ts=now,
        wait_for_end_ts=False,
    )
    redeployed_on_time_metrics.allocation_zone = allocation_zone
    if last_complete_session := redeployment_history.get_last_success_active_session():
        redeployed_on_time_metrics.last_success_redeployment_end_ts = last_complete_session.end_of_active_phase
        redeployed_on_time_metrics.last_success_duration = last_complete_session.in_progress_duration
    if complete_sessions := redeployment_history.get_complete_sessions():
        for session_ in complete_sessions:
            redeployed_on_time_metrics.complete_redeployment_sessions.append(dataclasses.asdict(Reroll(
                session_.start_ts,
                session_.end_of_active_phase,
                session_.taskgroup_ids,
                session_.snapshot_ids)))
    if current_session := redeployment_history.get_current_active_session():
        redeployed_on_time_metrics.redeployment_in_progress = True
        redeployed_on_time_metrics.current_redeployment_start_ts = current_session.start_ts
        redeployed_on_time_metrics.current_redeployment_end_ts = current_session.end_of_active_phase
        redeployed_on_time_metrics.nanny_current_taskgroup_ids = current_session.taskgroup_ids
        redeployed_on_time_metrics.nanny_current_snapshot_ids = current_session.snapshot_ids

    redeploy_configured = config_interface.get_tentacles_group_redeployment_settings(allocation_zone)
    redeployed_on_time_metrics.monitoring_current_redeployment_border = (
        redeploy_configured["monitoring"]["redeployment_stalled_after_min"] * 60
    )
    redeployed_on_time_metrics.monitoring_last_success_redeployment_border = (
        redeploy_configured["monitoring"]["last_redeployment_maximum_age_min"] * 60
    )


def get_all_allocation_zones_metrics(
    config_interface: ConfigInterface,
    clickhouse_client: ClickhouseClient

) -> typing.Dict[str, AllZoneMetrics]:
    result = collections.defaultdict(lambda: AllZoneMetrics(
        MetricsTentaclesAvailability(),
        MetricsTentaclesTimestampResourceFreshness(),
        MetricsTentaclesReallocation(),
        NonSloMetrics()
    ))
    target_columns_str = ", ".join([
        "nanny_service_name",
        "slo_availablilty_available",
        "slo_availablilty_excluded",
        "slo_redeployment_fresh_ts_resource",
        "slo_redeployment_excluded",
        "is_hq_data_present",
        "is_juggler_data_present",
    ])
    query = f"""
        (
            SELECT {target_columns_str}, COUNT() as num
            FROM {database.Tentacle.table_name()}
            WHERE ts = {database.get_last_timestamp()}
            GROUP BY {target_columns_str}
        )
    """
    for row in clickhouse_client.select(query):
        non_slo_metrics = result[row.nanny_service_name.name].non_slo_metrics
        non_slo_metrics.total_pods_count += row.num

        if row.is_hq_data_present:
            non_slo_metrics.hq_pods_count += row.num

        if row.is_juggler_data_present:
            non_slo_metrics.juggler_pods_count += row.num

        availability_metrics = result[row.nanny_service_name.name].availability
        if row.slo_availablilty_excluded:
            availability_metrics.number_of_excluded_tentacles += row.num
        else:
            if row.slo_availablilty_available:
                availability_metrics.number_of_available_tentacles += row.num
            else:
                availability_metrics.number_of_unavailable_tentacles += row.num

        redeployed_metrics = result[row.nanny_service_name.name].redeployed_on_time
        if row.slo_redeployment_excluded:
            redeployed_metrics.number_of_excluded_tentacles += row.num
        else:
            if row.slo_redeployment_fresh_ts_resource:
                redeployed_metrics.number_of_tentacles_with_fresh_ts_resource += row.num
            else:
                redeployed_metrics.number_of_tentacles_with_stalled_ts_resource += row.num

    allocation_zones_config = config_interface.tentacles_groups_config
    for allocation_zone, allocation_zone_all_metrics in result.items():
        allocation_zone_all_metrics.availability.monitoring_min_percent_of_available_tentacles = (
            allocation_zones_config.get_availability_limit(allocation_zone)
        )
        allocation_zone_all_metrics.redeployed_on_time.monitoring_min_percent_of_tentacles_with_fresh_ts_resource = (
            allocation_zones_config.get_redeployment_limit(allocation_zone)
        )
        _add_redeployment_info(
            allocation_zone, allocation_zone_all_metrics.redeployed_on_time,
            clickhouse_client, config_interface,
        )

        allocation_zone_config = config_interface.get_allocation_zone_config(allocation_zone)
        if allocation_zone_config.get("reallocation_settings"):
            _add_reallocation_info(
                allocation_zone, allocation_zone_all_metrics.reallocation,
                clickhouse_client, config_interface,
            )
    return result
