import collections
import concurrent.futures
import dataclasses
import typing

from infra.rtc_sla_tentacles.backend.lib.clickhouse.client import ClickhouseClient

from infra.rtc_sla_tentacles.backend.lib.clickhouse import database
from infra.rtc_sla_tentacles.backend.lib.harvesters.base import Harvester
from infra.rtc_sla_tentacles.backend.lib.harvesters import juggler as mongo_juggler
from infra.rtc_sla_tentacles.backend.lib.harvesters import walle as mongo_walle
from infra.rtc_sla_tentacles.backend.lib.harvesters import yp_lite_allocation
from infra.rtc_sla_tentacles.backend.lib.clickhouse.manage_columns import get_diff_between_tables


PER_ALLOCATION_ZONE_HARVESTER_DEPS = ["gencfg", "yp_lite_allocation", "hq", "juggler"]

YP_HFSM_STATE_PREPARE_MAINTENANCE = "prepare_maintenance"


@dataclasses.dataclass
class LastSnapshots:
    walle: list = dataclasses.field(default_factory=list)

    # per allocation zone
    juggler: dict = dataclasses.field(default_factory=dict)
    hq: dict = dataclasses.field(default_factory=dict)
    gencfg: dict = dataclasses.field(default_factory=dict)
    yp_lite_allocation: dict = dataclasses.field(default_factory=dict)


class ClickhouseDumper(Harvester):
    harvester_type = "clickhouse_dumper"

    def _resolve_diff_of_db_and_data(self, client, rows, table_name):
        removed_fields = []

        appended, removed, modified = get_diff_between_tables(client, table_name)
        if appended or modified:
            self.logger.info("following columns will be excluded from dump:")
        if appended:
            self.logger.info("  new: %s", appended)
        if modified:
            self.logger.info("  modified: %s", modified)
        for category in (appended, modified):
            removed_fields.extend(category)
        for row in rows:
            for field in removed_fields:
                row._writable_fields.pop(field[0], None)

    def _load_walle(self, last_snapshots, stats) -> None:
        with self.time_it("loaded walle"):
            self.logger.debug("loading walle")
            extracted_data = []

            labels = self._snapshot_manager.get_last_snapshot_labels_from_harvester("walle")
            for label in labels:
                snapshot = self._snapshot_manager.read_snapshot(label)
                extracted_data.extend(snapshot.data[label.data_list_path])

            stats["walle"] = int(min(label.ts for label in labels))
            last_snapshots.walle = extracted_data

    def _load_per_allocation_zone_data(self, harvester_type, last_snapshots, stats) -> None:
        with self.time_it(f"loaded {harvester_type}"):
            self.logger.debug("loading %s", harvester_type)
            extracted_data = collections.defaultdict(list)

            labels = self._snapshot_manager.get_last_snapshot_labels_from_harvester(harvester_type)
            for label in labels:
                snapshot = self._snapshot_manager.read_snapshot(label)
                if harvester_type == "gencfg":
                    snapshot_data = snapshot.data
                elif harvester_type == "yp_lite_allocation":
                    pods_info = snapshot.data.get("pods_info")
                    if pods_info:
                        snapshot_data = [yp_lite_allocation.PodInfo(*i) for i in pods_info]
                    else:
                        # TODO(rocco66): remove it after relase
                        snapshot_data = snapshot.data["pods"]
                else:
                    snapshot_data = snapshot.data[label.data_list_path]

                extracted_data[label.harvester_name].extend(snapshot_data)

            stats[harvester_type] = int(min(label.ts for label in labels) if labels else 0)
            setattr(last_snapshots, harvester_type, extracted_data)

    def extract(self, ts: int):
        last_snapshots = LastSnapshots()
        stats = {}

        with concurrent.futures.ThreadPoolExecutor(max_workers=len(PER_ALLOCATION_ZONE_HARVESTER_DEPS) + 1) as executor:
            results = [
                executor.submit(self._load_per_allocation_zone_data, harvester_type, last_snapshots, stats)
                for harvester_type in PER_ALLOCATION_ZONE_HARVESTER_DEPS
            ] + [executor.submit(self._load_walle, last_snapshots, stats)]
        try:
            for result in results:
                result.result(timeout=30)
        except Exception as exc:
            for result in results:
                result.cancel()
            raise exc
        return last_snapshots, stats

    def _create_walle_row(self, ts, walle_info: mongo_walle.WalleHost):
        deploy_engines = walle_info.deploy_engines
        return database.WalleHost(
            ts=ts,
            fqdn=walle_info.fqdn,
            tentacles_location=_get_tentacles_location(walle_info),
            rtc_stage=walle_info.rtc_stage,
            deploy_gencfg=("gencfg" in deploy_engines),
            deploy_yp_lite=("yp" in deploy_engines),
            project=walle_info.walle_project,
            location_short_datacenter_name=walle_info.walle_location_short_datacenter_name,
            operation_state=walle_info.walle_operation_state,
            state=walle_info.walle_state,
            status=walle_info.walle_status,
            health_status=walle_info.walle_health_status,
            health_fail_reasons=walle_info.walle_health_fail_reasons,
            scenario_id=walle_info.scenario_id,
            ticket=walle_info.ticket,
        )

    def _process_yp_zone(self, ts, allocation_zone_id, yp_raw_allocator_data,
                         hq_raw_data, juggler_raw_data, walle_data: typing.Dict[str, mongo_walle.WalleHost]):
        hq_index = {i["spec"]["node_name"]: i for i in hq_raw_data}
        # NOTE(rocco66): e.g. fqdn='atrprp5vpdsgfxnv.man-pre.yp-c.yandex.net', yp_pod = 'atrprp5vpdsgfxnv'
        juggler_checks_index = {c["fqdn"][:c["fqdn"].index(".")]: c for c in juggler_raw_data}
        if isinstance(yp_raw_allocator_data[0], yp_lite_allocation.PodInfo):
            for pod_info in yp_raw_allocator_data:
                try:
                    hq_info = hq_index.get(pod_info.node_id)
                    juggler_info = juggler_checks_index.get(pod_info.pod_id)
                    walle_info = walle_data.get(pod_info.node_id)
                    yield self._create_instance_row(
                        pod_info.pod_id, pod_info.node_id, ts, allocation_zone_id, hq_info, walle_info, juggler_info,
                        deploy_yp_lite=True,
                        yp_scheduling_error=pod_info.scheduling_error,
                        yp_node_hfsm_state=pod_info.node_hfsm_state,
                    )
                except Exception:
                    self.logger.exception("Uncaught error while tentacle row creation")
        else:
            # TODO(rocco66): remove after release
            for yp_node, yp_pod in yp_raw_allocator_data:
                try:
                    hq_info = hq_index.get(yp_node)
                    juggler_info = juggler_checks_index.get(yp_pod)
                    walle_info = walle_data.get(yp_node)
                    yield self._create_instance_row(
                        yp_pod, yp_node, ts, allocation_zone_id, hq_info, walle_info, juggler_info,
                        deploy_yp_lite=True,
                    )
                except Exception:
                    self.logger.exception("Uncaught error while tentacle row creation")

    def _process_gencfg_zone(self, ts, allocation_zone_id, gencfg_nodes,
                             hq_raw_data, juggler_raw_data, walle_data: typing.Dict[str, mongo_walle.WalleHost]):
        hq_index = {i["spec"]["node_name"]: i for i in hq_raw_data}
        juggler_checks_index = {c["fqdn"]: c for c in juggler_raw_data}
        for dom0_fqdn in gencfg_nodes:
            try:
                hq_info = hq_index.get(dom0_fqdn)
                juggler_info = juggler_checks_index.get(dom0_fqdn)
                walle_info = walle_data.get(dom0_fqdn)
                yield self._create_instance_row(
                    dom0_fqdn, dom0_fqdn, ts, allocation_zone_id, hq_info, walle_info, juggler_info,
                    deploy_yp_lite=False,
                )
            except Exception:
                self.logger.exception("Uncaught error while tentacle row creation")

    def _create_instance_row(self, pod_id, dom0_fqdn, ts, allocation_zone_id,
                             hq_info, walle_info, juggler_info, deploy_yp_lite,
                             yp_scheduling_error="", yp_node_hfsm_state=""):
        if walle_info:
            excluded_from_slo_by_walle = _is_excluded_by_walle(walle_info)
        else:
            excluded_from_slo_by_walle = True
            # TODO(rocco66): stats?

        excluded_from_slo_by_allocator = yp_node_hfsm_state == YP_HFSM_STATE_PREPARE_MAINTENANCE

        fresh_ts_resource = False
        reachable = False
        visible = False
        present = _get_monitoring_timestamp_age_present(juggler_info)
        if present:
            visible = _get_monitoring_timestamp_age_visible(juggler_info)
        if visible:
            reachable = _get_monitoring_timestamp_age_reachable(juggler_info)
        if reachable:
            fresh_ts_resource = _get_monitoring_timestamp_age_fresh_ts_resource(juggler_info)

        slo_availability_excluded = (
            not present
            or not visible
            or excluded_from_slo_by_walle
            or excluded_from_slo_by_allocator
        )
        slo_availablilty_available = reachable

        slo_redeployment_excluded = slo_availability_excluded or not reachable
        slo_redeployment_fresh_ts_resource = fresh_ts_resource

        return database.Tentacle(
            ts=ts,
            pod_id=pod_id,
            fqdn=dom0_fqdn,
            tentacles_location=_get_tentacles_location(walle_info) if walle_info else "",
            rtc_stage=walle_info.rtc_stage if walle_info else "",
            deploy_gencfg=not deploy_yp_lite,
            deploy_yp_lite=deploy_yp_lite,
            yp_scheduling_error=yp_scheduling_error,
            yp_node_hfsm_state=yp_node_hfsm_state,
            juggler_status=juggler_info and _juggler_status_from_mongo(juggler_info.get("status")),
            juggler_state_kind=juggler_info and _juggler_state_kind_from_mongo(juggler_info.get("state_kind")),
            juggler_description=juggler_info and _juggler_description_from_mongo(juggler_info.get("description")),
            is_juggler_data_present=bool(juggler_info),
            nanny_service_name=allocation_zone_id,
            hq_id=hq_info and hq_info["meta"]["id"],
            hq_hostname=hq_info and hq_info["spec"]["hostname"],
            is_hq_data_present=bool(hq_info),
            excluded_from_slo_by_walle=excluded_from_slo_by_walle,
            excluded_from_slo_by_allocator=excluded_from_slo_by_allocator,
            walle_project=walle_info.walle_project if walle_info else "",
            walle_location_short_datacenter_name=walle_info.walle_location_short_datacenter_name if walle_info else "",
            walle_operation_state=walle_info.walle_operation_state if walle_info else "",
            walle_state=walle_info.walle_state if walle_info else "",
            walle_status=walle_info.walle_status if walle_info else "",
            walle_health_status=walle_info.walle_health_status if walle_info else "",
            walle_health_fail_reasons=walle_info.walle_health_fail_reasons if walle_info else [],
            walle_scenario_id=walle_info.scenario_id if walle_info else 0,
            walle_ticket=walle_info.ticket if walle_info else "",
            monitoring_timestamp_age_present=present,
            monitoring_timestamp_age_visible=visible,
            monitoring_timestamp_age_reachable=reachable,
            monitoring_timestamp_age_fresh_ts_resource=fresh_ts_resource,
            slo_availablilty_excluded=slo_availability_excluded,
            slo_availablilty_available=slo_availablilty_available,
            slo_redeployment_excluded=slo_redeployment_excluded,
            slo_redeployment_fresh_ts_resource=slo_redeployment_fresh_ts_resource,
        )

    def transform(self, ts: int, data_stats) -> typing.Tuple[typing.Any, typing.Any]:
        data: LastSnapshots = data_stats[0]

        walle_data = {}
        with self.time_it("walle deserialization"):
            for serialized_walle_host in data.walle:
                walle_host = mongo_walle.WalleHost(*serialized_walle_host)
                walle_data[walle_host.fqdn] = walle_host

        instances_rows = []
        with self.time_it("tentacles rows creation"):
            for allocation_zone_id, gencfg_nodes in data.gencfg.items():
                instances_rows.extend(self._process_gencfg_zone(
                    ts,
                    allocation_zone_id,
                    gencfg_nodes,
                    data.hq[allocation_zone_id],
                    data.juggler[allocation_zone_id],
                    walle_data,
                ))
            for allocation_zone_id, yp_data in data.yp_lite_allocation.items():
                instances_rows.extend(self._process_yp_zone(
                    ts,
                    allocation_zone_id,
                    yp_data,
                    data.hq[allocation_zone_id],
                    data.juggler[allocation_zone_id],
                    walle_data,
                ))

        with self.time_it("walle rows creation"):
            walle_rows = []
            for walle_info in walle_data.values():
                try:
                    walle_rows.append(self._create_walle_row(ts, walle_info))
                except Exception:
                    self.logger.exception("Uncaught error while walle row creation")

        client = ClickhouseClient(self.config_interface)
        self._resolve_diff_of_db_and_data(client, instances_rows, database.Tentacle.table_name())
        self._resolve_diff_of_db_and_data(client, walle_rows, database.WalleHost.table_name())

        self.logger.debug("now pushing %d rows to tentacles tables", len(instances_rows))
        with self.time_it("tentacles info inserting"):
            client.insert(instances_rows)

        self.logger.debug("now pushing %d rows to walle table", len(walle_rows))
        with self.time_it("walle info inserting"):
            client.insert(walle_rows)

        stats = data_stats[1]
        iterations = [database.Iteration(
            ts=ts,
            juggler_data_freshness=stats["juggler"],
            walle_data_freshness=stats["walle"],
            hq_data_freshness=stats["hq"],
            yp_lite_allocation_data_freshness=stats["yp_lite_allocation"],
            gencfg_data_freshness=stats["gencfg"],
        )]
        self._resolve_diff_of_db_and_data(client, iterations, database.Iteration.table_name())
        client.insert(iterations)
        meta = {"written": len(instances_rows)}
        return meta, {}


_FLAPPING_DESCRIPTIONS_THAT_ARE_OK = [
    mongo_juggler.Description.OK,
    mongo_juggler.Description.TIMESTAMP_TOO_OLD,
    mongo_juggler.Description.HTTP_CONNECT_ERROR
]

_VISIBLE_STATE_KINDS = [
    mongo_juggler.StateKind.ACTUAL,
    mongo_juggler.StateKind.FLAPPING
]


def _is_excluded_by_walle(dom0_info: mongo_walle.WalleHost) -> bool:
    return (
        dom0_info.walle_state in ("probation", "free")
        or dom0_info.walle_operation_state == "decommissioned"
    )


_DC_MAPPING = {
    "iva": "msk",
    "myt": "msk",
    "man": "man",
    "vla": "vla",
    "sas": "sas"
}


def _get_tentacles_location(walle_info: mongo_walle.WalleHost):
    dc = walle_info.walle_location_short_datacenter_name
    return _DC_MAPPING.get(dc, dc)


def _juggler_status_from_mongo(status: mongo_juggler.Status) -> database.juggler_status_enum:
    if status is None:
        return
    mongo_enum = mongo_juggler.Status
    result_enum = database.juggler_status_enum
    if status == mongo_enum.OK:
        return result_enum.ok
    elif status == mongo_enum.WARN:
        return result_enum.warn
    elif status == mongo_enum.CRIT:
        return result_enum.crit
    else:
        return result_enum.other


def _juggler_state_kind_from_mongo(state_kind: mongo_juggler.StateKind) -> database.juggler_state_kind_enum:
    if state_kind is None:
        return
    mongo_enum = mongo_juggler.StateKind
    result_enum = database.juggler_state_kind_enum
    if state_kind == mongo_enum.ACTUAL:
        return result_enum.actual
    elif state_kind == mongo_enum.DOWNTIME_FORCE_OK:
        return result_enum.downtime_force_ok
    elif state_kind == mongo_enum.FLAPPING:
        return result_enum.flapping
    elif state_kind == mongo_enum.NO_DATA_FORCE_CRIT:
        return result_enum.no_data_force_crit
    else:
        return result_enum.other


def _juggler_description_from_mongo(description: mongo_juggler.Description) -> database.juggler_description_enum:
    if description is None:
        return
    mongo_enum = mongo_juggler.Description
    result_enum = database.juggler_description_enum
    if description == mongo_enum.OK:
        return result_enum.ok
    elif description == mongo_enum.TIMESTAMP_TOO_OLD:
        return result_enum.timestamp_too_old
    elif description == mongo_enum.RESOURCE_ERROR:
        return result_enum.resource_error
    elif description == mongo_enum.ICMPPING_DOWN:
        return result_enum.icmp_down
    elif description == mongo_enum.INVALID_HOSTNAME:
        return result_enum.invalid_hostname
    elif description == mongo_enum.HTTP_CONNECT_ERROR:
        return result_enum.http_connect_error
    elif description == mongo_enum.NO_DATA:
        return result_enum.no_data
    else:
        return result_enum.other


def _get_monitoring_timestamp_age_present(instance_check_data: dict) -> bool:
    return bool(instance_check_data)


def _get_monitoring_timestamp_age_visible(instance_check_data: dict) -> bool:
    return instance_check_data.get("state_kind") in _VISIBLE_STATE_KINDS


def _get_monitoring_timestamp_age_reachable(instance_check_data: dict) -> bool:
    state_kind = instance_check_data.get("state_kind")
    status = instance_check_data.get("status")
    description = instance_check_data.get("description")

    actual_ok = state_kind == mongo_juggler.StateKind.ACTUAL and status == mongo_juggler.Status.OK
    ts_resource_stalled = not _get_monitoring_timestamp_age_fresh_ts_resource(instance_check_data)
    some_flapping_descriptions = state_kind == mongo_juggler.StateKind.FLAPPING and \
        description in _FLAPPING_DESCRIPTIONS_THAT_ARE_OK

    return actual_ok or ts_resource_stalled or some_flapping_descriptions


def _get_monitoring_timestamp_age_fresh_ts_resource(instance_check_data: dict) -> bool:
    state_kind = instance_check_data.get("state_kind")
    status = instance_check_data.get("status")
    description = instance_check_data.get("description")

    ts_resource_stalled = state_kind == mongo_juggler.StateKind.ACTUAL and \
        status == mongo_juggler.Status.CRIT and \
        description == mongo_juggler.Description.TIMESTAMP_TOO_OLD

    return not ts_resource_stalled
