import logging
from functools import partial

import luigi

import radius_metrics_mr as radius_mr
from crypta.graph.v1.python.data_imports.import_logs import graph_watch_log
from crypta.graph.v1.python.infra.radius.export_radius_log import FilterByRadius

from crypta.graph.v1.python.lib.luigi import base_luigi_task
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.yuid_matching import graph_yuid_info
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import utils
from crypta.graph.v1.python.utils import yt_clients

logger = logging.getLogger("radius_metrics")


def enrich_with_ua_and_filter(yuid_table, out_table, workdir, yuid_all_dict):
    # filter yuid ua info only for relevant yuids. We don't reduce full table for performance reasons
    yt_client = yt_clients.get_yt_client()
    yuids = set(r["yuid"] for r in yt_client.read_table(yuid_table, raw=False))
    tmp_ua_table = workdir + "yuid_ua_tmp"
    yt_client.run_map(partial(radius_mr.filter_by_yuids, yuids=yuids), yuid_all_dict, tmp_ua_table)
    yt_client.run_merge(tmp_ua_table, tmp_ua_table, spec={"combine_chunks": True})

    mr.sort_all([yuid_table, tmp_ua_table], sort_by="yuid")
    yt_client.run_reduce(radius_mr.reduce_add_ua_profile, [yuid_table, tmp_ua_table], out_table, reduce_by="yuid")


def prepare_yuid_radius_data(radius_logs, watch_logs, workdir, yuid_dict):
    # Join radius and watchlog
    yt_client = yt_clients.get_yt_client()
    utils.wait_all(
        [
            yt_client.run_sort(watch_logs, workdir + "watch_log", sort_by=["ip", "timestamp"], sync=False),
            yt_client.run_sort(radius_logs, workdir + "radius_log", sort_by=["ip", "timestamp"], sync=False),
        ]
    )
    yt_client.run_reduce(
        radius_mr.join_logs_by_ip_and_ts,
        [workdir + "watch_log", workdir + "radius_log"],
        [workdir + "yuid_rlogin_raw"],
        sort_by=["ip", "timestamp"],
        reduce_by="ip",
    )

    # Sometimes radius login events may be missing, thus we count only first login as more robust
    yt_client.run_sort(workdir + "yuid_rlogin_raw", sort_by=["yuid", "timestamp"])
    yt_client.run_reduce(
        radius_mr.remove_login_conflicts_by_hits_freq,
        workdir + "yuid_rlogin_raw",
        [workdir + "yuid_rlogin", workdir + "yuid_rlogin_raw_conflict"],
        sort_by=["yuid", "timestamp"],
        reduce_by="yuid",
    )

    # Enrich external data with internal dicts
    # Add user-agent info and keep only records with user agent
    enrich_with_ua_and_filter(workdir + "yuid_rlogin", workdir + "yuid_rlogin_final", workdir, yuid_dict)

    # Avoid inner-Yandex botnets. May be skipped and filtered later at final step
    yt_client.run_sort(workdir + "yuid_rlogin_final", sort_by="login")
    yt_client.run_reduce(
        partial(radius_mr.remove_large_rlogins, max_count=100),
        workdir + "yuid_rlogin_final",
        [workdir + "yuid_rlogin_final_stable", workdir + "yuid_rlogin_final_large"],
        reduce_by="login",
    )


def get_available_log_tables(date, ndays, watch_log_table_name):
    # Input data
    yt_client = yt_clients.get_yt_client()
    if not yt_client.exists(config.RADIUS_LOG_YT_FOLDER[:-1]):
        logger.error("No radius log data is available. Stopping execution")
        return [], []

    # Logs from Radius auth server. Contains proven IP-login linkage by time
    radius_log_dates = mr.list_dates_before(config.RADIUS_LOG_YT_FOLDER, date, ndays)
    # Watch logs. Contains IP-yuid linkage by time. Mined from fingerprints and already filtered by radius log ips
    watch_log_dates = mr.list_dates_before(config.YT_OUTPUT_FOLDER, date, ndays)

    available_dates = sorted(list(set(radius_log_dates).intersection(watch_log_dates)))
    if not available_dates:
        logger.error("No data is available to calculate metrics. Stopping execution")
        return [], []

    radius_logs = [config.RADIUS_LOG_YT_FOLDER + dt + "/radius_log" for dt in available_dates]
    radius_logs = filter(lambda table: yt_client.exists(table), radius_logs)
    logger.info("Radius log tables for %d days: %s", ndays, "\n".join(radius_logs))

    watch_logs = [config.YT_OUTPUT_FOLDER + dt + "/" + watch_log_table_name for dt in available_dates]
    watch_logs = filter(lambda table: yt_client.exists(table), watch_logs)
    logger.info("Watch log %s tables for %d days: %s", watch_log_table_name, ndays, "\n".join(watch_logs))

    return radius_logs, watch_logs


class PrepareYuidRadiusData(base_luigi_task.BaseTask, yt_clients.YtClientMixin):
    date = luigi.Parameter()

    def requires(self):
        return [
            graph_watch_log.ImportWatchLogDayTask(date=self.date, run_date=self.date),
            graph_yuid_info.YuidInfoMonth(self.date),
            FilterByRadius(date=self.date),
        ]

    def run(self):
        yuid_with_all_info = self.yt.TablePath(
            "{prefix}yandexuid/yuid_with_all_info".format(prefix=config.CRYPTA_IDS_STORAGE),
            columns=("yuid", "ua_profile", "ip_activity_type"),
            rename_columns={"id": "yuid"},
        )

        workdir = "{prefix}{date}/yuid_rlogin/".format(prefix=config.RADIUS_METRICS_YT_FOLDER, date=self.date)
        mr.mkdir(workdir)

        radius_logs, yuid_watch_logs = get_available_log_tables(
            self.date, int(config.STORE_DAYS), "raw_links/watch_log_filtered_by_radius"
        )
        if radius_logs and yuid_watch_logs:
            prepare_yuid_radius_data(radius_logs, yuid_watch_logs, workdir, yuid_with_all_info)
        else:
            logger.error("Can't prepare yuid radius data")

    def output(self):
        return [
            yt_luigi.YtTarget(
                "{prefix}{date}/yuid_rlogin/yuid_rlogin_final_stable".format(
                    prefix=config.RADIUS_METRICS_YT_FOLDER, date=self.date
                )
            )
        ]


def reduce_rlogin(login_key, recs):
    recs = list(recs)
    ua_profiles = dict()
    for r in recs:
        ua_profiles[r["yuid"]] = r["ua_profile"]

    left_login, right_login = mr.split_left_right(recs)
    out_rec = dict(login_key)

    if left_login and right_login:
        left_yuids = set(r["yuid"] for r in left_login)
        right_yuids = set(r["yuid"] for r in right_login)

        left_diff = left_yuids.difference(right_yuids)
        right_diff = right_yuids.difference(left_yuids)

        if left_diff and right_diff:
            out_rec["@table_index"] = 0
        elif left_diff:
            out_rec["@table_index"] = 1
        elif right_diff:
            for r in right_diff:
                yield {"login": login_key["login"], "yuid": r, "ua_profile": ua_profiles[r], "@table_index": 6}
            out_rec["@table_index"] = 2
        else:
            out_rec["@table_index"] = 3

        yield out_rec

    elif left_login:
        out_rec["@table_index"] = 4
        yield out_rec
    elif right_login:
        out_rec["@table_index"] = 5
        yield out_rec
