from collections import defaultdict

import luigi

from crypta.graph.v1.python.matching.device_matching.perfect.config import merge_configs
from crypta.graph.v1.python.matching.device_matching.perfect.enrich.device_yuid_perfect_enrich import (
    join_info_async_op,
    EnrichToDevidYuidPairs,
)
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.device_matching.indevice_utils import get_device_matching_type_multi_source
from crypta.graph.v1.python.matching.yuid_matching import graph_dict
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import utils
from crypta.graph.v1.python.utils import yt_clients


def get_devid_all_shema():
    schema = {
        "devid": "string",
        "yuid": "string",
        "source_types": "any",
        "dates": "any",
        "last_ts": "int64",
        "uuid_ua_profile": "string",
        "yuid_ua_profile": "string",
        "source_match_type": "string",
        "ua_match_type": "string",
        "yuid_browser": "string",
        "yuid_logins": "any",
        "yuid_useragent": "string",
    }

    for pair_type in config.DEVID_PAIR_TYPES_PERFECT:
        schema[pair_type.source_type + "_dates"] = "any"
        # matched through another identifier
        if pair_type.source_type in [
            config.ID_SOURCE_TYPE_PASSPORT_OAUTH,
            config.ID_SOURCE_TYPE_ACCOUNT_MANAGER,
            config.ID_SOURCE_TYPE_VMETRO,
        ]:
            schema[pair_type.source_type + "_match_chain"] = "any"

    return schema


def reduce_to_devid_yuid_dict(devid_yuid_key, recs):
    out_rec = dict(devid_yuid_key)
    source_types = set()
    uuid_ua_profile = None
    last_tss = []
    dates = set()
    dates_per_source = defaultdict(set)
    match_chains_per_source = defaultdict(dict)

    for r in recs:
        source_type = r["source_type"]
        source_types.add(source_type)

        if "dates" in r:
            source_dates = r["dates"]
            dates_per_source[source_type].update(source_dates)
            dates.update(source_dates)

        if "match_chain" in r:
            match_chains_per_source[source_type].update(r["match_chain"])

        if "uuid_ua_profile" in r:
            uuid_ua_profile = r["uuid_ua_profile"]

    if uuid_ua_profile:
        out_rec["uuid_ua_profile"] = uuid_ua_profile

    if last_tss:
        out_rec["last_ts"] = min(last_tss)  # tss are negative numbers
    else:
        out_rec["last_ts"] = None

    for source_type, source_dates in dates_per_source.iteritems():
        out_rec[source_type + "_dates"] = sorted(source_dates)

    for source_type, match_chain in match_chains_per_source.iteritems():
        out_rec[source_type + "_match_chain"] = match_chain

    out_rec["dates"] = sorted(dates)

    out_rec["source_types"] = list(source_types)
    yield out_rec


def map_check_match_type(rec):
    source_match_type, ua_profile_match = get_device_matching_type_multi_source(rec, rec["source_types"])
    rec["source_match_type"] = source_match_type
    rec["ua_match_type"] = ua_profile_match
    yield rec


def get_best_indevice_devids_for_yuid(same_ua_recs, n_recs):
    """
    Chooses one devid based on source priority

    :param same_ua_recs: several in-device recs, one per devid. Each rec may have several sources
    :return: all recs (one per source?) of best devid
    """
    devid_sources = defaultdict(set)
    for rec in same_ua_recs:
        devid = rec["devid"]
        devid_sources[devid].update(rec["source_types"])

    if len(devid_sources) == 1:
        # only single devid, return all it recs
        return same_ua_recs, []

    # otherwise, choose one by indev sources priority
    def source_comparator(devid_s1, devid_s2):
        devid1, s1 = devid_s1
        devid2, s2 = devid_s2
        best_s1 = min([config.DEVID_PERFECT_INDEV_PRIORITY.index(s) for s in s1])
        best_s2 = min([config.DEVID_PERFECT_INDEV_PRIORITY.index(s) for s in s2])
        if best_s1 == best_s2:
            if devid1 < devid2:  # this makes no sense but allows stable sorting
                return -1
            elif devid1 == devid2:
                return 0
            else:
                return 1
        else:
            return best_s1 - best_s2

    devid_sources_items = sorted(devid_sources.items(), cmp=source_comparator)
    best_devids = [d for d, sources in devid_sources_items[:n_recs]]

    best_devid_recs = [r for r in same_ua_recs if r["devid"] in best_devids]
    thrown_recs = [r for r in same_ua_recs if r["devid"] not in best_devids]

    return best_devid_recs, thrown_recs


def reduce_best_devids_per_yuid(yuid_key, recs):
    recs = list(recs)

    same_ua_recs = []
    diff_ua_recs = []
    crossdevice_devids = set()
    for rec in recs:
        if rec["devid"] is None:
            rec["@table_index"] = 3
            yield rec
        elif rec["ua_match_type"] == config.UA_PROFILE_MATCHES:
            same_ua_recs.append(rec)
        else:
            crossdevice_devids.add(rec["devid"])
            diff_ua_recs.append(rec)

    # for crossdevice we just check limit of linked devices
    for rec in diff_ua_recs:
        if len(crossdevice_devids) <= config.default_devids_per_id_limit:
            rec["@table_index"] = 0
        else:
            rec["@table_index"] = 2

        yield rec

    # for in-device we need to keep at least one devid, but let's keep first best
    if same_ua_recs:
        best_indevice_recs, thrown_recs = get_best_indevice_devids_for_yuid(
            same_ua_recs, config.default_devids_per_id_limit
        )

        best_devid_sources = defaultdict(set)  # for debug
        for r in best_indevice_recs:
            best_devid_sources[r["devid"]].update(r["source_types"])
            r["@table_index"] = 0
            yield r
        for r in thrown_recs:
            r["best_devid"] = utils.default_to_regular(best_devid_sources)
            r["@table_index"] = 1
            yield r


def make_devid_yuid_dict(enriched_month_tables, workdir, yuid_with_all):
    yt_client = yt_clients.get_yt_client()
    yt_client.run_reduce(
        reduce_to_devid_yuid_dict, enriched_month_tables, workdir + "devid_yuid_all_tmp", reduce_by=["devid", "yuid"]
    )

    # add yuid ua profile
    yt_client.run_sort(workdir + "devid_yuid_all_tmp", sort_by="yuid")

    utils.wait_all(
        [
            join_info_async_op(
                source_table=workdir + "devid_yuid_all_tmp",
                join_table=yuid_with_all,
                target_table=workdir + "devid_yuid_all_tmp_2",
                join_by="yuid",
                join_columns_mapping={
                    "ua_profile": "yuid_ua_profile",
                    "browser": "yuid_browser",
                    "login_fp_dates": "yuid_logins",
                    "ua": "yuid_useragent",
                },
            )
        ]
    )

    yt_client.run_map(map_check_match_type, workdir + "devid_yuid_all_tmp_2", workdir + "devid_yuid_all_tmp_2")
    yt_client.run_sort(
        workdir + "devid_yuid_all_tmp_2",
        sort_by="yuid",
        spec={"combine_chunks": True},
    )

    with yt_client.Transaction() as tr:
        mr.create_table_with_schema(
            workdir + "devid_yuid_all",
            schema=get_devid_all_shema(),
            transaction=tr,
            strict=True,
            recreate_if_exists=True,
        )
        yt_client.run_reduce(
            reduce_best_devids_per_yuid,
            workdir + "devid_yuid_all_tmp_2",
            [
                workdir + "devid_yuid_all",
                workdir + "bad_yuid_indev_multi_devid",
                workdir + "bad_yuid_crdev_multi_devid",
                workdir + "nullable_devid",
            ],
            reduce_by="yuid",
            memory_limit=5 * 1024 * 1024 * 1024,
        )

        yt_client.run_sort(workdir + "devid_yuid_all", sort_by=["devid"])
        mr.merge_chunks_all(
            [
                workdir + "bad_yuid_indev_multi_devid",
                workdir + "bad_yuid_crdev_multi_devid",
                workdir + "devid_yuid_all_tmp_2_not_found",
                workdir + "devid_yuid_all_tmp_2_oom",
            ]
        )

    mr.drop(workdir + "devid_yuid_all_tmp")
    mr.drop(workdir + "devid_yuid_all_tmp_2")


class DeviceYuidsMergePerfectDictMonthTask(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    tags = ["v1"]

    def input_folders(self):
        return {
            "device_perfect": config.INDEVICE_YT_FOLDER + self.date + "/perfect/",
            "dict": config.GRAPH_YT_DICTS_FOLDER,
        }

    def output_folders(self):
        return {
            "device_perfect": config.INDEVICE_YT_FOLDER + self.date + "/perfect/",
            "dict": config.GRAPH_YT_DICTS_FOLDER,
        }

    def requires(self):
        return [graph_dict.YuidAllIdDictsTask(self.date), EnrichToDevidYuidPairs(self.date)]

    def before_run(self):
        mr.mkdir(self.out_f("device_perfect"))
        mr.mkdir(self.out_f("dict") + "device_matching/")

    def run(self):
        workdir = self.out_f("device_perfect")
        out_dict_dir = self.out_f("dict") + "device_matching/"

        enriched_month_tables = [
            self.in_f("device_perfect") + "devid_month/devid_yuid_" + m.source_type for m in merge_configs
        ]

        make_devid_yuid_dict(enriched_month_tables, workdir, self.in_f("dict") + "yuid_with_all")

        self.yt.copy(workdir + "devid_yuid_all", out_dict_dir + "devid_yuid_all", force=True)
        mr.set_generate_date(out_dict_dir + "devid_yuid_all", self.date)

    def output(self):
        return [
            yt_luigi.YtTarget(self.out_f("device_perfect") + "devid_yuid_all"),
            yt_luigi.YtDateTarget(self.out_f("dict") + "device_matching/devid_yuid_all", self.date),
        ]
