from collections import defaultdict

import luigi

from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.device_matching.app_metrica import app_metrica_to_old_formats
from crypta.graph.v1.python.matching.yuid_matching import graph_merge_month
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import utils


def join_yuid_devid_by_mac(mac_key, recs):
    try:
        devid_recs, yuid_recs = mr.split_left_right(recs)
    except mr.OomLimitException as oom:
        yield {"mac": mac_key["mac"], "recs_count": oom.recs_count, "@table_index": 1}
        return

    if devid_recs and yuid_recs:
        # only one devid should correspond to mac
        devid_rec = devid_recs[0]
        devid = devid_rec["devid"]
        mac = devid_rec["mac"]
        devid_mac_last_date = utils.ts_to_datetime_str(abs(devid_rec["ts"]))
        match_chain = {config.ID_TYPE_MAC: {config.ID_SOURCE_TYPE_MOBILE_METRIKA: {mac: {devid_mac_last_date: 1}}}}

        yuid_dates = defaultdict(lambda: defaultdict(int))
        for yuid_rec in yuid_recs:
            yuid_dates[yuid_rec["yuid"]][yuid_rec["id_date"]] += 1

        for yuid, dates in yuid_dates.iteritems():
            source_type = config.ID_SOURCE_TYPE_VMETRO  # TODO: source type shouldn't be hardcoded here
            yield {
                "devid": devid,
                "yuid": yuid,
                "source_type": source_type,
                "match_chain": match_chain,
                "last_ts": devid_rec["ts"],
                "dates": utils.default_to_regular(dates),
            }


def to_mac_key(rec):
    rec["mac"] = rec["id_value"]
    yield rec


def filter_multi_mac_yuids(yuid_key, recs):
    recs, rest_count = mr.take_till_oom(recs)
    macs = set(r["mac"] for r in recs)

    if len(macs) == 1:
        table_index = 0
    else:
        table_index = 1

    for r in recs:
        r["@table_index"] = table_index
        yield r


def filter_multi_devid_macs(mac_key, recs):
    if len(mac_key["mac"]) >= config.YT_KEY_SIZE_LIMIT:
        return

    recs, rest_count = mr.take_till_oom(recs)
    devids = set(r["devid"] for r in recs)

    if len(devids) == 1:
        table_index = 0
    else:
        table_index = 1

    for r in recs:
        r["@table_index"] = table_index
        yield r


class JoinDevidYuidByMac(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    tags = ["v1"]

    def input_folders(self):
        return {"yuid_raw_month": config.GRAPH_YT_DICTS_FOLDER + "yuid_raw/", "dict": config.GRAPH_YT_DICTS_FOLDER}

    def output_folders(self):
        return {"indevice_month": config.INDEVICE_YT_FOLDER + self.date + "/perfect/devid_raw_month/"}

    def requires(self):
        return [
            app_metrica_to_old_formats.ConvertAppMetricaDictsToOldFormats(self.date),  # devid-mac dict
            graph_merge_month.IncrementalDayAndDumpMergeTask(self.date),
        ]  # yuid-mac vmetro

    def before_run(self):
        mr.mkdir(self.out_f("indevice_month") + "vmetro")

    def run(self):
        indevice_month_f = self.out_f("indevice_month")

        yuid_mac_month = (
            self.in_f("yuid_raw_month") + "yuid_with_" + config.ID_TYPE_MAC + "_" + config.ID_SOURCE_TYPE_VMETRO
        )
        devid_mac_month = self.in_f("dict") + "devid_mac"

        # mac-yuid should be 1-n relationship to use it in matching
        self.yt.run_map_reduce(
            to_mac_key,
            filter_multi_mac_yuids,
            yuid_mac_month,
            [indevice_month_f + "vmetro/yuid_mac", indevice_month_f + "vmetro/yuid_multi_mac"],
            reduce_by="yuid",
        )

        # assume devid_mac sorted
        self.yt.run_sort(indevice_month_f + "vmetro/yuid_mac", sort_by="mac")
        self.yt.run_reduce(
            join_yuid_devid_by_mac,
            [devid_mac_month, indevice_month_f + "vmetro/yuid_mac"],
            [
                indevice_month_f + "devid_yuid_" + config.ID_SOURCE_TYPE_VMETRO,
                indevice_month_f + "devid_yuid_" + config.ID_SOURCE_TYPE_VMETRO + "_oom",
            ],
            reduce_by="mac",
        )

        self.yt.run_sort(
            indevice_month_f + "devid_yuid_" + config.ID_SOURCE_TYPE_VMETRO,
            sort_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID],
        )

        mr.drop(indevice_month_f + "vmetro/yuid_mac")

    def output(self):
        return yt_luigi.YtTarget(self.out_f("indevice_month") + "devid_yuid_" + config.ID_SOURCE_TYPE_VMETRO)


def map_dump(rec):
    if not rec.get("mac_normalized") or rec["mac_normalized"] == "000000000000":
        return
    mac = rec["mac_normalized"].lower()
    yuid = rec["yuid"]
    for date in rec["dates"]:
        yield {
            "yuid": yuid,
            "id_date": date,
            "id_value": mac,
            "id_count": 1,
            "id_type": config.ID_TYPE_MAC,
            "source_type": config.ID_SOURCE_TYPE_VMETRO,
        }
