from functools import partial

import luigi

from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr, utils
from crypta.graph.v1.python.v2.soup.soup_tables import SoupDailyLogTable, SoupDailyTable

from crypta.graph.soup.config.python import (  # N811 # noqa
    EDGE_TYPE as edges,
    ID_TYPE as ids,
    LOG_SOURCE as log_source,
    SOURCE_TYPE as source_type,
)


def map_dit_cookie_matching(rec):
    yield {"dit_cookie": rec["ext_id"], "yuid": rec["yuid"], "ts": rec["timestamp"]}


def match_dit_cookie(_dit_cookie_key, recs, yuid_ditid_ditmskdmp):
    try:
        email_phone_recs, yuid_recs = mr.split_left_right(recs)
    except mr.OomLimitException as oom:
        yield {"dit_cookie": _dit_cookie_key["dit_cookie"], "oom": oom.recs_count, "@table_index": 2}
        return

    if yuid_recs:
        last_yuid_recs = max(yuid_recs, key=lambda rec: rec["ts"])
        yuid = last_yuid_recs["yuid"]

        for email_phone_dit_rec in email_phone_recs:
            yield SoupDailyTable.make_rec(
                yuid,
                email_phone_dit_rec["dit_id"],
                yuid_ditid_ditmskdmp,
                email_phone_dit_rec["date"],
                None,
                1,
                table_index=1,
            )

            phone_md5 = email_phone_dit_rec.get("phone_md5")
            if phone_md5:
                yield {
                    "yuid": yuid,
                    "id_orig_value": "UNKNOWN",
                    "id_date": email_phone_dit_rec["date"],
                    "id_count": 1,
                    "id_type": config.ID_TYPE_PHONE,
                    "id_value": phone_md5,
                    "source_type": config.ID_SOURCE_TYPE_DITMSK,
                }


def map_to_soup(rec, emailhash_ditid_ditmskdmp, phonehash_ditid_ditmskdmp):
    email_md5 = rec.get("email_md5")
    if email_md5:
        yield SoupDailyTable.make_rec(
            email_md5, rec["dit_id"], emailhash_ditid_ditmskdmp, rec["date"], None, 1, table_index=0
        )

    phone_md5 = rec.get("phone_md5")
    if phone_md5:
        yield SoupDailyTable.make_rec(
            phone_md5, rec["dit_id"], phonehash_ditid_ditmskdmp, rec["date"], None, 1, table_index=1
        )


def get_not_processed_tables(in_dir, last_processed):
    if last_processed:
        # assume tables have date-related names like ts.
        return [t for t in mr.ls(in_dir, absolute_path=False) if t > last_processed]
    else:
        return mr.ls(in_dir, absolute_path=False)


class ImportDitMskTask(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    resources = {"import_dit_msk_lock": 1}

    def __init__(self, *args, **kwargs):
        super(ImportDitMskTask, self).__init__(*args, **kwargs)
        self.dit_msk_log_table = SoupDailyLogTable(log_source.DITMSK, self.date)
        self.emailhash_ditid_ditmsk = SoupDailyTable(
            edges.get_edge_type(ids.EMAIL_MD5, ids.DIT_ID, source_type.DIT_MSK, log_source.DITMSK), self.date
        )
        self.phonehash_ditid_ditmsk = SoupDailyTable(
            edges.get_edge_type(ids.PHONE_MD5, ids.DIT_ID, source_type.DIT_MSK, log_source.DITMSK), self.date
        )
        self.yuid_ditid_ditmsk = SoupDailyTable(
            edges.get_edge_type(ids.YANDEXUID, ids.DIT_ID, source_type.DIT_MSK, log_source.DITMSK), self.date
        )

    def input_folders(self):
        return {
            "dit_emails": config.DIT_MSK_DAY_FOLDER + "raw-schematized/emails/",
            "dit_phones": config.DIT_MSK_DAY_FOLDER + "raw-schematized/phones/",
            "dit_cookie_matching": config.COOKIE_MATCHING_FOLDER + "ditmsk/state",
        }

    def output_folders(self):
        return {
            "raw_links": config.YT_OUTPUT_FOLDER + self.date + "/raw_links/",
            "yuid_raw": config.YT_OUTPUT_FOLDER + self.date + "/yuid_raw/",
        }

    def output(self):
        soup_out_targets = [
            t.as_target() for t in [self.emailhash_ditid_ditmsk, self.phonehash_ditid_ditmsk, self.yuid_ditid_ditmsk]
        ]

        return [
            yt_luigi.YtDateTarget(
                self.out_f("yuid_raw") + "yuid_with_phone_" + config.ID_SOURCE_TYPE_DITMSK, self.date
            )
        ] + soup_out_targets

    def before_run(self):
        mr.mkdir(self.out_f("raw_links"))
        mr.mkdir(self.out_f("yuid_raw"))

    def run(self):
        with self.yt.Transaction() as tr:

            last_t = self.dit_msk_log_table.get_last_processed_log_table()
            to_process = get_not_processed_tables(self.in_f("dit_emails"), last_t)  # assume phones are synced

            if not to_process:
                v1_out_table = self.out_f("yuid_raw") + "yuid_with_phone_" + config.ID_SOURCE_TYPE_DITMSK
                self.yt.create_table(v1_out_table, ignore_existing=True)
                mr.set_generate_date(v1_out_table, self.date)

                self.emailhash_ditid_ditmsk.create(tr, recreate_if_exists=False)
                self.phonehash_ditid_ditmsk.create(tr, recreate_if_exists=False)
                self.yuid_ditid_ditmsk.create(tr, recreate_if_exists=False)
                SoupDailyTable.finalize_all(
                    [self.emailhash_ditid_ditmsk, self.phonehash_ditid_ditmsk, self.yuid_ditid_ditmsk]
                )
                return

            self.yt.run_map(
                map_dit_cookie_matching, self.in_f("dit_cookie_matching"), self.out_f("raw_links") + "yuid_dit_cookie"
            )

            email_phone_dit = [self.in_f("dit_emails") + t for t in to_process] + [
                self.in_f("dit_phones") + t for t in to_process
            ]

            utils.wait_all(
                [
                    self.yt.run_sort(self.out_f("raw_links") + "yuid_dit_cookie", sort_by="dit_cookie", sync=False),
                    self.yt.run_sort(
                        email_phone_dit,
                        self.out_f("raw_links") + "email_phone_dit",
                        sort_by="dit_cookie",
                        spec={"schema_inference_mode": "from_output"},
                        sync=False,
                    ),
                ]
            )

            soup_emails = self.emailhash_ditid_ditmsk.create(tr)
            soup_phones = self.phonehash_ditid_ditmsk.create(tr)
            soup_yuids = self.yuid_ditid_ditmsk.create(tr)

            self.yt.run_reduce(
                partial(match_dit_cookie, yuid_ditid_ditmskdmp=self.yuid_ditid_ditmsk.edge_type),
                [self.out_f("raw_links") + "email_phone_dit", self.out_f("raw_links") + "yuid_dit_cookie"],
                [
                    self.out_f("yuid_raw") + "yuid_with_phone_" + config.ID_SOURCE_TYPE_DITMSK,
                    soup_yuids,
                    self.out_f("raw_links") + "yuid_dit_cookie_oom",
                ],
                reduce_by=["dit_cookie"],
            )

            # don't care about duplicates
            self.yt.run_map(
                partial(
                    map_to_soup,
                    emailhash_ditid_ditmskdmp=self.emailhash_ditid_ditmsk.edge_type,
                    phonehash_ditid_ditmskdmp=self.phonehash_ditid_ditmsk.edge_type,
                ),
                self.out_f("raw_links") + "email_phone_dit",
                [soup_emails, soup_phones],
            )

            mr.drop(self.out_f("raw_links") + "yuid_dit_cookie")
            mr.drop(self.out_f("raw_links") + "email_phone_dit")

            SoupDailyTable.finalize_all(
                [self.emailhash_ditid_ditmsk, self.phonehash_ditid_ditmsk, self.yuid_ditid_ditmsk]
            )

            self.dit_msk_log_table.set_last_processed_log_table(max(to_process))

            for o in self.output():
                mr.set_generate_date(o.table, self.date)
