import logging

import luigi

import graph_vertices_base
import graph_vertices_pretty
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.device_matching.perfect.device_yuid_perfect_by_source import (
    DeviceYuidsPerfectBySource,
)
from crypta.graph.v1.python.matching.pairs import graph_pairs_black_list
from crypta.graph.v1.python.matching.pairs import graph_pairs
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr

logger = logging.getLogger("graph_vertices")


def map_vertices_cid_yuid(rec):
    d = mr.get_field_value("d", rec["value"])
    cid = mr.get_field_value("c", rec["value"])
    if d == "y":
        yield {"yuid": rec["key"], "cid": cid, "@table_index": 0}
    elif d == "d":
        yield {"devid": rec["key"], "cid": cid, "@table_index": 1}


def reduce_cid_limit(key, recs):
    lrecs = []
    for rec in recs:
        if len(lrecs) > config.FINAL_MERGE_LIMIT:
            return
        else:
            lrecs.append(rec)
    for rec in lrecs:
        yield rec


class GraphVerticesExact(graph_vertices_base.BaseVerticesTask):
    date = luigi.Parameter()
    vertices_type = luigi.Parameter()
    yuid_pairs_folder = luigi.Parameter()
    tags = ["v1"]

    def __init__(self, date, vertices_type="exact", yuid_pairs_folder="pairs/"):
        vertices_config = graph_vertices_base.VerticesConfig(
            vertices_type + "/", vertices_type, date, producing_task=self
        )
        super(GraphVerticesExact, self).__init__(
            vertices_config, date=date, vertices_type=vertices_type, yuid_pairs_folder=yuid_pairs_folder
        )

    def input_folders(self):
        input_folders = super(GraphVerticesExact, self).input_folders()
        input_folders.update(
            {
                "yuid_pairs": config.YT_OUTPUT_FOLDER + self.vertices_config.date + "/" + self.yuid_pairs_folder,
                "devid_pairs": config.YT_OUTPUT_FOLDER + self.vertices_config.date + "/pairs/",
            }
        )
        return input_folders

    def requires(self):
        return super(GraphVerticesExact, self).requires() + [
            graph_pairs.GraphPairs(self.vertices_config.date),  # exact yuid-yuid pairs
            DeviceYuidsPerfectBySource(self.vertices_config.date),  # exact devid-yuid pairs
            graph_pairs_black_list.GraphPairsBlackList(date=self.date),
        ]

    def get_pairs_tables(self, yuid_pairs_folder, devid_pairs_folder):
        dev_yuid_pairs = [
            devid_pairs_folder + "dev_yuid_pairs_" + pair_name for pair_name in config.DEVID_PAIRS_NAMES_ALL
        ]

        yuid_yuid_pairs = [yuid_pairs_folder + "yuid_pairs_" + pair.id_type for pair in config.YUID_PAIR_TYPES_EXACT]
        table_pairs = dev_yuid_pairs + yuid_yuid_pairs

        return table_pairs

    def before_run(self):
        mr.mkdir(self.out_f("vertices_folder"))

    def run(self):
        workdir = self.out_f("vertices_folder")

        pair_tables = self.get_pairs_tables(self.in_f("yuid_pairs"), self.in_f("devid_pairs"))
        merged_pairs = workdir + "yuid_pairs"
        self.yt.run_merge(
            pair_tables, merged_pairs, spec={"combine_chunks": True, "schema_inference_mode": "from_output"}
        )

        self.vertices_pipeline(merged_pairs, workdir)


def filter_no_login_pairs(rec):
    login_source_types = {
        config.ID_SOURCE_TYPE_SOCIAL,
        config.ID_SOURCE_TYPE_YAMONEY,
        config.ID_SOURCE_TYPE_TICKETS,
        config.ID_SOURCE_TYPE_PASSPORT,
        config.ID_SOURCE_TYPE_PASSPORT_SENSITIVE,
        config.ID_SOURCE_TYPE_PASSPORT_DUMP,
        config.ID_SOURCE_TYPE_PASSPORT_SERVER,
        config.ID_SOURCE_TYPE_PASSPORT_OAUTH,  # d_y
    }

    def only_login_sources(sources):
        return all(s in login_source_types for s in sources)

    def fp_only(sources):
        return all(s == config.ID_SOURCE_TYPE_FP for s in sources)

    login_pair = False

    if rec["pair_type"] == "y_y":
        only_login_source = only_login_sources(rec["yuid1_sources"]) or only_login_sources(rec["yuid2_sources"])
        email_from_login = (rec["id_type"] == config.ID_TYPE_LOGIN) and (
            fp_only(rec["yuid1_sources"]) or fp_only(rec["yuid2_sources"])
        )
        if only_login_source or email_from_login or rec["id_type"] == config.ID_TYPE_LOGIN:
            login_pair = True

    elif rec["pair_type"] == "d_y":
        if rec["source_type"] in login_source_types:
            login_pair = True

    if not login_pair:
        rec["@table_index"] = 0
        yield rec


class GraphVerticesNoLogin(graph_vertices_base.BaseVerticesTask):
    date = luigi.Parameter()

    def __init__(self, date):
        vertices_config = graph_vertices_base.VerticesConfig("no_login/", "no_login", date, producing_task=self)
        super(GraphVerticesNoLogin, self).__init__(vertices_config, date=date)

    def input_folders(self):
        input_folders = super(GraphVerticesNoLogin, self).input_folders()
        input_folders.update({"pairs": config.YT_OUTPUT_FOLDER + self.vertices_config.date + "/pairs/"})
        return input_folders

    def requires(self):

        return super(GraphVerticesNoLogin, self).requires() + [
            graph_pairs.GraphPairs(self.vertices_config.date),  # exact yuid-yuid pairs
            DeviceYuidsPerfectBySource(self.vertices_config.date),  # exact devid-yuid pairs
            graph_pairs_black_list.GraphPairsBlackList(date=self.date),
        ]

    def get_pairs_tables(self, pairs_folder):
        dev_yuid_pairs = [pairs_folder + "dev_yuid_pairs_" + pair_name for pair_name in config.DEVID_PAIRS_NAMES_ALL]

        yuid_yuid_pairs = [pairs_folder + "yuid_pairs_" + pair.id_type for pair in config.YUID_PAIR_TYPES_EXACT]

        return dev_yuid_pairs + yuid_yuid_pairs

    def before_run(self):
        mr.mkdir(self.out_f("vertices_folder"))

    def run(self):
        workdir = self.out_f("vertices_folder")

        pair_tables = self.get_pairs_tables(self.in_f("pairs"))
        merged_pairs = workdir + "pairs_no_login"
        self.yt.run_map(filter_no_login_pairs, pair_tables, merged_pairs)
        mr.merge(merged_pairs)

        self.vertices_pipeline(merged_pairs, workdir)


class CopyVerticesToDict(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    exact_task = luigi.TaskParameter()
    tags = ["v1"]

    def input_folders(self):
        return {"vertices_exact": self.exact_task.vertices_config.get_vertices_folder()}

    def output_folders(self):
        return {"dict": config.GRAPH_YT_DICTS_FOLDER}

    def requires(self):
        return [self.exact_task]

    def before_run(self):
        dict_f = self.out_f("dict")
        mr.mkdir(dict_f)
        mr.mkdir(dict_f + "matching")

    def run(self):
        exact_folder = self.in_f("vertices_exact")
        dict_f = self.out_f("dict")

        # new format
        mr.copy(exact_folder + "vertices", dict_f + "matching/exact_vertices_by_key")
        mr.copy(exact_folder + "edges", dict_f + "matching/exact_edges")
        self.yt.run_sort(
            exact_folder + "vertices", dict_f + "matching/exact_vertices_by_crypta_id", sort_by="crypta_id"
        )

        # old format exact
        self.yt.run_map(
            graph_vertices_pretty.map_vertices_yt_to_yamr_format, exact_folder + "vertices", dict_f + "exact_vertices"
        )
        self.yt.run_sort(dict_f + "exact_vertices", sort_by="key")

        self.yt.run_map(
            map_vertices_cid_yuid,
            dict_f + "exact_vertices",
            [dict_f + "exact_yuid_cid_no_limit", dict_f + "exact_devid_cid_no_limit"],
        )
        mr.sort_all([dict_f + "exact_yuid_cid_no_limit", dict_f + "exact_devid_cid_no_limit"], sort_by="cid")
        self.yt.run_reduce(
            reduce_cid_limit,
            [dict_f + "exact_yuid_cid_no_limit", dict_f + "exact_devid_cid_no_limit"],
            [dict_f + "exact_yuid_cid", dict_f + "exact_devid_cid"],
            reduce_by="cid",
        )

        mr.set_generate_date(dict_f + "matching/exact_vertices_by_key", self.date)
        mr.set_generate_date(dict_f + "matching/exact_vertices_by_crypta_id", self.date)
        mr.set_generate_date(dict_f + "matching/exact_edges", self.date)
        mr.set_generate_date(dict_f + "edges", self.date)
        mr.set_generate_date(dict_f + "exact_yuid_cid", self.date)
        mr.set_generate_date(dict_f + "exact_devid_cid", self.date)
        mr.set_generate_date(dict_f + "exact_vertices", self.date)

    def output(self):
        for t in [
            "matching/exact_vertices_by_key",
            "matching/exact_vertices_by_crypta_id",
            "matching/exact_edges",
            "exact_yuid_cid",
            "exact_devid_cid",
            "exact_vertices",
        ]:
            yield yt_luigi.YtDateTarget(self.out_f("dict") + t, self.date)
