import luigi

from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.device_matching.app_metrica.account_manager import AccountManagerUpdateDictTask
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import utils
from crypta.graph.v1.python.utils import yt_clients


def map_yuid_email_md5(rec):
    id_value = rec["id_value"]
    id_hash = utils.md5(id_value)
    yield {
        "id_hash": id_hash,
        "id_value": id_value,
        "id_orig_value": id_value,
        "id_type": config.ID_TYPE_EMAIL,
        "source_types": rec[config.ID_TYPE_EMAIL + "_sources"][id_value],
        "id_dates": rec[config.ID_TYPE_EMAIL + "_dates"],
        "yuid": rec["yuid"],
    }


def map_yuid_phone_md5(rec):
    # phone id is already md5
    id_value = rec["id_value"]
    # but contains original value in specific column
    hash_orig_value_mapping = rec[config.ID_TYPE_PHONE + "_orig"]
    yield {
        "id_hash": id_value,
        "id_value": id_value,
        "id_orig_value": id_value + "(" + hash_orig_value_mapping[id_value] + ")",
        "id_type": config.ID_TYPE_PHONE,
        "source_types": rec[config.ID_TYPE_PHONE + "_sources"][id_value],
        "id_dates": rec[config.ID_TYPE_PHONE + "_dates"],
        "yuid": rec["yuid"],
    }


def count_rest(recs):
    count = 0
    for _ in recs:
        count += 1
    return count


def reduce_devid_yuids(id_key, recs):
    yuid_recs = []
    am_devids = set()

    for rec in recs:
        if "deviceid" in rec:
            am_devids.add(rec["deviceid"])
        else:
            yuid_recs.append(rec)

        if len(am_devids) > config.default_devids_per_id_limit or len(yuid_recs) > config.default_yuids_per_id_limit:
            if yuid_recs and am_devids:
                yield {
                    "id_hash": id_key["id_hash"],
                    "yuids_count": len(yuid_recs),
                    "devids_count": len(am_devids),
                    "rest_count": count_rest(recs),
                    "@table_index": 1,
                }
            return

    if yuid_recs and am_devids:
        id_hash = id_key["id_hash"]
        id_type = id_key["id_type"]

        # phone and email stats
        phone_or_email_index = 2 if id_type == config.ID_TYPE_PHONE else 3
        yield {"id_hash": id_hash, "id_type": id_type, "@table_index": phone_or_email_index}

        for yuid_rec in yuid_recs:
            id_value = yuid_rec["id_value"]
            id_orig_value = yuid_rec["id_orig_value"]
            for devid in am_devids:
                id_value_dates = yuid_rec["id_dates"][id_value]
                match_chain = {
                    id_type: {
                        config.ID_SOURCE_TYPE_ACCOUNT_MANAGER: {  # in fact, sources of phone or mail should be here
                            id_orig_value: id_value_dates
                        }
                    }
                }
                yield {
                    "devid": devid,
                    "yuid": yuid_rec["yuid"],
                    "id_hash": id_hash,
                    "id_value": id_value,
                    "dates": id_value_dates,
                    "id_type": id_type,
                    "source_type": config.ID_SOURCE_TYPE_ACCOUNT_MANAGER,
                    "yuid_sources": yuid_rec["source_types"],
                    "match_chain": match_chain,
                    "@table_index": 0,
                }


def run_mr_am_pairs(dict_folder, am_date_folder, devid_raw_folder):
    # TODO: move hash calculation to yuid_with_all
    yt_client = yt_clients.get_yt_client()
    utils.wait_all(
        [
            yt_client.run_map(
                map_yuid_email_md5,
                dict_folder + "yuid_with_id_" + config.ID_TYPE_EMAIL,
                am_date_folder + "yuid_md5email",
                sync=False,
            ),
            yt_client.run_map(
                map_yuid_phone_md5,
                dict_folder + "yuid_with_id_" + config.ID_TYPE_PHONE,
                am_date_folder + "yuid_md5phone",
                sync=False,
            ),
        ]
    )

    # devid - yuid pairs
    mr.sort_all(
        [dict_folder + "account_manager", am_date_folder + "yuid_md5email", am_date_folder + "yuid_md5phone"],
        ["id_hash", "id_type"],
    )

    pair_table = devid_raw_folder + "devid_yuid_" + config.ID_SOURCE_TYPE_ACCOUNT_MANAGER
    out_tables = [
        pair_table,  # for later merge in devid-yuid dicts
        am_date_folder + "am_pairs_overlimit",  # stats
        am_date_folder + "am_phones_final_unique",  # stats
        am_date_folder + "am_emails_final_unique",
    ]

    yt_client.run_reduce(
        reduce_devid_yuids,
        [
            dict_folder + "account_manager",  # devid -> id_hash
            am_date_folder + "yuid_md5email",  # yuid -> id_hash
            am_date_folder + "yuid_md5phone",
        ],  # yuid -> id_hash
        out_tables,  # stats
        reduce_by=["id_hash", "id_type"],
    )

    mr.merge_chunks_all(out_tables)

    mr.drop(am_date_folder + "yuid_md5email")
    mr.drop(am_date_folder + "yuid_md5phone")

    return pair_table


class AccountManagerPhoneEmailPairsTask(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    tags = ["v1"]

    def input_folders(self):
        return {"dict": config.GRAPH_YT_DICTS_FOLDER}

    def workdir(self):
        return config.YT_OUTPUT_FOLDER + self.date + "/mobile/account_manager/"

    def output_folders(self):
        return {"devid_raw_month": config.INDEVICE_YT_FOLDER + self.date + "/perfect/devid_raw_month/"}

    def requires(self):
        from crypta.graph.v1.python.matching.yuid_matching import graph_dict

        return [AccountManagerUpdateDictTask(self.date), graph_dict.YuidAllIdBySourceDictsTask(self.date)]

    def before_run(self):
        mr.mkdir(self.workdir())
        mr.mkdir(self.out_f("devid_raw_month"))

    def run(self):
        pair_table = run_mr_am_pairs(self.in_f("dict"), self.workdir(), self.out_f("devid_raw_month"))
        mr.set_generate_date(pair_table, self.date)

    def output(self):
        return yt_luigi.YtDateTarget(
            self.out_f("devid_raw_month") + "devid_yuid_" + config.ID_SOURCE_TYPE_ACCOUNT_MANAGER, self.date
        )
