import json
from functools import partial

import luigi

from crypta.graph.v1.python.matching.device_matching.app_metrica.account_manager_decoder.decoder import decode
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.v2 import ids
from crypta.graph.v1.python.v2.soup.soup_tables import SoupDumpTable

from crypta.graph.soup.config.python import (  # N811 # noqa
    ID_TYPE as id_type,
    SOURCE_TYPE as source_type,
    LOG_SOURCE as log_source,
    EDGE_TYPE as edges,
)

AM_ACCOUNT_TYPES = {config.ID_TYPE_PHONE: "a", config.ID_TYPE_EMAIL: "b"}

OOM_LIMIT = 100


def parse_am_events(rec, date):
    def decode_account_events(account_type):
        event_name = rec.get("EventName", "")
        if event_name.startswith("AM_System AM info"):
            try:
                encrypted = json.loads(rec.get("EventValue", ""))["a"]
                mmetric_device_id = rec.get("DeviceID", "")
                decrypted = decode(encrypted, mmetric_device_id)

                emails = set()
                acc_pairs = json.loads(decrypted)["a"]
                for email in [x[0][1:] for x in acc_pairs if x[0].startswith(AM_ACCOUNT_TYPES[account_type])]:
                    emails.add(email)

                for email in emails:
                    yield email
            except:
                pass

    device_id = rec[ids.CRYPTA_DEVICE_ID]
    device_type = rec.get("id_type") or "gaid"  # keep compatibility with old tables
    if not device_id:
        # skip empty
        return

    ts = int(rec.get("_logfeller_timestamp", rec.get("timestamp")))

    for email in decode_account_events(config.ID_TYPE_EMAIL):
        yield {
            "id_hash": email,
            "id_type": config.ID_TYPE_EMAIL,
            "source_type": config.ID_SOURCE_TYPE_ACCOUNT_MANAGER,
            "deviceid": device_id,
            "device_type": device_type,
            "ts": ts,
            "date": date,
            "@table_index": 0,
        }

    for phone in decode_account_events(config.ID_TYPE_PHONE):
        yield {
            "id_hash": phone,
            "id_type": config.ID_TYPE_PHONE,
            "source_type": config.ID_SOURCE_TYPE_ACCOUNT_MANAGER,
            "deviceid": device_id,
            "device_type": device_type,
            "ts": ts,
            "date": date,
            "@table_index": 1,
        }


def merge_day_to_dict(key, recs):
    history = set()
    for r in recs:
        existing_history = r.get("history")
        if existing_history:
            history.update(set(existing_history))

        new_date = r.get("date")
        if new_date:
            history.add(new_date)

    if history:
        history = sorted(history)
        last_date = history[-1]
    else:
        history = []
        last_date = None

    out_rec = dict(key)
    out_rec["history"] = history
    out_rec["last_date"] = last_date
    yield out_rec


def map_am_to_soup(rec, gaid_email_am, gaid_phone_am, oaid_email_am, oaid_phone_am):
    dates = rec.get("history")
    device_type = rec.get("device_type") or "gaid"
    id_type = rec.get("id_type")
    mapping = {
        ("gaid", "email"): (gaid_email_am, 0),
        ("gaid", "phone"): (gaid_phone_am, 1),
        ("oaid", "email"): (oaid_email_am, 2),
        ("oaid", "phone"): (oaid_phone_am, 3),
    }

    key = mapping.get((device_type, id_type))

    if key is not None:
        yield SoupDumpTable.make_rec(rec["deviceid"], rec["id_hash"], key[0], dates, key[1])


class AccountManagerUpdateDictTask(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(AccountManagerUpdateDictTask, self).__init__(*args, **kwargs)
        self.gaid_email_am = SoupDumpTable(
            edges.get_edge_type(
                id_type.GAID, id_type.EMAIL_MD5, source_type.ACCOUNT_MANAGER, log_source.METRIKA_MOBILE_LOG
            ),
            self.date,
        )
        self.gaid_phone_am = SoupDumpTable(
            edges.get_edge_type(
                id_type.GAID, id_type.PHONE_MD5, source_type.ACCOUNT_MANAGER, log_source.METRIKA_MOBILE_LOG
            ),
            self.date,
        )
        self.oaid_email_am = SoupDumpTable(
            edges.get_edge_type(
                id_type.OAID, id_type.EMAIL_MD5, source_type.ACCOUNT_MANAGER, log_source.METRIKA_MOBILE_LOG
            ),
            self.date,
        )
        self.oaid_phone_am = SoupDumpTable(
            edges.get_edge_type(
                id_type.OAID, id_type.PHONE_MD5, source_type.ACCOUNT_MANAGER, log_source.METRIKA_MOBILE_LOG
            ),
            self.date,
        )

    def input_folders(self):
        return {
            "dict": config.GRAPH_YT_DICTS_FOLDER,
            "am_day": config.YT_OUTPUT_FOLDER + self.date + "/mobile/account_manager/",
        }

    def output_folders(self):
        return {
            "dict": config.GRAPH_YT_DICTS_FOLDER,
            "am_day": config.YT_OUTPUT_FOLDER + self.date + "/mobile/account_manager/",
        }

    def requires(self):
        from crypta.graph.v1.python.data_imports.import_logs import app_metrica_day

        return app_metrica_day.ImportAppMetrikaDayTask(date=self.date, run_date=self.date)

    def before_run(self):
        mr.mkdir(self.out_f("am_day"))
        mr.mkdir(self.out_f("dict"))

    def run(self):
        in_am_log_table = self.in_f("am_day") + "am_log"
        in_am_dict_table = self.in_f("dict") + "account_manager"

        email_am_day_table = self.out_f("am_day") + "am_device_md5" + config.ID_TYPE_EMAIL
        phone_am_day_table = self.out_f("am_day") + "am_device_md5" + config.ID_TYPE_PHONE

        # decoding is CPU-intensive, let's put 1K records per job
        job_count = self.yt.row_count(in_am_log_table) / 1000 + 1
        self.yt.run_map(
            partial(parse_am_events, date=self.date),
            in_am_log_table,
            [email_am_day_table, phone_am_day_table],
            job_count=job_count,
        )

        am_join_tables = [email_am_day_table, phone_am_day_table]
        if mr.exists(in_am_dict_table):
            # if no dict exist, we will just create it, otherwise append to it
            am_join_tables.append(in_am_dict_table)

        am_key = ["id_hash", "id_type", "deviceid", "device_type", "source_type"]
        mr.sort_all(am_join_tables, sort_by=am_key)
        self.yt.run_reduce(
            merge_day_to_dict,
            am_join_tables,
            self.out_f("am_day") + "account_manager_dict",  # use day folder for backup
            reduce_by=am_key,
        )

        mr.copy(self.out_f("am_day") + "account_manager_dict", self.out_f("dict") + "account_manager")
        mr.set_generate_date(self.out_f("dict") + "account_manager", self.date)

        with self.yt.Transaction() as tr:
            self.yt.run_map(
                partial(
                    map_am_to_soup,
                    gaid_email_am=self.gaid_email_am.edge_type,
                    gaid_phone_am=self.gaid_phone_am.edge_type,
                    oaid_email_am=self.oaid_email_am.edge_type,
                    oaid_phone_am=self.oaid_phone_am.edge_type,
                ),
                self.in_f("dict") + "account_manager",
                [
                    self.gaid_email_am.create(tr),
                    self.gaid_phone_am.create(tr),
                    self.oaid_email_am.create(tr),
                    self.oaid_phone_am.create(tr),
                ],
            )

            SoupDumpTable.finalize_all(
                [self.gaid_email_am, self.gaid_phone_am, self.oaid_email_am, self.oaid_phone_am]
            )

    def output(self):
        soup_targets = [
            t.as_target() for t in [self.gaid_email_am, self.gaid_phone_am, self.oaid_phone_am, self.oaid_phone_am]
        ]

        return [
            yt_luigi.YtTarget(self.out_f("am_day") + "account_manager_dict", self.date),
            yt_luigi.YtDateTarget(self.out_f("dict") + "account_manager", self.date),
        ] + soup_targets
