import os
import re
from functools import partial

import luigi

from crypta.graph.v1.python.data_imports.day_aggregate import (
    reduce_device_log_events_day,
    finalize_device_yuid_day_tables,
)
from crypta.graph.v1.python.lib.luigi.yt_luigi import BaseYtTask, ExternalInput, YtTarget
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.v2.soup.soup_tables import SoupDailyLogTable

from crypta.graph.soup.config.python import (  # N811 # noqa
    EDGE_TYPE as edges,
    ID_TYPE as ids,
    LOG_SOURCE as log_source,
    SOURCE_TYPE as source_type,
)

additional_gaid_regex = re.compile(r"(idfa-gps-adid|googlead-id|advertising_id)=([a-zA-Z0-9-]+)")


def get_device_ids(rec):
    idfa = rec.get("IDFA")
    if idfa:
        yield "idfa", idfa

    oaid = rec.get("OAID")
    if oaid:
        yield "oaid", oaid

    gaid = rec.get("GoogleAdID")
    if gaid:
        yield "gaid", gaid

    extpostback = rec.get("ExtPostBack")
    if not idfa and not gaid and extpostback:
        for orig_id_name, gaid in additional_gaid_regex.findall(extpostback):
            yield orig_id_name, gaid


def map_postclick_log(rec, yuid_oaid_postclick, yuid_gaid_postclick, yuid_idfa_postclick):
    try:
        ts = rec["ClickTime"]
        yuid = str(rec.get("UserID") or "")
        crypta_id = str(rec.get("CryptaID") or "")

        if not yuid:
            return

        if yuid == crypta_id:
            rec["@table_index"] = 2
            yield rec
            return

        for orig_id_name, devid in get_device_ids(rec):

            table_type = {"idfa": yuid_idfa_postclick, "oaid": yuid_oaid_postclick}.get(
                orig_id_name, yuid_gaid_postclick
            )

            yield {
                "source_type": config.ID_SOURCE_TYPE_POSTCLICK,
                config.ID_TYPE_DEVID: devid,
                config.ID_TYPE_YUID: yuid,
                "orig_id_name": orig_id_name,
                "match_type": rec.get("MatchType"),
                "ts": abs(ts),
                "@table_index": 0,
            }

            yield SoupDailyLogTable.make_rec(yuid, devid, table_type, ts=abs(ts), table_index=1)

    except Exception as e:
        rec["error"] = str(e)
        rec["@table_index"] = 3
        yield rec


class ImportPostclicksDailyTask(BaseYtTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportPostclicksDailyTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(log_source.POSTCLICK_LOG, self.date)

    def input_folders(self):
        return {"postclick": config.LOG_FOLDERS["postclick"]}

    def workdir(self):
        return os.path.join(config.INDEVICE_YT_FOLDER, self.date, "postclick") + "/"

    def output_folders(self):
        return {"devid_raw": os.path.join(config.INDEVICE_YT_FOLDER, self.date, "perfect", "devid_raw_day") + "/"}

    def requires(self):
        return [ExternalInput(os.path.join(self.in_f("postclick"), self.date))]

    def before_run(self):
        workdir = self.workdir()
        mr.mkdir(workdir)
        mr.mkdir(self.out_f("devid_raw"))

        self.soup_log.ensure_dir()

    def run(self):
        workdir = self.workdir()

        postclick_log = os.path.join(self.in_f("postclick"), self.date)
        out_tmp_t = workdir + "postback_tmp"

        self.yt.run_map(
            partial(
                map_postclick_log,
                yuid_oaid_postclick=edges.get_edge_type(
                    ids.YANDEXUID, ids.OAID, source_type.APP_INSTALL_TRACKER, log_source.POSTCLICK_LOG
                ),
                yuid_gaid_postclick=edges.get_edge_type(
                    ids.YANDEXUID, ids.GAID, source_type.APP_INSTALL_TRACKER, log_source.POSTCLICK_LOG
                ),
                yuid_idfa_postclick=edges.get_edge_type(
                    ids.YANDEXUID, ids.IDFA, source_type.APP_INSTALL_TRACKER, log_source.POSTCLICK_LOG
                ),
            ),
            postclick_log,
            [out_tmp_t, self.soup_log.create(), workdir + "crypta_id_postclicks", workdir + "bad_postclicks"],
        )

        self.soup_log.prepare_daily_tables_from_log()

        self.yt.run_sort(out_tmp_t, sort_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID, "ts"])

        out_t = self.out_f("devid_raw") + "%s_%s_%s" % (
            config.ID_TYPE_DEVID,
            config.ID_TYPE_YUID,
            config.ID_SOURCE_TYPE_POSTCLICK,
        )
        self.yt.run_reduce(
            partial(reduce_device_log_events_day, dt=self.date, source_type=config.ID_SOURCE_TYPE_POSTCLICK),
            out_tmp_t,
            out_t,
            sort_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID, "ts"],
            reduce_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID],
        )

        finalize_device_yuid_day_tables([out_t])

    def output(self):
        if self.date == self.run_date:
            soup_out_tables = self.soup_log.daily_tables_targets()
        else:
            soup_out_tables = []

        out_t = self.out_f("devid_raw") + "%s_%s_%s" % (
            config.ID_TYPE_DEVID,
            config.ID_TYPE_YUID,
            config.ID_SOURCE_TYPE_POSTCLICK,
        )
        return [YtTarget(os.path.join(self.out_f("devid_raw"), out_t), allow_empty=True)] + soup_out_tables
