from collections import defaultdict
from functools import partial

import luigi

from crypta.graph.v1.python.matching.device_matching.perfect import device_yuid_perfect_dict
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.matching.device_matching import indevice_utils
from crypta.graph.v1.python.matching.pairs import graph_pair_utils
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import uat_utils
from crypta.graph.v1.python.utils import yt_clients

OOM_CHECK = 30 * 100


class DevidYuids:
    def __init__(self):
        self.yuids_sources = defaultdict(set)
        self.yuids_by_source_matchtype = defaultdict(set)
        self.yuids_browser = {}
        self.yuids_logins = {}
        self.yuids_ua = {}

    def add_rec(self, rec):
        yuid = rec["yuid"]
        browser = rec.get("yuid_browser", "")
        logins = rec.get("yuid_logins", {})
        ua = rec.get("yuid_useragent", "")
        for source in rec["source_types"]:
            match_type = indevice_utils.get_device_matching_type_single_source(rec, source)

            yuids = self.yuids_by_source_matchtype[(source, match_type)]
            if len(yuids) < OOM_CHECK:
                yuids.add(yuid)
                self.yuids_sources[yuid].add(source)
                if browser:
                    self.yuids_browser[yuid] = browser
                if logins:
                    self.yuids_logins[yuid] = list(logins.keys())
                if ua:
                    self.yuids_ua[yuid] = uat_utils.Ua(ua)

    def yield_source_over_limit(self):
        for source_matchtype, yuids in self.yuids_by_source_matchtype.iteritems():
            source, matchtype = source_matchtype
            pair_type = config.DEVID_PAIR_TYPES_PERFECT_DICT[source]
            graph_hat_limit = pair_type.yuids_per_devid_limit if pair_type else 5
            if self.calc_yuids_without_ua_duplicates(yuids, self.yuids_ua) > graph_hat_limit:
                yield source

    def calc_yuids_without_ua_duplicates(self, yuids, uas):
        count_yuid_not_ua = 0
        count_yuids_without_ua_duplicates = 0
        group_by_ua = defaultdict(list)
        for yuid in yuids:
            if uas.get(yuid):
                version = uas[yuid].to_ua_profile().split("|")[-1]
                ua_without_version = uas[yuid].to_ua_profile().replace(version, "")
                group_by_ua[ua_without_version].append([version, False])
            else:
                count_yuid_not_ua += 1
        for versions in group_by_ua.itervalues():
            sorted_versions = sorted(versions, key=lambda ver: ver[0])
            count_duplicates = 0
            for version, it_new in sorted_versions:
                index_new_version = self.get_index_new_version(sorted_versions, version)
                if index_new_version:
                    sorted_versions[index_new_version][1] = True
                    count_duplicates += 1
            count_yuids_without_ua_duplicates += len(sorted_versions) - count_duplicates
        return count_yuids_without_ua_duplicates + count_yuid_not_ua

    def get_index_new_version(self, versions, current_version):
        new_version_index = 0
        for ver, used in versions:
            if ver > current_version and not used:
                new_version_index = versions.index([ver, used])
                break
        return new_version_index

    def yield_yuid_source_matchtype_no_limit(self):
        for source_matchtype, yuids in self.yuids_by_source_matchtype.iteritems():
            source, matchtype = source_matchtype
            for yuid in yuids:
                yield yuid, source, matchtype, self.yuids_sources[yuid]

    def yield_yuid_source_matchtype(self):
        for source_matchtype, yuids in self.yuids_by_source_matchtype.iteritems():
            source, matchtype = source_matchtype
            pair_type = config.DEVID_PAIR_TYPES_PERFECT_DICT[source]
            graph_hat_limit = pair_type.yuids_per_devid_limit if pair_type else 5
            if len(yuids) <= graph_hat_limit:
                pass
            elif source in config.DEVID_PERFECT_INDEV_PRIORITY:
                # Yield last (by timestamp) hat_limit yuids, see CRYPTAIS-670 for details
                yuids = sorted(list(yuids), key=lambda x: x[-10:])[-graph_hat_limit:]
            else:
                yuids = []
            for yuid in yuids:
                yield yuid, source, matchtype, self.yuids_sources[yuid]


def reduce_dev_yuid_by_source(devid_key, recs, pair_table_indexes, date):
    devid = devid_key["devid"]
    if devid is None:
        return

    dev_yuids = DevidYuids()
    for rec in recs:
        dev_yuids.add_rec(rec)

    for yuid, source, match_type, _ in dev_yuids.yield_yuid_source_matchtype_no_limit():
        if match_type == config.INDEVICE:
            ua = dev_yuids.yuids_ua.get(yuid)
            yield {
                "devid": devid,
                "yuid": yuid,
                "date": date,
                "yuid_browser": dev_yuids.yuids_browser.get(yuid, ""),
                "yuid_logins": dev_yuids.yuids_logins.get(yuid, []),
                "is_inapp_browser": ua.is_webview() if ua is not None else False,
                "source": source,
                "@table_index": len(pair_table_indexes) + 1,
            }

    for source in dev_yuids.yield_source_over_limit():
        yield {"devid": devid, "source": source, "@table_index": len(pair_table_indexes) + 2}

    for (yuid, source, match_type, yuid_sources) in dev_yuids.yield_yuid_source_matchtype():
        if match_type in config.NO_MATCH_TYPES:
            if match_type == config.UA_PROFILE_NOT_MATCHES:
                table_index = len(pair_table_indexes) + 3
            else:
                table_index = len(pair_table_indexes) + 4
        else:
            table_index = pair_table_indexes[(source, match_type)]
        yield graph_pair_utils.devid_yuid_pair_rec(devid, yuid, source, match_type, yuid_sources, table_index)

        if match_type == config.INDEVICE:
            yield {
                "key": devid,
                "subkey": yuid,
                "value": "perfect=1\tsource=%s" % source,
                "@table_index": len(pair_table_indexes),
            }


def run_devid_perfect_pairs_by_source(basedir, devid_all_table, workdir, pairs_output_folder, date):
    our_pair_types = []
    for pair in config.DEVID_PAIR_TYPES_PERFECT:
        for match_type in pair.match_types:
            our_pair_types.append((pair.source_type, match_type))

    out_pairs_tables = [pairs_output_folder + "dev_yuid_pairs_" + st + "_" + mt for st, mt in our_pair_types]
    perfect_pair_types_indexes = dict((pt, idx) for idx, pt in enumerate(our_pair_types))

    yt_client = yt_clients.get_yt_client()
    yt_client.run_sort(devid_all_table, sort_by="devid")
    workdir_tables = [
        workdir + "dev_yuid_perfect",
        workdir + "dev_yuid_indevice_perfect_no_limit_tmp",
        workdir + "bad_devid_perfect",
        pairs_output_folder + "dev_yuid_pairs_" + config.UA_PROFILE_NOT_MATCHES,
        pairs_output_folder + "dev_yuid_pairs_" + config.NOT_FOUND_YUID_UA,
    ]
    yt_client.run_reduce(
        partial(reduce_dev_yuid_by_source, pair_table_indexes=perfect_pair_types_indexes, date=date),
        devid_all_table,
        out_pairs_tables + workdir_tables,
        reduce_by="devid",
    )

    mr.merge_chunks_all(out_pairs_tables)
    mr.merge_chunks_all(workdir_tables)

    yt_client.copy(workdir + "dev_yuid_perfect", basedir + "dev_yuid", force=True)
    yt_client.copy(workdir + "dev_yuid_perfect", basedir + "dev_yuid_info_ua", force=True)
    yt_client.copy(workdir + "dev_yuid_perfect", basedir + "dev_yuid_perfect_unperfect", force=True)


class DeviceYuidsPerfectBySource(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    tags = ["v1"]

    def input_folders(self):
        return {"indevice_perfect": config.INDEVICE_YT_FOLDER + self.date + "/perfect/"}

    def output_folders(self):
        return {
            "indevice_perfect": config.INDEVICE_YT_FOLDER + self.date + "/perfect/",
            "indevice_base": config.INDEVICE_YT_FOLDER + self.date + "/",
            "pairs": config.GRAPH_YT_OUTPUT_FOLDER + self.date + "/pairs/",
        }

    def requires(self):
        return [device_yuid_perfect_dict.DeviceYuidsMergePerfectDictMonthTask(self.date)]

    def before_run(self):
        mr.mkdir(self.out_f("indevice_perfect"))
        mr.mkdir(self.out_f("pairs"))

    def run(self):
        devid_all_table = self.in_f("indevice_perfect") + "devid_yuid_all"

        out_indevice_base_folder = self.out_f("indevice_base")
        out_indevice_folder = self.out_f("indevice_perfect")
        out_pairs_folder = self.out_f("pairs")

        run_devid_perfect_pairs_by_source(
            out_indevice_base_folder, devid_all_table, out_indevice_folder, out_pairs_folder, self.date
        )

    def output(self):
        out_indevice_folder = self.out_f("indevice_perfect")
        out_pairs_folder = self.out_f("pairs")

        out_pairs_tables = [
            out_pairs_folder + "dev_yuid_pairs_" + pair_name for pair_name in config.DEVID_PAIRS_NAMES_PERFECT
        ]
        dev_yuid_tables = [out_indevice_folder + "dev_yuid_perfect"]

        no_limit_day_tables = [out_indevice_folder + "dev_yuid_indevice_perfect_no_limit_tmp"]

        return [yt_luigi.YtTarget(t) for t in out_pairs_tables + dev_yuid_tables + no_limit_day_tables]
