"""
Here we trying to process email-login tables
"""
from crypta.graph.fuzzy.lib.luiger import BaseTask, DateParameter
from crypta.graph.fuzzy.lib.common import cached_property
from yt.wrapper import create_table_switch, with_context as yt_with_context, OperationsTracker, yson
from extract_all_emails_task import ExtractAllEmailsAndLoginsTask

import crypta.graph.fuzzy.lib.config as conf


@yt_with_context
def reduce_all_matched_logins(keys, records, context):
    """
    here we find all different emails with same login
    """
    login = keys["login"]
    all_emails = dict()
    counter = 0
    for record in records:
        counter += 1
        all_emails[record["email"]] = record["yuids"]
        if counter > 20:
            return
    if len(all_emails.keys()) > 1:
        yield create_table_switch(0)
        yield {"login": login, "all_emails": all_emails, "howmany": counter}
        sorted_all_emails = sorted(all_emails.iteritems(), key=lambda x: x[0])
        for i, email_pair_first in enumerate(sorted_all_emails):
            for email_pair_second in sorted_all_emails[i + 1 :]:
                yield create_table_switch(1)
                yield {
                    "login": login,
                    "email_1": email_pair_first[0],
                    "yuids_1": email_pair_first[1],
                    "email_2": email_pair_second[0],
                    "yuids_2": email_pair_second[1],
                }


def extract_yuids_mapper(record):
    """
    here we returns all yuid-yuid pairs for [yuids]-[yuids] groups
    """
    login = record["login"]
    email_1 = record["email_1"]
    email_2 = record["email_2"]
    yuids_1 = record["yuids_1"]
    yuids_2 = record["yuids_2"]
    for yuid_1 in yuids_1:
        for yuid_2 in yuids_2:
            sorted_group = sorted(((yuid_1, email_1), (yuid_2, email_2)), key=lambda x: x[0])
            yield {
                conf.Constants.YUID_LEFT: sorted_group[0][0],
                conf.Constants.YUID_RIGHT: sorted_group[1][0],
                "email_left": sorted_group[0][1],
                "email_right": sorted_group[1][1],
                "login": login,
            }


def extract_yuids_reducer(key, records):
    yandexuid_left = yson.YsonUint64(int(key[conf.Constants.YUID_LEFT]))
    yandexuid_right = yson.YsonUint64(int(key[conf.Constants.YUID_RIGHT]))
    if yandexuid_left >= yandexuid_right:
        return
    match = "match"
    output = {conf.Constants.YUID_LEFT: yandexuid_left, conf.Constants.YUID_RIGHT: yandexuid_right, match: dict()}
    for record in records:
        output[match][record["login"]] = {"email_left": record["email_left"], "email_right": record["email_right"]}
    yield output


class ProcessExtractedLoginsTask(BaseTask):
    """
    This luigi task process all_email_logins table to some different tables
    """

    date = DateParameter()

    def requires(self):
        """
        this tasks must be done to complete this task
        """
        return ExtractAllEmailsAndLoginsTask(date=self.date)

    def output(self):
        """
        result of this task
        """
        return [
            self.yt.targets.table_is_actual(
                conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN, self.date.isoformat()
            )
        ]

    @cached_property
    def source_type(self):
        return conf.SourceTypes.EMAIL_LOGIN

    @cached_property
    def destination(self):
        return conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN

    @cached_property
    def destination_schema(self):
        return conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN_SCHEMA

    def _run(self):
        """
        Here we work with table all_email_logins -
        we reduce all groups by login
        and extract normal pairs
        and calculate precision
        """
        self.yt.run_sort(
            conf.Paths.sources.emails.ALL_EMAIL_LOGINS_TABLE,
            conf.Paths.sources.emails.ALL_EMAILS_SORTED_BY_LOGIN,
            sort_by=["login"],
        )

        self.yt.create_table_with_schema(
            conf.Paths.sources.emails.ALL_EMAILS_GROUPED_BY_LOGIN,
            conf.Paths.sources.emails.ALL_EMAILS_GROUPED_BY_LOGIN_SCHEMA,
            strict=True,
            recreate_if_exists=True,
        )

        self.yt.create_table_with_schema(
            conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE,
            conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE_SCHEMA,
            strict=True,
            recreate_if_exists=True,
        )

        self.yt.run_reduce(
            reduce_all_matched_logins,
            conf.Paths.sources.emails.ALL_EMAILS_SORTED_BY_LOGIN,
            [
                conf.Paths.sources.emails.ALL_EMAILS_GROUPED_BY_LOGIN,
                conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE,
            ],
            reduce_by=["login"],
        )

        sort_op_1 = self.yt.run_sort(
            conf.Paths.sources.emails.ALL_EMAILS_GROUPED_BY_LOGIN, sort_by=["howmany"], sync=False
        )

        sort_op_2 = self.yt.run_sort(
            conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE, sort_by=["email_1", "email_2"], sync=False
        )

        sort_operations = OperationsTracker()
        sort_operations.add(sort_op_1)
        sort_operations.add(sort_op_2)
        sort_operations.wait_all()

        self.yt.create_table_with_schema(
            conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN,
            conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN_SCHEMA,
            strict=True,
            recreate_if_exists=True,
        )

        self.yt.run_map_reduce(
            extract_yuids_mapper,
            extract_yuids_reducer,
            conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE,
            conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN,
            reduce_by=[conf.Constants.YUID_LEFT, conf.Constants.YUID_RIGHT],
        )

        self.yt.run_sort(
            conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN,
            sort_by=[conf.Constants.YUID_LEFT, conf.Constants.YUID_RIGHT],
        )

        self.yt.set(conf.Paths.sources.emails.ALL_EMAILS_SORTED_BY_LOGIN + "/@generate_date", self.date.isoformat())
        self.yt.set(conf.Paths.sources.emails.ALL_EMAILS_GROUPED_BY_LOGIN + "/@generate_date", self.date.isoformat())
        self.yt.set(conf.Paths.sources.emails.ALL_EMAIL_LOGINS_PAIRS_TABLE + "/@generate_date", self.date.isoformat())
        self.yt.set(
            conf.Paths.sources.emails.ALL_YUID_PAIRS_FROM_EMAIL_LOGIN + "/@generate_date", self.date.isoformat()
        )
