# coding=utf-8

import luigi
import yt.wrapper as yt

from data_imports.import_dumps.graph_passport_dump import ImportPassportStatboxHeavyDict
from lib.luigi import yt_luigi
from matching.yuid_matching.enrich.org_emails_classify import OrgEmailsClassifyTask
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDumpTable


def reduce_join_puid_yuid(puid_key, recs):
    try:
        puid_id_recs, puid_yuid_recs = mr.split_left_right(recs)
        for puid_id_rec in puid_id_recs:
            for puid_yuid_rec in puid_yuid_recs:
                puid_id_rec['yuid'] = puid_yuid_rec['yuid']
                puid_id_rec['@table_index'] = 0
                puid_id_date = puid_id_rec.get('id_date')
                if puid_id_date:
                    yield puid_id_rec
                # puid_id source is dict and doesn't contain activity dates, let's borrow activity from login
                else:
                    login_activity = puid_yuid_rec['match_chain'][config.ID_TYPE_LOGIN][config.ID_SOURCE_TYPE_FP]
                    login, login_dates = next(login_activity.iteritems())
                    for login_date in login_dates:
                        puid_id_rec['id_date'] = login_date
                        yield puid_id_rec
    except mr.OomLimitException as oom:
        err_out = {'recs_count': oom.recs_count, '@table_index': 1}
        err_out.update(puid_key)
        yield err_out


def join_login_to_puid(puid_key, recs):
    try:
        yuid_puid_fp_recs, puid_login_recs = mr.split_left_right(recs)
        if yuid_puid_fp_recs and puid_login_recs:
            login_rec = puid_login_recs[0]
            for yuid_puid_fp_rec in yuid_puid_fp_recs:
                yuid_puid_fp_rec['puid'] = login_rec['id_value']  # for stats
                yuid_puid_fp_rec['id_value'] = login_rec['login']
                # hack: mask it as login_fp
                # I just didn't want to touch login unsplicing logic in graph_dict, sorry
                yuid_puid_fp_rec['id_type'] = config.ID_TYPE_LOGIN
                yuid_puid_fp_rec['source_type'] = config.ID_SOURCE_TYPE_FP
                yuid_puid_fp_rec['@table_index'] = 0
                yield yuid_puid_fp_rec

    except mr.OomLimitException as oom:
        err_out = {'recs_count': oom.recs_count, '@table_index': 1}
        err_out.update(puid_key)
        yield err_out


def join_pasport_puid_yuid(in_f, workdir, puid_dict, pair_types):
    mr.sort_all([in_f + 'puid_with_' + pair_type for pair_type in pair_types], sort_by='puid')

    utils.wait_all([
        yt.run_reduce(reduce_join_puid_yuid,
                      [in_f + 'puid_with_' + pair_type, puid_dict],
                      [workdir + 'yuid_puid_with_' + pair_type,
                       workdir + 'yuid_with_' + pair_type + '_oom'],
                      reduce_by='puid', sync=False) for pair_type in pair_types
    ])


def join_passport_login_to_puid(in_f, out_f, puid_login_dict):
    yuid_puid_direct_pairs = in_f + 'yuid_with_' + config.ID_TYPE_PUID + '_' + config.ID_SOURCE_TYPE_FP
    yt.run_sort(yuid_puid_direct_pairs, sort_by='id_value')  # assume dict sorted

    yt.run_reduce(join_login_to_puid,
                  [yuid_puid_direct_pairs, puid_login_dict],
                  [out_f + 'yuid_with_' + config.ID_TYPE_LOGIN + '_' + config.ID_SOURCE_TYPE_PASSPORT_SERVER,
                   out_f + 'yuid_with_' + config.ID_TYPE_LOGIN + '_' + config.ID_SOURCE_TYPE_PASSPORT_SERVER + '_oom'],
                  reduce_by='id_value')


def reduce_yuid_login(login_key, recs):
    try:
        login_puid_recs, yuid_puid_recs = mr.split_left_right(recs)
    except mr.OomLimitException as oom:
        yield {'login': login_key['id_value'], 'recs_count': oom.recs_count, '@table_index': 4}
        return

    if not login_puid_recs:
        yield {'login': login_key['id_value'], 'err': 'no puid', '@table_index': 3}
        return

    yuids_limit = config.YUID_PAIR_TYPES_DICT[config.ID_TYPE_PUID].yuids_per_id_strict_limit

    login = login_puid_recs[0]['login']

    for rec in yuid_puid_recs:
        yuid = rec['yuid']
        puid_value = rec['id_value']
        puid_value_dates = rec[config.ID_TYPE_PUID + '_' + config.ID_SOURCE_TYPE_FP + '_dates'][puid_value]
        match_chain = {
            config.ID_TYPE_PUID: {
                config.ID_SOURCE_TYPE_FP: {
                    puid_value: puid_value_dates
                }
            }
        }
        yield {'key': login, 'subkey': 'py', 'value': yuid}
        yield {'login': login, 'yuid': yuid, 'match_chain': match_chain, '@table_index': 1}
        if len(yuid_puid_recs) <= yuids_limit:
            yield {'login': login, 'yuid': yuid, 'match_chain': match_chain, '@table_index': 2}


class ExpandPuidYuidMatching(yt_luigi.BaseYtTask):
    """
    A lot of services report only puid-some_id pairs.
    We enrich them with puid-yuid matching from previous day
    """
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        # Second param here denotes whether source have both puid-X and yuid-X pairs.
        # In this case we need to merge yuid-puid-X table from this task
        # to already created yuid-X table from graph_merge_month
        self.puid_sources = [
            (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_PASSPORT, False),
            (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_PASSPORT_SENSITIVE, False),
            (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_PASSPORT_DUMP, False),

            (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_SOCIAL, False),
            (config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_SOCIAL, False),

            (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_YAMONEY, True),
            (config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_YAMONEY, True),
            (config.ID_TYPE_YAMONEY_ACCOUNT + '_' + config.ID_SOURCE_TYPE_YAMONEY, True),
            (config.ID_TYPE_YAMONEY_CARD_TOKEN + '_' + config.ID_SOURCE_TYPE_YAMONEY, True),
            (config.ID_TYPE_YAMONEY_INTERNAL + '_' + config.ID_SOURCE_TYPE_YAMONEY, True),

            # (config.ID_TYPE_INSTAGRAM_ID + '_' + config.ID_SOURCE_TYPE_INSTAGRAM_POCHTA, False),
            # (config.ID_TYPE_INSTAGRAM_LOGIN + '_' + config.ID_SOURCE_TYPE_INSTAGRAM_POCHTA, False),
        ]
        if config.HAS_YANDEX_TICKETS == 'yes':
            self.puid_sources.extend([
                (config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_TICKETS, True),
                (config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_TICKETS, True)
            ])
        self.this_task_produces = [
            (config.ID_TYPE_LOGIN + '_' + config.ID_SOURCE_TYPE_PASSPORT_SERVER, False)
        ]
        super(ExpandPuidYuidMatching, self).__init__(*args, **kwargs)

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER,
            'yuid_raw_month': config.GRAPH_YT_DICTS_FOLDER + 'yuid_raw/'
        }

    def output_folders(self):
        return {
            'yuid_raw_month': config.GRAPH_YT_DICTS_FOLDER + 'yuid_raw/'
        }

    def requires(self):
        from data_imports.import_dumps import graph_passport_dump
        from data_imports.import_dumps.social import graph_social_auth
        from data_imports.import_dumps import graph_yamoney
        from data_imports.import_dumps import graph_tickets
        from data_imports.import_logs import graph_import_fp
        from data_imports.import_logs.webvisor import graph_webvisor
        from matching.yuid_matching import graph_merge_month
        req = [
            yt_luigi.YesterdayDictInput('puid_yuid_yt'),
            yt_luigi.YesterdayDictInput('passport/puid_login'),
            graph_merge_month.IncrementalDayAndDumpMergeTask(self.date),
            graph_merge_month.FullMonthYuidMergeTask(self.date),
            graph_passport_dump.ImportPassportDump(self.date),
            graph_social_auth.ImportSocialAuthDump(self.date),
            graph_yamoney.ImportYandexMoneyDump(self.date),
            graph_import_fp.ImportFPDayTask(date=self.date, run_date=self.date),
            graph_webvisor.ImportWebvisorTask(date=self.date, run_date=self.date)
        ]

        if config.HAS_YANDEX_TICKETS == 'yes':
            req.append(graph_tickets.ImportYandexTicketsDump(self.date))
        return req

    def run(self):
        dict_yuid_raw_in_f = self.in_f('yuid_raw_month')
        dict_folder = self.in_f('dict')
        dict_yuid_raw_out_f = self.out_f('yuid_raw_month')

        # yuid-puid should be resolved at month merge level, not day level
        # it allows to use yesterday version of puid-yuid dict and makes unsplicing possible
        join_pasport_puid_yuid(dict_yuid_raw_in_f, dict_yuid_raw_out_f,
                               dict_folder + 'puid_yuid_yt',
                               [t for t, _ in self.puid_sources])

        # special case for direct FP puid-yuid pairs
        join_passport_login_to_puid(dict_yuid_raw_in_f, dict_yuid_raw_out_f,
                                    dict_folder + 'passport/puid_login')

        # if some yuid came from both yuid and puid sources, let's merge it together
        for puid_source_type, has_yuid_sources in self.puid_sources:
            if has_yuid_sources and yt.exists(dict_yuid_raw_out_f + 'yuid_with_' + puid_source_type):
                # TODO: here we can get duplicates between yuid and puid sources, think of reduce
                yt.run_merge([dict_yuid_raw_out_f + 'yuid_puid_with_' + puid_source_type,
                              dict_yuid_raw_out_f + 'yuid_with_' + puid_source_type],
                             dict_yuid_raw_out_f + 'yuid_with_' + puid_source_type)
            else:
                yt.run_merge(dict_yuid_raw_out_f + 'yuid_puid_with_' + puid_source_type,
                             dict_yuid_raw_out_f + 'yuid_with_' + puid_source_type)

            # tmp table
            mr.drop(dict_yuid_raw_out_f + 'yuid_puid_with_' + puid_source_type)

        for s, _ in self.puid_sources + self.this_task_produces:
            mr.set_generate_date(dict_yuid_raw_out_f + 'yuid_with_' + s, self.date)

    def output(self):
        prefix = self.out_f('yuid_raw_month') + 'yuid_with_'
        return [yt_luigi.YtDateTarget(prefix + s, self.date)
                for s, _ in self.puid_sources + self.this_task_produces]


def reduce_yuid_puid(login_key, recs):
    try:
        login_puid_recs, yuid_login_recs = mr.split_left_right(recs)
    except mr.OomLimitException as oom:
        yield {'login': login_key['id_value'], 'recs_count': oom.recs_count, '@table_index': 4}
        return

    if not login_puid_recs:
        yield {'login': login_key['id_value'], 'err': 'no puid', '@table_index': 3}
        return

    yuids_limit = config.YUID_PAIR_TYPES_DICT[config.ID_TYPE_LOGIN].yuids_per_id_strict_limit

    puid = login_puid_recs[0]['puid']
    yuids_count = len(yuid_login_recs)

    for rec in yuid_login_recs:
        yuid = rec['yuid']
        login_value = rec['id_value']
        login_value_dates = rec[config.ID_TYPE_LOGIN + '_' + config.ID_SOURCE_TYPE_FP + '_dates'][login_value]
        match_chain = {
            config.ID_TYPE_LOGIN: {
                config.ID_SOURCE_TYPE_FP: {
                    login_value: login_value_dates
                }
            }
        }
        yield {'key': puid, 'subkey': 'py', 'value': yuid}
        yield {'puid': puid, 'yuid': yuid, 'match_chain': match_chain, 'yuids_count': yuids_count,
               '@table_index': 1}
        if yuids_count <= yuids_limit:
            yield {'puid': puid, 'yuid': yuid, 'match_chain': match_chain, 'yuids_count': yuids_count,
                   '@table_index': 2}


def map_login_puid(rec):
    login = rec.get('login', '')
    puid = rec.get('uid', '')
    if login and puid:
        yield {'id_value': login, 'puid': puid}
        yield {'id_value': puid, 'login': login, '@table_index': 1}
        yield SoupDumpTable.make_rec(puid, login, soup_config.puid_login_passport, [], 2)


class PuidYuidMappingTask(yt_luigi.BaseYtTask):
    def __init__(self, *args, **kwargs):
        super(PuidYuidMappingTask, self).__init__(*args, **kwargs)
        self.soup_storage = SoupDumpTable(soup_config.puid_login_passport, self.date)

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER,
        }

    def output_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    date = luigi.Parameter()

    def requires(self):
        return [
            OrgEmailsClassifyTask(self.date),
            ImportPassportStatboxHeavyDict(self.date)
        ]

    def run(self):
        puid_login_dict = self.in_f('dict') + 'passport/login_puid'
        login_yuid_dict = self.in_f('dict') + 'yuid_with_id_' + config.ID_TYPE_LOGIN + '_' + config.ID_SOURCE_TYPE_FP

        out_dict_f = self.out_f('dict')
        mr.mkdir(out_dict_f + 'passport')

        with yt.Transaction() as tr:
            # join yuids
            yt.run_reduce(reduce_yuid_puid,
                          [puid_login_dict, login_yuid_dict],
                          [out_dict_f + 'puid_yuid',
                           out_dict_f + 'puid_yuid_yt',
                           out_dict_f + 'puid_yuid_limit',
                           out_dict_f + 'passport/puid_not_found',
                           out_dict_f + 'passport/puid_yuid_oom'],
                          reduce_by='id_value')

            utils.wait_all([
                yt.run_sort(out_dict_f + 'puid_yuid', sort_by=['key', 'subkey'], sync=False),
                yt.run_sort(out_dict_f + 'puid_yuid_yt', sort_by='puid', sync=False),
                yt.run_sort(out_dict_f + 'puid_yuid_limit', sort_by='puid', sync=False),
            ])

        for t in self.output():
            mr.set_generate_date(t.table, self.date)

    def output(self):
        out_folder = self.out_f('dict')
        return [yt_luigi.YtDateTarget(out_folder + 'puid_yuid', self.date),
                yt_luigi.YtDateTarget(out_folder + 'puid_yuid_yt', self.date),
                yt_luigi.YtDateTarget(out_folder + 'puid_yuid_limit', self.date),
                ]
