#!/usr/bin/python
# coding=utf-8
import itertools

import luigi
import yt.wrapper as yt

from lib.luigi import yt_luigi
from matching.device_matching.app_metrica import app_metrica_to_old_formats
from matching.device_matching.perfect.config import merge_configs_daily, merge_configs_monthly, merge_configs
from matching.device_matching.perfect.enrich import devid_mac_vmetro, devid_email_phone_am
from matching.device_matching.perfect.merge_month_perfect import DeviceYuidsMergeMonthTask
from matching.yuid_matching.enrich import puid_yuid_passport
from rtcconf import config
from rtcconf.config import ID_TYPE_DEVID, ID_TYPE_UUID, ID_TYPE_PUID, ID_TYPE_YUID
from utils import mr_utils as mr
from utils import utils

OOM_CHECK = 30 * 100


def reduce_join_yuid_by_puid(key, recs):
    try:
        uuid_puid_recs, puid_yuid_recs = mr.split_left_right(recs, oom_limit=OOM_CHECK)
    except mr.OomLimitException as oom:
        err_out = {'recs_count': oom.recs_count, '@table_index': 1}
        err_out.update(key)
        yield err_out
        return

    for uuid_puid_rec in uuid_puid_recs:
        for puid_yuid_rec in puid_yuid_recs:
            out_rec = dict(uuid_puid_rec)

            match_chain = puid_yuid_rec['match_chain']
            match_chain.update({
                config.ID_TYPE_PUID: {
                    config.ID_SOURCE_TYPE_PASSPORT: {
                        key['puid']: {}  # date? what date?
                    }
                }
            })

            out_rec['match_chain'] = match_chain
            out_rec['yuid'] = puid_yuid_rec['yuid']
            yield out_rec


def join_info_async_op(source_table, join_table, target_table, join_by, join_columns_mapping):
    yamr_format = False

    def reduce_join_info(_, recs):
        info_rec = None
        orig_recs = []
        for rec in recs:
            # info table should go first
            if rec['@table_index'] == 0 and ('good' not in rec or utils.is_true(rec.get('good'))):
                info_rec = rec
            elif not info_rec:
                rec['@table_index'] = 1
                yield rec
            else:
                if len(orig_recs) < OOM_CHECK:
                    orig_recs.append(rec)
                else:
                    rec['@table_index'] = 2
                    yield rec

        if orig_recs:
            join_values = dict()
            for col_from, col_to in join_columns_mapping.iteritems():
                if yamr_format:
                    v = mr.get_field_value(col_from, info_rec['value'])
                else:
                    v = info_rec.get(col_from)
                if v:
                    join_values[col_to] = v

            for orig_rec in orig_recs:
                orig_rec.update(join_values)
                orig_rec['@table_index'] = 0
                yield orig_rec

    join_table_path = yt.TablePath(join_table,
                                   columns=[join_by] + join_columns_mapping.keys(),
                                   attributes={'foreign': True})

    return yt.run_join_reduce(
        reduce_join_info,
        [join_table_path, source_table],
        [target_table,
         target_table + '_not_found',
         target_table + '_oom'],
        join_by=join_by,
        sync=False
    )


def join_devid_ua_profile_by_devid(devid_yuid_month_tables, dev_info_dict):
    utils.wait_all([join_info_async_op(source_table=s.current_table,
                                       join_table=dev_info_dict,
                                       target_table=s.current_table + '_ua',
                                       join_by='devid',
                                       join_columns_mapping={'ua_profile': 'uuid_ua_profile'})
                    for s in devid_yuid_month_tables])

    return [s.change_current_table(s.current_table + '_ua')
            for s in devid_yuid_month_tables]


def join_devid_ua_profile_by_uuid(uuid_yuid_month_tables, uuid_dev_info_dict):
    utils.wait_all([join_info_async_op(source_table=s.current_table,
                                       join_table=uuid_dev_info_dict,
                                       target_table=s.current_table + '_devid_ua',
                                       join_by='uuid',
                                       join_columns_mapping={'ua_profile': 'uuid_ua_profile', 'devid': 'devid'})
                    for s in uuid_yuid_month_tables])

    return [s.change_current_table(s.current_table + '_devid_ua')
            for s in uuid_yuid_month_tables]


def join_devid_by_devidhash(devidhash_yuid_month_tables, devid_hash_dict):
    mr.sort_all([s.current_table for s in devidhash_yuid_month_tables], sort_by='devidhash')

    utils.wait_all([join_info_async_op(source_table=s.current_table,
                                       join_table=devid_hash_dict,
                                       target_table=s.current_table + '_devid',
                                       join_by='devidhash',
                                       join_columns_mapping={'devid': 'devid'})
                    for s in devidhash_yuid_month_tables])

    return [s.change_current_table(s.current_table + '_devid')
            for s in devidhash_yuid_month_tables]


def join_yuid_by_puid(puid_month_tables, puid_yuid_table):
    # Join puid_uuid with puid_yuid to get uuid_yuid
    puid_yuid_table_path = yt.TablePath(puid_yuid_table, attributes={'foreign': True})

    mr.sort_all([state.current_table for state in puid_month_tables], sort_by='puid')
    join_puid_ops = []
    for state in puid_month_tables:
        in_table = state.current_table
        out_t = state.current_table + '_yuid'

        join_puid_ops.append(
            yt.run_join_reduce(reduce_join_yuid_by_puid,
                               [in_table, puid_yuid_table_path],
                               [out_t, out_t + '_oom'],
                               join_by='puid', sync=False)
        )
        state.change_current_table(out_t)

    utils.wait_all(join_puid_ops)

    return puid_month_tables


class MergeState(object):
    def __init__(self, merge_config, current_table):
        self.merge_config = merge_config
        self.current_table = current_table

    def change_current_table(self, current_table):
        self.current_table = current_table
        return self

    @staticmethod
    def tables_by_source(merge_states):
        by_source = lambda merge_state: merge_state.merge_config.source_type
        for source_type, grouped in itertools.groupby(
                sorted(merge_states, key=by_source),
                key=by_source):
            yield source_type, [ms.current_table for ms in grouped]

    def __repr__(self):
        return self.merge_config.source_type + ': ' + self.current_table


def enrich_to_devid_yuid(dict_folder, workdir, out_dir):
    # TODO: REWRITE!!!

    # enrich different types of pairs to uuid -> yuid and devid -> yuid
    uuid_puid_month_tables = [MergeState(m, workdir + m.table_name())
                              for m in merge_configs_daily
                              if m.id1 == ID_TYPE_PUID and m.id2 == ID_TYPE_UUID]
    uuid_puid_yuid_month_tables = join_yuid_by_puid(uuid_puid_month_tables, dict_folder + 'puid_yuid_limit')

    devid_puid_month_tables = [MergeState(m, workdir + m.table_name())
                               for m in merge_configs_daily
                               if m.id1 == ID_TYPE_PUID and m.id2 == ID_TYPE_DEVID]
    devid_puid_yuid_month_tables = join_yuid_by_puid(devid_puid_month_tables, dict_folder + 'puid_yuid_limit')

    # TODO: no devid_hash anymore
    # devidhash_yuid_month_tables = [MergeState(m, workdir + m.table_name())
    #                                for m in merge_configs_daily
    #                                if m.id1 == ID_TYPE_DEVID_HASH]
    # devid_track_yuid_tables = join_devid_by_devidhash(devidhash_yuid_month_tables, workdir + 'devid_hash')

    # all already existing uuid-yuid pairs
    uuid_yuid_month_tables = [MergeState(m, workdir + m.table_name()) for m in merge_configs_daily
                              if m.id1 == ID_TYPE_UUID and m.id2 == ID_TYPE_YUID]
    # and newly created (enriched)
    uuid_yuid_month_tables += uuid_puid_yuid_month_tables

    # all already existing devid-yuid pairs
    devid_yuid_month_tables = [MergeState(m, workdir + m.table_name())
                               for m in merge_configs_daily + merge_configs_monthly
                               if m.id1 == ID_TYPE_DEVID and m.id2 == ID_TYPE_YUID]
    # and newly created (enriched)
    # devid_yuid_month_tables += devid_track_yuid_tables
    devid_yuid_month_tables += devid_puid_yuid_month_tables

    # join uuid_dev_info for two purposes:
    # 1) add devid to uuid
    # 2) add uuid ua profile to separate indevice/crossdevice later
    mr.sort_all([s.current_table for s in uuid_yuid_month_tables], sort_by='uuid')
    mr.sort_all([s.current_table for s in devid_yuid_month_tables], sort_by='devid')

    enriched_month_tables = []
    enriched_month_tables += join_devid_ua_profile_by_uuid(
        uuid_yuid_month_tables,
        dict_folder + 'uuid_dev_info_yt'
    )
    enriched_month_tables += join_devid_ua_profile_by_devid(
        devid_yuid_month_tables,
        dict_folder + 'dev_info_yt'
    )

    # clean copy of all enriched table to final devid_yuid_X tables
    ops = []
    for source_type, tables in MergeState.tables_by_source(enriched_month_tables):

        ops.append(yt.run_sort(
            tables,
            out_dir + 'devid_yuid_' + source_type,
            sort_by=['devid', 'yuid'],
            sync=False
        ))

    utils.wait_all(ops)


class EnrichToDevidYuidPairs(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'device_perfect': config.INDEVICE_YT_FOLDER + self.date + '/perfect/',
            'dict': config.GRAPH_YT_DICTS_FOLDER,
        }

    def output_folders(self):
        return {
            'device_perfect': config.INDEVICE_YT_FOLDER + self.date + '/perfect/',
        }

    def requires(self):
        # there are only two tasks that produce devid-yuid pairs
        # using some intermediate identifier
        manual_devid_yuid_expansion_tasks = [
            devid_email_phone_am.AccountManagerPhoneEmailPairsTask(self.date),  # using email and phone hashes
            devid_mac_vmetro.JoinDevidYuidByMac(self.date),  # using mac address
        ]

        dict_tasks = [
            puid_yuid_passport.PuidYuidMappingTask(self.date),  # puid-yuid dict
            app_metrica_to_old_formats.ConvertAppMetricaDictsToOldFormats(self.date),  # uuid-devid dict
        ]

        # noinspection PyTypeChecker
        return [DeviceYuidsMergeMonthTask(self.date)] + manual_devid_yuid_expansion_tasks + dict_tasks

    def run(self):
        devid_raw_month = self.in_f('device_perfect') + 'devid_raw_month/'
        devid_month = self.out_f('device_perfect') + 'devid_month/'
        mr.mkdir(devid_month)

        # the only table that doesn't have appropriate sorting by default
        yt.run_sort(self.in_f('dict') + 'devid_hash', devid_raw_month + 'devid_hash', sort_by='devidhash')

        enrich_to_devid_yuid(self.in_f('dict'), devid_raw_month, devid_month)

        mr.drop(devid_raw_month + 'devid_hash')

    def output(self):
        return [yt_luigi.YtTarget(self.out_f('device_perfect') + 'devid_month/devid_yuid_' + m.source_type)
                for m in merge_configs]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)

    workdir = '//home/crypta/testing/state/indevice/2017-12-01/perfect/devid_raw_month/'
    mr.mkdir(workdir)

    enrich_to_devid_yuid(config.GRAPH_YT_DICTS_FOLDER, workdir, workdir + 'out')


