from functools import partial

import luigi
import yt.wrapper as yt

from fuzzy_heuristic import match_by_ip_merge_month
from fuzzy_ml import classify
from lib.luigi import yt_luigi
from matching.device_matching.app_metrica import app_metrica_month
from matching.pairs import graph_pair_utils
from perfect import device_yuid_perfect_by_source
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2 import ids
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDumpTable

OOM_CHECK = 30 * 100


def reduce_perfect_non_perfect(key, recs, date):
    devid = key['key']
    best_non_perfect_rec = None
    best_perfect_rec = None
    perfect_recs = []
    max_score = (0, 0)  # (probability, hits) for unperfect recs

    for rec in recs:
        perfect = mr.get_field_value('perfect', rec['value'])
        if not perfect:
            raise ValueError('No perfect field')

        if perfect == '1':
            perfect_recs.append(rec)
        else:
            # chose ml-unperfect if possible, else - chose common unperfect
            prob, hit = (0, 0)
            if mr.get_field_value('type', rec['value']) == 'mn':
                prob = float(mr.get_field_value('prob', rec['value']))
            else:
                hit = int(mr.get_field_value('hit', rec['value']))
            score = (prob, hit)
            if score > max_score:
                max_score = score
                best_non_perfect_rec = rec

    # Select MAIN yandex id
    if perfect_recs:
        for indevice_perfect_type in config.DEVID_PERFECT_INDEV_PRIORITY:
            # select only the most trusted rec first as types come in decreasing trust level
            # see also CRYPTAIS-501
            if not best_perfect_rec:
                type_recs = [rec for rec in perfect_recs
                             if mr.get_field_value('source', rec['value']) == indevice_perfect_type]
                if type_recs:
                    best_perfect_rec = type_recs[0]

        # All perfects goes to dev_yuid table
        perfect_yudis = set([perfect_rec['subkey'] for perfect_rec in perfect_recs if perfect_rec != best_perfect_rec])
        for perfect_yuid in perfect_yudis:
            yield {'key': devid, 'subkey': perfect_yuid, 'value': '', '@table_index': 1}

    best_single_rec = best_perfect_rec if best_perfect_rec else best_non_perfect_rec
    yuid = best_single_rec['subkey']
    # Best goes to dev_yuid_info_ua table
    yield {'key': devid, 'subkey': yuid, 'value': '', '@table_index': 0}

    if best_non_perfect_rec and not best_perfect_rec:
        # If no perfect best non perfect goes to dev_yuid table
        yuid = best_non_perfect_rec['subkey']
        yield {'key': devid, 'subkey': yuid, 'value': '', '@table_index': 1}

        # and track devid as fuzzy
        yield graph_pair_utils.devid_yuid_pair_rec(devid, yuid, config.ID_SOURCE_TYPE_FUZZY,
                                                   config.INDEVICE, [], 2)
        prob, hits = max_score
        yield {'id': devid, 'id_type': ids.CRYPTA_DEVICE_ID, 'yuid': yuid, 'weight': prob, '@table_index': 3}

        # add record to dev_yuid_indevice_unperfect_no_limit_tmp
        yield {'devid': devid, 'yuid': yuid, 'date': date,
               'source': config.ID_SOURCE_TYPE_FUZZY + '_' + config.INDEVICE,
               '@table_index': 4}


def reduce_perfect_non_perfect_yuid(_, recs):
    recs = list(recs)
    perfect_recs = [rec for rec in recs if mr.get_field_value('perfect', rec['value']) == '1']
    un_perfect_recs = [rec for rec in recs if mr.get_field_value('perfect', rec['value']) == '0']

    if perfect_recs:
        for rec in perfect_recs:
            rec['@table_index'] = 0
            yield rec
        return

    for rec in un_perfect_recs:
        rec['@table_index'] = 0
        yield rec


def reduce_to_soup(devid_key, recs):
    dev_info_recs, fuzzy_recs = mr.split_left_right(recs)
    if dev_info_recs and fuzzy_recs:
        dev_info_rec = dev_info_recs[0]
        os = dev_info_rec['os']
        for fuzzy_rec in fuzzy_recs:
            if os == 'android':
                yield SoupDumpTable.make_rec(fuzzy_rec['id'], fuzzy_rec['yuid'],
                                             soup_config.devid_yuid_andoroid_fuzzy_indev, [], 0)
            elif os == 'ios':
                yield SoupDumpTable.make_rec(fuzzy_rec['id'], fuzzy_rec['yuid'],
                                             soup_config.devid_yuid_ios_fuzzy_indev, [], 1)


def run_add_fuzzy_devid_yuid_pairs(perfect_dir, fuzzy_dir, dev_info_dict, workdir,
                                   pairs_out_dir, soup_android_out_table, soup_ios_out_table, date):
    mr.sort_all([
        perfect_dir + 'dev_yuid_perfect',
        fuzzy_dir + 'dev_yuid_unperfect',
        fuzzy_dir + 'dev_yuid_unperfect_ml'
    ], sort_by='subkey')

    # if yuid has any perfect link, throw all its fuzzy links
    yt.run_reduce(reduce_perfect_non_perfect_yuid,
                  [workdir + 'perfect/dev_yuid_perfect',
                   fuzzy_dir + 'dev_yuid_unperfect',
                   fuzzy_dir + 'dev_yuid_unperfect_ml'],
                  workdir + 'dev_yuid_perfect_unperfect', reduce_by='subkey')

    yt.run_sort(workdir + 'dev_yuid_perfect_unperfect', sort_by='key')

    fuzzy_pairs_out_t = pairs_out_dir + 'dev_yuid_pairs_' + config.ID_SOURCE_TYPE_FUZZY + '_' + config.INDEVICE

    soup_tmp = workdir + 'dev_yuid_fuzzy_to_soup'
    yt.run_reduce(
        partial(reduce_perfect_non_perfect, date=date),
        workdir + 'dev_yuid_perfect_unperfect',
        [
            workdir + 'dev_yuid_info_ua',  # main yuid (single best perfect or fuzzy)
            workdir + 'dev_yuid',  # all perfect recs or single fuzzy if no perfect found
            fuzzy_pairs_out_t,  # fuzzy month
            soup_tmp,
            workdir + 'fuzzy/dev_yuid_indevice_unperfect_no_limit_tmp'  # fuzzy all
        ],
        reduce_by='key'
    )

    utils.wait_all([
        yt.run_sort(workdir + 'dev_yuid_info_ua', sort_by='key', sync=False),
        yt.run_sort(soup_tmp, sort_by=ids.storage_sort_key, sync=False)
    ])

    yt.run_reduce(reduce_to_soup,
                  [dev_info_dict,
                   soup_tmp],
                  [soup_android_out_table.table_path(),
                   soup_ios_out_table.table_path()],
                  reduce_by=ids.storage_sort_key)


class DevidYuidMixPerfectFuzzy(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(DevidYuidMixPerfectFuzzy, self).__init__(*args, **kwargs)
        self.soup_android_out_table = SoupDumpTable(soup_config.devid_yuid_andoroid_fuzzy_indev, self.date)
        self.soup_ios_out_table = SoupDumpTable(soup_config.devid_yuid_ios_fuzzy_indev, self.date)

    def input_folders(self):
        return {
            'indevice_perfect': config.INDEVICE_YT_FOLDER + self.date + '/perfect/',
            'indevice_fuzzy': config.INDEVICE_YT_FOLDER + self.date + '/fuzzy/',
            'ids_storage': config.CRYPTA_IDS_STORAGE
        }

    def output_folders(self):
        return {
            'indevice': config.INDEVICE_YT_FOLDER + self.date + '/',
            'pairs': config.GRAPH_YT_OUTPUT_FOLDER + self.date + '/pairs/',
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def requires(self):
        return [
            classify.IndeviceClassifyTask(self.date),
                app_metrica_month.AppMetricaDictMergeMonthTask(self.date),
                match_by_ip_merge_month.DeviceYuidsMergeFuzzyDictMonthTask(self.date),
                device_yuid_perfect_by_source.DeviceYuidsPerfectBySource(self.date)
        ]

    def run(self):
        out_indevice_folder = self.out_f('indevice')
        out_pairs_folder = self.out_f('pairs')
        out_dict_folder = self.out_f('dict')

        mr.mkdir(out_indevice_folder + 'perfect')
        mr.mkdir(out_indevice_folder + 'fuzzy')
        mr.mkdir(out_pairs_folder)

        run_add_fuzzy_devid_yuid_pairs(
            self.in_f('indevice_perfect'),
            self.in_f('indevice_fuzzy'),
            self.in_f('ids_storage') + 'device_id/app_metrica_month',
            out_indevice_folder,
            out_pairs_folder,
            self.soup_android_out_table,
            self.soup_ios_out_table,
            self.date
        )

        with yt.Transaction() as tr:
            self.soup_android_out_table.finalize(tr)
            self.soup_ios_out_table.finalize(tr)

        self.map_dev_yuid_indevice_to_dict(
            from_table=out_indevice_folder + 'dev_yuid',
            to_table=out_dict_folder + 'dev_yuid_indevice'
        )

    def map_dev_yuid_indevice_to_dict(self, from_table, to_table):
        def map_to_dict(rec):
            yield {'devid': rec['key'], 'yuid': rec['subkey']}

        with yt.Transaction() as transaction:
            mr.create_table_with_schema(
                table=to_table,
                schema={
                    'devid': 'string',
                    'yuid': 'string',
                },
                transaction=transaction,
                strict=True,
                recreate_if_exists=True
            )
            yt.run_map(
                map_to_dict,
                from_table,
                to_table,
                ordered=True
            )
            yt.run_sort(
                to_table,
                sort_by='devid'
            )

            mr.set_generate_date(to_table, self.date)

    def output(self):
        out_indevice_folder = self.out_f('indevice')
        out_pairs_folder = self.out_f('pairs')
        out_dict_folder = self.out_f('dict')

        out_pairs_tables = [out_pairs_folder + 'dev_yuid_pairs_' + config.ID_SOURCE_TYPE_FUZZY + '_' + config.INDEVICE]

        dev_yuid_tables = [out_indevice_folder + 'dev_yuid',
                           out_indevice_folder + 'dev_yuid_info_ua']

        dev_yuid_dict_tables = [out_dict_folder + 'dev_yuid_indevice']

        no_limit_day_tables = [out_indevice_folder + 'fuzzy/dev_yuid_indevice_unperfect_no_limit_tmp']

        # noinspection PyTypeChecker
        return self.log_yt_targets_check(
            [yt_luigi.YtTarget(t) for t in
             out_pairs_tables + dev_yuid_tables + no_limit_day_tables] + \
            [yt_luigi.YtDateTarget(t, self.date) for t in dev_yuid_dict_tables] + \
            [self.soup_ios_out_table.as_target(), self.soup_android_out_table.as_target()]
        )


if __name__ == '__main__':
    yt.config.set_proxy("hahn")
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)

    task = DevidYuidMixPerfectFuzzy('2018-07-12')

    luigi.build([task], local_scheduler=True)
