from functools import partial

import luigi
import yt.wrapper as yt

from utils import mr_utils as mr
from utils import utils
from lib.luigi import yt_luigi
from matching.device_matching.device_yuid_mix_perfect_fuzzy import DevidYuidMixPerfectFuzzy
from matching.device_matching.perfect.device_yuid_perfect_by_source import DeviceYuidsPerfectBySource
from rtcconf import config


def simple_join_info(_, recs, fields):
    info_rec = None
    for rec in recs:
        if rec['@table_index'] == 0:
            info_rec = rec.copy()
        else:
            for field_from, field_to in fields.iteritems():
                if info_rec and (field_from in info_rec):
                    rec[field_to] = info_rec[field_from]
            rec['@table_index'] = 0
            yield rec


def reduce_add_mmetric_devid(_key, recs):
    mmetric_devids = []
    for rec in recs:
        if rec['@table_index'] == 0:
            mmetric_devids = rec['mmetric_devids'].split(',')
        elif mmetric_devids:
            for mmetric_devid in mmetric_devids:
                rec['mmetric_devid'] = mmetric_devid
                rec['@table_index'] = 0
                yield rec


def merge_no_limit(_key, recs):
    first_rec = None
    sources = set()
    for rec in recs:
        if first_rec is None:
            first_rec = rec.copy()
        if rec.get('source', ''):
            sources |= set(rec.get('source', '').split(','))
    first_rec['@table_index'] = 0
    first_rec['source'] = ','.join(sources)
    yield first_rec


# no_limit_type = perfect | unperfect
def run_dev_yuid_indevice_no_limit(workdir, in_dict_folder, out_dict_folder, date, no_limit_type):
    no_limit_new_recs = workdir + 'dev_yuid_indevice_%s_no_limit_tmp' % no_limit_type
    no_limit_new_recs_sorted = workdir + 'dev_yuid_indevice_%s_no_limit_tmp_tmp' % no_limit_type
    no_limit_in_dict = in_dict_folder + 'dev_yuid_indevice_%s_no_limit' % no_limit_type
    no_limit_in_dict_sorted = in_dict_folder + 'dev_yuid_indevice_%s_no_limit_tmp' % no_limit_type
    no_limit_out_dict = out_dict_folder + 'dev_yuid_indevice_%s_no_limit' % no_limit_type

    utils.wait_all([
        yt.run_sort(in_dict_folder + 'dev_info_yt', workdir + 'dev_info_yt', sort_by='devid'),
        yt.run_sort(no_limit_new_recs, sort_by='devid')
    ])

    yt.run_reduce(
        reduce_add_mmetric_devid,
        [workdir + 'dev_info_yt', no_limit_new_recs],
        no_limit_new_recs_sorted,
        reduce_by='devid'
    )

    yt.run_sort(
        no_limit_new_recs_sorted,
        sort_by=['mmetric_devid', 'yuid', 'date']
    )

    in_tables = [no_limit_new_recs_sorted]
    if mr.exists(no_limit_in_dict):
        yt.run_sort(
            no_limit_in_dict,
            no_limit_in_dict_sorted,
            sort_by=['mmetric_devid', 'yuid', 'date']
        )
        in_tables = [no_limit_in_dict_sorted] + in_tables

    with yt.Transaction() as transaction:
        table_schema = {
            'devid': 'string',
            'mmetric_devid': 'string',
            'date': 'string',
            'source': 'string',
            'yuid': 'string',
            'yuid_browser': 'string',
            'yuid_logins': 'any',
        }
        # dev_info_yt  |  devid
        mr.create_table_with_schema(
            no_limit_out_dict,
            table_schema,
            transaction,
            recreate_if_exists=True,
            sorted_by=['devid']
        )
        mr.create_table_with_schema(
            no_limit_out_dict + '_not_prepared_output',
            table_schema,
            transaction,
            recreate_if_exists=True
        )
        yt.run_reduce(
            merge_no_limit,
            in_tables,
            no_limit_out_dict + '_not_prepared_output',
            sort_by=['mmetric_devid', 'yuid', 'date'],
            reduce_by=['mmetric_devid', 'yuid']
        )

        yt.run_sort(
            no_limit_out_dict + '_not_prepared_output',
            no_limit_out_dict,
            sort_by=['devid']
        )

        yt.set_attribute(no_limit_out_dict, '_format', 'yson')
        mr.set_generate_date(no_limit_out_dict, date)

    mr.drop(workdir + 'dev_info_yt')
    # mr.drop(no_limit_new_recs)
    mr.drop(no_limit_new_recs_sorted)
    mr.drop(no_limit_in_dict_sorted)
    mr.drop(no_limit_out_dict + '_not_prepared_output')


class UpdateYuidDevidIndeviceAllDict(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'indevice_perfect': config.INDEVICE_YT_FOLDER + self.date + '/perfect/',
            'indevice_fuzzy': config.INDEVICE_YT_FOLDER + self.date + '/fuzzy/',
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def output_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def requires(self):
        return [
            DevidYuidMixPerfectFuzzy(self.date),
            DeviceYuidsPerfectBySource(self.date),
        ]

    def run(self):
        in_indevice_perfect_dir = self.in_f('indevice_perfect')
        in_indevice_fuzzy_dir = self.in_f('indevice_fuzzy')
        in_dict_dir = self.in_f('dict')

        out_dict_dir = self.out_f('dict')

        # add browsers/logins to new _unperfect_no_limit_ recs
        yt.run_sort(in_indevice_fuzzy_dir + 'dev_yuid_indevice_unperfect_no_limit_tmp', sort_by='yuid')
        yt.run_reduce(
            partial(simple_join_info, fields={'browser': 'yuid_browser', 'login_fp_dates': 'yuid_logins'}),
            [
                in_dict_dir + 'yuid_with_all',
                in_indevice_fuzzy_dir + 'dev_yuid_indevice_unperfect_no_limit_tmp'
            ],
            in_indevice_fuzzy_dir + 'dev_yuid_indevice_unperfect_no_limit_tmp',
            reduce_by='yuid'
        )
        run_dev_yuid_indevice_no_limit(in_indevice_perfect_dir, in_dict_dir, out_dict_dir, self.date, 'perfect')
        run_dev_yuid_indevice_no_limit(in_indevice_fuzzy_dir, in_dict_dir, out_dict_dir, self.date, 'unperfect')

    def output(self):
        out_dict_folder = self.out_f('dict')
        no_limit_dict_tables = [out_dict_folder + 'dev_yuid_indevice_perfect_no_limit',
                                out_dict_folder + 'dev_yuid_indevice_unperfect_no_limit']
        return [yt_luigi.YtDateTarget(t, self.date) for t in no_limit_dict_tables]
