import luigi
import yt.wrapper as yt
from utils import mr_utils as mr
from lib.luigi import yt_luigi
from rtcconf import config
from matching.device_matching.device_yuid_mix_perfect_fuzzy import DevidYuidMixPerfectFuzzy
from matching.device_matching.perfect.device_yuid_perfect_by_source import DeviceYuidsPerfectBySource
from matching.pairs import graph_pairs


def validate_black_list(rec):
    bad_recs_table_index = 1
    if rec.get("pair_type") and rec.get("st_number"):
        rec["@table_index"] = 0
    else:
        rec["@table_index"] = bad_recs_table_index
    yield rec


def remove_pairs_from_black_list(key, recs):
    group_pairs = list(recs)
    if group_pairs[0]["pair_type"] == "black_list":
        removed_pair_names = [pair["pair_source"] for pair in group_pairs if pair.get("pair_source")]
        if not removed_pair_names:
            removed_pair_names.append("Pair not found!")
        yield {"key": key, "list_removes": removed_pair_names, "@table_index": 0}
    else:
        for pair in group_pairs:
            yield pair


class GraphPairsBlackList(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            "black_list_folder": config.YT_OUTPUT_FOLDER + self.date + "/pairs/black_list",
            "black_list_table": config.GRAPH_YT_DICTS_FOLDER + "pairs_black_list"
        }

    def output_folders(self):
        return {
            "yuid_pairs_prefix": config.YT_OUTPUT_FOLDER + self.date + "/pairs/yuid_pairs_",
            "dev_yuid_pairs_prefix": config.YT_OUTPUT_FOLDER + self.date + "/pairs/dev_yuid_pairs_",
            "removed_pairs_table": config.YT_OUTPUT_FOLDER + self.date + "/pairs/black_list/removed_pairs_from_black_list",
            "bad_black_list_table": config.YT_OUTPUT_FOLDER + self.date + "/pairs/black_list/bad_black_list_recs"
        }

    def requires(self):
        return [graph_pairs.GraphPairs(self.date),
                DevidYuidMixPerfectFuzzy(self.date),
                DeviceYuidsPerfectBySource(self.date)]

    def run(self):

        # TODO CRYPTR-298: use black list for fuzzy pairs as well

        if yt.row_count(self.in_f("black_list_table")):
            y_y_pairs_tables = [self.out_f("yuid_pairs_prefix") + pair.id_type for pair in config.YUID_PAIR_TYPES_EXACT]
            d_y_pairs_tables = [self.out_f("dev_yuid_pairs_prefix") + pair_name for pair_name in config.DEVID_PAIRS_NAMES_ALL]
            all_pairs_tables = y_y_pairs_tables + d_y_pairs_tables
            all_input_pairs_tables = [self.in_f("black_list_table")] + all_pairs_tables
            all_output_pairs_tables = [self.out_f("removed_pairs_table")] + all_pairs_tables

            mr.mkdir(self.in_f("black_list_folder"))
            yt.run_map(validate_black_list, self.in_f("black_list_table"), [self.in_f("black_list_table"), self.out_f("bad_black_list_table")])
            yt.run_sort(self.in_f("black_list_table"), sort_by='key')
            for pairs_table in all_pairs_tables:
                yt.run_sort(pairs_table, sort_by="key")
            yt.run_reduce(remove_pairs_from_black_list,
                          all_input_pairs_tables,
                          all_output_pairs_tables,
                          reduce_by="key"
                          )
            mr.merge_chunks_all(all_output_pairs_tables)

    def output(self):
        y_y_pairs_tables = [self.out_f("yuid_pairs_prefix") + pair.id_type for pair in config.YUID_PAIR_TYPES_EXACT]
        d_y_pairs_tables = [self.out_f("dev_yuid_pairs_prefix") + pair_name for pair_name in
                            config.DEVID_PAIRS_NAMES_ALL]
        return [yt_luigi.YtTarget(path, allow_empty=True) for path in y_y_pairs_tables + d_y_pairs_tables]


if __name__ == '__main__':
    yt.config.set_proxy('hahn.yt.yandex.net')
    dt = '2016-08-10'
    workdir = '//home/crypta/team/gengo/CRYPTAPRIVATE-10'
