"""
now it uses for metrica_socket, but in the future it can be used
to calculate info for other sources
"""
from collections import defaultdict

import luigi
import yt.wrapper as yt

from data_imports.import_logs.watch_log.graph_watch_log import ImportWatchLogDayTask
from lib import graphite_sender
from lib.luigi import yt_luigi
from matching.device_matching.device_yuid_mix_perfect_fuzzy import DevidYuidMixPerfectFuzzy
from matching.device_matching.perfect.device_yuid_perfect_by_source import DeviceYuidsPerfectBySource
from rtcconf import config
from utils import mr_utils as mr


def split_key_mapper(record):
    parts = record.get('key', '_').split('_')
    if len(parts) == 2:
        devid, yuid = parts[0], parts[1]
        yield {
            'devid': devid,
            'yuid': yuid,
            'match_type': record.get('match_type', ''),
            'pair_source': record.get('pair_source', ''),
            'pair_type': record.get('pair_type', ''),
            'source_type': record.get('source_type', ''),
            'yuid_sources': record.get('yuid_sources', []),
        }


def join_reducer(_, records):
    input_records = []
    joined_records = []
    for record in records:
        if record['@table_index'] == 0:
            input_records.append(record)
        elif record['@table_index'] == 1:
            joined_records.append(record)
    for input_record in input_records:
        source_type = input_record["source_type"]
        yuid = input_record["yuid"]
        uniq, sole_indev = True, True
        for rec in joined_records:
            if rec["yuid_sources"] != [source_type]:
                sole_indev = False
                if rec["yuid"] != yuid:
                    uniq = False
        yield {
            "uniq": int(uniq),
            "sole_indev": int(sole_indev),
            "source_type": source_type,
        }


@yt.aggregator
def calc_mapper(records):
    uniq_pairs = defaultdict(int)
    sole_indev_pairs = defaultdict(int)
    source_types = set()
    for record in records:
        source_type = record["source_type"]
        source_types.add(source_type)
        uniq_pairs[source_type] += record["uniq"]
        sole_indev_pairs[source_type] += record["sole_indev"]
    for source_type in source_types:
        yield {
            "uniq": uniq_pairs[source_type],
            "sole_indev": sole_indev_pairs[source_type],
            "source_type": source_type,
        }


def calc_reducer(keys, records):
    source_type = keys["source_type"]
    uniq = 0
    sole_indev = 0
    for record in records:
        uniq += record["uniq"]
        sole_indev += record["sole_indev"]
    yield {
        "uniq": uniq,
        "sole_indev": sole_indev,
        "source_type": source_type,
    }


class NewIndevicePairsStat(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'devid_row': config.INDEVICE_YT_FOLDER + self.date + "/perfect/devid_raw_day/",
            'pairs': config.GRAPH_FOLDER + self.date + "/pairs/",
        }

    @staticmethod
    def output_table():
        """
        name of output file
        """
        return config.LOCAL_OUTPUT_FOLDER + 'new-indevice-pairs-stat'

    def output(self):
        return [yt_luigi.TodayFileTarget(self.output_table(), self.date)]

    def requires(self):
        return [
            DevidYuidMixPerfectFuzzy(date=self.date),
            DeviceYuidsPerfectBySource(date=self.date),
            ImportWatchLogDayTask(date=self.date, run_date=self.date),
        ]

    def run(self):
        input_tables = [
            self.in_f("devid_row") + "devid_yuid_metrica_sockets_android",
            self.in_f("devid_row") + "devid_yuid_metrica_sockets_ios",
        ]
        all_indev_tables = [
            self.in_f("pairs") + t
            for t in yt.list(self.in_f("pairs")[:-1])
            if t.startswith("dev_yuid_pairs_") and t.endswith("_indev")
        ]
        with yt.Transaction() as _,\
                yt.TempTable() as merged_input_table,\
                yt.TempTable() as merged_indev_table,\
                yt.TempTable() as performed_indev_table,\
                yt.TempTable() as joined_table,\
                yt.TempTable() as result_table:
            yt.concatenate(input_tables, merged_input_table)
            yt.concatenate(all_indev_tables, merged_indev_table)
            yt.run_map(split_key_mapper, merged_indev_table, performed_indev_table)
            mr.sort_all([performed_indev_table, merged_input_table], sort_by='devid')

            performed_indev_table_path = yt.TablePath(
                performed_indev_table, attributes={'foreign': True}
            )

            yt.run_join_reduce(
                join_reducer,
                [merged_input_table, performed_indev_table_path],
                joined_table,
                reduce_by=['devid'],
                join_by=['devid'],
            )

            yt.run_map_reduce(
                calc_mapper,
                calc_reducer,
                joined_table,
                result_table,
                reduce_by=['source_type'],
            )

            results = dict()
            for row in yt.read_table(result_table):
                source_type = row["source_type"]
                results[source_type + '.uniq'] = row["uniq"]
                results[source_type + '.sole_indev'] = row["sole_indev"]

            graphite_sender.to_graphite_sender_batch(
                'graph_precision_all',
                results.items(),
                self.date
            )

            yt_luigi.TodayFileTarget.done(self.output_table(), self.date)

        return


if __name__ == "__main__":
    import os
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)
    yt.config.set_proxy(os.getenv('RTCRYPTA_MR_SERVER'))

    @yt_luigi.BaseYtTask.event_handler(luigi.Event.START)
    def on_task_start(_):
        yt_luigi.reset_global_yt_state()

    _ = luigi.build(
        [NewIndevicePairsStat('2018-08-19')],
        workers=10,
        scheduler_port=int(config.LUIGID_PORT)
    )
